* tree.h (DECL_ONE_ONLY): Return true only for externally visible
[official-gcc.git] / gcc / config / arm / arm.c
blobccad5484ff519c5c304d54875ec960a61dee869a
1 /* Output routines for GCC for ARM.
2 Copyright (C) 1991-2014 Free Software Foundation, Inc.
3 Contributed by Pieter `Tiggr' Schoenmakers (rcpieter@win.tue.nl)
4 and Martin Simmons (@harleqn.co.uk).
5 More major hacks by Richard Earnshaw (rearnsha@arm.com).
7 This file is part of GCC.
9 GCC is free software; you can redistribute it and/or modify it
10 under the terms of the GNU General Public License as published
11 by the Free Software Foundation; either version 3, or (at your
12 option) any later version.
14 GCC is distributed in the hope that it will be useful, but WITHOUT
15 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
16 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
17 License for more details.
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING3. If not see
21 <http://www.gnu.org/licenses/>. */
23 #include "config.h"
24 #include "system.h"
25 #include "coretypes.h"
26 #include "hash-table.h"
27 #include "tm.h"
28 #include "rtl.h"
29 #include "tree.h"
30 #include "stringpool.h"
31 #include "stor-layout.h"
32 #include "calls.h"
33 #include "varasm.h"
34 #include "obstack.h"
35 #include "regs.h"
36 #include "hard-reg-set.h"
37 #include "insn-config.h"
38 #include "conditions.h"
39 #include "output.h"
40 #include "insn-attr.h"
41 #include "flags.h"
42 #include "reload.h"
43 #include "function.h"
44 #include "expr.h"
45 #include "optabs.h"
46 #include "diagnostic-core.h"
47 #include "recog.h"
48 #include "cgraph.h"
49 #include "ggc.h"
50 #include "except.h"
51 #include "tm_p.h"
52 #include "target.h"
53 #include "sched-int.h"
54 #include "target-def.h"
55 #include "debug.h"
56 #include "langhooks.h"
57 #include "df.h"
58 #include "intl.h"
59 #include "libfuncs.h"
60 #include "params.h"
61 #include "opts.h"
62 #include "dumpfile.h"
64 /* Forward definitions of types. */
65 typedef struct minipool_node Mnode;
66 typedef struct minipool_fixup Mfix;
68 void (*arm_lang_output_object_attributes_hook)(void);
70 struct four_ints
72 int i[4];
75 /* Forward function declarations. */
76 static bool arm_const_not_ok_for_debug_p (rtx);
77 static bool arm_lra_p (void);
78 static bool arm_needs_doubleword_align (enum machine_mode, const_tree);
79 static int arm_compute_static_chain_stack_bytes (void);
80 static arm_stack_offsets *arm_get_frame_offsets (void);
81 static void arm_add_gc_roots (void);
82 static int arm_gen_constant (enum rtx_code, enum machine_mode, rtx,
83 HOST_WIDE_INT, rtx, rtx, int, int);
84 static unsigned bit_count (unsigned long);
85 static int arm_address_register_rtx_p (rtx, int);
86 static int arm_legitimate_index_p (enum machine_mode, rtx, RTX_CODE, int);
87 static int thumb2_legitimate_index_p (enum machine_mode, rtx, int);
88 static int thumb1_base_register_rtx_p (rtx, enum machine_mode, int);
89 static rtx arm_legitimize_address (rtx, rtx, enum machine_mode);
90 static reg_class_t arm_preferred_reload_class (rtx, reg_class_t);
91 static rtx thumb_legitimize_address (rtx, rtx, enum machine_mode);
92 inline static int thumb1_index_register_rtx_p (rtx, int);
93 static bool arm_legitimate_address_p (enum machine_mode, rtx, bool);
94 static int thumb_far_jump_used_p (void);
95 static bool thumb_force_lr_save (void);
96 static unsigned arm_size_return_regs (void);
97 static bool arm_assemble_integer (rtx, unsigned int, int);
98 static void arm_print_operand (FILE *, rtx, int);
99 static void arm_print_operand_address (FILE *, rtx);
100 static bool arm_print_operand_punct_valid_p (unsigned char code);
101 static const char *fp_const_from_val (REAL_VALUE_TYPE *);
102 static arm_cc get_arm_condition_code (rtx);
103 static HOST_WIDE_INT int_log2 (HOST_WIDE_INT);
104 static const char *output_multi_immediate (rtx *, const char *, const char *,
105 int, HOST_WIDE_INT);
106 static const char *shift_op (rtx, HOST_WIDE_INT *);
107 static struct machine_function *arm_init_machine_status (void);
108 static void thumb_exit (FILE *, int);
109 static HOST_WIDE_INT get_jump_table_size (rtx);
110 static Mnode *move_minipool_fix_forward_ref (Mnode *, Mnode *, HOST_WIDE_INT);
111 static Mnode *add_minipool_forward_ref (Mfix *);
112 static Mnode *move_minipool_fix_backward_ref (Mnode *, Mnode *, HOST_WIDE_INT);
113 static Mnode *add_minipool_backward_ref (Mfix *);
114 static void assign_minipool_offsets (Mfix *);
115 static void arm_print_value (FILE *, rtx);
116 static void dump_minipool (rtx);
117 static int arm_barrier_cost (rtx);
118 static Mfix *create_fix_barrier (Mfix *, HOST_WIDE_INT);
119 static void push_minipool_barrier (rtx, HOST_WIDE_INT);
120 static void push_minipool_fix (rtx, HOST_WIDE_INT, rtx *, enum machine_mode,
121 rtx);
122 static void arm_reorg (void);
123 static void note_invalid_constants (rtx, HOST_WIDE_INT, int);
124 static unsigned long arm_compute_save_reg0_reg12_mask (void);
125 static unsigned long arm_compute_save_reg_mask (void);
126 static unsigned long arm_isr_value (tree);
127 static unsigned long arm_compute_func_type (void);
128 static tree arm_handle_fndecl_attribute (tree *, tree, tree, int, bool *);
129 static tree arm_handle_pcs_attribute (tree *, tree, tree, int, bool *);
130 static tree arm_handle_isr_attribute (tree *, tree, tree, int, bool *);
131 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
132 static tree arm_handle_notshared_attribute (tree *, tree, tree, int, bool *);
133 #endif
134 static void arm_output_function_epilogue (FILE *, HOST_WIDE_INT);
135 static void arm_output_function_prologue (FILE *, HOST_WIDE_INT);
136 static int arm_comp_type_attributes (const_tree, const_tree);
137 static void arm_set_default_type_attributes (tree);
138 static int arm_adjust_cost (rtx, rtx, rtx, int);
139 static int arm_sched_reorder (FILE *, int, rtx *, int *, int);
140 static int optimal_immediate_sequence (enum rtx_code code,
141 unsigned HOST_WIDE_INT val,
142 struct four_ints *return_sequence);
143 static int optimal_immediate_sequence_1 (enum rtx_code code,
144 unsigned HOST_WIDE_INT val,
145 struct four_ints *return_sequence,
146 int i);
147 static int arm_get_strip_length (int);
148 static bool arm_function_ok_for_sibcall (tree, tree);
149 static enum machine_mode arm_promote_function_mode (const_tree,
150 enum machine_mode, int *,
151 const_tree, int);
152 static bool arm_return_in_memory (const_tree, const_tree);
153 static rtx arm_function_value (const_tree, const_tree, bool);
154 static rtx arm_libcall_value_1 (enum machine_mode);
155 static rtx arm_libcall_value (enum machine_mode, const_rtx);
156 static bool arm_function_value_regno_p (const unsigned int);
157 static void arm_internal_label (FILE *, const char *, unsigned long);
158 static void arm_output_mi_thunk (FILE *, tree, HOST_WIDE_INT, HOST_WIDE_INT,
159 tree);
160 static bool arm_have_conditional_execution (void);
161 static bool arm_cannot_force_const_mem (enum machine_mode, rtx);
162 static bool arm_legitimate_constant_p (enum machine_mode, rtx);
163 static bool arm_rtx_costs_1 (rtx, enum rtx_code, int*, bool);
164 static bool arm_size_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *);
165 static bool arm_slowmul_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *, bool);
166 static bool arm_fastmul_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *, bool);
167 static bool arm_xscale_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *, bool);
168 static bool arm_9e_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *, bool);
169 static bool arm_rtx_costs (rtx, int, int, int, int *, bool);
170 static int arm_address_cost (rtx, enum machine_mode, addr_space_t, bool);
171 static int arm_register_move_cost (enum machine_mode, reg_class_t, reg_class_t);
172 static int arm_memory_move_cost (enum machine_mode, reg_class_t, bool);
173 static void arm_init_builtins (void);
174 static void arm_init_iwmmxt_builtins (void);
175 static rtx safe_vector_operand (rtx, enum machine_mode);
176 static rtx arm_expand_binop_builtin (enum insn_code, tree, rtx);
177 static rtx arm_expand_unop_builtin (enum insn_code, tree, rtx, int);
178 static rtx arm_expand_builtin (tree, rtx, rtx, enum machine_mode, int);
179 static tree arm_builtin_decl (unsigned, bool);
180 static void emit_constant_insn (rtx cond, rtx pattern);
181 static rtx emit_set_insn (rtx, rtx);
182 static rtx emit_multi_reg_push (unsigned long, unsigned long);
183 static int arm_arg_partial_bytes (cumulative_args_t, enum machine_mode,
184 tree, bool);
185 static rtx arm_function_arg (cumulative_args_t, enum machine_mode,
186 const_tree, bool);
187 static void arm_function_arg_advance (cumulative_args_t, enum machine_mode,
188 const_tree, bool);
189 static unsigned int arm_function_arg_boundary (enum machine_mode, const_tree);
190 static rtx aapcs_allocate_return_reg (enum machine_mode, const_tree,
191 const_tree);
192 static rtx aapcs_libcall_value (enum machine_mode);
193 static int aapcs_select_return_coproc (const_tree, const_tree);
195 #ifdef OBJECT_FORMAT_ELF
196 static void arm_elf_asm_constructor (rtx, int) ATTRIBUTE_UNUSED;
197 static void arm_elf_asm_destructor (rtx, int) ATTRIBUTE_UNUSED;
198 #endif
199 #ifndef ARM_PE
200 static void arm_encode_section_info (tree, rtx, int);
201 #endif
203 static void arm_file_end (void);
204 static void arm_file_start (void);
206 static void arm_setup_incoming_varargs (cumulative_args_t, enum machine_mode,
207 tree, int *, int);
208 static bool arm_pass_by_reference (cumulative_args_t,
209 enum machine_mode, const_tree, bool);
210 static bool arm_promote_prototypes (const_tree);
211 static bool arm_default_short_enums (void);
212 static bool arm_align_anon_bitfield (void);
213 static bool arm_return_in_msb (const_tree);
214 static bool arm_must_pass_in_stack (enum machine_mode, const_tree);
215 static bool arm_return_in_memory (const_tree, const_tree);
216 #if ARM_UNWIND_INFO
217 static void arm_unwind_emit (FILE *, rtx);
218 static bool arm_output_ttype (rtx);
219 static void arm_asm_emit_except_personality (rtx);
220 static void arm_asm_init_sections (void);
221 #endif
222 static rtx arm_dwarf_register_span (rtx);
224 static tree arm_cxx_guard_type (void);
225 static bool arm_cxx_guard_mask_bit (void);
226 static tree arm_get_cookie_size (tree);
227 static bool arm_cookie_has_size (void);
228 static bool arm_cxx_cdtor_returns_this (void);
229 static bool arm_cxx_key_method_may_be_inline (void);
230 static void arm_cxx_determine_class_data_visibility (tree);
231 static bool arm_cxx_class_data_always_comdat (void);
232 static bool arm_cxx_use_aeabi_atexit (void);
233 static void arm_init_libfuncs (void);
234 static tree arm_build_builtin_va_list (void);
235 static void arm_expand_builtin_va_start (tree, rtx);
236 static tree arm_gimplify_va_arg_expr (tree, tree, gimple_seq *, gimple_seq *);
237 static void arm_option_override (void);
238 static unsigned HOST_WIDE_INT arm_shift_truncation_mask (enum machine_mode);
239 static bool arm_cannot_copy_insn_p (rtx);
240 static int arm_issue_rate (void);
241 static void arm_output_dwarf_dtprel (FILE *, int, rtx) ATTRIBUTE_UNUSED;
242 static bool arm_output_addr_const_extra (FILE *, rtx);
243 static bool arm_allocate_stack_slots_for_args (void);
244 static bool arm_warn_func_return (tree);
245 static const char *arm_invalid_parameter_type (const_tree t);
246 static const char *arm_invalid_return_type (const_tree t);
247 static tree arm_promoted_type (const_tree t);
248 static tree arm_convert_to_type (tree type, tree expr);
249 static bool arm_scalar_mode_supported_p (enum machine_mode);
250 static bool arm_frame_pointer_required (void);
251 static bool arm_can_eliminate (const int, const int);
252 static void arm_asm_trampoline_template (FILE *);
253 static void arm_trampoline_init (rtx, tree, rtx);
254 static rtx arm_trampoline_adjust_address (rtx);
255 static rtx arm_pic_static_addr (rtx orig, rtx reg);
256 static bool cortex_a9_sched_adjust_cost (rtx, rtx, rtx, int *);
257 static bool xscale_sched_adjust_cost (rtx, rtx, rtx, int *);
258 static bool fa726te_sched_adjust_cost (rtx, rtx, rtx, int *);
259 static bool arm_array_mode_supported_p (enum machine_mode,
260 unsigned HOST_WIDE_INT);
261 static enum machine_mode arm_preferred_simd_mode (enum machine_mode);
262 static bool arm_class_likely_spilled_p (reg_class_t);
263 static HOST_WIDE_INT arm_vector_alignment (const_tree type);
264 static bool arm_vector_alignment_reachable (const_tree type, bool is_packed);
265 static bool arm_builtin_support_vector_misalignment (enum machine_mode mode,
266 const_tree type,
267 int misalignment,
268 bool is_packed);
269 static void arm_conditional_register_usage (void);
270 static reg_class_t arm_preferred_rename_class (reg_class_t rclass);
271 static unsigned int arm_autovectorize_vector_sizes (void);
272 static int arm_default_branch_cost (bool, bool);
273 static int arm_cortex_a5_branch_cost (bool, bool);
274 static int arm_cortex_m_branch_cost (bool, bool);
276 static bool arm_vectorize_vec_perm_const_ok (enum machine_mode vmode,
277 const unsigned char *sel);
279 static int arm_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
280 tree vectype,
281 int misalign ATTRIBUTE_UNUSED);
282 static unsigned arm_add_stmt_cost (void *data, int count,
283 enum vect_cost_for_stmt kind,
284 struct _stmt_vec_info *stmt_info,
285 int misalign,
286 enum vect_cost_model_location where);
288 static void arm_canonicalize_comparison (int *code, rtx *op0, rtx *op1,
289 bool op0_preserve_value);
290 static unsigned HOST_WIDE_INT arm_asan_shadow_offset (void);
292 /* Table of machine attributes. */
293 static const struct attribute_spec arm_attribute_table[] =
295 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler,
296 affects_type_identity } */
297 /* Function calls made to this symbol must be done indirectly, because
298 it may lie outside of the 26 bit addressing range of a normal function
299 call. */
300 { "long_call", 0, 0, false, true, true, NULL, false },
301 /* Whereas these functions are always known to reside within the 26 bit
302 addressing range. */
303 { "short_call", 0, 0, false, true, true, NULL, false },
304 /* Specify the procedure call conventions for a function. */
305 { "pcs", 1, 1, false, true, true, arm_handle_pcs_attribute,
306 false },
307 /* Interrupt Service Routines have special prologue and epilogue requirements. */
308 { "isr", 0, 1, false, false, false, arm_handle_isr_attribute,
309 false },
310 { "interrupt", 0, 1, false, false, false, arm_handle_isr_attribute,
311 false },
312 { "naked", 0, 0, true, false, false, arm_handle_fndecl_attribute,
313 false },
314 #ifdef ARM_PE
315 /* ARM/PE has three new attributes:
316 interfacearm - ?
317 dllexport - for exporting a function/variable that will live in a dll
318 dllimport - for importing a function/variable from a dll
320 Microsoft allows multiple declspecs in one __declspec, separating
321 them with spaces. We do NOT support this. Instead, use __declspec
322 multiple times.
324 { "dllimport", 0, 0, true, false, false, NULL, false },
325 { "dllexport", 0, 0, true, false, false, NULL, false },
326 { "interfacearm", 0, 0, true, false, false, arm_handle_fndecl_attribute,
327 false },
328 #elif TARGET_DLLIMPORT_DECL_ATTRIBUTES
329 { "dllimport", 0, 0, false, false, false, handle_dll_attribute, false },
330 { "dllexport", 0, 0, false, false, false, handle_dll_attribute, false },
331 { "notshared", 0, 0, false, true, false, arm_handle_notshared_attribute,
332 false },
333 #endif
334 { NULL, 0, 0, false, false, false, NULL, false }
337 /* Initialize the GCC target structure. */
338 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
339 #undef TARGET_MERGE_DECL_ATTRIBUTES
340 #define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
341 #endif
343 #undef TARGET_LEGITIMIZE_ADDRESS
344 #define TARGET_LEGITIMIZE_ADDRESS arm_legitimize_address
346 #undef TARGET_LRA_P
347 #define TARGET_LRA_P arm_lra_p
349 #undef TARGET_ATTRIBUTE_TABLE
350 #define TARGET_ATTRIBUTE_TABLE arm_attribute_table
352 #undef TARGET_ASM_FILE_START
353 #define TARGET_ASM_FILE_START arm_file_start
354 #undef TARGET_ASM_FILE_END
355 #define TARGET_ASM_FILE_END arm_file_end
357 #undef TARGET_ASM_ALIGNED_SI_OP
358 #define TARGET_ASM_ALIGNED_SI_OP NULL
359 #undef TARGET_ASM_INTEGER
360 #define TARGET_ASM_INTEGER arm_assemble_integer
362 #undef TARGET_PRINT_OPERAND
363 #define TARGET_PRINT_OPERAND arm_print_operand
364 #undef TARGET_PRINT_OPERAND_ADDRESS
365 #define TARGET_PRINT_OPERAND_ADDRESS arm_print_operand_address
366 #undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
367 #define TARGET_PRINT_OPERAND_PUNCT_VALID_P arm_print_operand_punct_valid_p
369 #undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA
370 #define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA arm_output_addr_const_extra
372 #undef TARGET_ASM_FUNCTION_PROLOGUE
373 #define TARGET_ASM_FUNCTION_PROLOGUE arm_output_function_prologue
375 #undef TARGET_ASM_FUNCTION_EPILOGUE
376 #define TARGET_ASM_FUNCTION_EPILOGUE arm_output_function_epilogue
378 #undef TARGET_OPTION_OVERRIDE
379 #define TARGET_OPTION_OVERRIDE arm_option_override
381 #undef TARGET_COMP_TYPE_ATTRIBUTES
382 #define TARGET_COMP_TYPE_ATTRIBUTES arm_comp_type_attributes
384 #undef TARGET_SET_DEFAULT_TYPE_ATTRIBUTES
385 #define TARGET_SET_DEFAULT_TYPE_ATTRIBUTES arm_set_default_type_attributes
387 #undef TARGET_SCHED_ADJUST_COST
388 #define TARGET_SCHED_ADJUST_COST arm_adjust_cost
390 #undef TARGET_SCHED_REORDER
391 #define TARGET_SCHED_REORDER arm_sched_reorder
393 #undef TARGET_REGISTER_MOVE_COST
394 #define TARGET_REGISTER_MOVE_COST arm_register_move_cost
396 #undef TARGET_MEMORY_MOVE_COST
397 #define TARGET_MEMORY_MOVE_COST arm_memory_move_cost
399 #undef TARGET_ENCODE_SECTION_INFO
400 #ifdef ARM_PE
401 #define TARGET_ENCODE_SECTION_INFO arm_pe_encode_section_info
402 #else
403 #define TARGET_ENCODE_SECTION_INFO arm_encode_section_info
404 #endif
406 #undef TARGET_STRIP_NAME_ENCODING
407 #define TARGET_STRIP_NAME_ENCODING arm_strip_name_encoding
409 #undef TARGET_ASM_INTERNAL_LABEL
410 #define TARGET_ASM_INTERNAL_LABEL arm_internal_label
412 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
413 #define TARGET_FUNCTION_OK_FOR_SIBCALL arm_function_ok_for_sibcall
415 #undef TARGET_FUNCTION_VALUE
416 #define TARGET_FUNCTION_VALUE arm_function_value
418 #undef TARGET_LIBCALL_VALUE
419 #define TARGET_LIBCALL_VALUE arm_libcall_value
421 #undef TARGET_FUNCTION_VALUE_REGNO_P
422 #define TARGET_FUNCTION_VALUE_REGNO_P arm_function_value_regno_p
424 #undef TARGET_ASM_OUTPUT_MI_THUNK
425 #define TARGET_ASM_OUTPUT_MI_THUNK arm_output_mi_thunk
426 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
427 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK default_can_output_mi_thunk_no_vcall
429 #undef TARGET_RTX_COSTS
430 #define TARGET_RTX_COSTS arm_rtx_costs
431 #undef TARGET_ADDRESS_COST
432 #define TARGET_ADDRESS_COST arm_address_cost
434 #undef TARGET_SHIFT_TRUNCATION_MASK
435 #define TARGET_SHIFT_TRUNCATION_MASK arm_shift_truncation_mask
436 #undef TARGET_VECTOR_MODE_SUPPORTED_P
437 #define TARGET_VECTOR_MODE_SUPPORTED_P arm_vector_mode_supported_p
438 #undef TARGET_ARRAY_MODE_SUPPORTED_P
439 #define TARGET_ARRAY_MODE_SUPPORTED_P arm_array_mode_supported_p
440 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
441 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE arm_preferred_simd_mode
442 #undef TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES
443 #define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES \
444 arm_autovectorize_vector_sizes
446 #undef TARGET_MACHINE_DEPENDENT_REORG
447 #define TARGET_MACHINE_DEPENDENT_REORG arm_reorg
449 #undef TARGET_INIT_BUILTINS
450 #define TARGET_INIT_BUILTINS arm_init_builtins
451 #undef TARGET_EXPAND_BUILTIN
452 #define TARGET_EXPAND_BUILTIN arm_expand_builtin
453 #undef TARGET_BUILTIN_DECL
454 #define TARGET_BUILTIN_DECL arm_builtin_decl
456 #undef TARGET_INIT_LIBFUNCS
457 #define TARGET_INIT_LIBFUNCS arm_init_libfuncs
459 #undef TARGET_PROMOTE_FUNCTION_MODE
460 #define TARGET_PROMOTE_FUNCTION_MODE arm_promote_function_mode
461 #undef TARGET_PROMOTE_PROTOTYPES
462 #define TARGET_PROMOTE_PROTOTYPES arm_promote_prototypes
463 #undef TARGET_PASS_BY_REFERENCE
464 #define TARGET_PASS_BY_REFERENCE arm_pass_by_reference
465 #undef TARGET_ARG_PARTIAL_BYTES
466 #define TARGET_ARG_PARTIAL_BYTES arm_arg_partial_bytes
467 #undef TARGET_FUNCTION_ARG
468 #define TARGET_FUNCTION_ARG arm_function_arg
469 #undef TARGET_FUNCTION_ARG_ADVANCE
470 #define TARGET_FUNCTION_ARG_ADVANCE arm_function_arg_advance
471 #undef TARGET_FUNCTION_ARG_BOUNDARY
472 #define TARGET_FUNCTION_ARG_BOUNDARY arm_function_arg_boundary
474 #undef TARGET_SETUP_INCOMING_VARARGS
475 #define TARGET_SETUP_INCOMING_VARARGS arm_setup_incoming_varargs
477 #undef TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS
478 #define TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS arm_allocate_stack_slots_for_args
480 #undef TARGET_ASM_TRAMPOLINE_TEMPLATE
481 #define TARGET_ASM_TRAMPOLINE_TEMPLATE arm_asm_trampoline_template
482 #undef TARGET_TRAMPOLINE_INIT
483 #define TARGET_TRAMPOLINE_INIT arm_trampoline_init
484 #undef TARGET_TRAMPOLINE_ADJUST_ADDRESS
485 #define TARGET_TRAMPOLINE_ADJUST_ADDRESS arm_trampoline_adjust_address
487 #undef TARGET_WARN_FUNC_RETURN
488 #define TARGET_WARN_FUNC_RETURN arm_warn_func_return
490 #undef TARGET_DEFAULT_SHORT_ENUMS
491 #define TARGET_DEFAULT_SHORT_ENUMS arm_default_short_enums
493 #undef TARGET_ALIGN_ANON_BITFIELD
494 #define TARGET_ALIGN_ANON_BITFIELD arm_align_anon_bitfield
496 #undef TARGET_NARROW_VOLATILE_BITFIELD
497 #define TARGET_NARROW_VOLATILE_BITFIELD hook_bool_void_false
499 #undef TARGET_CXX_GUARD_TYPE
500 #define TARGET_CXX_GUARD_TYPE arm_cxx_guard_type
502 #undef TARGET_CXX_GUARD_MASK_BIT
503 #define TARGET_CXX_GUARD_MASK_BIT arm_cxx_guard_mask_bit
505 #undef TARGET_CXX_GET_COOKIE_SIZE
506 #define TARGET_CXX_GET_COOKIE_SIZE arm_get_cookie_size
508 #undef TARGET_CXX_COOKIE_HAS_SIZE
509 #define TARGET_CXX_COOKIE_HAS_SIZE arm_cookie_has_size
511 #undef TARGET_CXX_CDTOR_RETURNS_THIS
512 #define TARGET_CXX_CDTOR_RETURNS_THIS arm_cxx_cdtor_returns_this
514 #undef TARGET_CXX_KEY_METHOD_MAY_BE_INLINE
515 #define TARGET_CXX_KEY_METHOD_MAY_BE_INLINE arm_cxx_key_method_may_be_inline
517 #undef TARGET_CXX_USE_AEABI_ATEXIT
518 #define TARGET_CXX_USE_AEABI_ATEXIT arm_cxx_use_aeabi_atexit
520 #undef TARGET_CXX_DETERMINE_CLASS_DATA_VISIBILITY
521 #define TARGET_CXX_DETERMINE_CLASS_DATA_VISIBILITY \
522 arm_cxx_determine_class_data_visibility
524 #undef TARGET_CXX_CLASS_DATA_ALWAYS_COMDAT
525 #define TARGET_CXX_CLASS_DATA_ALWAYS_COMDAT arm_cxx_class_data_always_comdat
527 #undef TARGET_RETURN_IN_MSB
528 #define TARGET_RETURN_IN_MSB arm_return_in_msb
530 #undef TARGET_RETURN_IN_MEMORY
531 #define TARGET_RETURN_IN_MEMORY arm_return_in_memory
533 #undef TARGET_MUST_PASS_IN_STACK
534 #define TARGET_MUST_PASS_IN_STACK arm_must_pass_in_stack
536 #if ARM_UNWIND_INFO
537 #undef TARGET_ASM_UNWIND_EMIT
538 #define TARGET_ASM_UNWIND_EMIT arm_unwind_emit
540 /* EABI unwinding tables use a different format for the typeinfo tables. */
541 #undef TARGET_ASM_TTYPE
542 #define TARGET_ASM_TTYPE arm_output_ttype
544 #undef TARGET_ARM_EABI_UNWINDER
545 #define TARGET_ARM_EABI_UNWINDER true
547 #undef TARGET_ASM_EMIT_EXCEPT_PERSONALITY
548 #define TARGET_ASM_EMIT_EXCEPT_PERSONALITY arm_asm_emit_except_personality
550 #undef TARGET_ASM_INIT_SECTIONS
551 #define TARGET_ASM_INIT_SECTIONS arm_asm_init_sections
552 #endif /* ARM_UNWIND_INFO */
554 #undef TARGET_DWARF_REGISTER_SPAN
555 #define TARGET_DWARF_REGISTER_SPAN arm_dwarf_register_span
557 #undef TARGET_CANNOT_COPY_INSN_P
558 #define TARGET_CANNOT_COPY_INSN_P arm_cannot_copy_insn_p
560 #ifdef HAVE_AS_TLS
561 #undef TARGET_HAVE_TLS
562 #define TARGET_HAVE_TLS true
563 #endif
565 #undef TARGET_HAVE_CONDITIONAL_EXECUTION
566 #define TARGET_HAVE_CONDITIONAL_EXECUTION arm_have_conditional_execution
568 #undef TARGET_LEGITIMATE_CONSTANT_P
569 #define TARGET_LEGITIMATE_CONSTANT_P arm_legitimate_constant_p
571 #undef TARGET_CANNOT_FORCE_CONST_MEM
572 #define TARGET_CANNOT_FORCE_CONST_MEM arm_cannot_force_const_mem
574 #undef TARGET_MAX_ANCHOR_OFFSET
575 #define TARGET_MAX_ANCHOR_OFFSET 4095
577 /* The minimum is set such that the total size of the block
578 for a particular anchor is -4088 + 1 + 4095 bytes, which is
579 divisible by eight, ensuring natural spacing of anchors. */
580 #undef TARGET_MIN_ANCHOR_OFFSET
581 #define TARGET_MIN_ANCHOR_OFFSET -4088
583 #undef TARGET_SCHED_ISSUE_RATE
584 #define TARGET_SCHED_ISSUE_RATE arm_issue_rate
586 #undef TARGET_MANGLE_TYPE
587 #define TARGET_MANGLE_TYPE arm_mangle_type
589 #undef TARGET_BUILD_BUILTIN_VA_LIST
590 #define TARGET_BUILD_BUILTIN_VA_LIST arm_build_builtin_va_list
591 #undef TARGET_EXPAND_BUILTIN_VA_START
592 #define TARGET_EXPAND_BUILTIN_VA_START arm_expand_builtin_va_start
593 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
594 #define TARGET_GIMPLIFY_VA_ARG_EXPR arm_gimplify_va_arg_expr
596 #ifdef HAVE_AS_TLS
597 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
598 #define TARGET_ASM_OUTPUT_DWARF_DTPREL arm_output_dwarf_dtprel
599 #endif
601 #undef TARGET_LEGITIMATE_ADDRESS_P
602 #define TARGET_LEGITIMATE_ADDRESS_P arm_legitimate_address_p
604 #undef TARGET_PREFERRED_RELOAD_CLASS
605 #define TARGET_PREFERRED_RELOAD_CLASS arm_preferred_reload_class
607 #undef TARGET_INVALID_PARAMETER_TYPE
608 #define TARGET_INVALID_PARAMETER_TYPE arm_invalid_parameter_type
610 #undef TARGET_INVALID_RETURN_TYPE
611 #define TARGET_INVALID_RETURN_TYPE arm_invalid_return_type
613 #undef TARGET_PROMOTED_TYPE
614 #define TARGET_PROMOTED_TYPE arm_promoted_type
616 #undef TARGET_CONVERT_TO_TYPE
617 #define TARGET_CONVERT_TO_TYPE arm_convert_to_type
619 #undef TARGET_SCALAR_MODE_SUPPORTED_P
620 #define TARGET_SCALAR_MODE_SUPPORTED_P arm_scalar_mode_supported_p
622 #undef TARGET_FRAME_POINTER_REQUIRED
623 #define TARGET_FRAME_POINTER_REQUIRED arm_frame_pointer_required
625 #undef TARGET_CAN_ELIMINATE
626 #define TARGET_CAN_ELIMINATE arm_can_eliminate
628 #undef TARGET_CONDITIONAL_REGISTER_USAGE
629 #define TARGET_CONDITIONAL_REGISTER_USAGE arm_conditional_register_usage
631 #undef TARGET_CLASS_LIKELY_SPILLED_P
632 #define TARGET_CLASS_LIKELY_SPILLED_P arm_class_likely_spilled_p
634 #undef TARGET_VECTORIZE_BUILTINS
635 #define TARGET_VECTORIZE_BUILTINS
637 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
638 #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
639 arm_builtin_vectorized_function
641 #undef TARGET_VECTOR_ALIGNMENT
642 #define TARGET_VECTOR_ALIGNMENT arm_vector_alignment
644 #undef TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE
645 #define TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE \
646 arm_vector_alignment_reachable
648 #undef TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT
649 #define TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT \
650 arm_builtin_support_vector_misalignment
652 #undef TARGET_PREFERRED_RENAME_CLASS
653 #define TARGET_PREFERRED_RENAME_CLASS \
654 arm_preferred_rename_class
656 #undef TARGET_VECTORIZE_VEC_PERM_CONST_OK
657 #define TARGET_VECTORIZE_VEC_PERM_CONST_OK \
658 arm_vectorize_vec_perm_const_ok
660 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
661 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
662 arm_builtin_vectorization_cost
663 #undef TARGET_VECTORIZE_ADD_STMT_COST
664 #define TARGET_VECTORIZE_ADD_STMT_COST arm_add_stmt_cost
666 #undef TARGET_CANONICALIZE_COMPARISON
667 #define TARGET_CANONICALIZE_COMPARISON \
668 arm_canonicalize_comparison
670 #undef TARGET_ASAN_SHADOW_OFFSET
671 #define TARGET_ASAN_SHADOW_OFFSET arm_asan_shadow_offset
673 #undef MAX_INSN_PER_IT_BLOCK
674 #define MAX_INSN_PER_IT_BLOCK (arm_restrict_it ? 1 : 4)
676 #undef TARGET_CAN_USE_DOLOOP_P
677 #define TARGET_CAN_USE_DOLOOP_P can_use_doloop_if_innermost
679 #undef TARGET_CONST_NOT_OK_FOR_DEBUG_P
680 #define TARGET_CONST_NOT_OK_FOR_DEBUG_P arm_const_not_ok_for_debug_p
682 struct gcc_target targetm = TARGET_INITIALIZER;
684 /* Obstack for minipool constant handling. */
685 static struct obstack minipool_obstack;
686 static char * minipool_startobj;
688 /* The maximum number of insns skipped which
689 will be conditionalised if possible. */
690 static int max_insns_skipped = 5;
692 extern FILE * asm_out_file;
694 /* True if we are currently building a constant table. */
695 int making_const_table;
697 /* The processor for which instructions should be scheduled. */
698 enum processor_type arm_tune = arm_none;
700 /* The current tuning set. */
701 const struct tune_params *current_tune;
703 /* Which floating point hardware to schedule for. */
704 int arm_fpu_attr;
706 /* Which floating popint hardware to use. */
707 const struct arm_fpu_desc *arm_fpu_desc;
709 /* Used for Thumb call_via trampolines. */
710 rtx thumb_call_via_label[14];
711 static int thumb_call_reg_needed;
713 /* Bit values used to identify processor capabilities. */
714 #define FL_CO_PROC (1 << 0) /* Has external co-processor bus */
715 #define FL_ARCH3M (1 << 1) /* Extended multiply */
716 #define FL_MODE26 (1 << 2) /* 26-bit mode support */
717 #define FL_MODE32 (1 << 3) /* 32-bit mode support */
718 #define FL_ARCH4 (1 << 4) /* Architecture rel 4 */
719 #define FL_ARCH5 (1 << 5) /* Architecture rel 5 */
720 #define FL_THUMB (1 << 6) /* Thumb aware */
721 #define FL_LDSCHED (1 << 7) /* Load scheduling necessary */
722 #define FL_STRONG (1 << 8) /* StrongARM */
723 #define FL_ARCH5E (1 << 9) /* DSP extensions to v5 */
724 #define FL_XSCALE (1 << 10) /* XScale */
725 /* spare (1 << 11) */
726 #define FL_ARCH6 (1 << 12) /* Architecture rel 6. Adds
727 media instructions. */
728 #define FL_VFPV2 (1 << 13) /* Vector Floating Point V2. */
729 #define FL_WBUF (1 << 14) /* Schedule for write buffer ops.
730 Note: ARM6 & 7 derivatives only. */
731 #define FL_ARCH6K (1 << 15) /* Architecture rel 6 K extensions. */
732 #define FL_THUMB2 (1 << 16) /* Thumb-2. */
733 #define FL_NOTM (1 << 17) /* Instructions not present in the 'M'
734 profile. */
735 #define FL_THUMB_DIV (1 << 18) /* Hardware divide (Thumb mode). */
736 #define FL_VFPV3 (1 << 19) /* Vector Floating Point V3. */
737 #define FL_NEON (1 << 20) /* Neon instructions. */
738 #define FL_ARCH7EM (1 << 21) /* Instructions present in the ARMv7E-M
739 architecture. */
740 #define FL_ARCH7 (1 << 22) /* Architecture 7. */
741 #define FL_ARM_DIV (1 << 23) /* Hardware divide (ARM mode). */
742 #define FL_ARCH8 (1 << 24) /* Architecture 8. */
743 #define FL_CRC32 (1 << 25) /* ARMv8 CRC32 instructions. */
745 #define FL_IWMMXT (1 << 29) /* XScale v2 or "Intel Wireless MMX technology". */
746 #define FL_IWMMXT2 (1 << 30) /* "Intel Wireless MMX2 technology". */
748 /* Flags that only effect tuning, not available instructions. */
749 #define FL_TUNE (FL_WBUF | FL_VFPV2 | FL_STRONG | FL_LDSCHED \
750 | FL_CO_PROC)
752 #define FL_FOR_ARCH2 FL_NOTM
753 #define FL_FOR_ARCH3 (FL_FOR_ARCH2 | FL_MODE32)
754 #define FL_FOR_ARCH3M (FL_FOR_ARCH3 | FL_ARCH3M)
755 #define FL_FOR_ARCH4 (FL_FOR_ARCH3M | FL_ARCH4)
756 #define FL_FOR_ARCH4T (FL_FOR_ARCH4 | FL_THUMB)
757 #define FL_FOR_ARCH5 (FL_FOR_ARCH4 | FL_ARCH5)
758 #define FL_FOR_ARCH5T (FL_FOR_ARCH5 | FL_THUMB)
759 #define FL_FOR_ARCH5E (FL_FOR_ARCH5 | FL_ARCH5E)
760 #define FL_FOR_ARCH5TE (FL_FOR_ARCH5E | FL_THUMB)
761 #define FL_FOR_ARCH5TEJ FL_FOR_ARCH5TE
762 #define FL_FOR_ARCH6 (FL_FOR_ARCH5TE | FL_ARCH6)
763 #define FL_FOR_ARCH6J FL_FOR_ARCH6
764 #define FL_FOR_ARCH6K (FL_FOR_ARCH6 | FL_ARCH6K)
765 #define FL_FOR_ARCH6Z FL_FOR_ARCH6
766 #define FL_FOR_ARCH6ZK FL_FOR_ARCH6K
767 #define FL_FOR_ARCH6T2 (FL_FOR_ARCH6 | FL_THUMB2)
768 #define FL_FOR_ARCH6M (FL_FOR_ARCH6 & ~FL_NOTM)
769 #define FL_FOR_ARCH7 ((FL_FOR_ARCH6T2 & ~FL_NOTM) | FL_ARCH7)
770 #define FL_FOR_ARCH7A (FL_FOR_ARCH7 | FL_NOTM | FL_ARCH6K)
771 #define FL_FOR_ARCH7VE (FL_FOR_ARCH7A | FL_THUMB_DIV | FL_ARM_DIV)
772 #define FL_FOR_ARCH7R (FL_FOR_ARCH7A | FL_THUMB_DIV)
773 #define FL_FOR_ARCH7M (FL_FOR_ARCH7 | FL_THUMB_DIV)
774 #define FL_FOR_ARCH7EM (FL_FOR_ARCH7M | FL_ARCH7EM)
775 #define FL_FOR_ARCH8A (FL_FOR_ARCH7VE | FL_ARCH8)
777 /* The bits in this mask specify which
778 instructions we are allowed to generate. */
779 static unsigned long insn_flags = 0;
781 /* The bits in this mask specify which instruction scheduling options should
782 be used. */
783 static unsigned long tune_flags = 0;
785 /* The highest ARM architecture version supported by the
786 target. */
787 enum base_architecture arm_base_arch = BASE_ARCH_0;
789 /* The following are used in the arm.md file as equivalents to bits
790 in the above two flag variables. */
792 /* Nonzero if this chip supports the ARM Architecture 3M extensions. */
793 int arm_arch3m = 0;
795 /* Nonzero if this chip supports the ARM Architecture 4 extensions. */
796 int arm_arch4 = 0;
798 /* Nonzero if this chip supports the ARM Architecture 4t extensions. */
799 int arm_arch4t = 0;
801 /* Nonzero if this chip supports the ARM Architecture 5 extensions. */
802 int arm_arch5 = 0;
804 /* Nonzero if this chip supports the ARM Architecture 5E extensions. */
805 int arm_arch5e = 0;
807 /* Nonzero if this chip supports the ARM Architecture 6 extensions. */
808 int arm_arch6 = 0;
810 /* Nonzero if this chip supports the ARM 6K extensions. */
811 int arm_arch6k = 0;
813 /* Nonzero if instructions present in ARMv6-M can be used. */
814 int arm_arch6m = 0;
816 /* Nonzero if this chip supports the ARM 7 extensions. */
817 int arm_arch7 = 0;
819 /* Nonzero if instructions not present in the 'M' profile can be used. */
820 int arm_arch_notm = 0;
822 /* Nonzero if instructions present in ARMv7E-M can be used. */
823 int arm_arch7em = 0;
825 /* Nonzero if instructions present in ARMv8 can be used. */
826 int arm_arch8 = 0;
828 /* Nonzero if this chip can benefit from load scheduling. */
829 int arm_ld_sched = 0;
831 /* Nonzero if this chip is a StrongARM. */
832 int arm_tune_strongarm = 0;
834 /* Nonzero if this chip supports Intel Wireless MMX technology. */
835 int arm_arch_iwmmxt = 0;
837 /* Nonzero if this chip supports Intel Wireless MMX2 technology. */
838 int arm_arch_iwmmxt2 = 0;
840 /* Nonzero if this chip is an XScale. */
841 int arm_arch_xscale = 0;
843 /* Nonzero if tuning for XScale */
844 int arm_tune_xscale = 0;
846 /* Nonzero if we want to tune for stores that access the write-buffer.
847 This typically means an ARM6 or ARM7 with MMU or MPU. */
848 int arm_tune_wbuf = 0;
850 /* Nonzero if tuning for Cortex-A9. */
851 int arm_tune_cortex_a9 = 0;
853 /* Nonzero if generating Thumb instructions. */
854 int thumb_code = 0;
856 /* Nonzero if generating Thumb-1 instructions. */
857 int thumb1_code = 0;
859 /* Nonzero if we should define __THUMB_INTERWORK__ in the
860 preprocessor.
861 XXX This is a bit of a hack, it's intended to help work around
862 problems in GLD which doesn't understand that armv5t code is
863 interworking clean. */
864 int arm_cpp_interwork = 0;
866 /* Nonzero if chip supports Thumb 2. */
867 int arm_arch_thumb2;
869 /* Nonzero if chip supports integer division instruction. */
870 int arm_arch_arm_hwdiv;
871 int arm_arch_thumb_hwdiv;
873 /* Nonzero if we should use Neon to handle 64-bits operations rather
874 than core registers. */
875 int prefer_neon_for_64bits = 0;
877 /* Nonzero if we shouldn't use literal pools. */
878 bool arm_disable_literal_pool = false;
880 /* In case of a PRE_INC, POST_INC, PRE_DEC, POST_DEC memory reference,
881 we must report the mode of the memory reference from
882 TARGET_PRINT_OPERAND to TARGET_PRINT_OPERAND_ADDRESS. */
883 enum machine_mode output_memory_reference_mode;
885 /* The register number to be used for the PIC offset register. */
886 unsigned arm_pic_register = INVALID_REGNUM;
888 enum arm_pcs arm_pcs_default;
890 /* For an explanation of these variables, see final_prescan_insn below. */
891 int arm_ccfsm_state;
892 /* arm_current_cc is also used for Thumb-2 cond_exec blocks. */
893 enum arm_cond_code arm_current_cc;
895 rtx arm_target_insn;
896 int arm_target_label;
897 /* The number of conditionally executed insns, including the current insn. */
898 int arm_condexec_count = 0;
899 /* A bitmask specifying the patterns for the IT block.
900 Zero means do not output an IT block before this insn. */
901 int arm_condexec_mask = 0;
902 /* The number of bits used in arm_condexec_mask. */
903 int arm_condexec_masklen = 0;
905 /* Nonzero if chip supports the ARMv8 CRC instructions. */
906 int arm_arch_crc = 0;
908 /* The condition codes of the ARM, and the inverse function. */
909 static const char * const arm_condition_codes[] =
911 "eq", "ne", "cs", "cc", "mi", "pl", "vs", "vc",
912 "hi", "ls", "ge", "lt", "gt", "le", "al", "nv"
915 /* The register numbers in sequence, for passing to arm_gen_load_multiple. */
916 int arm_regs_in_sequence[] =
918 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
921 #define ARM_LSL_NAME (TARGET_UNIFIED_ASM ? "lsl" : "asl")
922 #define streq(string1, string2) (strcmp (string1, string2) == 0)
924 #define THUMB2_WORK_REGS (0xff & ~( (1 << THUMB_HARD_FRAME_POINTER_REGNUM) \
925 | (1 << SP_REGNUM) | (1 << PC_REGNUM) \
926 | (1 << PIC_OFFSET_TABLE_REGNUM)))
928 /* Initialization code. */
930 struct processors
932 const char *const name;
933 enum processor_type core;
934 const char *arch;
935 enum base_architecture base_arch;
936 const unsigned long flags;
937 const struct tune_params *const tune;
941 #define ARM_PREFETCH_NOT_BENEFICIAL 0, -1, -1
942 #define ARM_PREFETCH_BENEFICIAL(prefetch_slots,l1_size,l1_line_size) \
943 prefetch_slots, \
944 l1_size, \
945 l1_line_size
947 /* arm generic vectorizer costs. */
948 static const
949 struct cpu_vec_costs arm_default_vec_cost = {
950 1, /* scalar_stmt_cost. */
951 1, /* scalar load_cost. */
952 1, /* scalar_store_cost. */
953 1, /* vec_stmt_cost. */
954 1, /* vec_to_scalar_cost. */
955 1, /* scalar_to_vec_cost. */
956 1, /* vec_align_load_cost. */
957 1, /* vec_unalign_load_cost. */
958 1, /* vec_unalign_store_cost. */
959 1, /* vec_store_cost. */
960 3, /* cond_taken_branch_cost. */
961 1, /* cond_not_taken_branch_cost. */
964 /* Cost tables for AArch32 + AArch64 cores should go in aarch-cost-tables.h */
965 #include "aarch-cost-tables.h"
969 const struct cpu_cost_table cortexa9_extra_costs =
971 /* ALU */
973 0, /* arith. */
974 0, /* logical. */
975 0, /* shift. */
976 COSTS_N_INSNS (1), /* shift_reg. */
977 COSTS_N_INSNS (1), /* arith_shift. */
978 COSTS_N_INSNS (2), /* arith_shift_reg. */
979 0, /* log_shift. */
980 COSTS_N_INSNS (1), /* log_shift_reg. */
981 COSTS_N_INSNS (1), /* extend. */
982 COSTS_N_INSNS (2), /* extend_arith. */
983 COSTS_N_INSNS (1), /* bfi. */
984 COSTS_N_INSNS (1), /* bfx. */
985 0, /* clz. */
986 0, /* rev. */
987 0, /* non_exec. */
988 true /* non_exec_costs_exec. */
991 /* MULT SImode */
993 COSTS_N_INSNS (3), /* simple. */
994 COSTS_N_INSNS (3), /* flag_setting. */
995 COSTS_N_INSNS (2), /* extend. */
996 COSTS_N_INSNS (3), /* add. */
997 COSTS_N_INSNS (2), /* extend_add. */
998 COSTS_N_INSNS (30) /* idiv. No HW div on Cortex A9. */
1000 /* MULT DImode */
1002 0, /* simple (N/A). */
1003 0, /* flag_setting (N/A). */
1004 COSTS_N_INSNS (4), /* extend. */
1005 0, /* add (N/A). */
1006 COSTS_N_INSNS (4), /* extend_add. */
1007 0 /* idiv (N/A). */
1010 /* LD/ST */
1012 COSTS_N_INSNS (2), /* load. */
1013 COSTS_N_INSNS (2), /* load_sign_extend. */
1014 COSTS_N_INSNS (2), /* ldrd. */
1015 COSTS_N_INSNS (2), /* ldm_1st. */
1016 1, /* ldm_regs_per_insn_1st. */
1017 2, /* ldm_regs_per_insn_subsequent. */
1018 COSTS_N_INSNS (5), /* loadf. */
1019 COSTS_N_INSNS (5), /* loadd. */
1020 COSTS_N_INSNS (1), /* load_unaligned. */
1021 COSTS_N_INSNS (2), /* store. */
1022 COSTS_N_INSNS (2), /* strd. */
1023 COSTS_N_INSNS (2), /* stm_1st. */
1024 1, /* stm_regs_per_insn_1st. */
1025 2, /* stm_regs_per_insn_subsequent. */
1026 COSTS_N_INSNS (1), /* storef. */
1027 COSTS_N_INSNS (1), /* stored. */
1028 COSTS_N_INSNS (1) /* store_unaligned. */
1031 /* FP SFmode */
1033 COSTS_N_INSNS (14), /* div. */
1034 COSTS_N_INSNS (4), /* mult. */
1035 COSTS_N_INSNS (7), /* mult_addsub. */
1036 COSTS_N_INSNS (30), /* fma. */
1037 COSTS_N_INSNS (3), /* addsub. */
1038 COSTS_N_INSNS (1), /* fpconst. */
1039 COSTS_N_INSNS (1), /* neg. */
1040 COSTS_N_INSNS (3), /* compare. */
1041 COSTS_N_INSNS (3), /* widen. */
1042 COSTS_N_INSNS (3), /* narrow. */
1043 COSTS_N_INSNS (3), /* toint. */
1044 COSTS_N_INSNS (3), /* fromint. */
1045 COSTS_N_INSNS (3) /* roundint. */
1047 /* FP DFmode */
1049 COSTS_N_INSNS (24), /* div. */
1050 COSTS_N_INSNS (5), /* mult. */
1051 COSTS_N_INSNS (8), /* mult_addsub. */
1052 COSTS_N_INSNS (30), /* fma. */
1053 COSTS_N_INSNS (3), /* addsub. */
1054 COSTS_N_INSNS (1), /* fpconst. */
1055 COSTS_N_INSNS (1), /* neg. */
1056 COSTS_N_INSNS (3), /* compare. */
1057 COSTS_N_INSNS (3), /* widen. */
1058 COSTS_N_INSNS (3), /* narrow. */
1059 COSTS_N_INSNS (3), /* toint. */
1060 COSTS_N_INSNS (3), /* fromint. */
1061 COSTS_N_INSNS (3) /* roundint. */
1064 /* Vector */
1066 COSTS_N_INSNS (1) /* alu. */
1070 const struct cpu_cost_table cortexa8_extra_costs =
1072 /* ALU */
1074 0, /* arith. */
1075 0, /* logical. */
1076 COSTS_N_INSNS (1), /* shift. */
1077 0, /* shift_reg. */
1078 COSTS_N_INSNS (1), /* arith_shift. */
1079 0, /* arith_shift_reg. */
1080 COSTS_N_INSNS (1), /* log_shift. */
1081 0, /* log_shift_reg. */
1082 0, /* extend. */
1083 0, /* extend_arith. */
1084 0, /* bfi. */
1085 0, /* bfx. */
1086 0, /* clz. */
1087 0, /* rev. */
1088 0, /* non_exec. */
1089 true /* non_exec_costs_exec. */
1092 /* MULT SImode */
1094 COSTS_N_INSNS (1), /* simple. */
1095 COSTS_N_INSNS (1), /* flag_setting. */
1096 COSTS_N_INSNS (1), /* extend. */
1097 COSTS_N_INSNS (1), /* add. */
1098 COSTS_N_INSNS (1), /* extend_add. */
1099 COSTS_N_INSNS (30) /* idiv. No HW div on Cortex A8. */
1101 /* MULT DImode */
1103 0, /* simple (N/A). */
1104 0, /* flag_setting (N/A). */
1105 COSTS_N_INSNS (2), /* extend. */
1106 0, /* add (N/A). */
1107 COSTS_N_INSNS (2), /* extend_add. */
1108 0 /* idiv (N/A). */
1111 /* LD/ST */
1113 COSTS_N_INSNS (1), /* load. */
1114 COSTS_N_INSNS (1), /* load_sign_extend. */
1115 COSTS_N_INSNS (1), /* ldrd. */
1116 COSTS_N_INSNS (1), /* ldm_1st. */
1117 1, /* ldm_regs_per_insn_1st. */
1118 2, /* ldm_regs_per_insn_subsequent. */
1119 COSTS_N_INSNS (1), /* loadf. */
1120 COSTS_N_INSNS (1), /* loadd. */
1121 COSTS_N_INSNS (1), /* load_unaligned. */
1122 COSTS_N_INSNS (1), /* store. */
1123 COSTS_N_INSNS (1), /* strd. */
1124 COSTS_N_INSNS (1), /* stm_1st. */
1125 1, /* stm_regs_per_insn_1st. */
1126 2, /* stm_regs_per_insn_subsequent. */
1127 COSTS_N_INSNS (1), /* storef. */
1128 COSTS_N_INSNS (1), /* stored. */
1129 COSTS_N_INSNS (1) /* store_unaligned. */
1132 /* FP SFmode */
1134 COSTS_N_INSNS (36), /* div. */
1135 COSTS_N_INSNS (11), /* mult. */
1136 COSTS_N_INSNS (20), /* mult_addsub. */
1137 COSTS_N_INSNS (30), /* fma. */
1138 COSTS_N_INSNS (9), /* addsub. */
1139 COSTS_N_INSNS (3), /* fpconst. */
1140 COSTS_N_INSNS (3), /* neg. */
1141 COSTS_N_INSNS (6), /* compare. */
1142 COSTS_N_INSNS (4), /* widen. */
1143 COSTS_N_INSNS (4), /* narrow. */
1144 COSTS_N_INSNS (8), /* toint. */
1145 COSTS_N_INSNS (8), /* fromint. */
1146 COSTS_N_INSNS (8) /* roundint. */
1148 /* FP DFmode */
1150 COSTS_N_INSNS (64), /* div. */
1151 COSTS_N_INSNS (16), /* mult. */
1152 COSTS_N_INSNS (25), /* mult_addsub. */
1153 COSTS_N_INSNS (30), /* fma. */
1154 COSTS_N_INSNS (9), /* addsub. */
1155 COSTS_N_INSNS (3), /* fpconst. */
1156 COSTS_N_INSNS (3), /* neg. */
1157 COSTS_N_INSNS (6), /* compare. */
1158 COSTS_N_INSNS (6), /* widen. */
1159 COSTS_N_INSNS (6), /* narrow. */
1160 COSTS_N_INSNS (8), /* toint. */
1161 COSTS_N_INSNS (8), /* fromint. */
1162 COSTS_N_INSNS (8) /* roundint. */
1165 /* Vector */
1167 COSTS_N_INSNS (1) /* alu. */
1173 const struct cpu_cost_table cortexa7_extra_costs =
1175 /* ALU */
1177 0, /* arith. */
1178 0, /* logical. */
1179 COSTS_N_INSNS (1), /* shift. */
1180 COSTS_N_INSNS (1), /* shift_reg. */
1181 COSTS_N_INSNS (1), /* arith_shift. */
1182 COSTS_N_INSNS (1), /* arith_shift_reg. */
1183 COSTS_N_INSNS (1), /* log_shift. */
1184 COSTS_N_INSNS (1), /* log_shift_reg. */
1185 COSTS_N_INSNS (1), /* extend. */
1186 COSTS_N_INSNS (1), /* extend_arith. */
1187 COSTS_N_INSNS (1), /* bfi. */
1188 COSTS_N_INSNS (1), /* bfx. */
1189 COSTS_N_INSNS (1), /* clz. */
1190 COSTS_N_INSNS (1), /* rev. */
1191 0, /* non_exec. */
1192 true /* non_exec_costs_exec. */
1196 /* MULT SImode */
1198 0, /* simple. */
1199 COSTS_N_INSNS (1), /* flag_setting. */
1200 COSTS_N_INSNS (1), /* extend. */
1201 COSTS_N_INSNS (1), /* add. */
1202 COSTS_N_INSNS (1), /* extend_add. */
1203 COSTS_N_INSNS (7) /* idiv. */
1205 /* MULT DImode */
1207 0, /* simple (N/A). */
1208 0, /* flag_setting (N/A). */
1209 COSTS_N_INSNS (1), /* extend. */
1210 0, /* add. */
1211 COSTS_N_INSNS (2), /* extend_add. */
1212 0 /* idiv (N/A). */
1215 /* LD/ST */
1217 COSTS_N_INSNS (1), /* load. */
1218 COSTS_N_INSNS (1), /* load_sign_extend. */
1219 COSTS_N_INSNS (3), /* ldrd. */
1220 COSTS_N_INSNS (1), /* ldm_1st. */
1221 1, /* ldm_regs_per_insn_1st. */
1222 2, /* ldm_regs_per_insn_subsequent. */
1223 COSTS_N_INSNS (2), /* loadf. */
1224 COSTS_N_INSNS (2), /* loadd. */
1225 COSTS_N_INSNS (1), /* load_unaligned. */
1226 COSTS_N_INSNS (1), /* store. */
1227 COSTS_N_INSNS (3), /* strd. */
1228 COSTS_N_INSNS (1), /* stm_1st. */
1229 1, /* stm_regs_per_insn_1st. */
1230 2, /* stm_regs_per_insn_subsequent. */
1231 COSTS_N_INSNS (2), /* storef. */
1232 COSTS_N_INSNS (2), /* stored. */
1233 COSTS_N_INSNS (1) /* store_unaligned. */
1236 /* FP SFmode */
1238 COSTS_N_INSNS (15), /* div. */
1239 COSTS_N_INSNS (3), /* mult. */
1240 COSTS_N_INSNS (7), /* mult_addsub. */
1241 COSTS_N_INSNS (7), /* fma. */
1242 COSTS_N_INSNS (3), /* addsub. */
1243 COSTS_N_INSNS (3), /* fpconst. */
1244 COSTS_N_INSNS (3), /* neg. */
1245 COSTS_N_INSNS (3), /* compare. */
1246 COSTS_N_INSNS (3), /* widen. */
1247 COSTS_N_INSNS (3), /* narrow. */
1248 COSTS_N_INSNS (3), /* toint. */
1249 COSTS_N_INSNS (3), /* fromint. */
1250 COSTS_N_INSNS (3) /* roundint. */
1252 /* FP DFmode */
1254 COSTS_N_INSNS (30), /* div. */
1255 COSTS_N_INSNS (6), /* mult. */
1256 COSTS_N_INSNS (10), /* mult_addsub. */
1257 COSTS_N_INSNS (7), /* fma. */
1258 COSTS_N_INSNS (3), /* addsub. */
1259 COSTS_N_INSNS (3), /* fpconst. */
1260 COSTS_N_INSNS (3), /* neg. */
1261 COSTS_N_INSNS (3), /* compare. */
1262 COSTS_N_INSNS (3), /* widen. */
1263 COSTS_N_INSNS (3), /* narrow. */
1264 COSTS_N_INSNS (3), /* toint. */
1265 COSTS_N_INSNS (3), /* fromint. */
1266 COSTS_N_INSNS (3) /* roundint. */
1269 /* Vector */
1271 COSTS_N_INSNS (1) /* alu. */
1275 const struct cpu_cost_table cortexa12_extra_costs =
1277 /* ALU */
1279 0, /* arith. */
1280 0, /* logical. */
1281 0, /* shift. */
1282 COSTS_N_INSNS (1), /* shift_reg. */
1283 COSTS_N_INSNS (1), /* arith_shift. */
1284 COSTS_N_INSNS (1), /* arith_shift_reg. */
1285 COSTS_N_INSNS (1), /* log_shift. */
1286 COSTS_N_INSNS (1), /* log_shift_reg. */
1287 0, /* extend. */
1288 COSTS_N_INSNS (1), /* extend_arith. */
1289 0, /* bfi. */
1290 COSTS_N_INSNS (1), /* bfx. */
1291 COSTS_N_INSNS (1), /* clz. */
1292 COSTS_N_INSNS (1), /* rev. */
1293 0, /* non_exec. */
1294 true /* non_exec_costs_exec. */
1296 /* MULT SImode */
1299 COSTS_N_INSNS (2), /* simple. */
1300 COSTS_N_INSNS (3), /* flag_setting. */
1301 COSTS_N_INSNS (2), /* extend. */
1302 COSTS_N_INSNS (3), /* add. */
1303 COSTS_N_INSNS (2), /* extend_add. */
1304 COSTS_N_INSNS (18) /* idiv. */
1306 /* MULT DImode */
1308 0, /* simple (N/A). */
1309 0, /* flag_setting (N/A). */
1310 COSTS_N_INSNS (3), /* extend. */
1311 0, /* add (N/A). */
1312 COSTS_N_INSNS (3), /* extend_add. */
1313 0 /* idiv (N/A). */
1316 /* LD/ST */
1318 COSTS_N_INSNS (3), /* load. */
1319 COSTS_N_INSNS (3), /* load_sign_extend. */
1320 COSTS_N_INSNS (3), /* ldrd. */
1321 COSTS_N_INSNS (3), /* ldm_1st. */
1322 1, /* ldm_regs_per_insn_1st. */
1323 2, /* ldm_regs_per_insn_subsequent. */
1324 COSTS_N_INSNS (3), /* loadf. */
1325 COSTS_N_INSNS (3), /* loadd. */
1326 0, /* load_unaligned. */
1327 0, /* store. */
1328 0, /* strd. */
1329 0, /* stm_1st. */
1330 1, /* stm_regs_per_insn_1st. */
1331 2, /* stm_regs_per_insn_subsequent. */
1332 COSTS_N_INSNS (2), /* storef. */
1333 COSTS_N_INSNS (2), /* stored. */
1334 0 /* store_unaligned. */
1337 /* FP SFmode */
1339 COSTS_N_INSNS (17), /* div. */
1340 COSTS_N_INSNS (4), /* mult. */
1341 COSTS_N_INSNS (8), /* mult_addsub. */
1342 COSTS_N_INSNS (8), /* fma. */
1343 COSTS_N_INSNS (4), /* addsub. */
1344 COSTS_N_INSNS (2), /* fpconst. */
1345 COSTS_N_INSNS (2), /* neg. */
1346 COSTS_N_INSNS (2), /* compare. */
1347 COSTS_N_INSNS (4), /* widen. */
1348 COSTS_N_INSNS (4), /* narrow. */
1349 COSTS_N_INSNS (4), /* toint. */
1350 COSTS_N_INSNS (4), /* fromint. */
1351 COSTS_N_INSNS (4) /* roundint. */
1353 /* FP DFmode */
1355 COSTS_N_INSNS (31), /* div. */
1356 COSTS_N_INSNS (4), /* mult. */
1357 COSTS_N_INSNS (8), /* mult_addsub. */
1358 COSTS_N_INSNS (8), /* fma. */
1359 COSTS_N_INSNS (4), /* addsub. */
1360 COSTS_N_INSNS (2), /* fpconst. */
1361 COSTS_N_INSNS (2), /* neg. */
1362 COSTS_N_INSNS (2), /* compare. */
1363 COSTS_N_INSNS (4), /* widen. */
1364 COSTS_N_INSNS (4), /* narrow. */
1365 COSTS_N_INSNS (4), /* toint. */
1366 COSTS_N_INSNS (4), /* fromint. */
1367 COSTS_N_INSNS (4) /* roundint. */
1370 /* Vector */
1372 COSTS_N_INSNS (1) /* alu. */
1376 const struct cpu_cost_table cortexa15_extra_costs =
1378 /* ALU */
1380 0, /* arith. */
1381 0, /* logical. */
1382 0, /* shift. */
1383 0, /* shift_reg. */
1384 COSTS_N_INSNS (1), /* arith_shift. */
1385 COSTS_N_INSNS (1), /* arith_shift_reg. */
1386 COSTS_N_INSNS (1), /* log_shift. */
1387 COSTS_N_INSNS (1), /* log_shift_reg. */
1388 0, /* extend. */
1389 COSTS_N_INSNS (1), /* extend_arith. */
1390 COSTS_N_INSNS (1), /* bfi. */
1391 0, /* bfx. */
1392 0, /* clz. */
1393 0, /* rev. */
1394 0, /* non_exec. */
1395 true /* non_exec_costs_exec. */
1397 /* MULT SImode */
1400 COSTS_N_INSNS (2), /* simple. */
1401 COSTS_N_INSNS (3), /* flag_setting. */
1402 COSTS_N_INSNS (2), /* extend. */
1403 COSTS_N_INSNS (2), /* add. */
1404 COSTS_N_INSNS (2), /* extend_add. */
1405 COSTS_N_INSNS (18) /* idiv. */
1407 /* MULT DImode */
1409 0, /* simple (N/A). */
1410 0, /* flag_setting (N/A). */
1411 COSTS_N_INSNS (3), /* extend. */
1412 0, /* add (N/A). */
1413 COSTS_N_INSNS (3), /* extend_add. */
1414 0 /* idiv (N/A). */
1417 /* LD/ST */
1419 COSTS_N_INSNS (3), /* load. */
1420 COSTS_N_INSNS (3), /* load_sign_extend. */
1421 COSTS_N_INSNS (3), /* ldrd. */
1422 COSTS_N_INSNS (4), /* ldm_1st. */
1423 1, /* ldm_regs_per_insn_1st. */
1424 2, /* ldm_regs_per_insn_subsequent. */
1425 COSTS_N_INSNS (4), /* loadf. */
1426 COSTS_N_INSNS (4), /* loadd. */
1427 0, /* load_unaligned. */
1428 0, /* store. */
1429 0, /* strd. */
1430 COSTS_N_INSNS (1), /* stm_1st. */
1431 1, /* stm_regs_per_insn_1st. */
1432 2, /* stm_regs_per_insn_subsequent. */
1433 0, /* storef. */
1434 0, /* stored. */
1435 0 /* store_unaligned. */
1438 /* FP SFmode */
1440 COSTS_N_INSNS (17), /* div. */
1441 COSTS_N_INSNS (4), /* mult. */
1442 COSTS_N_INSNS (8), /* mult_addsub. */
1443 COSTS_N_INSNS (8), /* fma. */
1444 COSTS_N_INSNS (4), /* addsub. */
1445 COSTS_N_INSNS (2), /* fpconst. */
1446 COSTS_N_INSNS (2), /* neg. */
1447 COSTS_N_INSNS (5), /* compare. */
1448 COSTS_N_INSNS (4), /* widen. */
1449 COSTS_N_INSNS (4), /* narrow. */
1450 COSTS_N_INSNS (4), /* toint. */
1451 COSTS_N_INSNS (4), /* fromint. */
1452 COSTS_N_INSNS (4) /* roundint. */
1454 /* FP DFmode */
1456 COSTS_N_INSNS (31), /* div. */
1457 COSTS_N_INSNS (4), /* mult. */
1458 COSTS_N_INSNS (8), /* mult_addsub. */
1459 COSTS_N_INSNS (8), /* fma. */
1460 COSTS_N_INSNS (4), /* addsub. */
1461 COSTS_N_INSNS (2), /* fpconst. */
1462 COSTS_N_INSNS (2), /* neg. */
1463 COSTS_N_INSNS (2), /* compare. */
1464 COSTS_N_INSNS (4), /* widen. */
1465 COSTS_N_INSNS (4), /* narrow. */
1466 COSTS_N_INSNS (4), /* toint. */
1467 COSTS_N_INSNS (4), /* fromint. */
1468 COSTS_N_INSNS (4) /* roundint. */
1471 /* Vector */
1473 COSTS_N_INSNS (1) /* alu. */
1477 const struct cpu_cost_table v7m_extra_costs =
1479 /* ALU */
1481 0, /* arith. */
1482 0, /* logical. */
1483 0, /* shift. */
1484 0, /* shift_reg. */
1485 0, /* arith_shift. */
1486 COSTS_N_INSNS (1), /* arith_shift_reg. */
1487 0, /* log_shift. */
1488 COSTS_N_INSNS (1), /* log_shift_reg. */
1489 0, /* extend. */
1490 COSTS_N_INSNS (1), /* extend_arith. */
1491 0, /* bfi. */
1492 0, /* bfx. */
1493 0, /* clz. */
1494 0, /* rev. */
1495 COSTS_N_INSNS (1), /* non_exec. */
1496 false /* non_exec_costs_exec. */
1499 /* MULT SImode */
1501 COSTS_N_INSNS (1), /* simple. */
1502 COSTS_N_INSNS (1), /* flag_setting. */
1503 COSTS_N_INSNS (2), /* extend. */
1504 COSTS_N_INSNS (1), /* add. */
1505 COSTS_N_INSNS (3), /* extend_add. */
1506 COSTS_N_INSNS (8) /* idiv. */
1508 /* MULT DImode */
1510 0, /* simple (N/A). */
1511 0, /* flag_setting (N/A). */
1512 COSTS_N_INSNS (2), /* extend. */
1513 0, /* add (N/A). */
1514 COSTS_N_INSNS (3), /* extend_add. */
1515 0 /* idiv (N/A). */
1518 /* LD/ST */
1520 COSTS_N_INSNS (2), /* load. */
1521 0, /* load_sign_extend. */
1522 COSTS_N_INSNS (3), /* ldrd. */
1523 COSTS_N_INSNS (2), /* ldm_1st. */
1524 1, /* ldm_regs_per_insn_1st. */
1525 1, /* ldm_regs_per_insn_subsequent. */
1526 COSTS_N_INSNS (2), /* loadf. */
1527 COSTS_N_INSNS (3), /* loadd. */
1528 COSTS_N_INSNS (1), /* load_unaligned. */
1529 COSTS_N_INSNS (2), /* store. */
1530 COSTS_N_INSNS (3), /* strd. */
1531 COSTS_N_INSNS (2), /* stm_1st. */
1532 1, /* stm_regs_per_insn_1st. */
1533 1, /* stm_regs_per_insn_subsequent. */
1534 COSTS_N_INSNS (2), /* storef. */
1535 COSTS_N_INSNS (3), /* stored. */
1536 COSTS_N_INSNS (1) /* store_unaligned. */
1539 /* FP SFmode */
1541 COSTS_N_INSNS (7), /* div. */
1542 COSTS_N_INSNS (2), /* mult. */
1543 COSTS_N_INSNS (5), /* mult_addsub. */
1544 COSTS_N_INSNS (3), /* fma. */
1545 COSTS_N_INSNS (1), /* addsub. */
1546 0, /* fpconst. */
1547 0, /* neg. */
1548 0, /* compare. */
1549 0, /* widen. */
1550 0, /* narrow. */
1551 0, /* toint. */
1552 0, /* fromint. */
1553 0 /* roundint. */
1555 /* FP DFmode */
1557 COSTS_N_INSNS (15), /* div. */
1558 COSTS_N_INSNS (5), /* mult. */
1559 COSTS_N_INSNS (7), /* mult_addsub. */
1560 COSTS_N_INSNS (7), /* fma. */
1561 COSTS_N_INSNS (3), /* addsub. */
1562 0, /* fpconst. */
1563 0, /* neg. */
1564 0, /* compare. */
1565 0, /* widen. */
1566 0, /* narrow. */
1567 0, /* toint. */
1568 0, /* fromint. */
1569 0 /* roundint. */
1572 /* Vector */
1574 COSTS_N_INSNS (1) /* alu. */
1578 const struct tune_params arm_slowmul_tune =
1580 arm_slowmul_rtx_costs,
1581 NULL,
1582 NULL, /* Sched adj cost. */
1583 3, /* Constant limit. */
1584 5, /* Max cond insns. */
1585 ARM_PREFETCH_NOT_BENEFICIAL,
1586 true, /* Prefer constant pool. */
1587 arm_default_branch_cost,
1588 false, /* Prefer LDRD/STRD. */
1589 {true, true}, /* Prefer non short circuit. */
1590 &arm_default_vec_cost, /* Vectorizer costs. */
1591 false, /* Prefer Neon for 64-bits bitops. */
1592 false, false /* Prefer 32-bit encodings. */
1595 const struct tune_params arm_fastmul_tune =
1597 arm_fastmul_rtx_costs,
1598 NULL,
1599 NULL, /* Sched adj cost. */
1600 1, /* Constant limit. */
1601 5, /* Max cond insns. */
1602 ARM_PREFETCH_NOT_BENEFICIAL,
1603 true, /* Prefer constant pool. */
1604 arm_default_branch_cost,
1605 false, /* Prefer LDRD/STRD. */
1606 {true, true}, /* Prefer non short circuit. */
1607 &arm_default_vec_cost, /* Vectorizer costs. */
1608 false, /* Prefer Neon for 64-bits bitops. */
1609 false, false /* Prefer 32-bit encodings. */
1612 /* StrongARM has early execution of branches, so a sequence that is worth
1613 skipping is shorter. Set max_insns_skipped to a lower value. */
1615 const struct tune_params arm_strongarm_tune =
1617 arm_fastmul_rtx_costs,
1618 NULL,
1619 NULL, /* Sched adj cost. */
1620 1, /* Constant limit. */
1621 3, /* Max cond insns. */
1622 ARM_PREFETCH_NOT_BENEFICIAL,
1623 true, /* Prefer constant pool. */
1624 arm_default_branch_cost,
1625 false, /* Prefer LDRD/STRD. */
1626 {true, true}, /* Prefer non short circuit. */
1627 &arm_default_vec_cost, /* Vectorizer costs. */
1628 false, /* Prefer Neon for 64-bits bitops. */
1629 false, false /* Prefer 32-bit encodings. */
1632 const struct tune_params arm_xscale_tune =
1634 arm_xscale_rtx_costs,
1635 NULL,
1636 xscale_sched_adjust_cost,
1637 2, /* Constant limit. */
1638 3, /* Max cond insns. */
1639 ARM_PREFETCH_NOT_BENEFICIAL,
1640 true, /* Prefer constant pool. */
1641 arm_default_branch_cost,
1642 false, /* Prefer LDRD/STRD. */
1643 {true, true}, /* Prefer non short circuit. */
1644 &arm_default_vec_cost, /* Vectorizer costs. */
1645 false, /* Prefer Neon for 64-bits bitops. */
1646 false, false /* Prefer 32-bit encodings. */
1649 const struct tune_params arm_9e_tune =
1651 arm_9e_rtx_costs,
1652 NULL,
1653 NULL, /* Sched adj cost. */
1654 1, /* Constant limit. */
1655 5, /* Max cond insns. */
1656 ARM_PREFETCH_NOT_BENEFICIAL,
1657 true, /* Prefer constant pool. */
1658 arm_default_branch_cost,
1659 false, /* Prefer LDRD/STRD. */
1660 {true, true}, /* Prefer non short circuit. */
1661 &arm_default_vec_cost, /* Vectorizer costs. */
1662 false, /* Prefer Neon for 64-bits bitops. */
1663 false, false /* Prefer 32-bit encodings. */
1666 const struct tune_params arm_v6t2_tune =
1668 arm_9e_rtx_costs,
1669 NULL,
1670 NULL, /* Sched adj cost. */
1671 1, /* Constant limit. */
1672 5, /* Max cond insns. */
1673 ARM_PREFETCH_NOT_BENEFICIAL,
1674 false, /* Prefer constant pool. */
1675 arm_default_branch_cost,
1676 false, /* Prefer LDRD/STRD. */
1677 {true, true}, /* Prefer non short circuit. */
1678 &arm_default_vec_cost, /* Vectorizer costs. */
1679 false, /* Prefer Neon for 64-bits bitops. */
1680 false, false /* Prefer 32-bit encodings. */
1683 /* Generic Cortex tuning. Use more specific tunings if appropriate. */
1684 const struct tune_params arm_cortex_tune =
1686 arm_9e_rtx_costs,
1687 &generic_extra_costs,
1688 NULL, /* Sched adj cost. */
1689 1, /* Constant limit. */
1690 5, /* Max cond insns. */
1691 ARM_PREFETCH_NOT_BENEFICIAL,
1692 false, /* Prefer constant pool. */
1693 arm_default_branch_cost,
1694 false, /* Prefer LDRD/STRD. */
1695 {true, true}, /* Prefer non short circuit. */
1696 &arm_default_vec_cost, /* Vectorizer costs. */
1697 false, /* Prefer Neon for 64-bits bitops. */
1698 false, false /* Prefer 32-bit encodings. */
1701 const struct tune_params arm_cortex_a8_tune =
1703 arm_9e_rtx_costs,
1704 &cortexa8_extra_costs,
1705 NULL, /* Sched adj cost. */
1706 1, /* Constant limit. */
1707 5, /* Max cond insns. */
1708 ARM_PREFETCH_NOT_BENEFICIAL,
1709 false, /* Prefer constant pool. */
1710 arm_default_branch_cost,
1711 false, /* Prefer LDRD/STRD. */
1712 {true, true}, /* Prefer non short circuit. */
1713 &arm_default_vec_cost, /* Vectorizer costs. */
1714 false, /* Prefer Neon for 64-bits bitops. */
1715 false, false /* Prefer 32-bit encodings. */
1718 const struct tune_params arm_cortex_a7_tune =
1720 arm_9e_rtx_costs,
1721 &cortexa7_extra_costs,
1722 NULL,
1723 1, /* Constant limit. */
1724 5, /* Max cond insns. */
1725 ARM_PREFETCH_NOT_BENEFICIAL,
1726 false, /* Prefer constant pool. */
1727 arm_default_branch_cost,
1728 false, /* Prefer LDRD/STRD. */
1729 {true, true}, /* Prefer non short circuit. */
1730 &arm_default_vec_cost, /* Vectorizer costs. */
1731 false, /* Prefer Neon for 64-bits bitops. */
1732 false, false /* Prefer 32-bit encodings. */
1735 const struct tune_params arm_cortex_a15_tune =
1737 arm_9e_rtx_costs,
1738 &cortexa15_extra_costs,
1739 NULL, /* Sched adj cost. */
1740 1, /* Constant limit. */
1741 2, /* Max cond insns. */
1742 ARM_PREFETCH_NOT_BENEFICIAL,
1743 false, /* Prefer constant pool. */
1744 arm_default_branch_cost,
1745 true, /* Prefer LDRD/STRD. */
1746 {true, true}, /* Prefer non short circuit. */
1747 &arm_default_vec_cost, /* Vectorizer costs. */
1748 false, /* Prefer Neon for 64-bits bitops. */
1749 true, true /* Prefer 32-bit encodings. */
1752 const struct tune_params arm_cortex_a53_tune =
1754 arm_9e_rtx_costs,
1755 &cortexa53_extra_costs,
1756 NULL, /* Scheduler cost adjustment. */
1757 1, /* Constant limit. */
1758 5, /* Max cond insns. */
1759 ARM_PREFETCH_NOT_BENEFICIAL,
1760 false, /* Prefer constant pool. */
1761 arm_default_branch_cost,
1762 false, /* Prefer LDRD/STRD. */
1763 {true, true}, /* Prefer non short circuit. */
1764 &arm_default_vec_cost, /* Vectorizer costs. */
1765 false, /* Prefer Neon for 64-bits bitops. */
1766 false, false /* Prefer 32-bit encodings. */
1769 const struct tune_params arm_cortex_a57_tune =
1771 arm_9e_rtx_costs,
1772 &cortexa57_extra_costs,
1773 NULL, /* Scheduler cost adjustment. */
1774 1, /* Constant limit. */
1775 2, /* Max cond insns. */
1776 ARM_PREFETCH_NOT_BENEFICIAL,
1777 false, /* Prefer constant pool. */
1778 arm_default_branch_cost,
1779 true, /* Prefer LDRD/STRD. */
1780 {true, true}, /* Prefer non short circuit. */
1781 &arm_default_vec_cost, /* Vectorizer costs. */
1782 false, /* Prefer Neon for 64-bits bitops. */
1783 true, true /* Prefer 32-bit encodings. */
1786 /* Branches can be dual-issued on Cortex-A5, so conditional execution is
1787 less appealing. Set max_insns_skipped to a low value. */
1789 const struct tune_params arm_cortex_a5_tune =
1791 arm_9e_rtx_costs,
1792 NULL,
1793 NULL, /* Sched adj cost. */
1794 1, /* Constant limit. */
1795 1, /* Max cond insns. */
1796 ARM_PREFETCH_NOT_BENEFICIAL,
1797 false, /* Prefer constant pool. */
1798 arm_cortex_a5_branch_cost,
1799 false, /* Prefer LDRD/STRD. */
1800 {false, false}, /* Prefer non short circuit. */
1801 &arm_default_vec_cost, /* Vectorizer costs. */
1802 false, /* Prefer Neon for 64-bits bitops. */
1803 false, false /* Prefer 32-bit encodings. */
1806 const struct tune_params arm_cortex_a9_tune =
1808 arm_9e_rtx_costs,
1809 &cortexa9_extra_costs,
1810 cortex_a9_sched_adjust_cost,
1811 1, /* Constant limit. */
1812 5, /* Max cond insns. */
1813 ARM_PREFETCH_BENEFICIAL(4,32,32),
1814 false, /* Prefer constant pool. */
1815 arm_default_branch_cost,
1816 false, /* Prefer LDRD/STRD. */
1817 {true, true}, /* Prefer non short circuit. */
1818 &arm_default_vec_cost, /* Vectorizer costs. */
1819 false, /* Prefer Neon for 64-bits bitops. */
1820 false, false /* Prefer 32-bit encodings. */
1823 const struct tune_params arm_cortex_a12_tune =
1825 arm_9e_rtx_costs,
1826 &cortexa12_extra_costs,
1827 NULL,
1828 1, /* Constant limit. */
1829 5, /* Max cond insns. */
1830 ARM_PREFETCH_BENEFICIAL(4,32,32),
1831 false, /* Prefer constant pool. */
1832 arm_default_branch_cost,
1833 true, /* Prefer LDRD/STRD. */
1834 {true, true}, /* Prefer non short circuit. */
1835 &arm_default_vec_cost, /* Vectorizer costs. */
1836 false, /* Prefer Neon for 64-bits bitops. */
1837 false, false /* Prefer 32-bit encodings. */
1840 /* armv7m tuning. On Cortex-M4 cores for example, MOVW/MOVT take a single
1841 cycle to execute each. An LDR from the constant pool also takes two cycles
1842 to execute, but mildly increases pipelining opportunity (consecutive
1843 loads/stores can be pipelined together, saving one cycle), and may also
1844 improve icache utilisation. Hence we prefer the constant pool for such
1845 processors. */
1847 const struct tune_params arm_v7m_tune =
1849 arm_9e_rtx_costs,
1850 &v7m_extra_costs,
1851 NULL, /* Sched adj cost. */
1852 1, /* Constant limit. */
1853 2, /* Max cond insns. */
1854 ARM_PREFETCH_NOT_BENEFICIAL,
1855 true, /* Prefer constant pool. */
1856 arm_cortex_m_branch_cost,
1857 false, /* Prefer LDRD/STRD. */
1858 {false, false}, /* Prefer non short circuit. */
1859 &arm_default_vec_cost, /* Vectorizer costs. */
1860 false, /* Prefer Neon for 64-bits bitops. */
1861 false, false /* Prefer 32-bit encodings. */
1864 /* The arm_v6m_tune is duplicated from arm_cortex_tune, rather than
1865 arm_v6t2_tune. It is used for cortex-m0, cortex-m1 and cortex-m0plus. */
1866 const struct tune_params arm_v6m_tune =
1868 arm_9e_rtx_costs,
1869 NULL,
1870 NULL, /* Sched adj cost. */
1871 1, /* Constant limit. */
1872 5, /* Max cond insns. */
1873 ARM_PREFETCH_NOT_BENEFICIAL,
1874 false, /* Prefer constant pool. */
1875 arm_default_branch_cost,
1876 false, /* Prefer LDRD/STRD. */
1877 {false, false}, /* Prefer non short circuit. */
1878 &arm_default_vec_cost, /* Vectorizer costs. */
1879 false, /* Prefer Neon for 64-bits bitops. */
1880 false, false /* Prefer 32-bit encodings. */
1883 const struct tune_params arm_fa726te_tune =
1885 arm_9e_rtx_costs,
1886 NULL,
1887 fa726te_sched_adjust_cost,
1888 1, /* Constant limit. */
1889 5, /* Max cond insns. */
1890 ARM_PREFETCH_NOT_BENEFICIAL,
1891 true, /* Prefer constant pool. */
1892 arm_default_branch_cost,
1893 false, /* Prefer LDRD/STRD. */
1894 {true, true}, /* Prefer non short circuit. */
1895 &arm_default_vec_cost, /* Vectorizer costs. */
1896 false, /* Prefer Neon for 64-bits bitops. */
1897 false, false /* Prefer 32-bit encodings. */
1901 /* Not all of these give usefully different compilation alternatives,
1902 but there is no simple way of generalizing them. */
1903 static const struct processors all_cores[] =
1905 /* ARM Cores */
1906 #define ARM_CORE(NAME, X, IDENT, ARCH, FLAGS, COSTS) \
1907 {NAME, IDENT, #ARCH, BASE_ARCH_##ARCH, \
1908 FLAGS | FL_FOR_ARCH##ARCH, &arm_##COSTS##_tune},
1909 #include "arm-cores.def"
1910 #undef ARM_CORE
1911 {NULL, arm_none, NULL, BASE_ARCH_0, 0, NULL}
1914 static const struct processors all_architectures[] =
1916 /* ARM Architectures */
1917 /* We don't specify tuning costs here as it will be figured out
1918 from the core. */
1920 #define ARM_ARCH(NAME, CORE, ARCH, FLAGS) \
1921 {NAME, CORE, #ARCH, BASE_ARCH_##ARCH, FLAGS, NULL},
1922 #include "arm-arches.def"
1923 #undef ARM_ARCH
1924 {NULL, arm_none, NULL, BASE_ARCH_0, 0, NULL}
1928 /* These are populated as commandline arguments are processed, or NULL
1929 if not specified. */
1930 static const struct processors *arm_selected_arch;
1931 static const struct processors *arm_selected_cpu;
1932 static const struct processors *arm_selected_tune;
1934 /* The name of the preprocessor macro to define for this architecture. */
1936 char arm_arch_name[] = "__ARM_ARCH_0UNK__";
1938 /* Available values for -mfpu=. */
1940 static const struct arm_fpu_desc all_fpus[] =
1942 #define ARM_FPU(NAME, MODEL, REV, VFP_REGS, NEON, FP16, CRYPTO) \
1943 { NAME, MODEL, REV, VFP_REGS, NEON, FP16, CRYPTO },
1944 #include "arm-fpus.def"
1945 #undef ARM_FPU
1949 /* Supported TLS relocations. */
1951 enum tls_reloc {
1952 TLS_GD32,
1953 TLS_LDM32,
1954 TLS_LDO32,
1955 TLS_IE32,
1956 TLS_LE32,
1957 TLS_DESCSEQ /* GNU scheme */
1960 /* The maximum number of insns to be used when loading a constant. */
1961 inline static int
1962 arm_constant_limit (bool size_p)
1964 return size_p ? 1 : current_tune->constant_limit;
1967 /* Emit an insn that's a simple single-set. Both the operands must be known
1968 to be valid. */
1969 inline static rtx
1970 emit_set_insn (rtx x, rtx y)
1972 return emit_insn (gen_rtx_SET (VOIDmode, x, y));
1975 /* Return the number of bits set in VALUE. */
1976 static unsigned
1977 bit_count (unsigned long value)
1979 unsigned long count = 0;
1981 while (value)
1983 count++;
1984 value &= value - 1; /* Clear the least-significant set bit. */
1987 return count;
1990 typedef struct
1992 enum machine_mode mode;
1993 const char *name;
1994 } arm_fixed_mode_set;
1996 /* A small helper for setting fixed-point library libfuncs. */
1998 static void
1999 arm_set_fixed_optab_libfunc (optab optable, enum machine_mode mode,
2000 const char *funcname, const char *modename,
2001 int num_suffix)
2003 char buffer[50];
2005 if (num_suffix == 0)
2006 sprintf (buffer, "__gnu_%s%s", funcname, modename);
2007 else
2008 sprintf (buffer, "__gnu_%s%s%d", funcname, modename, num_suffix);
2010 set_optab_libfunc (optable, mode, buffer);
2013 static void
2014 arm_set_fixed_conv_libfunc (convert_optab optable, enum machine_mode to,
2015 enum machine_mode from, const char *funcname,
2016 const char *toname, const char *fromname)
2018 char buffer[50];
2019 const char *maybe_suffix_2 = "";
2021 /* Follow the logic for selecting a "2" suffix in fixed-bit.h. */
2022 if (ALL_FIXED_POINT_MODE_P (from) && ALL_FIXED_POINT_MODE_P (to)
2023 && UNSIGNED_FIXED_POINT_MODE_P (from) == UNSIGNED_FIXED_POINT_MODE_P (to)
2024 && ALL_FRACT_MODE_P (from) == ALL_FRACT_MODE_P (to))
2025 maybe_suffix_2 = "2";
2027 sprintf (buffer, "__gnu_%s%s%s%s", funcname, fromname, toname,
2028 maybe_suffix_2);
2030 set_conv_libfunc (optable, to, from, buffer);
2033 /* Set up library functions unique to ARM. */
2035 static void
2036 arm_init_libfuncs (void)
2038 /* For Linux, we have access to kernel support for atomic operations. */
2039 if (arm_abi == ARM_ABI_AAPCS_LINUX)
2040 init_sync_libfuncs (2 * UNITS_PER_WORD);
2042 /* There are no special library functions unless we are using the
2043 ARM BPABI. */
2044 if (!TARGET_BPABI)
2045 return;
2047 /* The functions below are described in Section 4 of the "Run-Time
2048 ABI for the ARM architecture", Version 1.0. */
2050 /* Double-precision floating-point arithmetic. Table 2. */
2051 set_optab_libfunc (add_optab, DFmode, "__aeabi_dadd");
2052 set_optab_libfunc (sdiv_optab, DFmode, "__aeabi_ddiv");
2053 set_optab_libfunc (smul_optab, DFmode, "__aeabi_dmul");
2054 set_optab_libfunc (neg_optab, DFmode, "__aeabi_dneg");
2055 set_optab_libfunc (sub_optab, DFmode, "__aeabi_dsub");
2057 /* Double-precision comparisons. Table 3. */
2058 set_optab_libfunc (eq_optab, DFmode, "__aeabi_dcmpeq");
2059 set_optab_libfunc (ne_optab, DFmode, NULL);
2060 set_optab_libfunc (lt_optab, DFmode, "__aeabi_dcmplt");
2061 set_optab_libfunc (le_optab, DFmode, "__aeabi_dcmple");
2062 set_optab_libfunc (ge_optab, DFmode, "__aeabi_dcmpge");
2063 set_optab_libfunc (gt_optab, DFmode, "__aeabi_dcmpgt");
2064 set_optab_libfunc (unord_optab, DFmode, "__aeabi_dcmpun");
2066 /* Single-precision floating-point arithmetic. Table 4. */
2067 set_optab_libfunc (add_optab, SFmode, "__aeabi_fadd");
2068 set_optab_libfunc (sdiv_optab, SFmode, "__aeabi_fdiv");
2069 set_optab_libfunc (smul_optab, SFmode, "__aeabi_fmul");
2070 set_optab_libfunc (neg_optab, SFmode, "__aeabi_fneg");
2071 set_optab_libfunc (sub_optab, SFmode, "__aeabi_fsub");
2073 /* Single-precision comparisons. Table 5. */
2074 set_optab_libfunc (eq_optab, SFmode, "__aeabi_fcmpeq");
2075 set_optab_libfunc (ne_optab, SFmode, NULL);
2076 set_optab_libfunc (lt_optab, SFmode, "__aeabi_fcmplt");
2077 set_optab_libfunc (le_optab, SFmode, "__aeabi_fcmple");
2078 set_optab_libfunc (ge_optab, SFmode, "__aeabi_fcmpge");
2079 set_optab_libfunc (gt_optab, SFmode, "__aeabi_fcmpgt");
2080 set_optab_libfunc (unord_optab, SFmode, "__aeabi_fcmpun");
2082 /* Floating-point to integer conversions. Table 6. */
2083 set_conv_libfunc (sfix_optab, SImode, DFmode, "__aeabi_d2iz");
2084 set_conv_libfunc (ufix_optab, SImode, DFmode, "__aeabi_d2uiz");
2085 set_conv_libfunc (sfix_optab, DImode, DFmode, "__aeabi_d2lz");
2086 set_conv_libfunc (ufix_optab, DImode, DFmode, "__aeabi_d2ulz");
2087 set_conv_libfunc (sfix_optab, SImode, SFmode, "__aeabi_f2iz");
2088 set_conv_libfunc (ufix_optab, SImode, SFmode, "__aeabi_f2uiz");
2089 set_conv_libfunc (sfix_optab, DImode, SFmode, "__aeabi_f2lz");
2090 set_conv_libfunc (ufix_optab, DImode, SFmode, "__aeabi_f2ulz");
2092 /* Conversions between floating types. Table 7. */
2093 set_conv_libfunc (trunc_optab, SFmode, DFmode, "__aeabi_d2f");
2094 set_conv_libfunc (sext_optab, DFmode, SFmode, "__aeabi_f2d");
2096 /* Integer to floating-point conversions. Table 8. */
2097 set_conv_libfunc (sfloat_optab, DFmode, SImode, "__aeabi_i2d");
2098 set_conv_libfunc (ufloat_optab, DFmode, SImode, "__aeabi_ui2d");
2099 set_conv_libfunc (sfloat_optab, DFmode, DImode, "__aeabi_l2d");
2100 set_conv_libfunc (ufloat_optab, DFmode, DImode, "__aeabi_ul2d");
2101 set_conv_libfunc (sfloat_optab, SFmode, SImode, "__aeabi_i2f");
2102 set_conv_libfunc (ufloat_optab, SFmode, SImode, "__aeabi_ui2f");
2103 set_conv_libfunc (sfloat_optab, SFmode, DImode, "__aeabi_l2f");
2104 set_conv_libfunc (ufloat_optab, SFmode, DImode, "__aeabi_ul2f");
2106 /* Long long. Table 9. */
2107 set_optab_libfunc (smul_optab, DImode, "__aeabi_lmul");
2108 set_optab_libfunc (sdivmod_optab, DImode, "__aeabi_ldivmod");
2109 set_optab_libfunc (udivmod_optab, DImode, "__aeabi_uldivmod");
2110 set_optab_libfunc (ashl_optab, DImode, "__aeabi_llsl");
2111 set_optab_libfunc (lshr_optab, DImode, "__aeabi_llsr");
2112 set_optab_libfunc (ashr_optab, DImode, "__aeabi_lasr");
2113 set_optab_libfunc (cmp_optab, DImode, "__aeabi_lcmp");
2114 set_optab_libfunc (ucmp_optab, DImode, "__aeabi_ulcmp");
2116 /* Integer (32/32->32) division. \S 4.3.1. */
2117 set_optab_libfunc (sdivmod_optab, SImode, "__aeabi_idivmod");
2118 set_optab_libfunc (udivmod_optab, SImode, "__aeabi_uidivmod");
2120 /* The divmod functions are designed so that they can be used for
2121 plain division, even though they return both the quotient and the
2122 remainder. The quotient is returned in the usual location (i.e.,
2123 r0 for SImode, {r0, r1} for DImode), just as would be expected
2124 for an ordinary division routine. Because the AAPCS calling
2125 conventions specify that all of { r0, r1, r2, r3 } are
2126 callee-saved registers, there is no need to tell the compiler
2127 explicitly that those registers are clobbered by these
2128 routines. */
2129 set_optab_libfunc (sdiv_optab, DImode, "__aeabi_ldivmod");
2130 set_optab_libfunc (udiv_optab, DImode, "__aeabi_uldivmod");
2132 /* For SImode division the ABI provides div-without-mod routines,
2133 which are faster. */
2134 set_optab_libfunc (sdiv_optab, SImode, "__aeabi_idiv");
2135 set_optab_libfunc (udiv_optab, SImode, "__aeabi_uidiv");
2137 /* We don't have mod libcalls. Fortunately gcc knows how to use the
2138 divmod libcalls instead. */
2139 set_optab_libfunc (smod_optab, DImode, NULL);
2140 set_optab_libfunc (umod_optab, DImode, NULL);
2141 set_optab_libfunc (smod_optab, SImode, NULL);
2142 set_optab_libfunc (umod_optab, SImode, NULL);
2144 /* Half-precision float operations. The compiler handles all operations
2145 with NULL libfuncs by converting the SFmode. */
2146 switch (arm_fp16_format)
2148 case ARM_FP16_FORMAT_IEEE:
2149 case ARM_FP16_FORMAT_ALTERNATIVE:
2151 /* Conversions. */
2152 set_conv_libfunc (trunc_optab, HFmode, SFmode,
2153 (arm_fp16_format == ARM_FP16_FORMAT_IEEE
2154 ? "__gnu_f2h_ieee"
2155 : "__gnu_f2h_alternative"));
2156 set_conv_libfunc (sext_optab, SFmode, HFmode,
2157 (arm_fp16_format == ARM_FP16_FORMAT_IEEE
2158 ? "__gnu_h2f_ieee"
2159 : "__gnu_h2f_alternative"));
2161 /* Arithmetic. */
2162 set_optab_libfunc (add_optab, HFmode, NULL);
2163 set_optab_libfunc (sdiv_optab, HFmode, NULL);
2164 set_optab_libfunc (smul_optab, HFmode, NULL);
2165 set_optab_libfunc (neg_optab, HFmode, NULL);
2166 set_optab_libfunc (sub_optab, HFmode, NULL);
2168 /* Comparisons. */
2169 set_optab_libfunc (eq_optab, HFmode, NULL);
2170 set_optab_libfunc (ne_optab, HFmode, NULL);
2171 set_optab_libfunc (lt_optab, HFmode, NULL);
2172 set_optab_libfunc (le_optab, HFmode, NULL);
2173 set_optab_libfunc (ge_optab, HFmode, NULL);
2174 set_optab_libfunc (gt_optab, HFmode, NULL);
2175 set_optab_libfunc (unord_optab, HFmode, NULL);
2176 break;
2178 default:
2179 break;
2182 /* Use names prefixed with __gnu_ for fixed-point helper functions. */
2184 const arm_fixed_mode_set fixed_arith_modes[] =
2186 { QQmode, "qq" },
2187 { UQQmode, "uqq" },
2188 { HQmode, "hq" },
2189 { UHQmode, "uhq" },
2190 { SQmode, "sq" },
2191 { USQmode, "usq" },
2192 { DQmode, "dq" },
2193 { UDQmode, "udq" },
2194 { TQmode, "tq" },
2195 { UTQmode, "utq" },
2196 { HAmode, "ha" },
2197 { UHAmode, "uha" },
2198 { SAmode, "sa" },
2199 { USAmode, "usa" },
2200 { DAmode, "da" },
2201 { UDAmode, "uda" },
2202 { TAmode, "ta" },
2203 { UTAmode, "uta" }
2205 const arm_fixed_mode_set fixed_conv_modes[] =
2207 { QQmode, "qq" },
2208 { UQQmode, "uqq" },
2209 { HQmode, "hq" },
2210 { UHQmode, "uhq" },
2211 { SQmode, "sq" },
2212 { USQmode, "usq" },
2213 { DQmode, "dq" },
2214 { UDQmode, "udq" },
2215 { TQmode, "tq" },
2216 { UTQmode, "utq" },
2217 { HAmode, "ha" },
2218 { UHAmode, "uha" },
2219 { SAmode, "sa" },
2220 { USAmode, "usa" },
2221 { DAmode, "da" },
2222 { UDAmode, "uda" },
2223 { TAmode, "ta" },
2224 { UTAmode, "uta" },
2225 { QImode, "qi" },
2226 { HImode, "hi" },
2227 { SImode, "si" },
2228 { DImode, "di" },
2229 { TImode, "ti" },
2230 { SFmode, "sf" },
2231 { DFmode, "df" }
2233 unsigned int i, j;
2235 for (i = 0; i < ARRAY_SIZE (fixed_arith_modes); i++)
2237 arm_set_fixed_optab_libfunc (add_optab, fixed_arith_modes[i].mode,
2238 "add", fixed_arith_modes[i].name, 3);
2239 arm_set_fixed_optab_libfunc (ssadd_optab, fixed_arith_modes[i].mode,
2240 "ssadd", fixed_arith_modes[i].name, 3);
2241 arm_set_fixed_optab_libfunc (usadd_optab, fixed_arith_modes[i].mode,
2242 "usadd", fixed_arith_modes[i].name, 3);
2243 arm_set_fixed_optab_libfunc (sub_optab, fixed_arith_modes[i].mode,
2244 "sub", fixed_arith_modes[i].name, 3);
2245 arm_set_fixed_optab_libfunc (sssub_optab, fixed_arith_modes[i].mode,
2246 "sssub", fixed_arith_modes[i].name, 3);
2247 arm_set_fixed_optab_libfunc (ussub_optab, fixed_arith_modes[i].mode,
2248 "ussub", fixed_arith_modes[i].name, 3);
2249 arm_set_fixed_optab_libfunc (smul_optab, fixed_arith_modes[i].mode,
2250 "mul", fixed_arith_modes[i].name, 3);
2251 arm_set_fixed_optab_libfunc (ssmul_optab, fixed_arith_modes[i].mode,
2252 "ssmul", fixed_arith_modes[i].name, 3);
2253 arm_set_fixed_optab_libfunc (usmul_optab, fixed_arith_modes[i].mode,
2254 "usmul", fixed_arith_modes[i].name, 3);
2255 arm_set_fixed_optab_libfunc (sdiv_optab, fixed_arith_modes[i].mode,
2256 "div", fixed_arith_modes[i].name, 3);
2257 arm_set_fixed_optab_libfunc (udiv_optab, fixed_arith_modes[i].mode,
2258 "udiv", fixed_arith_modes[i].name, 3);
2259 arm_set_fixed_optab_libfunc (ssdiv_optab, fixed_arith_modes[i].mode,
2260 "ssdiv", fixed_arith_modes[i].name, 3);
2261 arm_set_fixed_optab_libfunc (usdiv_optab, fixed_arith_modes[i].mode,
2262 "usdiv", fixed_arith_modes[i].name, 3);
2263 arm_set_fixed_optab_libfunc (neg_optab, fixed_arith_modes[i].mode,
2264 "neg", fixed_arith_modes[i].name, 2);
2265 arm_set_fixed_optab_libfunc (ssneg_optab, fixed_arith_modes[i].mode,
2266 "ssneg", fixed_arith_modes[i].name, 2);
2267 arm_set_fixed_optab_libfunc (usneg_optab, fixed_arith_modes[i].mode,
2268 "usneg", fixed_arith_modes[i].name, 2);
2269 arm_set_fixed_optab_libfunc (ashl_optab, fixed_arith_modes[i].mode,
2270 "ashl", fixed_arith_modes[i].name, 3);
2271 arm_set_fixed_optab_libfunc (ashr_optab, fixed_arith_modes[i].mode,
2272 "ashr", fixed_arith_modes[i].name, 3);
2273 arm_set_fixed_optab_libfunc (lshr_optab, fixed_arith_modes[i].mode,
2274 "lshr", fixed_arith_modes[i].name, 3);
2275 arm_set_fixed_optab_libfunc (ssashl_optab, fixed_arith_modes[i].mode,
2276 "ssashl", fixed_arith_modes[i].name, 3);
2277 arm_set_fixed_optab_libfunc (usashl_optab, fixed_arith_modes[i].mode,
2278 "usashl", fixed_arith_modes[i].name, 3);
2279 arm_set_fixed_optab_libfunc (cmp_optab, fixed_arith_modes[i].mode,
2280 "cmp", fixed_arith_modes[i].name, 2);
2283 for (i = 0; i < ARRAY_SIZE (fixed_conv_modes); i++)
2284 for (j = 0; j < ARRAY_SIZE (fixed_conv_modes); j++)
2286 if (i == j
2287 || (!ALL_FIXED_POINT_MODE_P (fixed_conv_modes[i].mode)
2288 && !ALL_FIXED_POINT_MODE_P (fixed_conv_modes[j].mode)))
2289 continue;
2291 arm_set_fixed_conv_libfunc (fract_optab, fixed_conv_modes[i].mode,
2292 fixed_conv_modes[j].mode, "fract",
2293 fixed_conv_modes[i].name,
2294 fixed_conv_modes[j].name);
2295 arm_set_fixed_conv_libfunc (satfract_optab,
2296 fixed_conv_modes[i].mode,
2297 fixed_conv_modes[j].mode, "satfract",
2298 fixed_conv_modes[i].name,
2299 fixed_conv_modes[j].name);
2300 arm_set_fixed_conv_libfunc (fractuns_optab,
2301 fixed_conv_modes[i].mode,
2302 fixed_conv_modes[j].mode, "fractuns",
2303 fixed_conv_modes[i].name,
2304 fixed_conv_modes[j].name);
2305 arm_set_fixed_conv_libfunc (satfractuns_optab,
2306 fixed_conv_modes[i].mode,
2307 fixed_conv_modes[j].mode, "satfractuns",
2308 fixed_conv_modes[i].name,
2309 fixed_conv_modes[j].name);
2313 if (TARGET_AAPCS_BASED)
2314 synchronize_libfunc = init_one_libfunc ("__sync_synchronize");
2317 /* On AAPCS systems, this is the "struct __va_list". */
2318 static GTY(()) tree va_list_type;
2320 /* Return the type to use as __builtin_va_list. */
2321 static tree
2322 arm_build_builtin_va_list (void)
2324 tree va_list_name;
2325 tree ap_field;
2327 if (!TARGET_AAPCS_BASED)
2328 return std_build_builtin_va_list ();
2330 /* AAPCS \S 7.1.4 requires that va_list be a typedef for a type
2331 defined as:
2333 struct __va_list
2335 void *__ap;
2338 The C Library ABI further reinforces this definition in \S
2339 4.1.
2341 We must follow this definition exactly. The structure tag
2342 name is visible in C++ mangled names, and thus forms a part
2343 of the ABI. The field name may be used by people who
2344 #include <stdarg.h>. */
2345 /* Create the type. */
2346 va_list_type = lang_hooks.types.make_type (RECORD_TYPE);
2347 /* Give it the required name. */
2348 va_list_name = build_decl (BUILTINS_LOCATION,
2349 TYPE_DECL,
2350 get_identifier ("__va_list"),
2351 va_list_type);
2352 DECL_ARTIFICIAL (va_list_name) = 1;
2353 TYPE_NAME (va_list_type) = va_list_name;
2354 TYPE_STUB_DECL (va_list_type) = va_list_name;
2355 /* Create the __ap field. */
2356 ap_field = build_decl (BUILTINS_LOCATION,
2357 FIELD_DECL,
2358 get_identifier ("__ap"),
2359 ptr_type_node);
2360 DECL_ARTIFICIAL (ap_field) = 1;
2361 DECL_FIELD_CONTEXT (ap_field) = va_list_type;
2362 TYPE_FIELDS (va_list_type) = ap_field;
2363 /* Compute its layout. */
2364 layout_type (va_list_type);
2366 return va_list_type;
2369 /* Return an expression of type "void *" pointing to the next
2370 available argument in a variable-argument list. VALIST is the
2371 user-level va_list object, of type __builtin_va_list. */
2372 static tree
2373 arm_extract_valist_ptr (tree valist)
2375 if (TREE_TYPE (valist) == error_mark_node)
2376 return error_mark_node;
2378 /* On an AAPCS target, the pointer is stored within "struct
2379 va_list". */
2380 if (TARGET_AAPCS_BASED)
2382 tree ap_field = TYPE_FIELDS (TREE_TYPE (valist));
2383 valist = build3 (COMPONENT_REF, TREE_TYPE (ap_field),
2384 valist, ap_field, NULL_TREE);
2387 return valist;
2390 /* Implement TARGET_EXPAND_BUILTIN_VA_START. */
2391 static void
2392 arm_expand_builtin_va_start (tree valist, rtx nextarg)
2394 valist = arm_extract_valist_ptr (valist);
2395 std_expand_builtin_va_start (valist, nextarg);
2398 /* Implement TARGET_GIMPLIFY_VA_ARG_EXPR. */
2399 static tree
2400 arm_gimplify_va_arg_expr (tree valist, tree type, gimple_seq *pre_p,
2401 gimple_seq *post_p)
2403 valist = arm_extract_valist_ptr (valist);
2404 return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
2407 /* Fix up any incompatible options that the user has specified. */
2408 static void
2409 arm_option_override (void)
2411 if (global_options_set.x_arm_arch_option)
2412 arm_selected_arch = &all_architectures[arm_arch_option];
2414 if (global_options_set.x_arm_cpu_option)
2416 arm_selected_cpu = &all_cores[(int) arm_cpu_option];
2417 arm_selected_tune = &all_cores[(int) arm_cpu_option];
2420 if (global_options_set.x_arm_tune_option)
2421 arm_selected_tune = &all_cores[(int) arm_tune_option];
2423 #ifdef SUBTARGET_OVERRIDE_OPTIONS
2424 SUBTARGET_OVERRIDE_OPTIONS;
2425 #endif
2427 if (arm_selected_arch)
2429 if (arm_selected_cpu)
2431 /* Check for conflict between mcpu and march. */
2432 if ((arm_selected_cpu->flags ^ arm_selected_arch->flags) & ~FL_TUNE)
2434 warning (0, "switch -mcpu=%s conflicts with -march=%s switch",
2435 arm_selected_cpu->name, arm_selected_arch->name);
2436 /* -march wins for code generation.
2437 -mcpu wins for default tuning. */
2438 if (!arm_selected_tune)
2439 arm_selected_tune = arm_selected_cpu;
2441 arm_selected_cpu = arm_selected_arch;
2443 else
2444 /* -mcpu wins. */
2445 arm_selected_arch = NULL;
2447 else
2448 /* Pick a CPU based on the architecture. */
2449 arm_selected_cpu = arm_selected_arch;
2452 /* If the user did not specify a processor, choose one for them. */
2453 if (!arm_selected_cpu)
2455 const struct processors * sel;
2456 unsigned int sought;
2458 arm_selected_cpu = &all_cores[TARGET_CPU_DEFAULT];
2459 if (!arm_selected_cpu->name)
2461 #ifdef SUBTARGET_CPU_DEFAULT
2462 /* Use the subtarget default CPU if none was specified by
2463 configure. */
2464 arm_selected_cpu = &all_cores[SUBTARGET_CPU_DEFAULT];
2465 #endif
2466 /* Default to ARM6. */
2467 if (!arm_selected_cpu->name)
2468 arm_selected_cpu = &all_cores[arm6];
2471 sel = arm_selected_cpu;
2472 insn_flags = sel->flags;
2474 /* Now check to see if the user has specified some command line
2475 switch that require certain abilities from the cpu. */
2476 sought = 0;
2478 if (TARGET_INTERWORK || TARGET_THUMB)
2480 sought |= (FL_THUMB | FL_MODE32);
2482 /* There are no ARM processors that support both APCS-26 and
2483 interworking. Therefore we force FL_MODE26 to be removed
2484 from insn_flags here (if it was set), so that the search
2485 below will always be able to find a compatible processor. */
2486 insn_flags &= ~FL_MODE26;
2489 if (sought != 0 && ((sought & insn_flags) != sought))
2491 /* Try to locate a CPU type that supports all of the abilities
2492 of the default CPU, plus the extra abilities requested by
2493 the user. */
2494 for (sel = all_cores; sel->name != NULL; sel++)
2495 if ((sel->flags & sought) == (sought | insn_flags))
2496 break;
2498 if (sel->name == NULL)
2500 unsigned current_bit_count = 0;
2501 const struct processors * best_fit = NULL;
2503 /* Ideally we would like to issue an error message here
2504 saying that it was not possible to find a CPU compatible
2505 with the default CPU, but which also supports the command
2506 line options specified by the programmer, and so they
2507 ought to use the -mcpu=<name> command line option to
2508 override the default CPU type.
2510 If we cannot find a cpu that has both the
2511 characteristics of the default cpu and the given
2512 command line options we scan the array again looking
2513 for a best match. */
2514 for (sel = all_cores; sel->name != NULL; sel++)
2515 if ((sel->flags & sought) == sought)
2517 unsigned count;
2519 count = bit_count (sel->flags & insn_flags);
2521 if (count >= current_bit_count)
2523 best_fit = sel;
2524 current_bit_count = count;
2528 gcc_assert (best_fit);
2529 sel = best_fit;
2532 arm_selected_cpu = sel;
2536 gcc_assert (arm_selected_cpu);
2537 /* The selected cpu may be an architecture, so lookup tuning by core ID. */
2538 if (!arm_selected_tune)
2539 arm_selected_tune = &all_cores[arm_selected_cpu->core];
2541 sprintf (arm_arch_name, "__ARM_ARCH_%s__", arm_selected_cpu->arch);
2542 insn_flags = arm_selected_cpu->flags;
2543 arm_base_arch = arm_selected_cpu->base_arch;
2545 arm_tune = arm_selected_tune->core;
2546 tune_flags = arm_selected_tune->flags;
2547 current_tune = arm_selected_tune->tune;
2549 /* Make sure that the processor choice does not conflict with any of the
2550 other command line choices. */
2551 if (TARGET_ARM && !(insn_flags & FL_NOTM))
2552 error ("target CPU does not support ARM mode");
2554 /* BPABI targets use linker tricks to allow interworking on cores
2555 without thumb support. */
2556 if (TARGET_INTERWORK && !((insn_flags & FL_THUMB) || TARGET_BPABI))
2558 warning (0, "target CPU does not support interworking" );
2559 target_flags &= ~MASK_INTERWORK;
2562 if (TARGET_THUMB && !(insn_flags & FL_THUMB))
2564 warning (0, "target CPU does not support THUMB instructions");
2565 target_flags &= ~MASK_THUMB;
2568 if (TARGET_APCS_FRAME && TARGET_THUMB)
2570 /* warning (0, "ignoring -mapcs-frame because -mthumb was used"); */
2571 target_flags &= ~MASK_APCS_FRAME;
2574 /* Callee super interworking implies thumb interworking. Adding
2575 this to the flags here simplifies the logic elsewhere. */
2576 if (TARGET_THUMB && TARGET_CALLEE_INTERWORKING)
2577 target_flags |= MASK_INTERWORK;
2579 /* TARGET_BACKTRACE calls leaf_function_p, which causes a crash if done
2580 from here where no function is being compiled currently. */
2581 if ((TARGET_TPCS_FRAME || TARGET_TPCS_LEAF_FRAME) && TARGET_ARM)
2582 warning (0, "enabling backtrace support is only meaningful when compiling for the Thumb");
2584 if (TARGET_ARM && TARGET_CALLEE_INTERWORKING)
2585 warning (0, "enabling callee interworking support is only meaningful when compiling for the Thumb");
2587 if (TARGET_APCS_STACK && !TARGET_APCS_FRAME)
2589 warning (0, "-mapcs-stack-check incompatible with -mno-apcs-frame");
2590 target_flags |= MASK_APCS_FRAME;
2593 if (TARGET_POKE_FUNCTION_NAME)
2594 target_flags |= MASK_APCS_FRAME;
2596 if (TARGET_APCS_REENT && flag_pic)
2597 error ("-fpic and -mapcs-reent are incompatible");
2599 if (TARGET_APCS_REENT)
2600 warning (0, "APCS reentrant code not supported. Ignored");
2602 /* If this target is normally configured to use APCS frames, warn if they
2603 are turned off and debugging is turned on. */
2604 if (TARGET_ARM
2605 && write_symbols != NO_DEBUG
2606 && !TARGET_APCS_FRAME
2607 && (TARGET_DEFAULT & MASK_APCS_FRAME))
2608 warning (0, "-g with -mno-apcs-frame may not give sensible debugging");
2610 if (TARGET_APCS_FLOAT)
2611 warning (0, "passing floating point arguments in fp regs not yet supported");
2613 if (TARGET_LITTLE_WORDS)
2614 warning (OPT_Wdeprecated, "%<mwords-little-endian%> is deprecated and "
2615 "will be removed in a future release");
2617 /* Initialize boolean versions of the flags, for use in the arm.md file. */
2618 arm_arch3m = (insn_flags & FL_ARCH3M) != 0;
2619 arm_arch4 = (insn_flags & FL_ARCH4) != 0;
2620 arm_arch4t = arm_arch4 & ((insn_flags & FL_THUMB) != 0);
2621 arm_arch5 = (insn_flags & FL_ARCH5) != 0;
2622 arm_arch5e = (insn_flags & FL_ARCH5E) != 0;
2623 arm_arch6 = (insn_flags & FL_ARCH6) != 0;
2624 arm_arch6k = (insn_flags & FL_ARCH6K) != 0;
2625 arm_arch_notm = (insn_flags & FL_NOTM) != 0;
2626 arm_arch6m = arm_arch6 && !arm_arch_notm;
2627 arm_arch7 = (insn_flags & FL_ARCH7) != 0;
2628 arm_arch7em = (insn_flags & FL_ARCH7EM) != 0;
2629 arm_arch8 = (insn_flags & FL_ARCH8) != 0;
2630 arm_arch_thumb2 = (insn_flags & FL_THUMB2) != 0;
2631 arm_arch_xscale = (insn_flags & FL_XSCALE) != 0;
2633 arm_ld_sched = (tune_flags & FL_LDSCHED) != 0;
2634 arm_tune_strongarm = (tune_flags & FL_STRONG) != 0;
2635 thumb_code = TARGET_ARM == 0;
2636 thumb1_code = TARGET_THUMB1 != 0;
2637 arm_tune_wbuf = (tune_flags & FL_WBUF) != 0;
2638 arm_tune_xscale = (tune_flags & FL_XSCALE) != 0;
2639 arm_arch_iwmmxt = (insn_flags & FL_IWMMXT) != 0;
2640 arm_arch_iwmmxt2 = (insn_flags & FL_IWMMXT2) != 0;
2641 arm_arch_thumb_hwdiv = (insn_flags & FL_THUMB_DIV) != 0;
2642 arm_arch_arm_hwdiv = (insn_flags & FL_ARM_DIV) != 0;
2643 arm_tune_cortex_a9 = (arm_tune == cortexa9) != 0;
2644 arm_arch_crc = (insn_flags & FL_CRC32) != 0;
2645 if (arm_restrict_it == 2)
2646 arm_restrict_it = arm_arch8 && TARGET_THUMB2;
2648 if (!TARGET_THUMB2)
2649 arm_restrict_it = 0;
2651 /* If we are not using the default (ARM mode) section anchor offset
2652 ranges, then set the correct ranges now. */
2653 if (TARGET_THUMB1)
2655 /* Thumb-1 LDR instructions cannot have negative offsets.
2656 Permissible positive offset ranges are 5-bit (for byte loads),
2657 6-bit (for halfword loads), or 7-bit (for word loads).
2658 Empirical results suggest a 7-bit anchor range gives the best
2659 overall code size. */
2660 targetm.min_anchor_offset = 0;
2661 targetm.max_anchor_offset = 127;
2663 else if (TARGET_THUMB2)
2665 /* The minimum is set such that the total size of the block
2666 for a particular anchor is 248 + 1 + 4095 bytes, which is
2667 divisible by eight, ensuring natural spacing of anchors. */
2668 targetm.min_anchor_offset = -248;
2669 targetm.max_anchor_offset = 4095;
2672 /* V5 code we generate is completely interworking capable, so we turn off
2673 TARGET_INTERWORK here to avoid many tests later on. */
2675 /* XXX However, we must pass the right pre-processor defines to CPP
2676 or GLD can get confused. This is a hack. */
2677 if (TARGET_INTERWORK)
2678 arm_cpp_interwork = 1;
2680 if (arm_arch5)
2681 target_flags &= ~MASK_INTERWORK;
2683 if (TARGET_IWMMXT && !ARM_DOUBLEWORD_ALIGN)
2684 error ("iwmmxt requires an AAPCS compatible ABI for proper operation");
2686 if (TARGET_IWMMXT_ABI && !TARGET_IWMMXT)
2687 error ("iwmmxt abi requires an iwmmxt capable cpu");
2689 if (!global_options_set.x_arm_fpu_index)
2691 const char *target_fpu_name;
2692 bool ok;
2694 #ifdef FPUTYPE_DEFAULT
2695 target_fpu_name = FPUTYPE_DEFAULT;
2696 #else
2697 target_fpu_name = "vfp";
2698 #endif
2700 ok = opt_enum_arg_to_value (OPT_mfpu_, target_fpu_name, &arm_fpu_index,
2701 CL_TARGET);
2702 gcc_assert (ok);
2705 arm_fpu_desc = &all_fpus[arm_fpu_index];
2707 switch (arm_fpu_desc->model)
2709 case ARM_FP_MODEL_VFP:
2710 arm_fpu_attr = FPU_VFP;
2711 break;
2713 default:
2714 gcc_unreachable();
2717 if (TARGET_AAPCS_BASED)
2719 if (TARGET_CALLER_INTERWORKING)
2720 error ("AAPCS does not support -mcaller-super-interworking");
2721 else
2722 if (TARGET_CALLEE_INTERWORKING)
2723 error ("AAPCS does not support -mcallee-super-interworking");
2726 /* iWMMXt and NEON are incompatible. */
2727 if (TARGET_IWMMXT && TARGET_NEON)
2728 error ("iWMMXt and NEON are incompatible");
2730 /* iWMMXt unsupported under Thumb mode. */
2731 if (TARGET_THUMB && TARGET_IWMMXT)
2732 error ("iWMMXt unsupported under Thumb mode");
2734 /* __fp16 support currently assumes the core has ldrh. */
2735 if (!arm_arch4 && arm_fp16_format != ARM_FP16_FORMAT_NONE)
2736 sorry ("__fp16 and no ldrh");
2738 /* If soft-float is specified then don't use FPU. */
2739 if (TARGET_SOFT_FLOAT)
2740 arm_fpu_attr = FPU_NONE;
2742 if (TARGET_AAPCS_BASED)
2744 if (arm_abi == ARM_ABI_IWMMXT)
2745 arm_pcs_default = ARM_PCS_AAPCS_IWMMXT;
2746 else if (arm_float_abi == ARM_FLOAT_ABI_HARD
2747 && TARGET_HARD_FLOAT
2748 && TARGET_VFP)
2749 arm_pcs_default = ARM_PCS_AAPCS_VFP;
2750 else
2751 arm_pcs_default = ARM_PCS_AAPCS;
2753 else
2755 if (arm_float_abi == ARM_FLOAT_ABI_HARD && TARGET_VFP)
2756 sorry ("-mfloat-abi=hard and VFP");
2758 if (arm_abi == ARM_ABI_APCS)
2759 arm_pcs_default = ARM_PCS_APCS;
2760 else
2761 arm_pcs_default = ARM_PCS_ATPCS;
2764 /* For arm2/3 there is no need to do any scheduling if we are doing
2765 software floating-point. */
2766 if (TARGET_SOFT_FLOAT && (tune_flags & FL_MODE32) == 0)
2767 flag_schedule_insns = flag_schedule_insns_after_reload = 0;
2769 /* Use the cp15 method if it is available. */
2770 if (target_thread_pointer == TP_AUTO)
2772 if (arm_arch6k && !TARGET_THUMB1)
2773 target_thread_pointer = TP_CP15;
2774 else
2775 target_thread_pointer = TP_SOFT;
2778 if (TARGET_HARD_TP && TARGET_THUMB1)
2779 error ("can not use -mtp=cp15 with 16-bit Thumb");
2781 /* Override the default structure alignment for AAPCS ABI. */
2782 if (!global_options_set.x_arm_structure_size_boundary)
2784 if (TARGET_AAPCS_BASED)
2785 arm_structure_size_boundary = 8;
2787 else
2789 if (arm_structure_size_boundary != 8
2790 && arm_structure_size_boundary != 32
2791 && !(ARM_DOUBLEWORD_ALIGN && arm_structure_size_boundary == 64))
2793 if (ARM_DOUBLEWORD_ALIGN)
2794 warning (0,
2795 "structure size boundary can only be set to 8, 32 or 64");
2796 else
2797 warning (0, "structure size boundary can only be set to 8 or 32");
2798 arm_structure_size_boundary
2799 = (TARGET_AAPCS_BASED ? 8 : DEFAULT_STRUCTURE_SIZE_BOUNDARY);
2803 if (!TARGET_ARM && TARGET_VXWORKS_RTP && flag_pic)
2805 error ("RTP PIC is incompatible with Thumb");
2806 flag_pic = 0;
2809 /* If stack checking is disabled, we can use r10 as the PIC register,
2810 which keeps r9 available. The EABI specifies r9 as the PIC register. */
2811 if (flag_pic && TARGET_SINGLE_PIC_BASE)
2813 if (TARGET_VXWORKS_RTP)
2814 warning (0, "RTP PIC is incompatible with -msingle-pic-base");
2815 arm_pic_register = (TARGET_APCS_STACK || TARGET_AAPCS_BASED) ? 9 : 10;
2818 if (flag_pic && TARGET_VXWORKS_RTP)
2819 arm_pic_register = 9;
2821 if (arm_pic_register_string != NULL)
2823 int pic_register = decode_reg_name (arm_pic_register_string);
2825 if (!flag_pic)
2826 warning (0, "-mpic-register= is useless without -fpic");
2828 /* Prevent the user from choosing an obviously stupid PIC register. */
2829 else if (pic_register < 0 || call_used_regs[pic_register]
2830 || pic_register == HARD_FRAME_POINTER_REGNUM
2831 || pic_register == STACK_POINTER_REGNUM
2832 || pic_register >= PC_REGNUM
2833 || (TARGET_VXWORKS_RTP
2834 && (unsigned int) pic_register != arm_pic_register))
2835 error ("unable to use '%s' for PIC register", arm_pic_register_string);
2836 else
2837 arm_pic_register = pic_register;
2840 if (TARGET_VXWORKS_RTP
2841 && !global_options_set.x_arm_pic_data_is_text_relative)
2842 arm_pic_data_is_text_relative = 0;
2844 /* Enable -mfix-cortex-m3-ldrd by default for Cortex-M3 cores. */
2845 if (fix_cm3_ldrd == 2)
2847 if (arm_selected_cpu->core == cortexm3)
2848 fix_cm3_ldrd = 1;
2849 else
2850 fix_cm3_ldrd = 0;
2853 /* Enable -munaligned-access by default for
2854 - all ARMv6 architecture-based processors
2855 - ARMv7-A, ARMv7-R, and ARMv7-M architecture-based processors.
2856 - ARMv8 architecture-base processors.
2858 Disable -munaligned-access by default for
2859 - all pre-ARMv6 architecture-based processors
2860 - ARMv6-M architecture-based processors. */
2862 if (unaligned_access == 2)
2864 if (arm_arch6 && (arm_arch_notm || arm_arch7))
2865 unaligned_access = 1;
2866 else
2867 unaligned_access = 0;
2869 else if (unaligned_access == 1
2870 && !(arm_arch6 && (arm_arch_notm || arm_arch7)))
2872 warning (0, "target CPU does not support unaligned accesses");
2873 unaligned_access = 0;
2876 if (TARGET_THUMB1 && flag_schedule_insns)
2878 /* Don't warn since it's on by default in -O2. */
2879 flag_schedule_insns = 0;
2882 if (optimize_size)
2884 /* If optimizing for size, bump the number of instructions that we
2885 are prepared to conditionally execute (even on a StrongARM). */
2886 max_insns_skipped = 6;
2888 else
2889 max_insns_skipped = current_tune->max_insns_skipped;
2891 /* Hot/Cold partitioning is not currently supported, since we can't
2892 handle literal pool placement in that case. */
2893 if (flag_reorder_blocks_and_partition)
2895 inform (input_location,
2896 "-freorder-blocks-and-partition not supported on this architecture");
2897 flag_reorder_blocks_and_partition = 0;
2898 flag_reorder_blocks = 1;
2901 if (flag_pic)
2902 /* Hoisting PIC address calculations more aggressively provides a small,
2903 but measurable, size reduction for PIC code. Therefore, we decrease
2904 the bar for unrestricted expression hoisting to the cost of PIC address
2905 calculation, which is 2 instructions. */
2906 maybe_set_param_value (PARAM_GCSE_UNRESTRICTED_COST, 2,
2907 global_options.x_param_values,
2908 global_options_set.x_param_values);
2910 /* ARM EABI defaults to strict volatile bitfields. */
2911 if (TARGET_AAPCS_BASED && flag_strict_volatile_bitfields < 0
2912 && abi_version_at_least(2))
2913 flag_strict_volatile_bitfields = 1;
2915 /* Enable sw prefetching at -O3 for CPUS that have prefetch, and we have deemed
2916 it beneficial (signified by setting num_prefetch_slots to 1 or more.) */
2917 if (flag_prefetch_loop_arrays < 0
2918 && HAVE_prefetch
2919 && optimize >= 3
2920 && current_tune->num_prefetch_slots > 0)
2921 flag_prefetch_loop_arrays = 1;
2923 /* Set up parameters to be used in prefetching algorithm. Do not override the
2924 defaults unless we are tuning for a core we have researched values for. */
2925 if (current_tune->num_prefetch_slots > 0)
2926 maybe_set_param_value (PARAM_SIMULTANEOUS_PREFETCHES,
2927 current_tune->num_prefetch_slots,
2928 global_options.x_param_values,
2929 global_options_set.x_param_values);
2930 if (current_tune->l1_cache_line_size >= 0)
2931 maybe_set_param_value (PARAM_L1_CACHE_LINE_SIZE,
2932 current_tune->l1_cache_line_size,
2933 global_options.x_param_values,
2934 global_options_set.x_param_values);
2935 if (current_tune->l1_cache_size >= 0)
2936 maybe_set_param_value (PARAM_L1_CACHE_SIZE,
2937 current_tune->l1_cache_size,
2938 global_options.x_param_values,
2939 global_options_set.x_param_values);
2941 /* Use Neon to perform 64-bits operations rather than core
2942 registers. */
2943 prefer_neon_for_64bits = current_tune->prefer_neon_for_64bits;
2944 if (use_neon_for_64bits == 1)
2945 prefer_neon_for_64bits = true;
2947 /* Use the alternative scheduling-pressure algorithm by default. */
2948 maybe_set_param_value (PARAM_SCHED_PRESSURE_ALGORITHM, SCHED_PRESSURE_MODEL,
2949 global_options.x_param_values,
2950 global_options_set.x_param_values);
2952 /* Disable shrink-wrap when optimizing function for size, since it tends to
2953 generate additional returns. */
2954 if (optimize_function_for_size_p (cfun) && TARGET_THUMB2)
2955 flag_shrink_wrap = false;
2956 /* TBD: Dwarf info for apcs frame is not handled yet. */
2957 if (TARGET_APCS_FRAME)
2958 flag_shrink_wrap = false;
2960 /* We only support -mslow-flash-data on armv7-m targets. */
2961 if (target_slow_flash_data
2962 && ((!(arm_arch7 && !arm_arch_notm) && !arm_arch7em)
2963 || (TARGET_THUMB1 || flag_pic || TARGET_NEON)))
2964 error ("-mslow-flash-data only supports non-pic code on armv7-m targets");
2966 /* Currently, for slow flash data, we just disable literal pools. */
2967 if (target_slow_flash_data)
2968 arm_disable_literal_pool = true;
2970 /* Register global variables with the garbage collector. */
2971 arm_add_gc_roots ();
2974 static void
2975 arm_add_gc_roots (void)
2977 gcc_obstack_init(&minipool_obstack);
2978 minipool_startobj = (char *) obstack_alloc (&minipool_obstack, 0);
2981 /* A table of known ARM exception types.
2982 For use with the interrupt function attribute. */
2984 typedef struct
2986 const char *const arg;
2987 const unsigned long return_value;
2989 isr_attribute_arg;
2991 static const isr_attribute_arg isr_attribute_args [] =
2993 { "IRQ", ARM_FT_ISR },
2994 { "irq", ARM_FT_ISR },
2995 { "FIQ", ARM_FT_FIQ },
2996 { "fiq", ARM_FT_FIQ },
2997 { "ABORT", ARM_FT_ISR },
2998 { "abort", ARM_FT_ISR },
2999 { "ABORT", ARM_FT_ISR },
3000 { "abort", ARM_FT_ISR },
3001 { "UNDEF", ARM_FT_EXCEPTION },
3002 { "undef", ARM_FT_EXCEPTION },
3003 { "SWI", ARM_FT_EXCEPTION },
3004 { "swi", ARM_FT_EXCEPTION },
3005 { NULL, ARM_FT_NORMAL }
3008 /* Returns the (interrupt) function type of the current
3009 function, or ARM_FT_UNKNOWN if the type cannot be determined. */
3011 static unsigned long
3012 arm_isr_value (tree argument)
3014 const isr_attribute_arg * ptr;
3015 const char * arg;
3017 if (!arm_arch_notm)
3018 return ARM_FT_NORMAL | ARM_FT_STACKALIGN;
3020 /* No argument - default to IRQ. */
3021 if (argument == NULL_TREE)
3022 return ARM_FT_ISR;
3024 /* Get the value of the argument. */
3025 if (TREE_VALUE (argument) == NULL_TREE
3026 || TREE_CODE (TREE_VALUE (argument)) != STRING_CST)
3027 return ARM_FT_UNKNOWN;
3029 arg = TREE_STRING_POINTER (TREE_VALUE (argument));
3031 /* Check it against the list of known arguments. */
3032 for (ptr = isr_attribute_args; ptr->arg != NULL; ptr++)
3033 if (streq (arg, ptr->arg))
3034 return ptr->return_value;
3036 /* An unrecognized interrupt type. */
3037 return ARM_FT_UNKNOWN;
3040 /* Computes the type of the current function. */
3042 static unsigned long
3043 arm_compute_func_type (void)
3045 unsigned long type = ARM_FT_UNKNOWN;
3046 tree a;
3047 tree attr;
3049 gcc_assert (TREE_CODE (current_function_decl) == FUNCTION_DECL);
3051 /* Decide if the current function is volatile. Such functions
3052 never return, and many memory cycles can be saved by not storing
3053 register values that will never be needed again. This optimization
3054 was added to speed up context switching in a kernel application. */
3055 if (optimize > 0
3056 && (TREE_NOTHROW (current_function_decl)
3057 || !(flag_unwind_tables
3058 || (flag_exceptions
3059 && arm_except_unwind_info (&global_options) != UI_SJLJ)))
3060 && TREE_THIS_VOLATILE (current_function_decl))
3061 type |= ARM_FT_VOLATILE;
3063 if (cfun->static_chain_decl != NULL)
3064 type |= ARM_FT_NESTED;
3066 attr = DECL_ATTRIBUTES (current_function_decl);
3068 a = lookup_attribute ("naked", attr);
3069 if (a != NULL_TREE)
3070 type |= ARM_FT_NAKED;
3072 a = lookup_attribute ("isr", attr);
3073 if (a == NULL_TREE)
3074 a = lookup_attribute ("interrupt", attr);
3076 if (a == NULL_TREE)
3077 type |= TARGET_INTERWORK ? ARM_FT_INTERWORKED : ARM_FT_NORMAL;
3078 else
3079 type |= arm_isr_value (TREE_VALUE (a));
3081 return type;
3084 /* Returns the type of the current function. */
3086 unsigned long
3087 arm_current_func_type (void)
3089 if (ARM_FUNC_TYPE (cfun->machine->func_type) == ARM_FT_UNKNOWN)
3090 cfun->machine->func_type = arm_compute_func_type ();
3092 return cfun->machine->func_type;
3095 bool
3096 arm_allocate_stack_slots_for_args (void)
3098 /* Naked functions should not allocate stack slots for arguments. */
3099 return !IS_NAKED (arm_current_func_type ());
3102 static bool
3103 arm_warn_func_return (tree decl)
3105 /* Naked functions are implemented entirely in assembly, including the
3106 return sequence, so suppress warnings about this. */
3107 return lookup_attribute ("naked", DECL_ATTRIBUTES (decl)) == NULL_TREE;
3111 /* Output assembler code for a block containing the constant parts
3112 of a trampoline, leaving space for the variable parts.
3114 On the ARM, (if r8 is the static chain regnum, and remembering that
3115 referencing pc adds an offset of 8) the trampoline looks like:
3116 ldr r8, [pc, #0]
3117 ldr pc, [pc]
3118 .word static chain value
3119 .word function's address
3120 XXX FIXME: When the trampoline returns, r8 will be clobbered. */
3122 static void
3123 arm_asm_trampoline_template (FILE *f)
3125 if (TARGET_ARM)
3127 asm_fprintf (f, "\tldr\t%r, [%r, #0]\n", STATIC_CHAIN_REGNUM, PC_REGNUM);
3128 asm_fprintf (f, "\tldr\t%r, [%r, #0]\n", PC_REGNUM, PC_REGNUM);
3130 else if (TARGET_THUMB2)
3132 /* The Thumb-2 trampoline is similar to the arm implementation.
3133 Unlike 16-bit Thumb, we enter the stub in thumb mode. */
3134 asm_fprintf (f, "\tldr.w\t%r, [%r, #4]\n",
3135 STATIC_CHAIN_REGNUM, PC_REGNUM);
3136 asm_fprintf (f, "\tldr.w\t%r, [%r, #4]\n", PC_REGNUM, PC_REGNUM);
3138 else
3140 ASM_OUTPUT_ALIGN (f, 2);
3141 fprintf (f, "\t.code\t16\n");
3142 fprintf (f, ".Ltrampoline_start:\n");
3143 asm_fprintf (f, "\tpush\t{r0, r1}\n");
3144 asm_fprintf (f, "\tldr\tr0, [%r, #8]\n", PC_REGNUM);
3145 asm_fprintf (f, "\tmov\t%r, r0\n", STATIC_CHAIN_REGNUM);
3146 asm_fprintf (f, "\tldr\tr0, [%r, #8]\n", PC_REGNUM);
3147 asm_fprintf (f, "\tstr\tr0, [%r, #4]\n", SP_REGNUM);
3148 asm_fprintf (f, "\tpop\t{r0, %r}\n", PC_REGNUM);
3150 assemble_aligned_integer (UNITS_PER_WORD, const0_rtx);
3151 assemble_aligned_integer (UNITS_PER_WORD, const0_rtx);
3154 /* Emit RTL insns to initialize the variable parts of a trampoline. */
3156 static void
3157 arm_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value)
3159 rtx fnaddr, mem, a_tramp;
3161 emit_block_move (m_tramp, assemble_trampoline_template (),
3162 GEN_INT (TRAMPOLINE_SIZE), BLOCK_OP_NORMAL);
3164 mem = adjust_address (m_tramp, SImode, TARGET_32BIT ? 8 : 12);
3165 emit_move_insn (mem, chain_value);
3167 mem = adjust_address (m_tramp, SImode, TARGET_32BIT ? 12 : 16);
3168 fnaddr = XEXP (DECL_RTL (fndecl), 0);
3169 emit_move_insn (mem, fnaddr);
3171 a_tramp = XEXP (m_tramp, 0);
3172 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__clear_cache"),
3173 LCT_NORMAL, VOIDmode, 2, a_tramp, Pmode,
3174 plus_constant (Pmode, a_tramp, TRAMPOLINE_SIZE), Pmode);
3177 /* Thumb trampolines should be entered in thumb mode, so set
3178 the bottom bit of the address. */
3180 static rtx
3181 arm_trampoline_adjust_address (rtx addr)
3183 if (TARGET_THUMB)
3184 addr = expand_simple_binop (Pmode, IOR, addr, const1_rtx,
3185 NULL, 0, OPTAB_LIB_WIDEN);
3186 return addr;
3189 /* Return 1 if it is possible to return using a single instruction.
3190 If SIBLING is non-null, this is a test for a return before a sibling
3191 call. SIBLING is the call insn, so we can examine its register usage. */
3194 use_return_insn (int iscond, rtx sibling)
3196 int regno;
3197 unsigned int func_type;
3198 unsigned long saved_int_regs;
3199 unsigned HOST_WIDE_INT stack_adjust;
3200 arm_stack_offsets *offsets;
3202 /* Never use a return instruction before reload has run. */
3203 if (!reload_completed)
3204 return 0;
3206 func_type = arm_current_func_type ();
3208 /* Naked, volatile and stack alignment functions need special
3209 consideration. */
3210 if (func_type & (ARM_FT_VOLATILE | ARM_FT_NAKED | ARM_FT_STACKALIGN))
3211 return 0;
3213 /* So do interrupt functions that use the frame pointer and Thumb
3214 interrupt functions. */
3215 if (IS_INTERRUPT (func_type) && (frame_pointer_needed || TARGET_THUMB))
3216 return 0;
3218 if (TARGET_LDRD && current_tune->prefer_ldrd_strd
3219 && !optimize_function_for_size_p (cfun))
3220 return 0;
3222 offsets = arm_get_frame_offsets ();
3223 stack_adjust = offsets->outgoing_args - offsets->saved_regs;
3225 /* As do variadic functions. */
3226 if (crtl->args.pretend_args_size
3227 || cfun->machine->uses_anonymous_args
3228 /* Or if the function calls __builtin_eh_return () */
3229 || crtl->calls_eh_return
3230 /* Or if the function calls alloca */
3231 || cfun->calls_alloca
3232 /* Or if there is a stack adjustment. However, if the stack pointer
3233 is saved on the stack, we can use a pre-incrementing stack load. */
3234 || !(stack_adjust == 0 || (TARGET_APCS_FRAME && frame_pointer_needed
3235 && stack_adjust == 4)))
3236 return 0;
3238 saved_int_regs = offsets->saved_regs_mask;
3240 /* Unfortunately, the insn
3242 ldmib sp, {..., sp, ...}
3244 triggers a bug on most SA-110 based devices, such that the stack
3245 pointer won't be correctly restored if the instruction takes a
3246 page fault. We work around this problem by popping r3 along with
3247 the other registers, since that is never slower than executing
3248 another instruction.
3250 We test for !arm_arch5 here, because code for any architecture
3251 less than this could potentially be run on one of the buggy
3252 chips. */
3253 if (stack_adjust == 4 && !arm_arch5 && TARGET_ARM)
3255 /* Validate that r3 is a call-clobbered register (always true in
3256 the default abi) ... */
3257 if (!call_used_regs[3])
3258 return 0;
3260 /* ... that it isn't being used for a return value ... */
3261 if (arm_size_return_regs () >= (4 * UNITS_PER_WORD))
3262 return 0;
3264 /* ... or for a tail-call argument ... */
3265 if (sibling)
3267 gcc_assert (CALL_P (sibling));
3269 if (find_regno_fusage (sibling, USE, 3))
3270 return 0;
3273 /* ... and that there are no call-saved registers in r0-r2
3274 (always true in the default ABI). */
3275 if (saved_int_regs & 0x7)
3276 return 0;
3279 /* Can't be done if interworking with Thumb, and any registers have been
3280 stacked. */
3281 if (TARGET_INTERWORK && saved_int_regs != 0 && !IS_INTERRUPT(func_type))
3282 return 0;
3284 /* On StrongARM, conditional returns are expensive if they aren't
3285 taken and multiple registers have been stacked. */
3286 if (iscond && arm_tune_strongarm)
3288 /* Conditional return when just the LR is stored is a simple
3289 conditional-load instruction, that's not expensive. */
3290 if (saved_int_regs != 0 && saved_int_regs != (1 << LR_REGNUM))
3291 return 0;
3293 if (flag_pic
3294 && arm_pic_register != INVALID_REGNUM
3295 && df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM))
3296 return 0;
3299 /* If there are saved registers but the LR isn't saved, then we need
3300 two instructions for the return. */
3301 if (saved_int_regs && !(saved_int_regs & (1 << LR_REGNUM)))
3302 return 0;
3304 /* Can't be done if any of the VFP regs are pushed,
3305 since this also requires an insn. */
3306 if (TARGET_HARD_FLOAT && TARGET_VFP)
3307 for (regno = FIRST_VFP_REGNUM; regno <= LAST_VFP_REGNUM; regno++)
3308 if (df_regs_ever_live_p (regno) && !call_used_regs[regno])
3309 return 0;
3311 if (TARGET_REALLY_IWMMXT)
3312 for (regno = FIRST_IWMMXT_REGNUM; regno <= LAST_IWMMXT_REGNUM; regno++)
3313 if (df_regs_ever_live_p (regno) && ! call_used_regs[regno])
3314 return 0;
3316 return 1;
3319 /* Return TRUE if we should try to use a simple_return insn, i.e. perform
3320 shrink-wrapping if possible. This is the case if we need to emit a
3321 prologue, which we can test by looking at the offsets. */
3322 bool
3323 use_simple_return_p (void)
3325 arm_stack_offsets *offsets;
3327 offsets = arm_get_frame_offsets ();
3328 return offsets->outgoing_args != 0;
3331 /* Return TRUE if int I is a valid immediate ARM constant. */
3334 const_ok_for_arm (HOST_WIDE_INT i)
3336 int lowbit;
3338 /* For machines with >32 bit HOST_WIDE_INT, the bits above bit 31 must
3339 be all zero, or all one. */
3340 if ((i & ~(unsigned HOST_WIDE_INT) 0xffffffff) != 0
3341 && ((i & ~(unsigned HOST_WIDE_INT) 0xffffffff)
3342 != ((~(unsigned HOST_WIDE_INT) 0)
3343 & ~(unsigned HOST_WIDE_INT) 0xffffffff)))
3344 return FALSE;
3346 i &= (unsigned HOST_WIDE_INT) 0xffffffff;
3348 /* Fast return for 0 and small values. We must do this for zero, since
3349 the code below can't handle that one case. */
3350 if ((i & ~(unsigned HOST_WIDE_INT) 0xff) == 0)
3351 return TRUE;
3353 /* Get the number of trailing zeros. */
3354 lowbit = ffs((int) i) - 1;
3356 /* Only even shifts are allowed in ARM mode so round down to the
3357 nearest even number. */
3358 if (TARGET_ARM)
3359 lowbit &= ~1;
3361 if ((i & ~(((unsigned HOST_WIDE_INT) 0xff) << lowbit)) == 0)
3362 return TRUE;
3364 if (TARGET_ARM)
3366 /* Allow rotated constants in ARM mode. */
3367 if (lowbit <= 4
3368 && ((i & ~0xc000003f) == 0
3369 || (i & ~0xf000000f) == 0
3370 || (i & ~0xfc000003) == 0))
3371 return TRUE;
3373 else
3375 HOST_WIDE_INT v;
3377 /* Allow repeated patterns 0x00XY00XY or 0xXYXYXYXY. */
3378 v = i & 0xff;
3379 v |= v << 16;
3380 if (i == v || i == (v | (v << 8)))
3381 return TRUE;
3383 /* Allow repeated pattern 0xXY00XY00. */
3384 v = i & 0xff00;
3385 v |= v << 16;
3386 if (i == v)
3387 return TRUE;
3390 return FALSE;
3393 /* Return true if I is a valid constant for the operation CODE. */
3395 const_ok_for_op (HOST_WIDE_INT i, enum rtx_code code)
3397 if (const_ok_for_arm (i))
3398 return 1;
3400 switch (code)
3402 case SET:
3403 /* See if we can use movw. */
3404 if (arm_arch_thumb2 && (i & 0xffff0000) == 0)
3405 return 1;
3406 else
3407 /* Otherwise, try mvn. */
3408 return const_ok_for_arm (ARM_SIGN_EXTEND (~i));
3410 case PLUS:
3411 /* See if we can use addw or subw. */
3412 if (TARGET_THUMB2
3413 && ((i & 0xfffff000) == 0
3414 || ((-i) & 0xfffff000) == 0))
3415 return 1;
3416 /* else fall through. */
3418 case COMPARE:
3419 case EQ:
3420 case NE:
3421 case GT:
3422 case LE:
3423 case LT:
3424 case GE:
3425 case GEU:
3426 case LTU:
3427 case GTU:
3428 case LEU:
3429 case UNORDERED:
3430 case ORDERED:
3431 case UNEQ:
3432 case UNGE:
3433 case UNLT:
3434 case UNGT:
3435 case UNLE:
3436 return const_ok_for_arm (ARM_SIGN_EXTEND (-i));
3438 case MINUS: /* Should only occur with (MINUS I reg) => rsb */
3439 case XOR:
3440 return 0;
3442 case IOR:
3443 if (TARGET_THUMB2)
3444 return const_ok_for_arm (ARM_SIGN_EXTEND (~i));
3445 return 0;
3447 case AND:
3448 return const_ok_for_arm (ARM_SIGN_EXTEND (~i));
3450 default:
3451 gcc_unreachable ();
3455 /* Return true if I is a valid di mode constant for the operation CODE. */
3457 const_ok_for_dimode_op (HOST_WIDE_INT i, enum rtx_code code)
3459 HOST_WIDE_INT hi_val = (i >> 32) & 0xFFFFFFFF;
3460 HOST_WIDE_INT lo_val = i & 0xFFFFFFFF;
3461 rtx hi = GEN_INT (hi_val);
3462 rtx lo = GEN_INT (lo_val);
3464 if (TARGET_THUMB1)
3465 return 0;
3467 switch (code)
3469 case AND:
3470 case IOR:
3471 case XOR:
3472 return (const_ok_for_op (hi_val, code) || hi_val == 0xFFFFFFFF)
3473 && (const_ok_for_op (lo_val, code) || lo_val == 0xFFFFFFFF);
3474 case PLUS:
3475 return arm_not_operand (hi, SImode) && arm_add_operand (lo, SImode);
3477 default:
3478 return 0;
3482 /* Emit a sequence of insns to handle a large constant.
3483 CODE is the code of the operation required, it can be any of SET, PLUS,
3484 IOR, AND, XOR, MINUS;
3485 MODE is the mode in which the operation is being performed;
3486 VAL is the integer to operate on;
3487 SOURCE is the other operand (a register, or a null-pointer for SET);
3488 SUBTARGETS means it is safe to create scratch registers if that will
3489 either produce a simpler sequence, or we will want to cse the values.
3490 Return value is the number of insns emitted. */
3492 /* ??? Tweak this for thumb2. */
3494 arm_split_constant (enum rtx_code code, enum machine_mode mode, rtx insn,
3495 HOST_WIDE_INT val, rtx target, rtx source, int subtargets)
3497 rtx cond;
3499 if (insn && GET_CODE (PATTERN (insn)) == COND_EXEC)
3500 cond = COND_EXEC_TEST (PATTERN (insn));
3501 else
3502 cond = NULL_RTX;
3504 if (subtargets || code == SET
3505 || (REG_P (target) && REG_P (source)
3506 && REGNO (target) != REGNO (source)))
3508 /* After arm_reorg has been called, we can't fix up expensive
3509 constants by pushing them into memory so we must synthesize
3510 them in-line, regardless of the cost. This is only likely to
3511 be more costly on chips that have load delay slots and we are
3512 compiling without running the scheduler (so no splitting
3513 occurred before the final instruction emission).
3515 Ref: gcc -O1 -mcpu=strongarm gcc.c-torture/compile/980506-2.c
3517 if (!cfun->machine->after_arm_reorg
3518 && !cond
3519 && (arm_gen_constant (code, mode, NULL_RTX, val, target, source,
3520 1, 0)
3521 > (arm_constant_limit (optimize_function_for_size_p (cfun))
3522 + (code != SET))))
3524 if (code == SET)
3526 /* Currently SET is the only monadic value for CODE, all
3527 the rest are diadic. */
3528 if (TARGET_USE_MOVT)
3529 arm_emit_movpair (target, GEN_INT (val));
3530 else
3531 emit_set_insn (target, GEN_INT (val));
3533 return 1;
3535 else
3537 rtx temp = subtargets ? gen_reg_rtx (mode) : target;
3539 if (TARGET_USE_MOVT)
3540 arm_emit_movpair (temp, GEN_INT (val));
3541 else
3542 emit_set_insn (temp, GEN_INT (val));
3544 /* For MINUS, the value is subtracted from, since we never
3545 have subtraction of a constant. */
3546 if (code == MINUS)
3547 emit_set_insn (target, gen_rtx_MINUS (mode, temp, source));
3548 else
3549 emit_set_insn (target,
3550 gen_rtx_fmt_ee (code, mode, source, temp));
3551 return 2;
3556 return arm_gen_constant (code, mode, cond, val, target, source, subtargets,
3560 /* Return a sequence of integers, in RETURN_SEQUENCE that fit into
3561 ARM/THUMB2 immediates, and add up to VAL.
3562 Thr function return value gives the number of insns required. */
3563 static int
3564 optimal_immediate_sequence (enum rtx_code code, unsigned HOST_WIDE_INT val,
3565 struct four_ints *return_sequence)
3567 int best_consecutive_zeros = 0;
3568 int i;
3569 int best_start = 0;
3570 int insns1, insns2;
3571 struct four_ints tmp_sequence;
3573 /* If we aren't targeting ARM, the best place to start is always at
3574 the bottom, otherwise look more closely. */
3575 if (TARGET_ARM)
3577 for (i = 0; i < 32; i += 2)
3579 int consecutive_zeros = 0;
3581 if (!(val & (3 << i)))
3583 while ((i < 32) && !(val & (3 << i)))
3585 consecutive_zeros += 2;
3586 i += 2;
3588 if (consecutive_zeros > best_consecutive_zeros)
3590 best_consecutive_zeros = consecutive_zeros;
3591 best_start = i - consecutive_zeros;
3593 i -= 2;
3598 /* So long as it won't require any more insns to do so, it's
3599 desirable to emit a small constant (in bits 0...9) in the last
3600 insn. This way there is more chance that it can be combined with
3601 a later addressing insn to form a pre-indexed load or store
3602 operation. Consider:
3604 *((volatile int *)0xe0000100) = 1;
3605 *((volatile int *)0xe0000110) = 2;
3607 We want this to wind up as:
3609 mov rA, #0xe0000000
3610 mov rB, #1
3611 str rB, [rA, #0x100]
3612 mov rB, #2
3613 str rB, [rA, #0x110]
3615 rather than having to synthesize both large constants from scratch.
3617 Therefore, we calculate how many insns would be required to emit
3618 the constant starting from `best_start', and also starting from
3619 zero (i.e. with bit 31 first to be output). If `best_start' doesn't
3620 yield a shorter sequence, we may as well use zero. */
3621 insns1 = optimal_immediate_sequence_1 (code, val, return_sequence, best_start);
3622 if (best_start != 0
3623 && ((((unsigned HOST_WIDE_INT) 1) << best_start) < val))
3625 insns2 = optimal_immediate_sequence_1 (code, val, &tmp_sequence, 0);
3626 if (insns2 <= insns1)
3628 *return_sequence = tmp_sequence;
3629 insns1 = insns2;
3633 return insns1;
3636 /* As for optimal_immediate_sequence, but starting at bit-position I. */
3637 static int
3638 optimal_immediate_sequence_1 (enum rtx_code code, unsigned HOST_WIDE_INT val,
3639 struct four_ints *return_sequence, int i)
3641 int remainder = val & 0xffffffff;
3642 int insns = 0;
3644 /* Try and find a way of doing the job in either two or three
3645 instructions.
3647 In ARM mode we can use 8-bit constants, rotated to any 2-bit aligned
3648 location. We start at position I. This may be the MSB, or
3649 optimial_immediate_sequence may have positioned it at the largest block
3650 of zeros that are aligned on a 2-bit boundary. We then fill up the temps,
3651 wrapping around to the top of the word when we drop off the bottom.
3652 In the worst case this code should produce no more than four insns.
3654 In Thumb2 mode, we can use 32/16-bit replicated constants, and 8-bit
3655 constants, shifted to any arbitrary location. We should always start
3656 at the MSB. */
3659 int end;
3660 unsigned int b1, b2, b3, b4;
3661 unsigned HOST_WIDE_INT result;
3662 int loc;
3664 gcc_assert (insns < 4);
3666 if (i <= 0)
3667 i += 32;
3669 /* First, find the next normal 12/8-bit shifted/rotated immediate. */
3670 if (remainder & ((TARGET_ARM ? (3 << (i - 2)) : (1 << (i - 1)))))
3672 loc = i;
3673 if (i <= 12 && TARGET_THUMB2 && code == PLUS)
3674 /* We can use addw/subw for the last 12 bits. */
3675 result = remainder;
3676 else
3678 /* Use an 8-bit shifted/rotated immediate. */
3679 end = i - 8;
3680 if (end < 0)
3681 end += 32;
3682 result = remainder & ((0x0ff << end)
3683 | ((i < end) ? (0xff >> (32 - end))
3684 : 0));
3685 i -= 8;
3688 else
3690 /* Arm allows rotates by a multiple of two. Thumb-2 allows
3691 arbitrary shifts. */
3692 i -= TARGET_ARM ? 2 : 1;
3693 continue;
3696 /* Next, see if we can do a better job with a thumb2 replicated
3697 constant.
3699 We do it this way around to catch the cases like 0x01F001E0 where
3700 two 8-bit immediates would work, but a replicated constant would
3701 make it worse.
3703 TODO: 16-bit constants that don't clear all the bits, but still win.
3704 TODO: Arithmetic splitting for set/add/sub, rather than bitwise. */
3705 if (TARGET_THUMB2)
3707 b1 = (remainder & 0xff000000) >> 24;
3708 b2 = (remainder & 0x00ff0000) >> 16;
3709 b3 = (remainder & 0x0000ff00) >> 8;
3710 b4 = remainder & 0xff;
3712 if (loc > 24)
3714 /* The 8-bit immediate already found clears b1 (and maybe b2),
3715 but must leave b3 and b4 alone. */
3717 /* First try to find a 32-bit replicated constant that clears
3718 almost everything. We can assume that we can't do it in one,
3719 or else we wouldn't be here. */
3720 unsigned int tmp = b1 & b2 & b3 & b4;
3721 unsigned int tmp2 = tmp + (tmp << 8) + (tmp << 16)
3722 + (tmp << 24);
3723 unsigned int matching_bytes = (tmp == b1) + (tmp == b2)
3724 + (tmp == b3) + (tmp == b4);
3725 if (tmp
3726 && (matching_bytes >= 3
3727 || (matching_bytes == 2
3728 && const_ok_for_op (remainder & ~tmp2, code))))
3730 /* At least 3 of the bytes match, and the fourth has at
3731 least as many bits set, or two of the bytes match
3732 and it will only require one more insn to finish. */
3733 result = tmp2;
3734 i = tmp != b1 ? 32
3735 : tmp != b2 ? 24
3736 : tmp != b3 ? 16
3737 : 8;
3740 /* Second, try to find a 16-bit replicated constant that can
3741 leave three of the bytes clear. If b2 or b4 is already
3742 zero, then we can. If the 8-bit from above would not
3743 clear b2 anyway, then we still win. */
3744 else if (b1 == b3 && (!b2 || !b4
3745 || (remainder & 0x00ff0000 & ~result)))
3747 result = remainder & 0xff00ff00;
3748 i = 24;
3751 else if (loc > 16)
3753 /* The 8-bit immediate already found clears b2 (and maybe b3)
3754 and we don't get here unless b1 is alredy clear, but it will
3755 leave b4 unchanged. */
3757 /* If we can clear b2 and b4 at once, then we win, since the
3758 8-bits couldn't possibly reach that far. */
3759 if (b2 == b4)
3761 result = remainder & 0x00ff00ff;
3762 i = 16;
3767 return_sequence->i[insns++] = result;
3768 remainder &= ~result;
3770 if (code == SET || code == MINUS)
3771 code = PLUS;
3773 while (remainder);
3775 return insns;
3778 /* Emit an instruction with the indicated PATTERN. If COND is
3779 non-NULL, conditionalize the execution of the instruction on COND
3780 being true. */
3782 static void
3783 emit_constant_insn (rtx cond, rtx pattern)
3785 if (cond)
3786 pattern = gen_rtx_COND_EXEC (VOIDmode, copy_rtx (cond), pattern);
3787 emit_insn (pattern);
3790 /* As above, but extra parameter GENERATE which, if clear, suppresses
3791 RTL generation. */
3793 static int
3794 arm_gen_constant (enum rtx_code code, enum machine_mode mode, rtx cond,
3795 HOST_WIDE_INT val, rtx target, rtx source, int subtargets,
3796 int generate)
3798 int can_invert = 0;
3799 int can_negate = 0;
3800 int final_invert = 0;
3801 int i;
3802 int set_sign_bit_copies = 0;
3803 int clear_sign_bit_copies = 0;
3804 int clear_zero_bit_copies = 0;
3805 int set_zero_bit_copies = 0;
3806 int insns = 0, neg_insns, inv_insns;
3807 unsigned HOST_WIDE_INT temp1, temp2;
3808 unsigned HOST_WIDE_INT remainder = val & 0xffffffff;
3809 struct four_ints *immediates;
3810 struct four_ints pos_immediates, neg_immediates, inv_immediates;
3812 /* Find out which operations are safe for a given CODE. Also do a quick
3813 check for degenerate cases; these can occur when DImode operations
3814 are split. */
3815 switch (code)
3817 case SET:
3818 can_invert = 1;
3819 break;
3821 case PLUS:
3822 can_negate = 1;
3823 break;
3825 case IOR:
3826 if (remainder == 0xffffffff)
3828 if (generate)
3829 emit_constant_insn (cond,
3830 gen_rtx_SET (VOIDmode, target,
3831 GEN_INT (ARM_SIGN_EXTEND (val))));
3832 return 1;
3835 if (remainder == 0)
3837 if (reload_completed && rtx_equal_p (target, source))
3838 return 0;
3840 if (generate)
3841 emit_constant_insn (cond,
3842 gen_rtx_SET (VOIDmode, target, source));
3843 return 1;
3845 break;
3847 case AND:
3848 if (remainder == 0)
3850 if (generate)
3851 emit_constant_insn (cond,
3852 gen_rtx_SET (VOIDmode, target, const0_rtx));
3853 return 1;
3855 if (remainder == 0xffffffff)
3857 if (reload_completed && rtx_equal_p (target, source))
3858 return 0;
3859 if (generate)
3860 emit_constant_insn (cond,
3861 gen_rtx_SET (VOIDmode, target, source));
3862 return 1;
3864 can_invert = 1;
3865 break;
3867 case XOR:
3868 if (remainder == 0)
3870 if (reload_completed && rtx_equal_p (target, source))
3871 return 0;
3872 if (generate)
3873 emit_constant_insn (cond,
3874 gen_rtx_SET (VOIDmode, target, source));
3875 return 1;
3878 if (remainder == 0xffffffff)
3880 if (generate)
3881 emit_constant_insn (cond,
3882 gen_rtx_SET (VOIDmode, target,
3883 gen_rtx_NOT (mode, source)));
3884 return 1;
3886 final_invert = 1;
3887 break;
3889 case MINUS:
3890 /* We treat MINUS as (val - source), since (source - val) is always
3891 passed as (source + (-val)). */
3892 if (remainder == 0)
3894 if (generate)
3895 emit_constant_insn (cond,
3896 gen_rtx_SET (VOIDmode, target,
3897 gen_rtx_NEG (mode, source)));
3898 return 1;
3900 if (const_ok_for_arm (val))
3902 if (generate)
3903 emit_constant_insn (cond,
3904 gen_rtx_SET (VOIDmode, target,
3905 gen_rtx_MINUS (mode, GEN_INT (val),
3906 source)));
3907 return 1;
3910 break;
3912 default:
3913 gcc_unreachable ();
3916 /* If we can do it in one insn get out quickly. */
3917 if (const_ok_for_op (val, code))
3919 if (generate)
3920 emit_constant_insn (cond,
3921 gen_rtx_SET (VOIDmode, target,
3922 (source
3923 ? gen_rtx_fmt_ee (code, mode, source,
3924 GEN_INT (val))
3925 : GEN_INT (val))));
3926 return 1;
3929 /* On targets with UXTH/UBFX, we can deal with AND (2^N)-1 in a single
3930 insn. */
3931 if (code == AND && (i = exact_log2 (remainder + 1)) > 0
3932 && (arm_arch_thumb2 || (i == 16 && arm_arch6 && mode == SImode)))
3934 if (generate)
3936 if (mode == SImode && i == 16)
3937 /* Use UXTH in preference to UBFX, since on Thumb2 it's a
3938 smaller insn. */
3939 emit_constant_insn (cond,
3940 gen_zero_extendhisi2
3941 (target, gen_lowpart (HImode, source)));
3942 else
3943 /* Extz only supports SImode, but we can coerce the operands
3944 into that mode. */
3945 emit_constant_insn (cond,
3946 gen_extzv_t2 (gen_lowpart (SImode, target),
3947 gen_lowpart (SImode, source),
3948 GEN_INT (i), const0_rtx));
3951 return 1;
3954 /* Calculate a few attributes that may be useful for specific
3955 optimizations. */
3956 /* Count number of leading zeros. */
3957 for (i = 31; i >= 0; i--)
3959 if ((remainder & (1 << i)) == 0)
3960 clear_sign_bit_copies++;
3961 else
3962 break;
3965 /* Count number of leading 1's. */
3966 for (i = 31; i >= 0; i--)
3968 if ((remainder & (1 << i)) != 0)
3969 set_sign_bit_copies++;
3970 else
3971 break;
3974 /* Count number of trailing zero's. */
3975 for (i = 0; i <= 31; i++)
3977 if ((remainder & (1 << i)) == 0)
3978 clear_zero_bit_copies++;
3979 else
3980 break;
3983 /* Count number of trailing 1's. */
3984 for (i = 0; i <= 31; i++)
3986 if ((remainder & (1 << i)) != 0)
3987 set_zero_bit_copies++;
3988 else
3989 break;
3992 switch (code)
3994 case SET:
3995 /* See if we can do this by sign_extending a constant that is known
3996 to be negative. This is a good, way of doing it, since the shift
3997 may well merge into a subsequent insn. */
3998 if (set_sign_bit_copies > 1)
4000 if (const_ok_for_arm
4001 (temp1 = ARM_SIGN_EXTEND (remainder
4002 << (set_sign_bit_copies - 1))))
4004 if (generate)
4006 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
4007 emit_constant_insn (cond,
4008 gen_rtx_SET (VOIDmode, new_src,
4009 GEN_INT (temp1)));
4010 emit_constant_insn (cond,
4011 gen_ashrsi3 (target, new_src,
4012 GEN_INT (set_sign_bit_copies - 1)));
4014 return 2;
4016 /* For an inverted constant, we will need to set the low bits,
4017 these will be shifted out of harm's way. */
4018 temp1 |= (1 << (set_sign_bit_copies - 1)) - 1;
4019 if (const_ok_for_arm (~temp1))
4021 if (generate)
4023 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
4024 emit_constant_insn (cond,
4025 gen_rtx_SET (VOIDmode, new_src,
4026 GEN_INT (temp1)));
4027 emit_constant_insn (cond,
4028 gen_ashrsi3 (target, new_src,
4029 GEN_INT (set_sign_bit_copies - 1)));
4031 return 2;
4035 /* See if we can calculate the value as the difference between two
4036 valid immediates. */
4037 if (clear_sign_bit_copies + clear_zero_bit_copies <= 16)
4039 int topshift = clear_sign_bit_copies & ~1;
4041 temp1 = ARM_SIGN_EXTEND ((remainder + (0x00800000 >> topshift))
4042 & (0xff000000 >> topshift));
4044 /* If temp1 is zero, then that means the 9 most significant
4045 bits of remainder were 1 and we've caused it to overflow.
4046 When topshift is 0 we don't need to do anything since we
4047 can borrow from 'bit 32'. */
4048 if (temp1 == 0 && topshift != 0)
4049 temp1 = 0x80000000 >> (topshift - 1);
4051 temp2 = ARM_SIGN_EXTEND (temp1 - remainder);
4053 if (const_ok_for_arm (temp2))
4055 if (generate)
4057 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
4058 emit_constant_insn (cond,
4059 gen_rtx_SET (VOIDmode, new_src,
4060 GEN_INT (temp1)));
4061 emit_constant_insn (cond,
4062 gen_addsi3 (target, new_src,
4063 GEN_INT (-temp2)));
4066 return 2;
4070 /* See if we can generate this by setting the bottom (or the top)
4071 16 bits, and then shifting these into the other half of the
4072 word. We only look for the simplest cases, to do more would cost
4073 too much. Be careful, however, not to generate this when the
4074 alternative would take fewer insns. */
4075 if (val & 0xffff0000)
4077 temp1 = remainder & 0xffff0000;
4078 temp2 = remainder & 0x0000ffff;
4080 /* Overlaps outside this range are best done using other methods. */
4081 for (i = 9; i < 24; i++)
4083 if ((((temp2 | (temp2 << i)) & 0xffffffff) == remainder)
4084 && !const_ok_for_arm (temp2))
4086 rtx new_src = (subtargets
4087 ? (generate ? gen_reg_rtx (mode) : NULL_RTX)
4088 : target);
4089 insns = arm_gen_constant (code, mode, cond, temp2, new_src,
4090 source, subtargets, generate);
4091 source = new_src;
4092 if (generate)
4093 emit_constant_insn
4094 (cond,
4095 gen_rtx_SET
4096 (VOIDmode, target,
4097 gen_rtx_IOR (mode,
4098 gen_rtx_ASHIFT (mode, source,
4099 GEN_INT (i)),
4100 source)));
4101 return insns + 1;
4105 /* Don't duplicate cases already considered. */
4106 for (i = 17; i < 24; i++)
4108 if (((temp1 | (temp1 >> i)) == remainder)
4109 && !const_ok_for_arm (temp1))
4111 rtx new_src = (subtargets
4112 ? (generate ? gen_reg_rtx (mode) : NULL_RTX)
4113 : target);
4114 insns = arm_gen_constant (code, mode, cond, temp1, new_src,
4115 source, subtargets, generate);
4116 source = new_src;
4117 if (generate)
4118 emit_constant_insn
4119 (cond,
4120 gen_rtx_SET (VOIDmode, target,
4121 gen_rtx_IOR
4122 (mode,
4123 gen_rtx_LSHIFTRT (mode, source,
4124 GEN_INT (i)),
4125 source)));
4126 return insns + 1;
4130 break;
4132 case IOR:
4133 case XOR:
4134 /* If we have IOR or XOR, and the constant can be loaded in a
4135 single instruction, and we can find a temporary to put it in,
4136 then this can be done in two instructions instead of 3-4. */
4137 if (subtargets
4138 /* TARGET can't be NULL if SUBTARGETS is 0 */
4139 || (reload_completed && !reg_mentioned_p (target, source)))
4141 if (const_ok_for_arm (ARM_SIGN_EXTEND (~val)))
4143 if (generate)
4145 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
4147 emit_constant_insn (cond,
4148 gen_rtx_SET (VOIDmode, sub,
4149 GEN_INT (val)));
4150 emit_constant_insn (cond,
4151 gen_rtx_SET (VOIDmode, target,
4152 gen_rtx_fmt_ee (code, mode,
4153 source, sub)));
4155 return 2;
4159 if (code == XOR)
4160 break;
4162 /* Convert.
4163 x = y | constant ( which is composed of set_sign_bit_copies of leading 1s
4164 and the remainder 0s for e.g. 0xfff00000)
4165 x = ~(~(y ashift set_sign_bit_copies) lshiftrt set_sign_bit_copies)
4167 This can be done in 2 instructions by using shifts with mov or mvn.
4168 e.g. for
4169 x = x | 0xfff00000;
4170 we generate.
4171 mvn r0, r0, asl #12
4172 mvn r0, r0, lsr #12 */
4173 if (set_sign_bit_copies > 8
4174 && (val & (-1 << (32 - set_sign_bit_copies))) == val)
4176 if (generate)
4178 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
4179 rtx shift = GEN_INT (set_sign_bit_copies);
4181 emit_constant_insn
4182 (cond,
4183 gen_rtx_SET (VOIDmode, sub,
4184 gen_rtx_NOT (mode,
4185 gen_rtx_ASHIFT (mode,
4186 source,
4187 shift))));
4188 emit_constant_insn
4189 (cond,
4190 gen_rtx_SET (VOIDmode, target,
4191 gen_rtx_NOT (mode,
4192 gen_rtx_LSHIFTRT (mode, sub,
4193 shift))));
4195 return 2;
4198 /* Convert
4199 x = y | constant (which has set_zero_bit_copies number of trailing ones).
4201 x = ~((~y lshiftrt set_zero_bit_copies) ashift set_zero_bit_copies).
4203 For eg. r0 = r0 | 0xfff
4204 mvn r0, r0, lsr #12
4205 mvn r0, r0, asl #12
4208 if (set_zero_bit_copies > 8
4209 && (remainder & ((1 << set_zero_bit_copies) - 1)) == remainder)
4211 if (generate)
4213 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
4214 rtx shift = GEN_INT (set_zero_bit_copies);
4216 emit_constant_insn
4217 (cond,
4218 gen_rtx_SET (VOIDmode, sub,
4219 gen_rtx_NOT (mode,
4220 gen_rtx_LSHIFTRT (mode,
4221 source,
4222 shift))));
4223 emit_constant_insn
4224 (cond,
4225 gen_rtx_SET (VOIDmode, target,
4226 gen_rtx_NOT (mode,
4227 gen_rtx_ASHIFT (mode, sub,
4228 shift))));
4230 return 2;
4233 /* This will never be reached for Thumb2 because orn is a valid
4234 instruction. This is for Thumb1 and the ARM 32 bit cases.
4236 x = y | constant (such that ~constant is a valid constant)
4237 Transform this to
4238 x = ~(~y & ~constant).
4240 if (const_ok_for_arm (temp1 = ARM_SIGN_EXTEND (~val)))
4242 if (generate)
4244 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
4245 emit_constant_insn (cond,
4246 gen_rtx_SET (VOIDmode, sub,
4247 gen_rtx_NOT (mode, source)));
4248 source = sub;
4249 if (subtargets)
4250 sub = gen_reg_rtx (mode);
4251 emit_constant_insn (cond,
4252 gen_rtx_SET (VOIDmode, sub,
4253 gen_rtx_AND (mode, source,
4254 GEN_INT (temp1))));
4255 emit_constant_insn (cond,
4256 gen_rtx_SET (VOIDmode, target,
4257 gen_rtx_NOT (mode, sub)));
4259 return 3;
4261 break;
4263 case AND:
4264 /* See if two shifts will do 2 or more insn's worth of work. */
4265 if (clear_sign_bit_copies >= 16 && clear_sign_bit_copies < 24)
4267 HOST_WIDE_INT shift_mask = ((0xffffffff
4268 << (32 - clear_sign_bit_copies))
4269 & 0xffffffff);
4271 if ((remainder | shift_mask) != 0xffffffff)
4273 if (generate)
4275 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
4276 insns = arm_gen_constant (AND, mode, cond,
4277 remainder | shift_mask,
4278 new_src, source, subtargets, 1);
4279 source = new_src;
4281 else
4283 rtx targ = subtargets ? NULL_RTX : target;
4284 insns = arm_gen_constant (AND, mode, cond,
4285 remainder | shift_mask,
4286 targ, source, subtargets, 0);
4290 if (generate)
4292 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
4293 rtx shift = GEN_INT (clear_sign_bit_copies);
4295 emit_insn (gen_ashlsi3 (new_src, source, shift));
4296 emit_insn (gen_lshrsi3 (target, new_src, shift));
4299 return insns + 2;
4302 if (clear_zero_bit_copies >= 16 && clear_zero_bit_copies < 24)
4304 HOST_WIDE_INT shift_mask = (1 << clear_zero_bit_copies) - 1;
4306 if ((remainder | shift_mask) != 0xffffffff)
4308 if (generate)
4310 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
4312 insns = arm_gen_constant (AND, mode, cond,
4313 remainder | shift_mask,
4314 new_src, source, subtargets, 1);
4315 source = new_src;
4317 else
4319 rtx targ = subtargets ? NULL_RTX : target;
4321 insns = arm_gen_constant (AND, mode, cond,
4322 remainder | shift_mask,
4323 targ, source, subtargets, 0);
4327 if (generate)
4329 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
4330 rtx shift = GEN_INT (clear_zero_bit_copies);
4332 emit_insn (gen_lshrsi3 (new_src, source, shift));
4333 emit_insn (gen_ashlsi3 (target, new_src, shift));
4336 return insns + 2;
4339 break;
4341 default:
4342 break;
4345 /* Calculate what the instruction sequences would be if we generated it
4346 normally, negated, or inverted. */
4347 if (code == AND)
4348 /* AND cannot be split into multiple insns, so invert and use BIC. */
4349 insns = 99;
4350 else
4351 insns = optimal_immediate_sequence (code, remainder, &pos_immediates);
4353 if (can_negate)
4354 neg_insns = optimal_immediate_sequence (code, (-remainder) & 0xffffffff,
4355 &neg_immediates);
4356 else
4357 neg_insns = 99;
4359 if (can_invert || final_invert)
4360 inv_insns = optimal_immediate_sequence (code, remainder ^ 0xffffffff,
4361 &inv_immediates);
4362 else
4363 inv_insns = 99;
4365 immediates = &pos_immediates;
4367 /* Is the negated immediate sequence more efficient? */
4368 if (neg_insns < insns && neg_insns <= inv_insns)
4370 insns = neg_insns;
4371 immediates = &neg_immediates;
4373 else
4374 can_negate = 0;
4376 /* Is the inverted immediate sequence more efficient?
4377 We must allow for an extra NOT instruction for XOR operations, although
4378 there is some chance that the final 'mvn' will get optimized later. */
4379 if ((inv_insns + 1) < insns || (!final_invert && inv_insns < insns))
4381 insns = inv_insns;
4382 immediates = &inv_immediates;
4384 else
4386 can_invert = 0;
4387 final_invert = 0;
4390 /* Now output the chosen sequence as instructions. */
4391 if (generate)
4393 for (i = 0; i < insns; i++)
4395 rtx new_src, temp1_rtx;
4397 temp1 = immediates->i[i];
4399 if (code == SET || code == MINUS)
4400 new_src = (subtargets ? gen_reg_rtx (mode) : target);
4401 else if ((final_invert || i < (insns - 1)) && subtargets)
4402 new_src = gen_reg_rtx (mode);
4403 else
4404 new_src = target;
4406 if (can_invert)
4407 temp1 = ~temp1;
4408 else if (can_negate)
4409 temp1 = -temp1;
4411 temp1 = trunc_int_for_mode (temp1, mode);
4412 temp1_rtx = GEN_INT (temp1);
4414 if (code == SET)
4416 else if (code == MINUS)
4417 temp1_rtx = gen_rtx_MINUS (mode, temp1_rtx, source);
4418 else
4419 temp1_rtx = gen_rtx_fmt_ee (code, mode, source, temp1_rtx);
4421 emit_constant_insn (cond,
4422 gen_rtx_SET (VOIDmode, new_src,
4423 temp1_rtx));
4424 source = new_src;
4426 if (code == SET)
4428 can_negate = can_invert;
4429 can_invert = 0;
4430 code = PLUS;
4432 else if (code == MINUS)
4433 code = PLUS;
4437 if (final_invert)
4439 if (generate)
4440 emit_constant_insn (cond, gen_rtx_SET (VOIDmode, target,
4441 gen_rtx_NOT (mode, source)));
4442 insns++;
4445 return insns;
4448 /* Canonicalize a comparison so that we are more likely to recognize it.
4449 This can be done for a few constant compares, where we can make the
4450 immediate value easier to load. */
4452 static void
4453 arm_canonicalize_comparison (int *code, rtx *op0, rtx *op1,
4454 bool op0_preserve_value)
4456 enum machine_mode mode;
4457 unsigned HOST_WIDE_INT i, maxval;
4459 mode = GET_MODE (*op0);
4460 if (mode == VOIDmode)
4461 mode = GET_MODE (*op1);
4463 maxval = (((unsigned HOST_WIDE_INT) 1) << (GET_MODE_BITSIZE(mode) - 1)) - 1;
4465 /* For DImode, we have GE/LT/GEU/LTU comparisons. In ARM mode
4466 we can also use cmp/cmpeq for GTU/LEU. GT/LE must be either
4467 reversed or (for constant OP1) adjusted to GE/LT. Similarly
4468 for GTU/LEU in Thumb mode. */
4469 if (mode == DImode)
4471 rtx tem;
4473 if (*code == GT || *code == LE
4474 || (!TARGET_ARM && (*code == GTU || *code == LEU)))
4476 /* Missing comparison. First try to use an available
4477 comparison. */
4478 if (CONST_INT_P (*op1))
4480 i = INTVAL (*op1);
4481 switch (*code)
4483 case GT:
4484 case LE:
4485 if (i != maxval
4486 && arm_const_double_by_immediates (GEN_INT (i + 1)))
4488 *op1 = GEN_INT (i + 1);
4489 *code = *code == GT ? GE : LT;
4490 return;
4492 break;
4493 case GTU:
4494 case LEU:
4495 if (i != ~((unsigned HOST_WIDE_INT) 0)
4496 && arm_const_double_by_immediates (GEN_INT (i + 1)))
4498 *op1 = GEN_INT (i + 1);
4499 *code = *code == GTU ? GEU : LTU;
4500 return;
4502 break;
4503 default:
4504 gcc_unreachable ();
4508 /* If that did not work, reverse the condition. */
4509 if (!op0_preserve_value)
4511 tem = *op0;
4512 *op0 = *op1;
4513 *op1 = tem;
4514 *code = (int)swap_condition ((enum rtx_code)*code);
4517 return;
4520 /* If *op0 is (zero_extend:SI (subreg:QI (reg:SI) 0)) and comparing
4521 with const0_rtx, change it to (and:SI (reg:SI) (const_int 255)),
4522 to facilitate possible combining with a cmp into 'ands'. */
4523 if (mode == SImode
4524 && GET_CODE (*op0) == ZERO_EXTEND
4525 && GET_CODE (XEXP (*op0, 0)) == SUBREG
4526 && GET_MODE (XEXP (*op0, 0)) == QImode
4527 && GET_MODE (SUBREG_REG (XEXP (*op0, 0))) == SImode
4528 && subreg_lowpart_p (XEXP (*op0, 0))
4529 && *op1 == const0_rtx)
4530 *op0 = gen_rtx_AND (SImode, SUBREG_REG (XEXP (*op0, 0)),
4531 GEN_INT (255));
4533 /* Comparisons smaller than DImode. Only adjust comparisons against
4534 an out-of-range constant. */
4535 if (!CONST_INT_P (*op1)
4536 || const_ok_for_arm (INTVAL (*op1))
4537 || const_ok_for_arm (- INTVAL (*op1)))
4538 return;
4540 i = INTVAL (*op1);
4542 switch (*code)
4544 case EQ:
4545 case NE:
4546 return;
4548 case GT:
4549 case LE:
4550 if (i != maxval
4551 && (const_ok_for_arm (i + 1) || const_ok_for_arm (-(i + 1))))
4553 *op1 = GEN_INT (i + 1);
4554 *code = *code == GT ? GE : LT;
4555 return;
4557 break;
4559 case GE:
4560 case LT:
4561 if (i != ~maxval
4562 && (const_ok_for_arm (i - 1) || const_ok_for_arm (-(i - 1))))
4564 *op1 = GEN_INT (i - 1);
4565 *code = *code == GE ? GT : LE;
4566 return;
4568 break;
4570 case GTU:
4571 case LEU:
4572 if (i != ~((unsigned HOST_WIDE_INT) 0)
4573 && (const_ok_for_arm (i + 1) || const_ok_for_arm (-(i + 1))))
4575 *op1 = GEN_INT (i + 1);
4576 *code = *code == GTU ? GEU : LTU;
4577 return;
4579 break;
4581 case GEU:
4582 case LTU:
4583 if (i != 0
4584 && (const_ok_for_arm (i - 1) || const_ok_for_arm (-(i - 1))))
4586 *op1 = GEN_INT (i - 1);
4587 *code = *code == GEU ? GTU : LEU;
4588 return;
4590 break;
4592 default:
4593 gcc_unreachable ();
4598 /* Define how to find the value returned by a function. */
4600 static rtx
4601 arm_function_value(const_tree type, const_tree func,
4602 bool outgoing ATTRIBUTE_UNUSED)
4604 enum machine_mode mode;
4605 int unsignedp ATTRIBUTE_UNUSED;
4606 rtx r ATTRIBUTE_UNUSED;
4608 mode = TYPE_MODE (type);
4610 if (TARGET_AAPCS_BASED)
4611 return aapcs_allocate_return_reg (mode, type, func);
4613 /* Promote integer types. */
4614 if (INTEGRAL_TYPE_P (type))
4615 mode = arm_promote_function_mode (type, mode, &unsignedp, func, 1);
4617 /* Promotes small structs returned in a register to full-word size
4618 for big-endian AAPCS. */
4619 if (arm_return_in_msb (type))
4621 HOST_WIDE_INT size = int_size_in_bytes (type);
4622 if (size % UNITS_PER_WORD != 0)
4624 size += UNITS_PER_WORD - size % UNITS_PER_WORD;
4625 mode = mode_for_size (size * BITS_PER_UNIT, MODE_INT, 0);
4629 return arm_libcall_value_1 (mode);
4632 /* libcall hashtable helpers. */
4634 struct libcall_hasher : typed_noop_remove <rtx_def>
4636 typedef rtx_def value_type;
4637 typedef rtx_def compare_type;
4638 static inline hashval_t hash (const value_type *);
4639 static inline bool equal (const value_type *, const compare_type *);
4640 static inline void remove (value_type *);
4643 inline bool
4644 libcall_hasher::equal (const value_type *p1, const compare_type *p2)
4646 return rtx_equal_p (p1, p2);
4649 inline hashval_t
4650 libcall_hasher::hash (const value_type *p1)
4652 return hash_rtx (p1, VOIDmode, NULL, NULL, FALSE);
4655 typedef hash_table <libcall_hasher> libcall_table_type;
4657 static void
4658 add_libcall (libcall_table_type htab, rtx libcall)
4660 *htab.find_slot (libcall, INSERT) = libcall;
4663 static bool
4664 arm_libcall_uses_aapcs_base (const_rtx libcall)
4666 static bool init_done = false;
4667 static libcall_table_type libcall_htab;
4669 if (!init_done)
4671 init_done = true;
4673 libcall_htab.create (31);
4674 add_libcall (libcall_htab,
4675 convert_optab_libfunc (sfloat_optab, SFmode, SImode));
4676 add_libcall (libcall_htab,
4677 convert_optab_libfunc (sfloat_optab, DFmode, SImode));
4678 add_libcall (libcall_htab,
4679 convert_optab_libfunc (sfloat_optab, SFmode, DImode));
4680 add_libcall (libcall_htab,
4681 convert_optab_libfunc (sfloat_optab, DFmode, DImode));
4683 add_libcall (libcall_htab,
4684 convert_optab_libfunc (ufloat_optab, SFmode, SImode));
4685 add_libcall (libcall_htab,
4686 convert_optab_libfunc (ufloat_optab, DFmode, SImode));
4687 add_libcall (libcall_htab,
4688 convert_optab_libfunc (ufloat_optab, SFmode, DImode));
4689 add_libcall (libcall_htab,
4690 convert_optab_libfunc (ufloat_optab, DFmode, DImode));
4692 add_libcall (libcall_htab,
4693 convert_optab_libfunc (sext_optab, SFmode, HFmode));
4694 add_libcall (libcall_htab,
4695 convert_optab_libfunc (trunc_optab, HFmode, SFmode));
4696 add_libcall (libcall_htab,
4697 convert_optab_libfunc (sfix_optab, SImode, DFmode));
4698 add_libcall (libcall_htab,
4699 convert_optab_libfunc (ufix_optab, SImode, DFmode));
4700 add_libcall (libcall_htab,
4701 convert_optab_libfunc (sfix_optab, DImode, DFmode));
4702 add_libcall (libcall_htab,
4703 convert_optab_libfunc (ufix_optab, DImode, DFmode));
4704 add_libcall (libcall_htab,
4705 convert_optab_libfunc (sfix_optab, DImode, SFmode));
4706 add_libcall (libcall_htab,
4707 convert_optab_libfunc (ufix_optab, DImode, SFmode));
4709 /* Values from double-precision helper functions are returned in core
4710 registers if the selected core only supports single-precision
4711 arithmetic, even if we are using the hard-float ABI. The same is
4712 true for single-precision helpers, but we will never be using the
4713 hard-float ABI on a CPU which doesn't support single-precision
4714 operations in hardware. */
4715 add_libcall (libcall_htab, optab_libfunc (add_optab, DFmode));
4716 add_libcall (libcall_htab, optab_libfunc (sdiv_optab, DFmode));
4717 add_libcall (libcall_htab, optab_libfunc (smul_optab, DFmode));
4718 add_libcall (libcall_htab, optab_libfunc (neg_optab, DFmode));
4719 add_libcall (libcall_htab, optab_libfunc (sub_optab, DFmode));
4720 add_libcall (libcall_htab, optab_libfunc (eq_optab, DFmode));
4721 add_libcall (libcall_htab, optab_libfunc (lt_optab, DFmode));
4722 add_libcall (libcall_htab, optab_libfunc (le_optab, DFmode));
4723 add_libcall (libcall_htab, optab_libfunc (ge_optab, DFmode));
4724 add_libcall (libcall_htab, optab_libfunc (gt_optab, DFmode));
4725 add_libcall (libcall_htab, optab_libfunc (unord_optab, DFmode));
4726 add_libcall (libcall_htab, convert_optab_libfunc (sext_optab, DFmode,
4727 SFmode));
4728 add_libcall (libcall_htab, convert_optab_libfunc (trunc_optab, SFmode,
4729 DFmode));
4732 return libcall && libcall_htab.find (libcall) != NULL;
4735 static rtx
4736 arm_libcall_value_1 (enum machine_mode mode)
4738 if (TARGET_AAPCS_BASED)
4739 return aapcs_libcall_value (mode);
4740 else if (TARGET_IWMMXT_ABI
4741 && arm_vector_mode_supported_p (mode))
4742 return gen_rtx_REG (mode, FIRST_IWMMXT_REGNUM);
4743 else
4744 return gen_rtx_REG (mode, ARG_REGISTER (1));
4747 /* Define how to find the value returned by a library function
4748 assuming the value has mode MODE. */
4750 static rtx
4751 arm_libcall_value (enum machine_mode mode, const_rtx libcall)
4753 if (TARGET_AAPCS_BASED && arm_pcs_default != ARM_PCS_AAPCS
4754 && GET_MODE_CLASS (mode) == MODE_FLOAT)
4756 /* The following libcalls return their result in integer registers,
4757 even though they return a floating point value. */
4758 if (arm_libcall_uses_aapcs_base (libcall))
4759 return gen_rtx_REG (mode, ARG_REGISTER(1));
4763 return arm_libcall_value_1 (mode);
4766 /* Implement TARGET_FUNCTION_VALUE_REGNO_P. */
4768 static bool
4769 arm_function_value_regno_p (const unsigned int regno)
4771 if (regno == ARG_REGISTER (1)
4772 || (TARGET_32BIT
4773 && TARGET_AAPCS_BASED
4774 && TARGET_VFP
4775 && TARGET_HARD_FLOAT
4776 && regno == FIRST_VFP_REGNUM)
4777 || (TARGET_IWMMXT_ABI
4778 && regno == FIRST_IWMMXT_REGNUM))
4779 return true;
4781 return false;
4784 /* Determine the amount of memory needed to store the possible return
4785 registers of an untyped call. */
4787 arm_apply_result_size (void)
4789 int size = 16;
4791 if (TARGET_32BIT)
4793 if (TARGET_HARD_FLOAT_ABI && TARGET_VFP)
4794 size += 32;
4795 if (TARGET_IWMMXT_ABI)
4796 size += 8;
4799 return size;
4802 /* Decide whether TYPE should be returned in memory (true)
4803 or in a register (false). FNTYPE is the type of the function making
4804 the call. */
4805 static bool
4806 arm_return_in_memory (const_tree type, const_tree fntype)
4808 HOST_WIDE_INT size;
4810 size = int_size_in_bytes (type); /* Negative if not fixed size. */
4812 if (TARGET_AAPCS_BASED)
4814 /* Simple, non-aggregate types (ie not including vectors and
4815 complex) are always returned in a register (or registers).
4816 We don't care about which register here, so we can short-cut
4817 some of the detail. */
4818 if (!AGGREGATE_TYPE_P (type)
4819 && TREE_CODE (type) != VECTOR_TYPE
4820 && TREE_CODE (type) != COMPLEX_TYPE)
4821 return false;
4823 /* Any return value that is no larger than one word can be
4824 returned in r0. */
4825 if (((unsigned HOST_WIDE_INT) size) <= UNITS_PER_WORD)
4826 return false;
4828 /* Check any available co-processors to see if they accept the
4829 type as a register candidate (VFP, for example, can return
4830 some aggregates in consecutive registers). These aren't
4831 available if the call is variadic. */
4832 if (aapcs_select_return_coproc (type, fntype) >= 0)
4833 return false;
4835 /* Vector values should be returned using ARM registers, not
4836 memory (unless they're over 16 bytes, which will break since
4837 we only have four call-clobbered registers to play with). */
4838 if (TREE_CODE (type) == VECTOR_TYPE)
4839 return (size < 0 || size > (4 * UNITS_PER_WORD));
4841 /* The rest go in memory. */
4842 return true;
4845 if (TREE_CODE (type) == VECTOR_TYPE)
4846 return (size < 0 || size > (4 * UNITS_PER_WORD));
4848 if (!AGGREGATE_TYPE_P (type) &&
4849 (TREE_CODE (type) != VECTOR_TYPE))
4850 /* All simple types are returned in registers. */
4851 return false;
4853 if (arm_abi != ARM_ABI_APCS)
4855 /* ATPCS and later return aggregate types in memory only if they are
4856 larger than a word (or are variable size). */
4857 return (size < 0 || size > UNITS_PER_WORD);
4860 /* For the arm-wince targets we choose to be compatible with Microsoft's
4861 ARM and Thumb compilers, which always return aggregates in memory. */
4862 #ifndef ARM_WINCE
4863 /* All structures/unions bigger than one word are returned in memory.
4864 Also catch the case where int_size_in_bytes returns -1. In this case
4865 the aggregate is either huge or of variable size, and in either case
4866 we will want to return it via memory and not in a register. */
4867 if (size < 0 || size > UNITS_PER_WORD)
4868 return true;
4870 if (TREE_CODE (type) == RECORD_TYPE)
4872 tree field;
4874 /* For a struct the APCS says that we only return in a register
4875 if the type is 'integer like' and every addressable element
4876 has an offset of zero. For practical purposes this means
4877 that the structure can have at most one non bit-field element
4878 and that this element must be the first one in the structure. */
4880 /* Find the first field, ignoring non FIELD_DECL things which will
4881 have been created by C++. */
4882 for (field = TYPE_FIELDS (type);
4883 field && TREE_CODE (field) != FIELD_DECL;
4884 field = DECL_CHAIN (field))
4885 continue;
4887 if (field == NULL)
4888 return false; /* An empty structure. Allowed by an extension to ANSI C. */
4890 /* Check that the first field is valid for returning in a register. */
4892 /* ... Floats are not allowed */
4893 if (FLOAT_TYPE_P (TREE_TYPE (field)))
4894 return true;
4896 /* ... Aggregates that are not themselves valid for returning in
4897 a register are not allowed. */
4898 if (arm_return_in_memory (TREE_TYPE (field), NULL_TREE))
4899 return true;
4901 /* Now check the remaining fields, if any. Only bitfields are allowed,
4902 since they are not addressable. */
4903 for (field = DECL_CHAIN (field);
4904 field;
4905 field = DECL_CHAIN (field))
4907 if (TREE_CODE (field) != FIELD_DECL)
4908 continue;
4910 if (!DECL_BIT_FIELD_TYPE (field))
4911 return true;
4914 return false;
4917 if (TREE_CODE (type) == UNION_TYPE)
4919 tree field;
4921 /* Unions can be returned in registers if every element is
4922 integral, or can be returned in an integer register. */
4923 for (field = TYPE_FIELDS (type);
4924 field;
4925 field = DECL_CHAIN (field))
4927 if (TREE_CODE (field) != FIELD_DECL)
4928 continue;
4930 if (FLOAT_TYPE_P (TREE_TYPE (field)))
4931 return true;
4933 if (arm_return_in_memory (TREE_TYPE (field), NULL_TREE))
4934 return true;
4937 return false;
4939 #endif /* not ARM_WINCE */
4941 /* Return all other types in memory. */
4942 return true;
4945 const struct pcs_attribute_arg
4947 const char *arg;
4948 enum arm_pcs value;
4949 } pcs_attribute_args[] =
4951 {"aapcs", ARM_PCS_AAPCS},
4952 {"aapcs-vfp", ARM_PCS_AAPCS_VFP},
4953 #if 0
4954 /* We could recognize these, but changes would be needed elsewhere
4955 * to implement them. */
4956 {"aapcs-iwmmxt", ARM_PCS_AAPCS_IWMMXT},
4957 {"atpcs", ARM_PCS_ATPCS},
4958 {"apcs", ARM_PCS_APCS},
4959 #endif
4960 {NULL, ARM_PCS_UNKNOWN}
4963 static enum arm_pcs
4964 arm_pcs_from_attribute (tree attr)
4966 const struct pcs_attribute_arg *ptr;
4967 const char *arg;
4969 /* Get the value of the argument. */
4970 if (TREE_VALUE (attr) == NULL_TREE
4971 || TREE_CODE (TREE_VALUE (attr)) != STRING_CST)
4972 return ARM_PCS_UNKNOWN;
4974 arg = TREE_STRING_POINTER (TREE_VALUE (attr));
4976 /* Check it against the list of known arguments. */
4977 for (ptr = pcs_attribute_args; ptr->arg != NULL; ptr++)
4978 if (streq (arg, ptr->arg))
4979 return ptr->value;
4981 /* An unrecognized interrupt type. */
4982 return ARM_PCS_UNKNOWN;
4985 /* Get the PCS variant to use for this call. TYPE is the function's type
4986 specification, DECL is the specific declartion. DECL may be null if
4987 the call could be indirect or if this is a library call. */
4988 static enum arm_pcs
4989 arm_get_pcs_model (const_tree type, const_tree decl)
4991 bool user_convention = false;
4992 enum arm_pcs user_pcs = arm_pcs_default;
4993 tree attr;
4995 gcc_assert (type);
4997 attr = lookup_attribute ("pcs", TYPE_ATTRIBUTES (type));
4998 if (attr)
5000 user_pcs = arm_pcs_from_attribute (TREE_VALUE (attr));
5001 user_convention = true;
5004 if (TARGET_AAPCS_BASED)
5006 /* Detect varargs functions. These always use the base rules
5007 (no argument is ever a candidate for a co-processor
5008 register). */
5009 bool base_rules = stdarg_p (type);
5011 if (user_convention)
5013 if (user_pcs > ARM_PCS_AAPCS_LOCAL)
5014 sorry ("non-AAPCS derived PCS variant");
5015 else if (base_rules && user_pcs != ARM_PCS_AAPCS)
5016 error ("variadic functions must use the base AAPCS variant");
5019 if (base_rules)
5020 return ARM_PCS_AAPCS;
5021 else if (user_convention)
5022 return user_pcs;
5023 else if (decl && flag_unit_at_a_time)
5025 /* Local functions never leak outside this compilation unit,
5026 so we are free to use whatever conventions are
5027 appropriate. */
5028 /* FIXME: remove CONST_CAST_TREE when cgraph is constified. */
5029 struct cgraph_local_info *i = cgraph_local_info (CONST_CAST_TREE(decl));
5030 if (i && i->local)
5031 return ARM_PCS_AAPCS_LOCAL;
5034 else if (user_convention && user_pcs != arm_pcs_default)
5035 sorry ("PCS variant");
5037 /* For everything else we use the target's default. */
5038 return arm_pcs_default;
5042 static void
5043 aapcs_vfp_cum_init (CUMULATIVE_ARGS *pcum ATTRIBUTE_UNUSED,
5044 const_tree fntype ATTRIBUTE_UNUSED,
5045 rtx libcall ATTRIBUTE_UNUSED,
5046 const_tree fndecl ATTRIBUTE_UNUSED)
5048 /* Record the unallocated VFP registers. */
5049 pcum->aapcs_vfp_regs_free = (1 << NUM_VFP_ARG_REGS) - 1;
5050 pcum->aapcs_vfp_reg_alloc = 0;
5053 /* Walk down the type tree of TYPE counting consecutive base elements.
5054 If *MODEP is VOIDmode, then set it to the first valid floating point
5055 type. If a non-floating point type is found, or if a floating point
5056 type that doesn't match a non-VOIDmode *MODEP is found, then return -1,
5057 otherwise return the count in the sub-tree. */
5058 static int
5059 aapcs_vfp_sub_candidate (const_tree type, enum machine_mode *modep)
5061 enum machine_mode mode;
5062 HOST_WIDE_INT size;
5064 switch (TREE_CODE (type))
5066 case REAL_TYPE:
5067 mode = TYPE_MODE (type);
5068 if (mode != DFmode && mode != SFmode)
5069 return -1;
5071 if (*modep == VOIDmode)
5072 *modep = mode;
5074 if (*modep == mode)
5075 return 1;
5077 break;
5079 case COMPLEX_TYPE:
5080 mode = TYPE_MODE (TREE_TYPE (type));
5081 if (mode != DFmode && mode != SFmode)
5082 return -1;
5084 if (*modep == VOIDmode)
5085 *modep = mode;
5087 if (*modep == mode)
5088 return 2;
5090 break;
5092 case VECTOR_TYPE:
5093 /* Use V2SImode and V4SImode as representatives of all 64-bit
5094 and 128-bit vector types, whether or not those modes are
5095 supported with the present options. */
5096 size = int_size_in_bytes (type);
5097 switch (size)
5099 case 8:
5100 mode = V2SImode;
5101 break;
5102 case 16:
5103 mode = V4SImode;
5104 break;
5105 default:
5106 return -1;
5109 if (*modep == VOIDmode)
5110 *modep = mode;
5112 /* Vector modes are considered to be opaque: two vectors are
5113 equivalent for the purposes of being homogeneous aggregates
5114 if they are the same size. */
5115 if (*modep == mode)
5116 return 1;
5118 break;
5120 case ARRAY_TYPE:
5122 int count;
5123 tree index = TYPE_DOMAIN (type);
5125 /* Can't handle incomplete types nor sizes that are not
5126 fixed. */
5127 if (!COMPLETE_TYPE_P (type)
5128 || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
5129 return -1;
5131 count = aapcs_vfp_sub_candidate (TREE_TYPE (type), modep);
5132 if (count == -1
5133 || !index
5134 || !TYPE_MAX_VALUE (index)
5135 || !tree_fits_uhwi_p (TYPE_MAX_VALUE (index))
5136 || !TYPE_MIN_VALUE (index)
5137 || !tree_fits_uhwi_p (TYPE_MIN_VALUE (index))
5138 || count < 0)
5139 return -1;
5141 count *= (1 + tree_to_uhwi (TYPE_MAX_VALUE (index))
5142 - tree_to_uhwi (TYPE_MIN_VALUE (index)));
5144 /* There must be no padding. */
5145 if (wi::ne_p (TYPE_SIZE (type), count * GET_MODE_BITSIZE (*modep)))
5146 return -1;
5148 return count;
5151 case RECORD_TYPE:
5153 int count = 0;
5154 int sub_count;
5155 tree field;
5157 /* Can't handle incomplete types nor sizes that are not
5158 fixed. */
5159 if (!COMPLETE_TYPE_P (type)
5160 || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
5161 return -1;
5163 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
5165 if (TREE_CODE (field) != FIELD_DECL)
5166 continue;
5168 sub_count = aapcs_vfp_sub_candidate (TREE_TYPE (field), modep);
5169 if (sub_count < 0)
5170 return -1;
5171 count += sub_count;
5174 /* There must be no padding. */
5175 if (wi::ne_p (TYPE_SIZE (type), count * GET_MODE_BITSIZE (*modep)))
5176 return -1;
5178 return count;
5181 case UNION_TYPE:
5182 case QUAL_UNION_TYPE:
5184 /* These aren't very interesting except in a degenerate case. */
5185 int count = 0;
5186 int sub_count;
5187 tree field;
5189 /* Can't handle incomplete types nor sizes that are not
5190 fixed. */
5191 if (!COMPLETE_TYPE_P (type)
5192 || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
5193 return -1;
5195 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
5197 if (TREE_CODE (field) != FIELD_DECL)
5198 continue;
5200 sub_count = aapcs_vfp_sub_candidate (TREE_TYPE (field), modep);
5201 if (sub_count < 0)
5202 return -1;
5203 count = count > sub_count ? count : sub_count;
5206 /* There must be no padding. */
5207 if (wi::ne_p (TYPE_SIZE (type), count * GET_MODE_BITSIZE (*modep)))
5208 return -1;
5210 return count;
5213 default:
5214 break;
5217 return -1;
5220 /* Return true if PCS_VARIANT should use VFP registers. */
5221 static bool
5222 use_vfp_abi (enum arm_pcs pcs_variant, bool is_double)
5224 if (pcs_variant == ARM_PCS_AAPCS_VFP)
5226 static bool seen_thumb1_vfp = false;
5228 if (TARGET_THUMB1 && !seen_thumb1_vfp)
5230 sorry ("Thumb-1 hard-float VFP ABI");
5231 /* sorry() is not immediately fatal, so only display this once. */
5232 seen_thumb1_vfp = true;
5235 return true;
5238 if (pcs_variant != ARM_PCS_AAPCS_LOCAL)
5239 return false;
5241 return (TARGET_32BIT && TARGET_VFP && TARGET_HARD_FLOAT &&
5242 (TARGET_VFP_DOUBLE || !is_double));
5245 /* Return true if an argument whose type is TYPE, or mode is MODE, is
5246 suitable for passing or returning in VFP registers for the PCS
5247 variant selected. If it is, then *BASE_MODE is updated to contain
5248 a machine mode describing each element of the argument's type and
5249 *COUNT to hold the number of such elements. */
5250 static bool
5251 aapcs_vfp_is_call_or_return_candidate (enum arm_pcs pcs_variant,
5252 enum machine_mode mode, const_tree type,
5253 enum machine_mode *base_mode, int *count)
5255 enum machine_mode new_mode = VOIDmode;
5257 /* If we have the type information, prefer that to working things
5258 out from the mode. */
5259 if (type)
5261 int ag_count = aapcs_vfp_sub_candidate (type, &new_mode);
5263 if (ag_count > 0 && ag_count <= 4)
5264 *count = ag_count;
5265 else
5266 return false;
5268 else if (GET_MODE_CLASS (mode) == MODE_FLOAT
5269 || GET_MODE_CLASS (mode) == MODE_VECTOR_INT
5270 || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
5272 *count = 1;
5273 new_mode = mode;
5275 else if (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT)
5277 *count = 2;
5278 new_mode = (mode == DCmode ? DFmode : SFmode);
5280 else
5281 return false;
5284 if (!use_vfp_abi (pcs_variant, ARM_NUM_REGS (new_mode) > 1))
5285 return false;
5287 *base_mode = new_mode;
5288 return true;
5291 static bool
5292 aapcs_vfp_is_return_candidate (enum arm_pcs pcs_variant,
5293 enum machine_mode mode, const_tree type)
5295 int count ATTRIBUTE_UNUSED;
5296 enum machine_mode ag_mode ATTRIBUTE_UNUSED;
5298 if (!use_vfp_abi (pcs_variant, false))
5299 return false;
5300 return aapcs_vfp_is_call_or_return_candidate (pcs_variant, mode, type,
5301 &ag_mode, &count);
5304 static bool
5305 aapcs_vfp_is_call_candidate (CUMULATIVE_ARGS *pcum, enum machine_mode mode,
5306 const_tree type)
5308 if (!use_vfp_abi (pcum->pcs_variant, false))
5309 return false;
5311 return aapcs_vfp_is_call_or_return_candidate (pcum->pcs_variant, mode, type,
5312 &pcum->aapcs_vfp_rmode,
5313 &pcum->aapcs_vfp_rcount);
5316 static bool
5317 aapcs_vfp_allocate (CUMULATIVE_ARGS *pcum, enum machine_mode mode,
5318 const_tree type ATTRIBUTE_UNUSED)
5320 int shift = GET_MODE_SIZE (pcum->aapcs_vfp_rmode) / GET_MODE_SIZE (SFmode);
5321 unsigned mask = (1 << (shift * pcum->aapcs_vfp_rcount)) - 1;
5322 int regno;
5324 for (regno = 0; regno < NUM_VFP_ARG_REGS; regno += shift)
5325 if (((pcum->aapcs_vfp_regs_free >> regno) & mask) == mask)
5327 pcum->aapcs_vfp_reg_alloc = mask << regno;
5328 if (mode == BLKmode
5329 || (mode == TImode && ! TARGET_NEON)
5330 || ! arm_hard_regno_mode_ok (FIRST_VFP_REGNUM + regno, mode))
5332 int i;
5333 int rcount = pcum->aapcs_vfp_rcount;
5334 int rshift = shift;
5335 enum machine_mode rmode = pcum->aapcs_vfp_rmode;
5336 rtx par;
5337 if (!TARGET_NEON)
5339 /* Avoid using unsupported vector modes. */
5340 if (rmode == V2SImode)
5341 rmode = DImode;
5342 else if (rmode == V4SImode)
5344 rmode = DImode;
5345 rcount *= 2;
5346 rshift /= 2;
5349 par = gen_rtx_PARALLEL (mode, rtvec_alloc (rcount));
5350 for (i = 0; i < rcount; i++)
5352 rtx tmp = gen_rtx_REG (rmode,
5353 FIRST_VFP_REGNUM + regno + i * rshift);
5354 tmp = gen_rtx_EXPR_LIST
5355 (VOIDmode, tmp,
5356 GEN_INT (i * GET_MODE_SIZE (rmode)));
5357 XVECEXP (par, 0, i) = tmp;
5360 pcum->aapcs_reg = par;
5362 else
5363 pcum->aapcs_reg = gen_rtx_REG (mode, FIRST_VFP_REGNUM + regno);
5364 return true;
5366 return false;
5369 static rtx
5370 aapcs_vfp_allocate_return_reg (enum arm_pcs pcs_variant ATTRIBUTE_UNUSED,
5371 enum machine_mode mode,
5372 const_tree type ATTRIBUTE_UNUSED)
5374 if (!use_vfp_abi (pcs_variant, false))
5375 return NULL;
5377 if (mode == BLKmode || (mode == TImode && !TARGET_NEON))
5379 int count;
5380 enum machine_mode ag_mode;
5381 int i;
5382 rtx par;
5383 int shift;
5385 aapcs_vfp_is_call_or_return_candidate (pcs_variant, mode, type,
5386 &ag_mode, &count);
5388 if (!TARGET_NEON)
5390 if (ag_mode == V2SImode)
5391 ag_mode = DImode;
5392 else if (ag_mode == V4SImode)
5394 ag_mode = DImode;
5395 count *= 2;
5398 shift = GET_MODE_SIZE(ag_mode) / GET_MODE_SIZE(SFmode);
5399 par = gen_rtx_PARALLEL (mode, rtvec_alloc (count));
5400 for (i = 0; i < count; i++)
5402 rtx tmp = gen_rtx_REG (ag_mode, FIRST_VFP_REGNUM + i * shift);
5403 tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp,
5404 GEN_INT (i * GET_MODE_SIZE (ag_mode)));
5405 XVECEXP (par, 0, i) = tmp;
5408 return par;
5411 return gen_rtx_REG (mode, FIRST_VFP_REGNUM);
5414 static void
5415 aapcs_vfp_advance (CUMULATIVE_ARGS *pcum ATTRIBUTE_UNUSED,
5416 enum machine_mode mode ATTRIBUTE_UNUSED,
5417 const_tree type ATTRIBUTE_UNUSED)
5419 pcum->aapcs_vfp_regs_free &= ~pcum->aapcs_vfp_reg_alloc;
5420 pcum->aapcs_vfp_reg_alloc = 0;
5421 return;
5424 #define AAPCS_CP(X) \
5426 aapcs_ ## X ## _cum_init, \
5427 aapcs_ ## X ## _is_call_candidate, \
5428 aapcs_ ## X ## _allocate, \
5429 aapcs_ ## X ## _is_return_candidate, \
5430 aapcs_ ## X ## _allocate_return_reg, \
5431 aapcs_ ## X ## _advance \
5434 /* Table of co-processors that can be used to pass arguments in
5435 registers. Idealy no arugment should be a candidate for more than
5436 one co-processor table entry, but the table is processed in order
5437 and stops after the first match. If that entry then fails to put
5438 the argument into a co-processor register, the argument will go on
5439 the stack. */
5440 static struct
5442 /* Initialize co-processor related state in CUMULATIVE_ARGS structure. */
5443 void (*cum_init) (CUMULATIVE_ARGS *, const_tree, rtx, const_tree);
5445 /* Return true if an argument of mode MODE (or type TYPE if MODE is
5446 BLKmode) is a candidate for this co-processor's registers; this
5447 function should ignore any position-dependent state in
5448 CUMULATIVE_ARGS and only use call-type dependent information. */
5449 bool (*is_call_candidate) (CUMULATIVE_ARGS *, enum machine_mode, const_tree);
5451 /* Return true if the argument does get a co-processor register; it
5452 should set aapcs_reg to an RTX of the register allocated as is
5453 required for a return from FUNCTION_ARG. */
5454 bool (*allocate) (CUMULATIVE_ARGS *, enum machine_mode, const_tree);
5456 /* Return true if a result of mode MODE (or type TYPE if MODE is
5457 BLKmode) is can be returned in this co-processor's registers. */
5458 bool (*is_return_candidate) (enum arm_pcs, enum machine_mode, const_tree);
5460 /* Allocate and return an RTX element to hold the return type of a
5461 call, this routine must not fail and will only be called if
5462 is_return_candidate returned true with the same parameters. */
5463 rtx (*allocate_return_reg) (enum arm_pcs, enum machine_mode, const_tree);
5465 /* Finish processing this argument and prepare to start processing
5466 the next one. */
5467 void (*advance) (CUMULATIVE_ARGS *, enum machine_mode, const_tree);
5468 } aapcs_cp_arg_layout[ARM_NUM_COPROC_SLOTS] =
5470 AAPCS_CP(vfp)
5473 #undef AAPCS_CP
5475 static int
5476 aapcs_select_call_coproc (CUMULATIVE_ARGS *pcum, enum machine_mode mode,
5477 const_tree type)
5479 int i;
5481 for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
5482 if (aapcs_cp_arg_layout[i].is_call_candidate (pcum, mode, type))
5483 return i;
5485 return -1;
5488 static int
5489 aapcs_select_return_coproc (const_tree type, const_tree fntype)
5491 /* We aren't passed a decl, so we can't check that a call is local.
5492 However, it isn't clear that that would be a win anyway, since it
5493 might limit some tail-calling opportunities. */
5494 enum arm_pcs pcs_variant;
5496 if (fntype)
5498 const_tree fndecl = NULL_TREE;
5500 if (TREE_CODE (fntype) == FUNCTION_DECL)
5502 fndecl = fntype;
5503 fntype = TREE_TYPE (fntype);
5506 pcs_variant = arm_get_pcs_model (fntype, fndecl);
5508 else
5509 pcs_variant = arm_pcs_default;
5511 if (pcs_variant != ARM_PCS_AAPCS)
5513 int i;
5515 for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
5516 if (aapcs_cp_arg_layout[i].is_return_candidate (pcs_variant,
5517 TYPE_MODE (type),
5518 type))
5519 return i;
5521 return -1;
5524 static rtx
5525 aapcs_allocate_return_reg (enum machine_mode mode, const_tree type,
5526 const_tree fntype)
5528 /* We aren't passed a decl, so we can't check that a call is local.
5529 However, it isn't clear that that would be a win anyway, since it
5530 might limit some tail-calling opportunities. */
5531 enum arm_pcs pcs_variant;
5532 int unsignedp ATTRIBUTE_UNUSED;
5534 if (fntype)
5536 const_tree fndecl = NULL_TREE;
5538 if (TREE_CODE (fntype) == FUNCTION_DECL)
5540 fndecl = fntype;
5541 fntype = TREE_TYPE (fntype);
5544 pcs_variant = arm_get_pcs_model (fntype, fndecl);
5546 else
5547 pcs_variant = arm_pcs_default;
5549 /* Promote integer types. */
5550 if (type && INTEGRAL_TYPE_P (type))
5551 mode = arm_promote_function_mode (type, mode, &unsignedp, fntype, 1);
5553 if (pcs_variant != ARM_PCS_AAPCS)
5555 int i;
5557 for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
5558 if (aapcs_cp_arg_layout[i].is_return_candidate (pcs_variant, mode,
5559 type))
5560 return aapcs_cp_arg_layout[i].allocate_return_reg (pcs_variant,
5561 mode, type);
5564 /* Promotes small structs returned in a register to full-word size
5565 for big-endian AAPCS. */
5566 if (type && arm_return_in_msb (type))
5568 HOST_WIDE_INT size = int_size_in_bytes (type);
5569 if (size % UNITS_PER_WORD != 0)
5571 size += UNITS_PER_WORD - size % UNITS_PER_WORD;
5572 mode = mode_for_size (size * BITS_PER_UNIT, MODE_INT, 0);
5576 return gen_rtx_REG (mode, R0_REGNUM);
5579 static rtx
5580 aapcs_libcall_value (enum machine_mode mode)
5582 if (BYTES_BIG_ENDIAN && ALL_FIXED_POINT_MODE_P (mode)
5583 && GET_MODE_SIZE (mode) <= 4)
5584 mode = SImode;
5586 return aapcs_allocate_return_reg (mode, NULL_TREE, NULL_TREE);
5589 /* Lay out a function argument using the AAPCS rules. The rule
5590 numbers referred to here are those in the AAPCS. */
5591 static void
5592 aapcs_layout_arg (CUMULATIVE_ARGS *pcum, enum machine_mode mode,
5593 const_tree type, bool named)
5595 int nregs, nregs2;
5596 int ncrn;
5598 /* We only need to do this once per argument. */
5599 if (pcum->aapcs_arg_processed)
5600 return;
5602 pcum->aapcs_arg_processed = true;
5604 /* Special case: if named is false then we are handling an incoming
5605 anonymous argument which is on the stack. */
5606 if (!named)
5607 return;
5609 /* Is this a potential co-processor register candidate? */
5610 if (pcum->pcs_variant != ARM_PCS_AAPCS)
5612 int slot = aapcs_select_call_coproc (pcum, mode, type);
5613 pcum->aapcs_cprc_slot = slot;
5615 /* We don't have to apply any of the rules from part B of the
5616 preparation phase, these are handled elsewhere in the
5617 compiler. */
5619 if (slot >= 0)
5621 /* A Co-processor register candidate goes either in its own
5622 class of registers or on the stack. */
5623 if (!pcum->aapcs_cprc_failed[slot])
5625 /* C1.cp - Try to allocate the argument to co-processor
5626 registers. */
5627 if (aapcs_cp_arg_layout[slot].allocate (pcum, mode, type))
5628 return;
5630 /* C2.cp - Put the argument on the stack and note that we
5631 can't assign any more candidates in this slot. We also
5632 need to note that we have allocated stack space, so that
5633 we won't later try to split a non-cprc candidate between
5634 core registers and the stack. */
5635 pcum->aapcs_cprc_failed[slot] = true;
5636 pcum->can_split = false;
5639 /* We didn't get a register, so this argument goes on the
5640 stack. */
5641 gcc_assert (pcum->can_split == false);
5642 return;
5646 /* C3 - For double-word aligned arguments, round the NCRN up to the
5647 next even number. */
5648 ncrn = pcum->aapcs_ncrn;
5649 if ((ncrn & 1) && arm_needs_doubleword_align (mode, type))
5650 ncrn++;
5652 nregs = ARM_NUM_REGS2(mode, type);
5654 /* Sigh, this test should really assert that nregs > 0, but a GCC
5655 extension allows empty structs and then gives them empty size; it
5656 then allows such a structure to be passed by value. For some of
5657 the code below we have to pretend that such an argument has
5658 non-zero size so that we 'locate' it correctly either in
5659 registers or on the stack. */
5660 gcc_assert (nregs >= 0);
5662 nregs2 = nregs ? nregs : 1;
5664 /* C4 - Argument fits entirely in core registers. */
5665 if (ncrn + nregs2 <= NUM_ARG_REGS)
5667 pcum->aapcs_reg = gen_rtx_REG (mode, ncrn);
5668 pcum->aapcs_next_ncrn = ncrn + nregs;
5669 return;
5672 /* C5 - Some core registers left and there are no arguments already
5673 on the stack: split this argument between the remaining core
5674 registers and the stack. */
5675 if (ncrn < NUM_ARG_REGS && pcum->can_split)
5677 pcum->aapcs_reg = gen_rtx_REG (mode, ncrn);
5678 pcum->aapcs_next_ncrn = NUM_ARG_REGS;
5679 pcum->aapcs_partial = (NUM_ARG_REGS - ncrn) * UNITS_PER_WORD;
5680 return;
5683 /* C6 - NCRN is set to 4. */
5684 pcum->aapcs_next_ncrn = NUM_ARG_REGS;
5686 /* C7,C8 - arugment goes on the stack. We have nothing to do here. */
5687 return;
5690 /* Initialize a variable CUM of type CUMULATIVE_ARGS
5691 for a call to a function whose data type is FNTYPE.
5692 For a library call, FNTYPE is NULL. */
5693 void
5694 arm_init_cumulative_args (CUMULATIVE_ARGS *pcum, tree fntype,
5695 rtx libname,
5696 tree fndecl ATTRIBUTE_UNUSED)
5698 /* Long call handling. */
5699 if (fntype)
5700 pcum->pcs_variant = arm_get_pcs_model (fntype, fndecl);
5701 else
5702 pcum->pcs_variant = arm_pcs_default;
5704 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
5706 if (arm_libcall_uses_aapcs_base (libname))
5707 pcum->pcs_variant = ARM_PCS_AAPCS;
5709 pcum->aapcs_ncrn = pcum->aapcs_next_ncrn = 0;
5710 pcum->aapcs_reg = NULL_RTX;
5711 pcum->aapcs_partial = 0;
5712 pcum->aapcs_arg_processed = false;
5713 pcum->aapcs_cprc_slot = -1;
5714 pcum->can_split = true;
5716 if (pcum->pcs_variant != ARM_PCS_AAPCS)
5718 int i;
5720 for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
5722 pcum->aapcs_cprc_failed[i] = false;
5723 aapcs_cp_arg_layout[i].cum_init (pcum, fntype, libname, fndecl);
5726 return;
5729 /* Legacy ABIs */
5731 /* On the ARM, the offset starts at 0. */
5732 pcum->nregs = 0;
5733 pcum->iwmmxt_nregs = 0;
5734 pcum->can_split = true;
5736 /* Varargs vectors are treated the same as long long.
5737 named_count avoids having to change the way arm handles 'named' */
5738 pcum->named_count = 0;
5739 pcum->nargs = 0;
5741 if (TARGET_REALLY_IWMMXT && fntype)
5743 tree fn_arg;
5745 for (fn_arg = TYPE_ARG_TYPES (fntype);
5746 fn_arg;
5747 fn_arg = TREE_CHAIN (fn_arg))
5748 pcum->named_count += 1;
5750 if (! pcum->named_count)
5751 pcum->named_count = INT_MAX;
5755 /* Return true if we use LRA instead of reload pass. */
5756 static bool
5757 arm_lra_p (void)
5759 return arm_lra_flag;
5762 /* Return true if mode/type need doubleword alignment. */
5763 static bool
5764 arm_needs_doubleword_align (enum machine_mode mode, const_tree type)
5766 return (GET_MODE_ALIGNMENT (mode) > PARM_BOUNDARY
5767 || (type && TYPE_ALIGN (type) > PARM_BOUNDARY));
5771 /* Determine where to put an argument to a function.
5772 Value is zero to push the argument on the stack,
5773 or a hard register in which to store the argument.
5775 MODE is the argument's machine mode.
5776 TYPE is the data type of the argument (as a tree).
5777 This is null for libcalls where that information may
5778 not be available.
5779 CUM is a variable of type CUMULATIVE_ARGS which gives info about
5780 the preceding args and about the function being called.
5781 NAMED is nonzero if this argument is a named parameter
5782 (otherwise it is an extra parameter matching an ellipsis).
5784 On the ARM, normally the first 16 bytes are passed in registers r0-r3; all
5785 other arguments are passed on the stack. If (NAMED == 0) (which happens
5786 only in assign_parms, since TARGET_SETUP_INCOMING_VARARGS is
5787 defined), say it is passed in the stack (function_prologue will
5788 indeed make it pass in the stack if necessary). */
5790 static rtx
5791 arm_function_arg (cumulative_args_t pcum_v, enum machine_mode mode,
5792 const_tree type, bool named)
5794 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
5795 int nregs;
5797 /* Handle the special case quickly. Pick an arbitrary value for op2 of
5798 a call insn (op3 of a call_value insn). */
5799 if (mode == VOIDmode)
5800 return const0_rtx;
5802 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
5804 aapcs_layout_arg (pcum, mode, type, named);
5805 return pcum->aapcs_reg;
5808 /* Varargs vectors are treated the same as long long.
5809 named_count avoids having to change the way arm handles 'named' */
5810 if (TARGET_IWMMXT_ABI
5811 && arm_vector_mode_supported_p (mode)
5812 && pcum->named_count > pcum->nargs + 1)
5814 if (pcum->iwmmxt_nregs <= 9)
5815 return gen_rtx_REG (mode, pcum->iwmmxt_nregs + FIRST_IWMMXT_REGNUM);
5816 else
5818 pcum->can_split = false;
5819 return NULL_RTX;
5823 /* Put doubleword aligned quantities in even register pairs. */
5824 if (pcum->nregs & 1
5825 && ARM_DOUBLEWORD_ALIGN
5826 && arm_needs_doubleword_align (mode, type))
5827 pcum->nregs++;
5829 /* Only allow splitting an arg between regs and memory if all preceding
5830 args were allocated to regs. For args passed by reference we only count
5831 the reference pointer. */
5832 if (pcum->can_split)
5833 nregs = 1;
5834 else
5835 nregs = ARM_NUM_REGS2 (mode, type);
5837 if (!named || pcum->nregs + nregs > NUM_ARG_REGS)
5838 return NULL_RTX;
5840 return gen_rtx_REG (mode, pcum->nregs);
5843 static unsigned int
5844 arm_function_arg_boundary (enum machine_mode mode, const_tree type)
5846 return (ARM_DOUBLEWORD_ALIGN && arm_needs_doubleword_align (mode, type)
5847 ? DOUBLEWORD_ALIGNMENT
5848 : PARM_BOUNDARY);
5851 static int
5852 arm_arg_partial_bytes (cumulative_args_t pcum_v, enum machine_mode mode,
5853 tree type, bool named)
5855 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
5856 int nregs = pcum->nregs;
5858 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
5860 aapcs_layout_arg (pcum, mode, type, named);
5861 return pcum->aapcs_partial;
5864 if (TARGET_IWMMXT_ABI && arm_vector_mode_supported_p (mode))
5865 return 0;
5867 if (NUM_ARG_REGS > nregs
5868 && (NUM_ARG_REGS < nregs + ARM_NUM_REGS2 (mode, type))
5869 && pcum->can_split)
5870 return (NUM_ARG_REGS - nregs) * UNITS_PER_WORD;
5872 return 0;
5875 /* Update the data in PCUM to advance over an argument
5876 of mode MODE and data type TYPE.
5877 (TYPE is null for libcalls where that information may not be available.) */
5879 static void
5880 arm_function_arg_advance (cumulative_args_t pcum_v, enum machine_mode mode,
5881 const_tree type, bool named)
5883 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
5885 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
5887 aapcs_layout_arg (pcum, mode, type, named);
5889 if (pcum->aapcs_cprc_slot >= 0)
5891 aapcs_cp_arg_layout[pcum->aapcs_cprc_slot].advance (pcum, mode,
5892 type);
5893 pcum->aapcs_cprc_slot = -1;
5896 /* Generic stuff. */
5897 pcum->aapcs_arg_processed = false;
5898 pcum->aapcs_ncrn = pcum->aapcs_next_ncrn;
5899 pcum->aapcs_reg = NULL_RTX;
5900 pcum->aapcs_partial = 0;
5902 else
5904 pcum->nargs += 1;
5905 if (arm_vector_mode_supported_p (mode)
5906 && pcum->named_count > pcum->nargs
5907 && TARGET_IWMMXT_ABI)
5908 pcum->iwmmxt_nregs += 1;
5909 else
5910 pcum->nregs += ARM_NUM_REGS2 (mode, type);
5914 /* Variable sized types are passed by reference. This is a GCC
5915 extension to the ARM ABI. */
5917 static bool
5918 arm_pass_by_reference (cumulative_args_t cum ATTRIBUTE_UNUSED,
5919 enum machine_mode mode ATTRIBUTE_UNUSED,
5920 const_tree type, bool named ATTRIBUTE_UNUSED)
5922 return type && TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST;
5925 /* Encode the current state of the #pragma [no_]long_calls. */
5926 typedef enum
5928 OFF, /* No #pragma [no_]long_calls is in effect. */
5929 LONG, /* #pragma long_calls is in effect. */
5930 SHORT /* #pragma no_long_calls is in effect. */
5931 } arm_pragma_enum;
5933 static arm_pragma_enum arm_pragma_long_calls = OFF;
5935 void
5936 arm_pr_long_calls (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
5938 arm_pragma_long_calls = LONG;
5941 void
5942 arm_pr_no_long_calls (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
5944 arm_pragma_long_calls = SHORT;
5947 void
5948 arm_pr_long_calls_off (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
5950 arm_pragma_long_calls = OFF;
5953 /* Handle an attribute requiring a FUNCTION_DECL;
5954 arguments as in struct attribute_spec.handler. */
5955 static tree
5956 arm_handle_fndecl_attribute (tree *node, tree name, tree args ATTRIBUTE_UNUSED,
5957 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
5959 if (TREE_CODE (*node) != FUNCTION_DECL)
5961 warning (OPT_Wattributes, "%qE attribute only applies to functions",
5962 name);
5963 *no_add_attrs = true;
5966 return NULL_TREE;
5969 /* Handle an "interrupt" or "isr" attribute;
5970 arguments as in struct attribute_spec.handler. */
5971 static tree
5972 arm_handle_isr_attribute (tree *node, tree name, tree args, int flags,
5973 bool *no_add_attrs)
5975 if (DECL_P (*node))
5977 if (TREE_CODE (*node) != FUNCTION_DECL)
5979 warning (OPT_Wattributes, "%qE attribute only applies to functions",
5980 name);
5981 *no_add_attrs = true;
5983 /* FIXME: the argument if any is checked for type attributes;
5984 should it be checked for decl ones? */
5986 else
5988 if (TREE_CODE (*node) == FUNCTION_TYPE
5989 || TREE_CODE (*node) == METHOD_TYPE)
5991 if (arm_isr_value (args) == ARM_FT_UNKNOWN)
5993 warning (OPT_Wattributes, "%qE attribute ignored",
5994 name);
5995 *no_add_attrs = true;
5998 else if (TREE_CODE (*node) == POINTER_TYPE
5999 && (TREE_CODE (TREE_TYPE (*node)) == FUNCTION_TYPE
6000 || TREE_CODE (TREE_TYPE (*node)) == METHOD_TYPE)
6001 && arm_isr_value (args) != ARM_FT_UNKNOWN)
6003 *node = build_variant_type_copy (*node);
6004 TREE_TYPE (*node) = build_type_attribute_variant
6005 (TREE_TYPE (*node),
6006 tree_cons (name, args, TYPE_ATTRIBUTES (TREE_TYPE (*node))));
6007 *no_add_attrs = true;
6009 else
6011 /* Possibly pass this attribute on from the type to a decl. */
6012 if (flags & ((int) ATTR_FLAG_DECL_NEXT
6013 | (int) ATTR_FLAG_FUNCTION_NEXT
6014 | (int) ATTR_FLAG_ARRAY_NEXT))
6016 *no_add_attrs = true;
6017 return tree_cons (name, args, NULL_TREE);
6019 else
6021 warning (OPT_Wattributes, "%qE attribute ignored",
6022 name);
6027 return NULL_TREE;
6030 /* Handle a "pcs" attribute; arguments as in struct
6031 attribute_spec.handler. */
6032 static tree
6033 arm_handle_pcs_attribute (tree *node ATTRIBUTE_UNUSED, tree name, tree args,
6034 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
6036 if (arm_pcs_from_attribute (args) == ARM_PCS_UNKNOWN)
6038 warning (OPT_Wattributes, "%qE attribute ignored", name);
6039 *no_add_attrs = true;
6041 return NULL_TREE;
6044 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
6045 /* Handle the "notshared" attribute. This attribute is another way of
6046 requesting hidden visibility. ARM's compiler supports
6047 "__declspec(notshared)"; we support the same thing via an
6048 attribute. */
6050 static tree
6051 arm_handle_notshared_attribute (tree *node,
6052 tree name ATTRIBUTE_UNUSED,
6053 tree args ATTRIBUTE_UNUSED,
6054 int flags ATTRIBUTE_UNUSED,
6055 bool *no_add_attrs)
6057 tree decl = TYPE_NAME (*node);
6059 if (decl)
6061 DECL_VISIBILITY (decl) = VISIBILITY_HIDDEN;
6062 DECL_VISIBILITY_SPECIFIED (decl) = 1;
6063 *no_add_attrs = false;
6065 return NULL_TREE;
6067 #endif
6069 /* Return 0 if the attributes for two types are incompatible, 1 if they
6070 are compatible, and 2 if they are nearly compatible (which causes a
6071 warning to be generated). */
6072 static int
6073 arm_comp_type_attributes (const_tree type1, const_tree type2)
6075 int l1, l2, s1, s2;
6077 /* Check for mismatch of non-default calling convention. */
6078 if (TREE_CODE (type1) != FUNCTION_TYPE)
6079 return 1;
6081 /* Check for mismatched call attributes. */
6082 l1 = lookup_attribute ("long_call", TYPE_ATTRIBUTES (type1)) != NULL;
6083 l2 = lookup_attribute ("long_call", TYPE_ATTRIBUTES (type2)) != NULL;
6084 s1 = lookup_attribute ("short_call", TYPE_ATTRIBUTES (type1)) != NULL;
6085 s2 = lookup_attribute ("short_call", TYPE_ATTRIBUTES (type2)) != NULL;
6087 /* Only bother to check if an attribute is defined. */
6088 if (l1 | l2 | s1 | s2)
6090 /* If one type has an attribute, the other must have the same attribute. */
6091 if ((l1 != l2) || (s1 != s2))
6092 return 0;
6094 /* Disallow mixed attributes. */
6095 if ((l1 & s2) || (l2 & s1))
6096 return 0;
6099 /* Check for mismatched ISR attribute. */
6100 l1 = lookup_attribute ("isr", TYPE_ATTRIBUTES (type1)) != NULL;
6101 if (! l1)
6102 l1 = lookup_attribute ("interrupt", TYPE_ATTRIBUTES (type1)) != NULL;
6103 l2 = lookup_attribute ("isr", TYPE_ATTRIBUTES (type2)) != NULL;
6104 if (! l2)
6105 l1 = lookup_attribute ("interrupt", TYPE_ATTRIBUTES (type2)) != NULL;
6106 if (l1 != l2)
6107 return 0;
6109 return 1;
6112 /* Assigns default attributes to newly defined type. This is used to
6113 set short_call/long_call attributes for function types of
6114 functions defined inside corresponding #pragma scopes. */
6115 static void
6116 arm_set_default_type_attributes (tree type)
6118 /* Add __attribute__ ((long_call)) to all functions, when
6119 inside #pragma long_calls or __attribute__ ((short_call)),
6120 when inside #pragma no_long_calls. */
6121 if (TREE_CODE (type) == FUNCTION_TYPE || TREE_CODE (type) == METHOD_TYPE)
6123 tree type_attr_list, attr_name;
6124 type_attr_list = TYPE_ATTRIBUTES (type);
6126 if (arm_pragma_long_calls == LONG)
6127 attr_name = get_identifier ("long_call");
6128 else if (arm_pragma_long_calls == SHORT)
6129 attr_name = get_identifier ("short_call");
6130 else
6131 return;
6133 type_attr_list = tree_cons (attr_name, NULL_TREE, type_attr_list);
6134 TYPE_ATTRIBUTES (type) = type_attr_list;
6138 /* Return true if DECL is known to be linked into section SECTION. */
6140 static bool
6141 arm_function_in_section_p (tree decl, section *section)
6143 /* We can only be certain about functions defined in the same
6144 compilation unit. */
6145 if (!TREE_STATIC (decl))
6146 return false;
6148 /* Make sure that SYMBOL always binds to the definition in this
6149 compilation unit. */
6150 if (!targetm.binds_local_p (decl))
6151 return false;
6153 /* If DECL_SECTION_NAME is set, assume it is trustworthy. */
6154 if (!DECL_SECTION_NAME (decl))
6156 /* Make sure that we will not create a unique section for DECL. */
6157 if (flag_function_sections || DECL_COMDAT_GROUP (decl))
6158 return false;
6161 return function_section (decl) == section;
6164 /* Return nonzero if a 32-bit "long_call" should be generated for
6165 a call from the current function to DECL. We generate a long_call
6166 if the function:
6168 a. has an __attribute__((long call))
6169 or b. is within the scope of a #pragma long_calls
6170 or c. the -mlong-calls command line switch has been specified
6172 However we do not generate a long call if the function:
6174 d. has an __attribute__ ((short_call))
6175 or e. is inside the scope of a #pragma no_long_calls
6176 or f. is defined in the same section as the current function. */
6178 bool
6179 arm_is_long_call_p (tree decl)
6181 tree attrs;
6183 if (!decl)
6184 return TARGET_LONG_CALLS;
6186 attrs = TYPE_ATTRIBUTES (TREE_TYPE (decl));
6187 if (lookup_attribute ("short_call", attrs))
6188 return false;
6190 /* For "f", be conservative, and only cater for cases in which the
6191 whole of the current function is placed in the same section. */
6192 if (!flag_reorder_blocks_and_partition
6193 && TREE_CODE (decl) == FUNCTION_DECL
6194 && arm_function_in_section_p (decl, current_function_section ()))
6195 return false;
6197 if (lookup_attribute ("long_call", attrs))
6198 return true;
6200 return TARGET_LONG_CALLS;
6203 /* Return nonzero if it is ok to make a tail-call to DECL. */
6204 static bool
6205 arm_function_ok_for_sibcall (tree decl, tree exp)
6207 unsigned long func_type;
6209 if (cfun->machine->sibcall_blocked)
6210 return false;
6212 /* Never tailcall something if we are generating code for Thumb-1. */
6213 if (TARGET_THUMB1)
6214 return false;
6216 /* The PIC register is live on entry to VxWorks PLT entries, so we
6217 must make the call before restoring the PIC register. */
6218 if (TARGET_VXWORKS_RTP && flag_pic && !targetm.binds_local_p (decl))
6219 return false;
6221 /* If we are interworking and the function is not declared static
6222 then we can't tail-call it unless we know that it exists in this
6223 compilation unit (since it might be a Thumb routine). */
6224 if (TARGET_INTERWORK && decl && TREE_PUBLIC (decl)
6225 && !TREE_ASM_WRITTEN (decl))
6226 return false;
6228 func_type = arm_current_func_type ();
6229 /* Never tailcall from an ISR routine - it needs a special exit sequence. */
6230 if (IS_INTERRUPT (func_type))
6231 return false;
6233 if (!VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun->decl))))
6235 /* Check that the return value locations are the same. For
6236 example that we aren't returning a value from the sibling in
6237 a VFP register but then need to transfer it to a core
6238 register. */
6239 rtx a, b;
6241 a = arm_function_value (TREE_TYPE (exp), decl, false);
6242 b = arm_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)),
6243 cfun->decl, false);
6244 if (!rtx_equal_p (a, b))
6245 return false;
6248 /* Never tailcall if function may be called with a misaligned SP. */
6249 if (IS_STACKALIGN (func_type))
6250 return false;
6252 /* The AAPCS says that, on bare-metal, calls to unresolved weak
6253 references should become a NOP. Don't convert such calls into
6254 sibling calls. */
6255 if (TARGET_AAPCS_BASED
6256 && arm_abi == ARM_ABI_AAPCS
6257 && decl
6258 && DECL_WEAK (decl))
6259 return false;
6261 /* Everything else is ok. */
6262 return true;
6266 /* Addressing mode support functions. */
6268 /* Return nonzero if X is a legitimate immediate operand when compiling
6269 for PIC. We know that X satisfies CONSTANT_P and flag_pic is true. */
6271 legitimate_pic_operand_p (rtx x)
6273 if (GET_CODE (x) == SYMBOL_REF
6274 || (GET_CODE (x) == CONST
6275 && GET_CODE (XEXP (x, 0)) == PLUS
6276 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF))
6277 return 0;
6279 return 1;
6282 /* Record that the current function needs a PIC register. Initialize
6283 cfun->machine->pic_reg if we have not already done so. */
6285 static void
6286 require_pic_register (void)
6288 /* A lot of the logic here is made obscure by the fact that this
6289 routine gets called as part of the rtx cost estimation process.
6290 We don't want those calls to affect any assumptions about the real
6291 function; and further, we can't call entry_of_function() until we
6292 start the real expansion process. */
6293 if (!crtl->uses_pic_offset_table)
6295 gcc_assert (can_create_pseudo_p ());
6296 if (arm_pic_register != INVALID_REGNUM
6297 && !(TARGET_THUMB1 && arm_pic_register > LAST_LO_REGNUM))
6299 if (!cfun->machine->pic_reg)
6300 cfun->machine->pic_reg = gen_rtx_REG (Pmode, arm_pic_register);
6302 /* Play games to avoid marking the function as needing pic
6303 if we are being called as part of the cost-estimation
6304 process. */
6305 if (current_ir_type () != IR_GIMPLE || currently_expanding_to_rtl)
6306 crtl->uses_pic_offset_table = 1;
6308 else
6310 rtx seq, insn;
6312 if (!cfun->machine->pic_reg)
6313 cfun->machine->pic_reg = gen_reg_rtx (Pmode);
6315 /* Play games to avoid marking the function as needing pic
6316 if we are being called as part of the cost-estimation
6317 process. */
6318 if (current_ir_type () != IR_GIMPLE || currently_expanding_to_rtl)
6320 crtl->uses_pic_offset_table = 1;
6321 start_sequence ();
6323 if (TARGET_THUMB1 && arm_pic_register != INVALID_REGNUM
6324 && arm_pic_register > LAST_LO_REGNUM)
6325 emit_move_insn (cfun->machine->pic_reg,
6326 gen_rtx_REG (Pmode, arm_pic_register));
6327 else
6328 arm_load_pic_register (0UL);
6330 seq = get_insns ();
6331 end_sequence ();
6333 for (insn = seq; insn; insn = NEXT_INSN (insn))
6334 if (INSN_P (insn))
6335 INSN_LOCATION (insn) = prologue_location;
6337 /* We can be called during expansion of PHI nodes, where
6338 we can't yet emit instructions directly in the final
6339 insn stream. Queue the insns on the entry edge, they will
6340 be committed after everything else is expanded. */
6341 insert_insn_on_edge (seq,
6342 single_succ_edge (ENTRY_BLOCK_PTR_FOR_FN (cfun)));
6349 legitimize_pic_address (rtx orig, enum machine_mode mode, rtx reg)
6351 if (GET_CODE (orig) == SYMBOL_REF
6352 || GET_CODE (orig) == LABEL_REF)
6354 rtx insn;
6356 if (reg == 0)
6358 gcc_assert (can_create_pseudo_p ());
6359 reg = gen_reg_rtx (Pmode);
6362 /* VxWorks does not impose a fixed gap between segments; the run-time
6363 gap can be different from the object-file gap. We therefore can't
6364 use GOTOFF unless we are absolutely sure that the symbol is in the
6365 same segment as the GOT. Unfortunately, the flexibility of linker
6366 scripts means that we can't be sure of that in general, so assume
6367 that GOTOFF is never valid on VxWorks. */
6368 if ((GET_CODE (orig) == LABEL_REF
6369 || (GET_CODE (orig) == SYMBOL_REF &&
6370 SYMBOL_REF_LOCAL_P (orig)))
6371 && NEED_GOT_RELOC
6372 && arm_pic_data_is_text_relative)
6373 insn = arm_pic_static_addr (orig, reg);
6374 else
6376 rtx pat;
6377 rtx mem;
6379 /* If this function doesn't have a pic register, create one now. */
6380 require_pic_register ();
6382 pat = gen_calculate_pic_address (reg, cfun->machine->pic_reg, orig);
6384 /* Make the MEM as close to a constant as possible. */
6385 mem = SET_SRC (pat);
6386 gcc_assert (MEM_P (mem) && !MEM_VOLATILE_P (mem));
6387 MEM_READONLY_P (mem) = 1;
6388 MEM_NOTRAP_P (mem) = 1;
6390 insn = emit_insn (pat);
6393 /* Put a REG_EQUAL note on this insn, so that it can be optimized
6394 by loop. */
6395 set_unique_reg_note (insn, REG_EQUAL, orig);
6397 return reg;
6399 else if (GET_CODE (orig) == CONST)
6401 rtx base, offset;
6403 if (GET_CODE (XEXP (orig, 0)) == PLUS
6404 && XEXP (XEXP (orig, 0), 0) == cfun->machine->pic_reg)
6405 return orig;
6407 /* Handle the case where we have: const (UNSPEC_TLS). */
6408 if (GET_CODE (XEXP (orig, 0)) == UNSPEC
6409 && XINT (XEXP (orig, 0), 1) == UNSPEC_TLS)
6410 return orig;
6412 /* Handle the case where we have:
6413 const (plus (UNSPEC_TLS) (ADDEND)). The ADDEND must be a
6414 CONST_INT. */
6415 if (GET_CODE (XEXP (orig, 0)) == PLUS
6416 && GET_CODE (XEXP (XEXP (orig, 0), 0)) == UNSPEC
6417 && XINT (XEXP (XEXP (orig, 0), 0), 1) == UNSPEC_TLS)
6419 gcc_assert (CONST_INT_P (XEXP (XEXP (orig, 0), 1)));
6420 return orig;
6423 if (reg == 0)
6425 gcc_assert (can_create_pseudo_p ());
6426 reg = gen_reg_rtx (Pmode);
6429 gcc_assert (GET_CODE (XEXP (orig, 0)) == PLUS);
6431 base = legitimize_pic_address (XEXP (XEXP (orig, 0), 0), Pmode, reg);
6432 offset = legitimize_pic_address (XEXP (XEXP (orig, 0), 1), Pmode,
6433 base == reg ? 0 : reg);
6435 if (CONST_INT_P (offset))
6437 /* The base register doesn't really matter, we only want to
6438 test the index for the appropriate mode. */
6439 if (!arm_legitimate_index_p (mode, offset, SET, 0))
6441 gcc_assert (can_create_pseudo_p ());
6442 offset = force_reg (Pmode, offset);
6445 if (CONST_INT_P (offset))
6446 return plus_constant (Pmode, base, INTVAL (offset));
6449 if (GET_MODE_SIZE (mode) > 4
6450 && (GET_MODE_CLASS (mode) == MODE_INT
6451 || TARGET_SOFT_FLOAT))
6453 emit_insn (gen_addsi3 (reg, base, offset));
6454 return reg;
6457 return gen_rtx_PLUS (Pmode, base, offset);
6460 return orig;
6464 /* Find a spare register to use during the prolog of a function. */
6466 static int
6467 thumb_find_work_register (unsigned long pushed_regs_mask)
6469 int reg;
6471 /* Check the argument registers first as these are call-used. The
6472 register allocation order means that sometimes r3 might be used
6473 but earlier argument registers might not, so check them all. */
6474 for (reg = LAST_ARG_REGNUM; reg >= 0; reg --)
6475 if (!df_regs_ever_live_p (reg))
6476 return reg;
6478 /* Before going on to check the call-saved registers we can try a couple
6479 more ways of deducing that r3 is available. The first is when we are
6480 pushing anonymous arguments onto the stack and we have less than 4
6481 registers worth of fixed arguments(*). In this case r3 will be part of
6482 the variable argument list and so we can be sure that it will be
6483 pushed right at the start of the function. Hence it will be available
6484 for the rest of the prologue.
6485 (*): ie crtl->args.pretend_args_size is greater than 0. */
6486 if (cfun->machine->uses_anonymous_args
6487 && crtl->args.pretend_args_size > 0)
6488 return LAST_ARG_REGNUM;
6490 /* The other case is when we have fixed arguments but less than 4 registers
6491 worth. In this case r3 might be used in the body of the function, but
6492 it is not being used to convey an argument into the function. In theory
6493 we could just check crtl->args.size to see how many bytes are
6494 being passed in argument registers, but it seems that it is unreliable.
6495 Sometimes it will have the value 0 when in fact arguments are being
6496 passed. (See testcase execute/20021111-1.c for an example). So we also
6497 check the args_info.nregs field as well. The problem with this field is
6498 that it makes no allowances for arguments that are passed to the
6499 function but which are not used. Hence we could miss an opportunity
6500 when a function has an unused argument in r3. But it is better to be
6501 safe than to be sorry. */
6502 if (! cfun->machine->uses_anonymous_args
6503 && crtl->args.size >= 0
6504 && crtl->args.size <= (LAST_ARG_REGNUM * UNITS_PER_WORD)
6505 && (TARGET_AAPCS_BASED
6506 ? crtl->args.info.aapcs_ncrn < 4
6507 : crtl->args.info.nregs < 4))
6508 return LAST_ARG_REGNUM;
6510 /* Otherwise look for a call-saved register that is going to be pushed. */
6511 for (reg = LAST_LO_REGNUM; reg > LAST_ARG_REGNUM; reg --)
6512 if (pushed_regs_mask & (1 << reg))
6513 return reg;
6515 if (TARGET_THUMB2)
6517 /* Thumb-2 can use high regs. */
6518 for (reg = FIRST_HI_REGNUM; reg < 15; reg ++)
6519 if (pushed_regs_mask & (1 << reg))
6520 return reg;
6522 /* Something went wrong - thumb_compute_save_reg_mask()
6523 should have arranged for a suitable register to be pushed. */
6524 gcc_unreachable ();
6527 static GTY(()) int pic_labelno;
6529 /* Generate code to load the PIC register. In thumb mode SCRATCH is a
6530 low register. */
6532 void
6533 arm_load_pic_register (unsigned long saved_regs ATTRIBUTE_UNUSED)
6535 rtx l1, labelno, pic_tmp, pic_rtx, pic_reg;
6537 if (crtl->uses_pic_offset_table == 0 || TARGET_SINGLE_PIC_BASE)
6538 return;
6540 gcc_assert (flag_pic);
6542 pic_reg = cfun->machine->pic_reg;
6543 if (TARGET_VXWORKS_RTP)
6545 pic_rtx = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_BASE);
6546 pic_rtx = gen_rtx_CONST (Pmode, pic_rtx);
6547 emit_insn (gen_pic_load_addr_32bit (pic_reg, pic_rtx));
6549 emit_insn (gen_rtx_SET (Pmode, pic_reg, gen_rtx_MEM (Pmode, pic_reg)));
6551 pic_tmp = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_INDEX);
6552 emit_insn (gen_pic_offset_arm (pic_reg, pic_reg, pic_tmp));
6554 else
6556 /* We use an UNSPEC rather than a LABEL_REF because this label
6557 never appears in the code stream. */
6559 labelno = GEN_INT (pic_labelno++);
6560 l1 = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
6561 l1 = gen_rtx_CONST (VOIDmode, l1);
6563 /* On the ARM the PC register contains 'dot + 8' at the time of the
6564 addition, on the Thumb it is 'dot + 4'. */
6565 pic_rtx = plus_constant (Pmode, l1, TARGET_ARM ? 8 : 4);
6566 pic_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, pic_rtx),
6567 UNSPEC_GOTSYM_OFF);
6568 pic_rtx = gen_rtx_CONST (Pmode, pic_rtx);
6570 if (TARGET_32BIT)
6572 emit_insn (gen_pic_load_addr_unified (pic_reg, pic_rtx, labelno));
6574 else /* TARGET_THUMB1 */
6576 if (arm_pic_register != INVALID_REGNUM
6577 && REGNO (pic_reg) > LAST_LO_REGNUM)
6579 /* We will have pushed the pic register, so we should always be
6580 able to find a work register. */
6581 pic_tmp = gen_rtx_REG (SImode,
6582 thumb_find_work_register (saved_regs));
6583 emit_insn (gen_pic_load_addr_thumb1 (pic_tmp, pic_rtx));
6584 emit_insn (gen_movsi (pic_offset_table_rtx, pic_tmp));
6585 emit_insn (gen_pic_add_dot_plus_four (pic_reg, pic_reg, labelno));
6587 else if (arm_pic_register != INVALID_REGNUM
6588 && arm_pic_register > LAST_LO_REGNUM
6589 && REGNO (pic_reg) <= LAST_LO_REGNUM)
6591 emit_insn (gen_pic_load_addr_unified (pic_reg, pic_rtx, labelno));
6592 emit_move_insn (gen_rtx_REG (Pmode, arm_pic_register), pic_reg);
6593 emit_use (gen_rtx_REG (Pmode, arm_pic_register));
6595 else
6596 emit_insn (gen_pic_load_addr_unified (pic_reg, pic_rtx, labelno));
6600 /* Need to emit this whether or not we obey regdecls,
6601 since setjmp/longjmp can cause life info to screw up. */
6602 emit_use (pic_reg);
6605 /* Generate code to load the address of a static var when flag_pic is set. */
6606 static rtx
6607 arm_pic_static_addr (rtx orig, rtx reg)
6609 rtx l1, labelno, offset_rtx, insn;
6611 gcc_assert (flag_pic);
6613 /* We use an UNSPEC rather than a LABEL_REF because this label
6614 never appears in the code stream. */
6615 labelno = GEN_INT (pic_labelno++);
6616 l1 = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
6617 l1 = gen_rtx_CONST (VOIDmode, l1);
6619 /* On the ARM the PC register contains 'dot + 8' at the time of the
6620 addition, on the Thumb it is 'dot + 4'. */
6621 offset_rtx = plus_constant (Pmode, l1, TARGET_ARM ? 8 : 4);
6622 offset_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, orig, offset_rtx),
6623 UNSPEC_SYMBOL_OFFSET);
6624 offset_rtx = gen_rtx_CONST (Pmode, offset_rtx);
6626 insn = emit_insn (gen_pic_load_addr_unified (reg, offset_rtx, labelno));
6627 return insn;
6630 /* Return nonzero if X is valid as an ARM state addressing register. */
6631 static int
6632 arm_address_register_rtx_p (rtx x, int strict_p)
6634 int regno;
6636 if (!REG_P (x))
6637 return 0;
6639 regno = REGNO (x);
6641 if (strict_p)
6642 return ARM_REGNO_OK_FOR_BASE_P (regno);
6644 return (regno <= LAST_ARM_REGNUM
6645 || regno >= FIRST_PSEUDO_REGISTER
6646 || regno == FRAME_POINTER_REGNUM
6647 || regno == ARG_POINTER_REGNUM);
6650 /* Return TRUE if this rtx is the difference of a symbol and a label,
6651 and will reduce to a PC-relative relocation in the object file.
6652 Expressions like this can be left alone when generating PIC, rather
6653 than forced through the GOT. */
6654 static int
6655 pcrel_constant_p (rtx x)
6657 if (GET_CODE (x) == MINUS)
6658 return symbol_mentioned_p (XEXP (x, 0)) && label_mentioned_p (XEXP (x, 1));
6660 return FALSE;
6663 /* Return true if X will surely end up in an index register after next
6664 splitting pass. */
6665 static bool
6666 will_be_in_index_register (const_rtx x)
6668 /* arm.md: calculate_pic_address will split this into a register. */
6669 return GET_CODE (x) == UNSPEC && (XINT (x, 1) == UNSPEC_PIC_SYM);
6672 /* Return nonzero if X is a valid ARM state address operand. */
6674 arm_legitimate_address_outer_p (enum machine_mode mode, rtx x, RTX_CODE outer,
6675 int strict_p)
6677 bool use_ldrd;
6678 enum rtx_code code = GET_CODE (x);
6680 if (arm_address_register_rtx_p (x, strict_p))
6681 return 1;
6683 use_ldrd = (TARGET_LDRD
6684 && (mode == DImode
6685 || (mode == DFmode && (TARGET_SOFT_FLOAT || TARGET_VFP))));
6687 if (code == POST_INC || code == PRE_DEC
6688 || ((code == PRE_INC || code == POST_DEC)
6689 && (use_ldrd || GET_MODE_SIZE (mode) <= 4)))
6690 return arm_address_register_rtx_p (XEXP (x, 0), strict_p);
6692 else if ((code == POST_MODIFY || code == PRE_MODIFY)
6693 && arm_address_register_rtx_p (XEXP (x, 0), strict_p)
6694 && GET_CODE (XEXP (x, 1)) == PLUS
6695 && rtx_equal_p (XEXP (XEXP (x, 1), 0), XEXP (x, 0)))
6697 rtx addend = XEXP (XEXP (x, 1), 1);
6699 /* Don't allow ldrd post increment by register because it's hard
6700 to fixup invalid register choices. */
6701 if (use_ldrd
6702 && GET_CODE (x) == POST_MODIFY
6703 && REG_P (addend))
6704 return 0;
6706 return ((use_ldrd || GET_MODE_SIZE (mode) <= 4)
6707 && arm_legitimate_index_p (mode, addend, outer, strict_p));
6710 /* After reload constants split into minipools will have addresses
6711 from a LABEL_REF. */
6712 else if (reload_completed
6713 && (code == LABEL_REF
6714 || (code == CONST
6715 && GET_CODE (XEXP (x, 0)) == PLUS
6716 && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
6717 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))))
6718 return 1;
6720 else if (mode == TImode || (TARGET_NEON && VALID_NEON_STRUCT_MODE (mode)))
6721 return 0;
6723 else if (code == PLUS)
6725 rtx xop0 = XEXP (x, 0);
6726 rtx xop1 = XEXP (x, 1);
6728 return ((arm_address_register_rtx_p (xop0, strict_p)
6729 && ((CONST_INT_P (xop1)
6730 && arm_legitimate_index_p (mode, xop1, outer, strict_p))
6731 || (!strict_p && will_be_in_index_register (xop1))))
6732 || (arm_address_register_rtx_p (xop1, strict_p)
6733 && arm_legitimate_index_p (mode, xop0, outer, strict_p)));
6736 #if 0
6737 /* Reload currently can't handle MINUS, so disable this for now */
6738 else if (GET_CODE (x) == MINUS)
6740 rtx xop0 = XEXP (x, 0);
6741 rtx xop1 = XEXP (x, 1);
6743 return (arm_address_register_rtx_p (xop0, strict_p)
6744 && arm_legitimate_index_p (mode, xop1, outer, strict_p));
6746 #endif
6748 else if (GET_MODE_CLASS (mode) != MODE_FLOAT
6749 && code == SYMBOL_REF
6750 && CONSTANT_POOL_ADDRESS_P (x)
6751 && ! (flag_pic
6752 && symbol_mentioned_p (get_pool_constant (x))
6753 && ! pcrel_constant_p (get_pool_constant (x))))
6754 return 1;
6756 return 0;
6759 /* Return nonzero if X is a valid Thumb-2 address operand. */
6760 static int
6761 thumb2_legitimate_address_p (enum machine_mode mode, rtx x, int strict_p)
6763 bool use_ldrd;
6764 enum rtx_code code = GET_CODE (x);
6766 if (arm_address_register_rtx_p (x, strict_p))
6767 return 1;
6769 use_ldrd = (TARGET_LDRD
6770 && (mode == DImode
6771 || (mode == DFmode && (TARGET_SOFT_FLOAT || TARGET_VFP))));
6773 if (code == POST_INC || code == PRE_DEC
6774 || ((code == PRE_INC || code == POST_DEC)
6775 && (use_ldrd || GET_MODE_SIZE (mode) <= 4)))
6776 return arm_address_register_rtx_p (XEXP (x, 0), strict_p);
6778 else if ((code == POST_MODIFY || code == PRE_MODIFY)
6779 && arm_address_register_rtx_p (XEXP (x, 0), strict_p)
6780 && GET_CODE (XEXP (x, 1)) == PLUS
6781 && rtx_equal_p (XEXP (XEXP (x, 1), 0), XEXP (x, 0)))
6783 /* Thumb-2 only has autoincrement by constant. */
6784 rtx addend = XEXP (XEXP (x, 1), 1);
6785 HOST_WIDE_INT offset;
6787 if (!CONST_INT_P (addend))
6788 return 0;
6790 offset = INTVAL(addend);
6791 if (GET_MODE_SIZE (mode) <= 4)
6792 return (offset > -256 && offset < 256);
6794 return (use_ldrd && offset > -1024 && offset < 1024
6795 && (offset & 3) == 0);
6798 /* After reload constants split into minipools will have addresses
6799 from a LABEL_REF. */
6800 else if (reload_completed
6801 && (code == LABEL_REF
6802 || (code == CONST
6803 && GET_CODE (XEXP (x, 0)) == PLUS
6804 && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
6805 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))))
6806 return 1;
6808 else if (mode == TImode || (TARGET_NEON && VALID_NEON_STRUCT_MODE (mode)))
6809 return 0;
6811 else if (code == PLUS)
6813 rtx xop0 = XEXP (x, 0);
6814 rtx xop1 = XEXP (x, 1);
6816 return ((arm_address_register_rtx_p (xop0, strict_p)
6817 && (thumb2_legitimate_index_p (mode, xop1, strict_p)
6818 || (!strict_p && will_be_in_index_register (xop1))))
6819 || (arm_address_register_rtx_p (xop1, strict_p)
6820 && thumb2_legitimate_index_p (mode, xop0, strict_p)));
6823 /* Normally we can assign constant values to target registers without
6824 the help of constant pool. But there are cases we have to use constant
6825 pool like:
6826 1) assign a label to register.
6827 2) sign-extend a 8bit value to 32bit and then assign to register.
6829 Constant pool access in format:
6830 (set (reg r0) (mem (symbol_ref (".LC0"))))
6831 will cause the use of literal pool (later in function arm_reorg).
6832 So here we mark such format as an invalid format, then the compiler
6833 will adjust it into:
6834 (set (reg r0) (symbol_ref (".LC0")))
6835 (set (reg r0) (mem (reg r0))).
6836 No extra register is required, and (mem (reg r0)) won't cause the use
6837 of literal pools. */
6838 else if (arm_disable_literal_pool && code == SYMBOL_REF
6839 && CONSTANT_POOL_ADDRESS_P (x))
6840 return 0;
6842 else if (GET_MODE_CLASS (mode) != MODE_FLOAT
6843 && code == SYMBOL_REF
6844 && CONSTANT_POOL_ADDRESS_P (x)
6845 && ! (flag_pic
6846 && symbol_mentioned_p (get_pool_constant (x))
6847 && ! pcrel_constant_p (get_pool_constant (x))))
6848 return 1;
6850 return 0;
6853 /* Return nonzero if INDEX is valid for an address index operand in
6854 ARM state. */
6855 static int
6856 arm_legitimate_index_p (enum machine_mode mode, rtx index, RTX_CODE outer,
6857 int strict_p)
6859 HOST_WIDE_INT range;
6860 enum rtx_code code = GET_CODE (index);
6862 /* Standard coprocessor addressing modes. */
6863 if (TARGET_HARD_FLOAT
6864 && TARGET_VFP
6865 && (mode == SFmode || mode == DFmode))
6866 return (code == CONST_INT && INTVAL (index) < 1024
6867 && INTVAL (index) > -1024
6868 && (INTVAL (index) & 3) == 0);
6870 /* For quad modes, we restrict the constant offset to be slightly less
6871 than what the instruction format permits. We do this because for
6872 quad mode moves, we will actually decompose them into two separate
6873 double-mode reads or writes. INDEX must therefore be a valid
6874 (double-mode) offset and so should INDEX+8. */
6875 if (TARGET_NEON && VALID_NEON_QREG_MODE (mode))
6876 return (code == CONST_INT
6877 && INTVAL (index) < 1016
6878 && INTVAL (index) > -1024
6879 && (INTVAL (index) & 3) == 0);
6881 /* We have no such constraint on double mode offsets, so we permit the
6882 full range of the instruction format. */
6883 if (TARGET_NEON && VALID_NEON_DREG_MODE (mode))
6884 return (code == CONST_INT
6885 && INTVAL (index) < 1024
6886 && INTVAL (index) > -1024
6887 && (INTVAL (index) & 3) == 0);
6889 if (TARGET_REALLY_IWMMXT && VALID_IWMMXT_REG_MODE (mode))
6890 return (code == CONST_INT
6891 && INTVAL (index) < 1024
6892 && INTVAL (index) > -1024
6893 && (INTVAL (index) & 3) == 0);
6895 if (arm_address_register_rtx_p (index, strict_p)
6896 && (GET_MODE_SIZE (mode) <= 4))
6897 return 1;
6899 if (mode == DImode || mode == DFmode)
6901 if (code == CONST_INT)
6903 HOST_WIDE_INT val = INTVAL (index);
6905 if (TARGET_LDRD)
6906 return val > -256 && val < 256;
6907 else
6908 return val > -4096 && val < 4092;
6911 return TARGET_LDRD && arm_address_register_rtx_p (index, strict_p);
6914 if (GET_MODE_SIZE (mode) <= 4
6915 && ! (arm_arch4
6916 && (mode == HImode
6917 || mode == HFmode
6918 || (mode == QImode && outer == SIGN_EXTEND))))
6920 if (code == MULT)
6922 rtx xiop0 = XEXP (index, 0);
6923 rtx xiop1 = XEXP (index, 1);
6925 return ((arm_address_register_rtx_p (xiop0, strict_p)
6926 && power_of_two_operand (xiop1, SImode))
6927 || (arm_address_register_rtx_p (xiop1, strict_p)
6928 && power_of_two_operand (xiop0, SImode)));
6930 else if (code == LSHIFTRT || code == ASHIFTRT
6931 || code == ASHIFT || code == ROTATERT)
6933 rtx op = XEXP (index, 1);
6935 return (arm_address_register_rtx_p (XEXP (index, 0), strict_p)
6936 && CONST_INT_P (op)
6937 && INTVAL (op) > 0
6938 && INTVAL (op) <= 31);
6942 /* For ARM v4 we may be doing a sign-extend operation during the
6943 load. */
6944 if (arm_arch4)
6946 if (mode == HImode
6947 || mode == HFmode
6948 || (outer == SIGN_EXTEND && mode == QImode))
6949 range = 256;
6950 else
6951 range = 4096;
6953 else
6954 range = (mode == HImode || mode == HFmode) ? 4095 : 4096;
6956 return (code == CONST_INT
6957 && INTVAL (index) < range
6958 && INTVAL (index) > -range);
6961 /* Return true if OP is a valid index scaling factor for Thumb-2 address
6962 index operand. i.e. 1, 2, 4 or 8. */
6963 static bool
6964 thumb2_index_mul_operand (rtx op)
6966 HOST_WIDE_INT val;
6968 if (!CONST_INT_P (op))
6969 return false;
6971 val = INTVAL(op);
6972 return (val == 1 || val == 2 || val == 4 || val == 8);
6975 /* Return nonzero if INDEX is a valid Thumb-2 address index operand. */
6976 static int
6977 thumb2_legitimate_index_p (enum machine_mode mode, rtx index, int strict_p)
6979 enum rtx_code code = GET_CODE (index);
6981 /* ??? Combine arm and thumb2 coprocessor addressing modes. */
6982 /* Standard coprocessor addressing modes. */
6983 if (TARGET_HARD_FLOAT
6984 && TARGET_VFP
6985 && (mode == SFmode || mode == DFmode))
6986 return (code == CONST_INT && INTVAL (index) < 1024
6987 /* Thumb-2 allows only > -256 index range for it's core register
6988 load/stores. Since we allow SF/DF in core registers, we have
6989 to use the intersection between -256~4096 (core) and -1024~1024
6990 (coprocessor). */
6991 && INTVAL (index) > -256
6992 && (INTVAL (index) & 3) == 0);
6994 if (TARGET_REALLY_IWMMXT && VALID_IWMMXT_REG_MODE (mode))
6996 /* For DImode assume values will usually live in core regs
6997 and only allow LDRD addressing modes. */
6998 if (!TARGET_LDRD || mode != DImode)
6999 return (code == CONST_INT
7000 && INTVAL (index) < 1024
7001 && INTVAL (index) > -1024
7002 && (INTVAL (index) & 3) == 0);
7005 /* For quad modes, we restrict the constant offset to be slightly less
7006 than what the instruction format permits. We do this because for
7007 quad mode moves, we will actually decompose them into two separate
7008 double-mode reads or writes. INDEX must therefore be a valid
7009 (double-mode) offset and so should INDEX+8. */
7010 if (TARGET_NEON && VALID_NEON_QREG_MODE (mode))
7011 return (code == CONST_INT
7012 && INTVAL (index) < 1016
7013 && INTVAL (index) > -1024
7014 && (INTVAL (index) & 3) == 0);
7016 /* We have no such constraint on double mode offsets, so we permit the
7017 full range of the instruction format. */
7018 if (TARGET_NEON && VALID_NEON_DREG_MODE (mode))
7019 return (code == CONST_INT
7020 && INTVAL (index) < 1024
7021 && INTVAL (index) > -1024
7022 && (INTVAL (index) & 3) == 0);
7024 if (arm_address_register_rtx_p (index, strict_p)
7025 && (GET_MODE_SIZE (mode) <= 4))
7026 return 1;
7028 if (mode == DImode || mode == DFmode)
7030 if (code == CONST_INT)
7032 HOST_WIDE_INT val = INTVAL (index);
7033 /* ??? Can we assume ldrd for thumb2? */
7034 /* Thumb-2 ldrd only has reg+const addressing modes. */
7035 /* ldrd supports offsets of +-1020.
7036 However the ldr fallback does not. */
7037 return val > -256 && val < 256 && (val & 3) == 0;
7039 else
7040 return 0;
7043 if (code == MULT)
7045 rtx xiop0 = XEXP (index, 0);
7046 rtx xiop1 = XEXP (index, 1);
7048 return ((arm_address_register_rtx_p (xiop0, strict_p)
7049 && thumb2_index_mul_operand (xiop1))
7050 || (arm_address_register_rtx_p (xiop1, strict_p)
7051 && thumb2_index_mul_operand (xiop0)));
7053 else if (code == ASHIFT)
7055 rtx op = XEXP (index, 1);
7057 return (arm_address_register_rtx_p (XEXP (index, 0), strict_p)
7058 && CONST_INT_P (op)
7059 && INTVAL (op) > 0
7060 && INTVAL (op) <= 3);
7063 return (code == CONST_INT
7064 && INTVAL (index) < 4096
7065 && INTVAL (index) > -256);
7068 /* Return nonzero if X is valid as a 16-bit Thumb state base register. */
7069 static int
7070 thumb1_base_register_rtx_p (rtx x, enum machine_mode mode, int strict_p)
7072 int regno;
7074 if (!REG_P (x))
7075 return 0;
7077 regno = REGNO (x);
7079 if (strict_p)
7080 return THUMB1_REGNO_MODE_OK_FOR_BASE_P (regno, mode);
7082 return (regno <= LAST_LO_REGNUM
7083 || regno > LAST_VIRTUAL_REGISTER
7084 || regno == FRAME_POINTER_REGNUM
7085 || (GET_MODE_SIZE (mode) >= 4
7086 && (regno == STACK_POINTER_REGNUM
7087 || regno >= FIRST_PSEUDO_REGISTER
7088 || x == hard_frame_pointer_rtx
7089 || x == arg_pointer_rtx)));
7092 /* Return nonzero if x is a legitimate index register. This is the case
7093 for any base register that can access a QImode object. */
7094 inline static int
7095 thumb1_index_register_rtx_p (rtx x, int strict_p)
7097 return thumb1_base_register_rtx_p (x, QImode, strict_p);
7100 /* Return nonzero if x is a legitimate 16-bit Thumb-state address.
7102 The AP may be eliminated to either the SP or the FP, so we use the
7103 least common denominator, e.g. SImode, and offsets from 0 to 64.
7105 ??? Verify whether the above is the right approach.
7107 ??? Also, the FP may be eliminated to the SP, so perhaps that
7108 needs special handling also.
7110 ??? Look at how the mips16 port solves this problem. It probably uses
7111 better ways to solve some of these problems.
7113 Although it is not incorrect, we don't accept QImode and HImode
7114 addresses based on the frame pointer or arg pointer until the
7115 reload pass starts. This is so that eliminating such addresses
7116 into stack based ones won't produce impossible code. */
7118 thumb1_legitimate_address_p (enum machine_mode mode, rtx x, int strict_p)
7120 /* ??? Not clear if this is right. Experiment. */
7121 if (GET_MODE_SIZE (mode) < 4
7122 && !(reload_in_progress || reload_completed)
7123 && (reg_mentioned_p (frame_pointer_rtx, x)
7124 || reg_mentioned_p (arg_pointer_rtx, x)
7125 || reg_mentioned_p (virtual_incoming_args_rtx, x)
7126 || reg_mentioned_p (virtual_outgoing_args_rtx, x)
7127 || reg_mentioned_p (virtual_stack_dynamic_rtx, x)
7128 || reg_mentioned_p (virtual_stack_vars_rtx, x)))
7129 return 0;
7131 /* Accept any base register. SP only in SImode or larger. */
7132 else if (thumb1_base_register_rtx_p (x, mode, strict_p))
7133 return 1;
7135 /* This is PC relative data before arm_reorg runs. */
7136 else if (GET_MODE_SIZE (mode) >= 4 && CONSTANT_P (x)
7137 && GET_CODE (x) == SYMBOL_REF
7138 && CONSTANT_POOL_ADDRESS_P (x) && !flag_pic)
7139 return 1;
7141 /* This is PC relative data after arm_reorg runs. */
7142 else if ((GET_MODE_SIZE (mode) >= 4 || mode == HFmode)
7143 && reload_completed
7144 && (GET_CODE (x) == LABEL_REF
7145 || (GET_CODE (x) == CONST
7146 && GET_CODE (XEXP (x, 0)) == PLUS
7147 && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
7148 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))))
7149 return 1;
7151 /* Post-inc indexing only supported for SImode and larger. */
7152 else if (GET_CODE (x) == POST_INC && GET_MODE_SIZE (mode) >= 4
7153 && thumb1_index_register_rtx_p (XEXP (x, 0), strict_p))
7154 return 1;
7156 else if (GET_CODE (x) == PLUS)
7158 /* REG+REG address can be any two index registers. */
7159 /* We disallow FRAME+REG addressing since we know that FRAME
7160 will be replaced with STACK, and SP relative addressing only
7161 permits SP+OFFSET. */
7162 if (GET_MODE_SIZE (mode) <= 4
7163 && XEXP (x, 0) != frame_pointer_rtx
7164 && XEXP (x, 1) != frame_pointer_rtx
7165 && thumb1_index_register_rtx_p (XEXP (x, 0), strict_p)
7166 && (thumb1_index_register_rtx_p (XEXP (x, 1), strict_p)
7167 || (!strict_p && will_be_in_index_register (XEXP (x, 1)))))
7168 return 1;
7170 /* REG+const has 5-7 bit offset for non-SP registers. */
7171 else if ((thumb1_index_register_rtx_p (XEXP (x, 0), strict_p)
7172 || XEXP (x, 0) == arg_pointer_rtx)
7173 && CONST_INT_P (XEXP (x, 1))
7174 && thumb_legitimate_offset_p (mode, INTVAL (XEXP (x, 1))))
7175 return 1;
7177 /* REG+const has 10-bit offset for SP, but only SImode and
7178 larger is supported. */
7179 /* ??? Should probably check for DI/DFmode overflow here
7180 just like GO_IF_LEGITIMATE_OFFSET does. */
7181 else if (REG_P (XEXP (x, 0))
7182 && REGNO (XEXP (x, 0)) == STACK_POINTER_REGNUM
7183 && GET_MODE_SIZE (mode) >= 4
7184 && CONST_INT_P (XEXP (x, 1))
7185 && INTVAL (XEXP (x, 1)) >= 0
7186 && INTVAL (XEXP (x, 1)) + GET_MODE_SIZE (mode) <= 1024
7187 && (INTVAL (XEXP (x, 1)) & 3) == 0)
7188 return 1;
7190 else if (REG_P (XEXP (x, 0))
7191 && (REGNO (XEXP (x, 0)) == FRAME_POINTER_REGNUM
7192 || REGNO (XEXP (x, 0)) == ARG_POINTER_REGNUM
7193 || (REGNO (XEXP (x, 0)) >= FIRST_VIRTUAL_REGISTER
7194 && REGNO (XEXP (x, 0))
7195 <= LAST_VIRTUAL_POINTER_REGISTER))
7196 && GET_MODE_SIZE (mode) >= 4
7197 && CONST_INT_P (XEXP (x, 1))
7198 && (INTVAL (XEXP (x, 1)) & 3) == 0)
7199 return 1;
7202 else if (GET_MODE_CLASS (mode) != MODE_FLOAT
7203 && GET_MODE_SIZE (mode) == 4
7204 && GET_CODE (x) == SYMBOL_REF
7205 && CONSTANT_POOL_ADDRESS_P (x)
7206 && ! (flag_pic
7207 && symbol_mentioned_p (get_pool_constant (x))
7208 && ! pcrel_constant_p (get_pool_constant (x))))
7209 return 1;
7211 return 0;
7214 /* Return nonzero if VAL can be used as an offset in a Thumb-state address
7215 instruction of mode MODE. */
7217 thumb_legitimate_offset_p (enum machine_mode mode, HOST_WIDE_INT val)
7219 switch (GET_MODE_SIZE (mode))
7221 case 1:
7222 return val >= 0 && val < 32;
7224 case 2:
7225 return val >= 0 && val < 64 && (val & 1) == 0;
7227 default:
7228 return (val >= 0
7229 && (val + GET_MODE_SIZE (mode)) <= 128
7230 && (val & 3) == 0);
7234 bool
7235 arm_legitimate_address_p (enum machine_mode mode, rtx x, bool strict_p)
7237 if (TARGET_ARM)
7238 return arm_legitimate_address_outer_p (mode, x, SET, strict_p);
7239 else if (TARGET_THUMB2)
7240 return thumb2_legitimate_address_p (mode, x, strict_p);
7241 else /* if (TARGET_THUMB1) */
7242 return thumb1_legitimate_address_p (mode, x, strict_p);
7245 /* Worker function for TARGET_PREFERRED_RELOAD_CLASS.
7247 Given an rtx X being reloaded into a reg required to be
7248 in class CLASS, return the class of reg to actually use.
7249 In general this is just CLASS, but for the Thumb core registers and
7250 immediate constants we prefer a LO_REGS class or a subset. */
7252 static reg_class_t
7253 arm_preferred_reload_class (rtx x ATTRIBUTE_UNUSED, reg_class_t rclass)
7255 if (TARGET_32BIT)
7256 return rclass;
7257 else
7259 if (rclass == GENERAL_REGS)
7260 return LO_REGS;
7261 else
7262 return rclass;
7266 /* Build the SYMBOL_REF for __tls_get_addr. */
7268 static GTY(()) rtx tls_get_addr_libfunc;
7270 static rtx
7271 get_tls_get_addr (void)
7273 if (!tls_get_addr_libfunc)
7274 tls_get_addr_libfunc = init_one_libfunc ("__tls_get_addr");
7275 return tls_get_addr_libfunc;
7279 arm_load_tp (rtx target)
7281 if (!target)
7282 target = gen_reg_rtx (SImode);
7284 if (TARGET_HARD_TP)
7286 /* Can return in any reg. */
7287 emit_insn (gen_load_tp_hard (target));
7289 else
7291 /* Always returned in r0. Immediately copy the result into a pseudo,
7292 otherwise other uses of r0 (e.g. setting up function arguments) may
7293 clobber the value. */
7295 rtx tmp;
7297 emit_insn (gen_load_tp_soft ());
7299 tmp = gen_rtx_REG (SImode, 0);
7300 emit_move_insn (target, tmp);
7302 return target;
7305 static rtx
7306 load_tls_operand (rtx x, rtx reg)
7308 rtx tmp;
7310 if (reg == NULL_RTX)
7311 reg = gen_reg_rtx (SImode);
7313 tmp = gen_rtx_CONST (SImode, x);
7315 emit_move_insn (reg, tmp);
7317 return reg;
7320 static rtx
7321 arm_call_tls_get_addr (rtx x, rtx reg, rtx *valuep, int reloc)
7323 rtx insns, label, labelno, sum;
7325 gcc_assert (reloc != TLS_DESCSEQ);
7326 start_sequence ();
7328 labelno = GEN_INT (pic_labelno++);
7329 label = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
7330 label = gen_rtx_CONST (VOIDmode, label);
7332 sum = gen_rtx_UNSPEC (Pmode,
7333 gen_rtvec (4, x, GEN_INT (reloc), label,
7334 GEN_INT (TARGET_ARM ? 8 : 4)),
7335 UNSPEC_TLS);
7336 reg = load_tls_operand (sum, reg);
7338 if (TARGET_ARM)
7339 emit_insn (gen_pic_add_dot_plus_eight (reg, reg, labelno));
7340 else
7341 emit_insn (gen_pic_add_dot_plus_four (reg, reg, labelno));
7343 *valuep = emit_library_call_value (get_tls_get_addr (), NULL_RTX,
7344 LCT_PURE, /* LCT_CONST? */
7345 Pmode, 1, reg, Pmode);
7347 insns = get_insns ();
7348 end_sequence ();
7350 return insns;
7353 static rtx
7354 arm_tls_descseq_addr (rtx x, rtx reg)
7356 rtx labelno = GEN_INT (pic_labelno++);
7357 rtx label = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
7358 rtx sum = gen_rtx_UNSPEC (Pmode,
7359 gen_rtvec (4, x, GEN_INT (TLS_DESCSEQ),
7360 gen_rtx_CONST (VOIDmode, label),
7361 GEN_INT (!TARGET_ARM)),
7362 UNSPEC_TLS);
7363 rtx reg0 = load_tls_operand (sum, gen_rtx_REG (SImode, 0));
7365 emit_insn (gen_tlscall (x, labelno));
7366 if (!reg)
7367 reg = gen_reg_rtx (SImode);
7368 else
7369 gcc_assert (REGNO (reg) != 0);
7371 emit_move_insn (reg, reg0);
7373 return reg;
7377 legitimize_tls_address (rtx x, rtx reg)
7379 rtx dest, tp, label, labelno, sum, insns, ret, eqv, addend;
7380 unsigned int model = SYMBOL_REF_TLS_MODEL (x);
7382 switch (model)
7384 case TLS_MODEL_GLOBAL_DYNAMIC:
7385 if (TARGET_GNU2_TLS)
7387 reg = arm_tls_descseq_addr (x, reg);
7389 tp = arm_load_tp (NULL_RTX);
7391 dest = gen_rtx_PLUS (Pmode, tp, reg);
7393 else
7395 /* Original scheme */
7396 insns = arm_call_tls_get_addr (x, reg, &ret, TLS_GD32);
7397 dest = gen_reg_rtx (Pmode);
7398 emit_libcall_block (insns, dest, ret, x);
7400 return dest;
7402 case TLS_MODEL_LOCAL_DYNAMIC:
7403 if (TARGET_GNU2_TLS)
7405 reg = arm_tls_descseq_addr (x, reg);
7407 tp = arm_load_tp (NULL_RTX);
7409 dest = gen_rtx_PLUS (Pmode, tp, reg);
7411 else
7413 insns = arm_call_tls_get_addr (x, reg, &ret, TLS_LDM32);
7415 /* Attach a unique REG_EQUIV, to allow the RTL optimizers to
7416 share the LDM result with other LD model accesses. */
7417 eqv = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const1_rtx),
7418 UNSPEC_TLS);
7419 dest = gen_reg_rtx (Pmode);
7420 emit_libcall_block (insns, dest, ret, eqv);
7422 /* Load the addend. */
7423 addend = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, x,
7424 GEN_INT (TLS_LDO32)),
7425 UNSPEC_TLS);
7426 addend = force_reg (SImode, gen_rtx_CONST (SImode, addend));
7427 dest = gen_rtx_PLUS (Pmode, dest, addend);
7429 return dest;
7431 case TLS_MODEL_INITIAL_EXEC:
7432 labelno = GEN_INT (pic_labelno++);
7433 label = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
7434 label = gen_rtx_CONST (VOIDmode, label);
7435 sum = gen_rtx_UNSPEC (Pmode,
7436 gen_rtvec (4, x, GEN_INT (TLS_IE32), label,
7437 GEN_INT (TARGET_ARM ? 8 : 4)),
7438 UNSPEC_TLS);
7439 reg = load_tls_operand (sum, reg);
7441 if (TARGET_ARM)
7442 emit_insn (gen_tls_load_dot_plus_eight (reg, reg, labelno));
7443 else if (TARGET_THUMB2)
7444 emit_insn (gen_tls_load_dot_plus_four (reg, NULL, reg, labelno));
7445 else
7447 emit_insn (gen_pic_add_dot_plus_four (reg, reg, labelno));
7448 emit_move_insn (reg, gen_const_mem (SImode, reg));
7451 tp = arm_load_tp (NULL_RTX);
7453 return gen_rtx_PLUS (Pmode, tp, reg);
7455 case TLS_MODEL_LOCAL_EXEC:
7456 tp = arm_load_tp (NULL_RTX);
7458 reg = gen_rtx_UNSPEC (Pmode,
7459 gen_rtvec (2, x, GEN_INT (TLS_LE32)),
7460 UNSPEC_TLS);
7461 reg = force_reg (SImode, gen_rtx_CONST (SImode, reg));
7463 return gen_rtx_PLUS (Pmode, tp, reg);
7465 default:
7466 abort ();
7470 /* Try machine-dependent ways of modifying an illegitimate address
7471 to be legitimate. If we find one, return the new, valid address. */
7473 arm_legitimize_address (rtx x, rtx orig_x, enum machine_mode mode)
7475 if (arm_tls_referenced_p (x))
7477 rtx addend = NULL;
7479 if (GET_CODE (x) == CONST && GET_CODE (XEXP (x, 0)) == PLUS)
7481 addend = XEXP (XEXP (x, 0), 1);
7482 x = XEXP (XEXP (x, 0), 0);
7485 if (GET_CODE (x) != SYMBOL_REF)
7486 return x;
7488 gcc_assert (SYMBOL_REF_TLS_MODEL (x) != 0);
7490 x = legitimize_tls_address (x, NULL_RTX);
7492 if (addend)
7494 x = gen_rtx_PLUS (SImode, x, addend);
7495 orig_x = x;
7497 else
7498 return x;
7501 if (!TARGET_ARM)
7503 /* TODO: legitimize_address for Thumb2. */
7504 if (TARGET_THUMB2)
7505 return x;
7506 return thumb_legitimize_address (x, orig_x, mode);
7509 if (GET_CODE (x) == PLUS)
7511 rtx xop0 = XEXP (x, 0);
7512 rtx xop1 = XEXP (x, 1);
7514 if (CONSTANT_P (xop0) && !symbol_mentioned_p (xop0))
7515 xop0 = force_reg (SImode, xop0);
7517 if (CONSTANT_P (xop1) && !CONST_INT_P (xop1)
7518 && !symbol_mentioned_p (xop1))
7519 xop1 = force_reg (SImode, xop1);
7521 if (ARM_BASE_REGISTER_RTX_P (xop0)
7522 && CONST_INT_P (xop1))
7524 HOST_WIDE_INT n, low_n;
7525 rtx base_reg, val;
7526 n = INTVAL (xop1);
7528 /* VFP addressing modes actually allow greater offsets, but for
7529 now we just stick with the lowest common denominator. */
7530 if (mode == DImode
7531 || ((TARGET_SOFT_FLOAT || TARGET_VFP) && mode == DFmode))
7533 low_n = n & 0x0f;
7534 n &= ~0x0f;
7535 if (low_n > 4)
7537 n += 16;
7538 low_n -= 16;
7541 else
7543 low_n = ((mode) == TImode ? 0
7544 : n >= 0 ? (n & 0xfff) : -((-n) & 0xfff));
7545 n -= low_n;
7548 base_reg = gen_reg_rtx (SImode);
7549 val = force_operand (plus_constant (Pmode, xop0, n), NULL_RTX);
7550 emit_move_insn (base_reg, val);
7551 x = plus_constant (Pmode, base_reg, low_n);
7553 else if (xop0 != XEXP (x, 0) || xop1 != XEXP (x, 1))
7554 x = gen_rtx_PLUS (SImode, xop0, xop1);
7557 /* XXX We don't allow MINUS any more -- see comment in
7558 arm_legitimate_address_outer_p (). */
7559 else if (GET_CODE (x) == MINUS)
7561 rtx xop0 = XEXP (x, 0);
7562 rtx xop1 = XEXP (x, 1);
7564 if (CONSTANT_P (xop0))
7565 xop0 = force_reg (SImode, xop0);
7567 if (CONSTANT_P (xop1) && ! symbol_mentioned_p (xop1))
7568 xop1 = force_reg (SImode, xop1);
7570 if (xop0 != XEXP (x, 0) || xop1 != XEXP (x, 1))
7571 x = gen_rtx_MINUS (SImode, xop0, xop1);
7574 /* Make sure to take full advantage of the pre-indexed addressing mode
7575 with absolute addresses which often allows for the base register to
7576 be factorized for multiple adjacent memory references, and it might
7577 even allows for the mini pool to be avoided entirely. */
7578 else if (CONST_INT_P (x) && optimize > 0)
7580 unsigned int bits;
7581 HOST_WIDE_INT mask, base, index;
7582 rtx base_reg;
7584 /* ldr and ldrb can use a 12-bit index, ldrsb and the rest can only
7585 use a 8-bit index. So let's use a 12-bit index for SImode only and
7586 hope that arm_gen_constant will enable ldrb to use more bits. */
7587 bits = (mode == SImode) ? 12 : 8;
7588 mask = (1 << bits) - 1;
7589 base = INTVAL (x) & ~mask;
7590 index = INTVAL (x) & mask;
7591 if (bit_count (base & 0xffffffff) > (32 - bits)/2)
7593 /* It'll most probably be more efficient to generate the base
7594 with more bits set and use a negative index instead. */
7595 base |= mask;
7596 index -= mask;
7598 base_reg = force_reg (SImode, GEN_INT (base));
7599 x = plus_constant (Pmode, base_reg, index);
7602 if (flag_pic)
7604 /* We need to find and carefully transform any SYMBOL and LABEL
7605 references; so go back to the original address expression. */
7606 rtx new_x = legitimize_pic_address (orig_x, mode, NULL_RTX);
7608 if (new_x != orig_x)
7609 x = new_x;
7612 return x;
7616 /* Try machine-dependent ways of modifying an illegitimate Thumb address
7617 to be legitimate. If we find one, return the new, valid address. */
7619 thumb_legitimize_address (rtx x, rtx orig_x, enum machine_mode mode)
7621 if (GET_CODE (x) == PLUS
7622 && CONST_INT_P (XEXP (x, 1))
7623 && (INTVAL (XEXP (x, 1)) >= 32 * GET_MODE_SIZE (mode)
7624 || INTVAL (XEXP (x, 1)) < 0))
7626 rtx xop0 = XEXP (x, 0);
7627 rtx xop1 = XEXP (x, 1);
7628 HOST_WIDE_INT offset = INTVAL (xop1);
7630 /* Try and fold the offset into a biasing of the base register and
7631 then offsetting that. Don't do this when optimizing for space
7632 since it can cause too many CSEs. */
7633 if (optimize_size && offset >= 0
7634 && offset < 256 + 31 * GET_MODE_SIZE (mode))
7636 HOST_WIDE_INT delta;
7638 if (offset >= 256)
7639 delta = offset - (256 - GET_MODE_SIZE (mode));
7640 else if (offset < 32 * GET_MODE_SIZE (mode) + 8)
7641 delta = 31 * GET_MODE_SIZE (mode);
7642 else
7643 delta = offset & (~31 * GET_MODE_SIZE (mode));
7645 xop0 = force_operand (plus_constant (Pmode, xop0, offset - delta),
7646 NULL_RTX);
7647 x = plus_constant (Pmode, xop0, delta);
7649 else if (offset < 0 && offset > -256)
7650 /* Small negative offsets are best done with a subtract before the
7651 dereference, forcing these into a register normally takes two
7652 instructions. */
7653 x = force_operand (x, NULL_RTX);
7654 else
7656 /* For the remaining cases, force the constant into a register. */
7657 xop1 = force_reg (SImode, xop1);
7658 x = gen_rtx_PLUS (SImode, xop0, xop1);
7661 else if (GET_CODE (x) == PLUS
7662 && s_register_operand (XEXP (x, 1), SImode)
7663 && !s_register_operand (XEXP (x, 0), SImode))
7665 rtx xop0 = force_operand (XEXP (x, 0), NULL_RTX);
7667 x = gen_rtx_PLUS (SImode, xop0, XEXP (x, 1));
7670 if (flag_pic)
7672 /* We need to find and carefully transform any SYMBOL and LABEL
7673 references; so go back to the original address expression. */
7674 rtx new_x = legitimize_pic_address (orig_x, mode, NULL_RTX);
7676 if (new_x != orig_x)
7677 x = new_x;
7680 return x;
7683 bool
7684 arm_legitimize_reload_address (rtx *p,
7685 enum machine_mode mode,
7686 int opnum, int type,
7687 int ind_levels ATTRIBUTE_UNUSED)
7689 /* We must recognize output that we have already generated ourselves. */
7690 if (GET_CODE (*p) == PLUS
7691 && GET_CODE (XEXP (*p, 0)) == PLUS
7692 && REG_P (XEXP (XEXP (*p, 0), 0))
7693 && CONST_INT_P (XEXP (XEXP (*p, 0), 1))
7694 && CONST_INT_P (XEXP (*p, 1)))
7696 push_reload (XEXP (*p, 0), NULL_RTX, &XEXP (*p, 0), NULL,
7697 MODE_BASE_REG_CLASS (mode), GET_MODE (*p),
7698 VOIDmode, 0, 0, opnum, (enum reload_type) type);
7699 return true;
7702 if (GET_CODE (*p) == PLUS
7703 && REG_P (XEXP (*p, 0))
7704 && ARM_REGNO_OK_FOR_BASE_P (REGNO (XEXP (*p, 0)))
7705 /* If the base register is equivalent to a constant, let the generic
7706 code handle it. Otherwise we will run into problems if a future
7707 reload pass decides to rematerialize the constant. */
7708 && !reg_equiv_constant (ORIGINAL_REGNO (XEXP (*p, 0)))
7709 && CONST_INT_P (XEXP (*p, 1)))
7711 HOST_WIDE_INT val = INTVAL (XEXP (*p, 1));
7712 HOST_WIDE_INT low, high;
7714 /* Detect coprocessor load/stores. */
7715 bool coproc_p = ((TARGET_HARD_FLOAT
7716 && TARGET_VFP
7717 && (mode == SFmode || mode == DFmode))
7718 || (TARGET_REALLY_IWMMXT
7719 && VALID_IWMMXT_REG_MODE (mode))
7720 || (TARGET_NEON
7721 && (VALID_NEON_DREG_MODE (mode)
7722 || VALID_NEON_QREG_MODE (mode))));
7724 /* For some conditions, bail out when lower two bits are unaligned. */
7725 if ((val & 0x3) != 0
7726 /* Coprocessor load/store indexes are 8-bits + '00' appended. */
7727 && (coproc_p
7728 /* For DI, and DF under soft-float: */
7729 || ((mode == DImode || mode == DFmode)
7730 /* Without ldrd, we use stm/ldm, which does not
7731 fair well with unaligned bits. */
7732 && (! TARGET_LDRD
7733 /* Thumb-2 ldrd/strd is [-1020,+1020] in steps of 4. */
7734 || TARGET_THUMB2))))
7735 return false;
7737 /* When breaking down a [reg+index] reload address into [(reg+high)+low],
7738 of which the (reg+high) gets turned into a reload add insn,
7739 we try to decompose the index into high/low values that can often
7740 also lead to better reload CSE.
7741 For example:
7742 ldr r0, [r2, #4100] // Offset too large
7743 ldr r1, [r2, #4104] // Offset too large
7745 is best reloaded as:
7746 add t1, r2, #4096
7747 ldr r0, [t1, #4]
7748 add t2, r2, #4096
7749 ldr r1, [t2, #8]
7751 which post-reload CSE can simplify in most cases to eliminate the
7752 second add instruction:
7753 add t1, r2, #4096
7754 ldr r0, [t1, #4]
7755 ldr r1, [t1, #8]
7757 The idea here is that we want to split out the bits of the constant
7758 as a mask, rather than as subtracting the maximum offset that the
7759 respective type of load/store used can handle.
7761 When encountering negative offsets, we can still utilize it even if
7762 the overall offset is positive; sometimes this may lead to an immediate
7763 that can be constructed with fewer instructions.
7764 For example:
7765 ldr r0, [r2, #0x3FFFFC]
7767 This is best reloaded as:
7768 add t1, r2, #0x400000
7769 ldr r0, [t1, #-4]
7771 The trick for spotting this for a load insn with N bits of offset
7772 (i.e. bits N-1:0) is to look at bit N; if it is set, then chose a
7773 negative offset that is going to make bit N and all the bits below
7774 it become zero in the remainder part.
7776 The SIGN_MAG_LOW_ADDR_BITS macro below implements this, with respect
7777 to sign-magnitude addressing (i.e. separate +- bit, or 1's complement),
7778 used in most cases of ARM load/store instructions. */
7780 #define SIGN_MAG_LOW_ADDR_BITS(VAL, N) \
7781 (((VAL) & ((1 << (N)) - 1)) \
7782 ? (((VAL) & ((1 << ((N) + 1)) - 1)) ^ (1 << (N))) - (1 << (N)) \
7783 : 0)
7785 if (coproc_p)
7787 low = SIGN_MAG_LOW_ADDR_BITS (val, 10);
7789 /* NEON quad-word load/stores are made of two double-word accesses,
7790 so the valid index range is reduced by 8. Treat as 9-bit range if
7791 we go over it. */
7792 if (TARGET_NEON && VALID_NEON_QREG_MODE (mode) && low >= 1016)
7793 low = SIGN_MAG_LOW_ADDR_BITS (val, 9);
7795 else if (GET_MODE_SIZE (mode) == 8)
7797 if (TARGET_LDRD)
7798 low = (TARGET_THUMB2
7799 ? SIGN_MAG_LOW_ADDR_BITS (val, 10)
7800 : SIGN_MAG_LOW_ADDR_BITS (val, 8));
7801 else
7802 /* For pre-ARMv5TE (without ldrd), we use ldm/stm(db/da/ib)
7803 to access doublewords. The supported load/store offsets are
7804 -8, -4, and 4, which we try to produce here. */
7805 low = ((val & 0xf) ^ 0x8) - 0x8;
7807 else if (GET_MODE_SIZE (mode) < 8)
7809 /* NEON element load/stores do not have an offset. */
7810 if (TARGET_NEON_FP16 && mode == HFmode)
7811 return false;
7813 if (TARGET_THUMB2)
7815 /* Thumb-2 has an asymmetrical index range of (-256,4096).
7816 Try the wider 12-bit range first, and re-try if the result
7817 is out of range. */
7818 low = SIGN_MAG_LOW_ADDR_BITS (val, 12);
7819 if (low < -255)
7820 low = SIGN_MAG_LOW_ADDR_BITS (val, 8);
7822 else
7824 if (mode == HImode || mode == HFmode)
7826 if (arm_arch4)
7827 low = SIGN_MAG_LOW_ADDR_BITS (val, 8);
7828 else
7830 /* The storehi/movhi_bytes fallbacks can use only
7831 [-4094,+4094] of the full ldrb/strb index range. */
7832 low = SIGN_MAG_LOW_ADDR_BITS (val, 12);
7833 if (low == 4095 || low == -4095)
7834 return false;
7837 else
7838 low = SIGN_MAG_LOW_ADDR_BITS (val, 12);
7841 else
7842 return false;
7844 high = ((((val - low) & (unsigned HOST_WIDE_INT) 0xffffffff)
7845 ^ (unsigned HOST_WIDE_INT) 0x80000000)
7846 - (unsigned HOST_WIDE_INT) 0x80000000);
7847 /* Check for overflow or zero */
7848 if (low == 0 || high == 0 || (high + low != val))
7849 return false;
7851 /* Reload the high part into a base reg; leave the low part
7852 in the mem.
7853 Note that replacing this gen_rtx_PLUS with plus_constant is
7854 wrong in this case because we rely on the
7855 (plus (plus reg c1) c2) structure being preserved so that
7856 XEXP (*p, 0) in push_reload below uses the correct term. */
7857 *p = gen_rtx_PLUS (GET_MODE (*p),
7858 gen_rtx_PLUS (GET_MODE (*p), XEXP (*p, 0),
7859 GEN_INT (high)),
7860 GEN_INT (low));
7861 push_reload (XEXP (*p, 0), NULL_RTX, &XEXP (*p, 0), NULL,
7862 MODE_BASE_REG_CLASS (mode), GET_MODE (*p),
7863 VOIDmode, 0, 0, opnum, (enum reload_type) type);
7864 return true;
7867 return false;
7871 thumb_legitimize_reload_address (rtx *x_p,
7872 enum machine_mode mode,
7873 int opnum, int type,
7874 int ind_levels ATTRIBUTE_UNUSED)
7876 rtx x = *x_p;
7878 if (GET_CODE (x) == PLUS
7879 && GET_MODE_SIZE (mode) < 4
7880 && REG_P (XEXP (x, 0))
7881 && XEXP (x, 0) == stack_pointer_rtx
7882 && CONST_INT_P (XEXP (x, 1))
7883 && !thumb_legitimate_offset_p (mode, INTVAL (XEXP (x, 1))))
7885 rtx orig_x = x;
7887 x = copy_rtx (x);
7888 push_reload (orig_x, NULL_RTX, x_p, NULL, MODE_BASE_REG_CLASS (mode),
7889 Pmode, VOIDmode, 0, 0, opnum, (enum reload_type) type);
7890 return x;
7893 /* If both registers are hi-regs, then it's better to reload the
7894 entire expression rather than each register individually. That
7895 only requires one reload register rather than two. */
7896 if (GET_CODE (x) == PLUS
7897 && REG_P (XEXP (x, 0))
7898 && REG_P (XEXP (x, 1))
7899 && !REG_MODE_OK_FOR_REG_BASE_P (XEXP (x, 0), mode)
7900 && !REG_MODE_OK_FOR_REG_BASE_P (XEXP (x, 1), mode))
7902 rtx orig_x = x;
7904 x = copy_rtx (x);
7905 push_reload (orig_x, NULL_RTX, x_p, NULL, MODE_BASE_REG_CLASS (mode),
7906 Pmode, VOIDmode, 0, 0, opnum, (enum reload_type) type);
7907 return x;
7910 return NULL;
7913 /* Test for various thread-local symbols. */
7915 /* Helper for arm_tls_referenced_p. */
7917 static int
7918 arm_tls_operand_p_1 (rtx *x, void *data ATTRIBUTE_UNUSED)
7920 if (GET_CODE (*x) == SYMBOL_REF)
7921 return SYMBOL_REF_TLS_MODEL (*x) != 0;
7923 /* Don't recurse into UNSPEC_TLS looking for TLS symbols; these are
7924 TLS offsets, not real symbol references. */
7925 if (GET_CODE (*x) == UNSPEC
7926 && XINT (*x, 1) == UNSPEC_TLS)
7927 return -1;
7929 return 0;
7932 /* Return TRUE if X contains any TLS symbol references. */
7934 bool
7935 arm_tls_referenced_p (rtx x)
7937 if (! TARGET_HAVE_TLS)
7938 return false;
7940 return for_each_rtx (&x, arm_tls_operand_p_1, NULL);
7943 /* Implement TARGET_LEGITIMATE_CONSTANT_P.
7945 On the ARM, allow any integer (invalid ones are removed later by insn
7946 patterns), nice doubles and symbol_refs which refer to the function's
7947 constant pool XXX.
7949 When generating pic allow anything. */
7951 static bool
7952 arm_legitimate_constant_p_1 (enum machine_mode mode, rtx x)
7954 /* At present, we have no support for Neon structure constants, so forbid
7955 them here. It might be possible to handle simple cases like 0 and -1
7956 in future. */
7957 if (TARGET_NEON && VALID_NEON_STRUCT_MODE (mode))
7958 return false;
7960 return flag_pic || !label_mentioned_p (x);
7963 static bool
7964 thumb_legitimate_constant_p (enum machine_mode mode ATTRIBUTE_UNUSED, rtx x)
7966 return (CONST_INT_P (x)
7967 || CONST_DOUBLE_P (x)
7968 || CONSTANT_ADDRESS_P (x)
7969 || flag_pic);
7972 static bool
7973 arm_legitimate_constant_p (enum machine_mode mode, rtx x)
7975 return (!arm_cannot_force_const_mem (mode, x)
7976 && (TARGET_32BIT
7977 ? arm_legitimate_constant_p_1 (mode, x)
7978 : thumb_legitimate_constant_p (mode, x)));
7981 /* Implement TARGET_CANNOT_FORCE_CONST_MEM. */
7983 static bool
7984 arm_cannot_force_const_mem (enum machine_mode mode ATTRIBUTE_UNUSED, rtx x)
7986 rtx base, offset;
7988 if (ARM_OFFSETS_MUST_BE_WITHIN_SECTIONS_P)
7990 split_const (x, &base, &offset);
7991 if (GET_CODE (base) == SYMBOL_REF
7992 && !offset_within_block_p (base, INTVAL (offset)))
7993 return true;
7995 return arm_tls_referenced_p (x);
7998 #define REG_OR_SUBREG_REG(X) \
7999 (REG_P (X) \
8000 || (GET_CODE (X) == SUBREG && REG_P (SUBREG_REG (X))))
8002 #define REG_OR_SUBREG_RTX(X) \
8003 (REG_P (X) ? (X) : SUBREG_REG (X))
8005 static inline int
8006 thumb1_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer)
8008 enum machine_mode mode = GET_MODE (x);
8009 int total, words;
8011 switch (code)
8013 case ASHIFT:
8014 case ASHIFTRT:
8015 case LSHIFTRT:
8016 case ROTATERT:
8017 return (mode == SImode) ? COSTS_N_INSNS (1) : COSTS_N_INSNS (2);
8019 case PLUS:
8020 case MINUS:
8021 case COMPARE:
8022 case NEG:
8023 case NOT:
8024 return COSTS_N_INSNS (1);
8026 case MULT:
8027 if (CONST_INT_P (XEXP (x, 1)))
8029 int cycles = 0;
8030 unsigned HOST_WIDE_INT i = INTVAL (XEXP (x, 1));
8032 while (i)
8034 i >>= 2;
8035 cycles++;
8037 return COSTS_N_INSNS (2) + cycles;
8039 return COSTS_N_INSNS (1) + 16;
8041 case SET:
8042 /* A SET doesn't have a mode, so let's look at the SET_DEST to get
8043 the mode. */
8044 words = ARM_NUM_INTS (GET_MODE_SIZE (GET_MODE (SET_DEST (x))));
8045 return (COSTS_N_INSNS (words)
8046 + 4 * ((MEM_P (SET_SRC (x)))
8047 + MEM_P (SET_DEST (x))));
8049 case CONST_INT:
8050 if (outer == SET)
8052 if ((unsigned HOST_WIDE_INT) INTVAL (x) < 256)
8053 return 0;
8054 if (thumb_shiftable_const (INTVAL (x)))
8055 return COSTS_N_INSNS (2);
8056 return COSTS_N_INSNS (3);
8058 else if ((outer == PLUS || outer == COMPARE)
8059 && INTVAL (x) < 256 && INTVAL (x) > -256)
8060 return 0;
8061 else if ((outer == IOR || outer == XOR || outer == AND)
8062 && INTVAL (x) < 256 && INTVAL (x) >= -256)
8063 return COSTS_N_INSNS (1);
8064 else if (outer == AND)
8066 int i;
8067 /* This duplicates the tests in the andsi3 expander. */
8068 for (i = 9; i <= 31; i++)
8069 if ((((HOST_WIDE_INT) 1) << i) - 1 == INTVAL (x)
8070 || (((HOST_WIDE_INT) 1) << i) - 1 == ~INTVAL (x))
8071 return COSTS_N_INSNS (2);
8073 else if (outer == ASHIFT || outer == ASHIFTRT
8074 || outer == LSHIFTRT)
8075 return 0;
8076 return COSTS_N_INSNS (2);
8078 case CONST:
8079 case CONST_DOUBLE:
8080 case LABEL_REF:
8081 case SYMBOL_REF:
8082 return COSTS_N_INSNS (3);
8084 case UDIV:
8085 case UMOD:
8086 case DIV:
8087 case MOD:
8088 return 100;
8090 case TRUNCATE:
8091 return 99;
8093 case AND:
8094 case XOR:
8095 case IOR:
8096 /* XXX guess. */
8097 return 8;
8099 case MEM:
8100 /* XXX another guess. */
8101 /* Memory costs quite a lot for the first word, but subsequent words
8102 load at the equivalent of a single insn each. */
8103 return (10 + 4 * ((GET_MODE_SIZE (mode) - 1) / UNITS_PER_WORD)
8104 + ((GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
8105 ? 4 : 0));
8107 case IF_THEN_ELSE:
8108 /* XXX a guess. */
8109 if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
8110 return 14;
8111 return 2;
8113 case SIGN_EXTEND:
8114 case ZERO_EXTEND:
8115 total = mode == DImode ? COSTS_N_INSNS (1) : 0;
8116 total += thumb1_rtx_costs (XEXP (x, 0), GET_CODE (XEXP (x, 0)), code);
8118 if (mode == SImode)
8119 return total;
8121 if (arm_arch6)
8122 return total + COSTS_N_INSNS (1);
8124 /* Assume a two-shift sequence. Increase the cost slightly so
8125 we prefer actual shifts over an extend operation. */
8126 return total + 1 + COSTS_N_INSNS (2);
8128 default:
8129 return 99;
8133 static inline bool
8134 arm_rtx_costs_1 (rtx x, enum rtx_code outer, int* total, bool speed)
8136 enum machine_mode mode = GET_MODE (x);
8137 enum rtx_code subcode;
8138 rtx operand;
8139 enum rtx_code code = GET_CODE (x);
8140 *total = 0;
8142 switch (code)
8144 case MEM:
8145 /* Memory costs quite a lot for the first word, but subsequent words
8146 load at the equivalent of a single insn each. */
8147 *total = COSTS_N_INSNS (2 + ARM_NUM_REGS (mode));
8148 return true;
8150 case DIV:
8151 case MOD:
8152 case UDIV:
8153 case UMOD:
8154 if (TARGET_HARD_FLOAT && mode == SFmode)
8155 *total = COSTS_N_INSNS (2);
8156 else if (TARGET_HARD_FLOAT && mode == DFmode && !TARGET_VFP_SINGLE)
8157 *total = COSTS_N_INSNS (4);
8158 else
8159 *total = COSTS_N_INSNS (20);
8160 return false;
8162 case ROTATE:
8163 if (REG_P (XEXP (x, 1)))
8164 *total = COSTS_N_INSNS (1); /* Need to subtract from 32 */
8165 else if (!CONST_INT_P (XEXP (x, 1)))
8166 *total = rtx_cost (XEXP (x, 1), code, 1, speed);
8168 /* Fall through */
8169 case ROTATERT:
8170 if (mode != SImode)
8172 *total += COSTS_N_INSNS (4);
8173 return true;
8176 /* Fall through */
8177 case ASHIFT: case LSHIFTRT: case ASHIFTRT:
8178 *total += rtx_cost (XEXP (x, 0), code, 0, speed);
8179 if (mode == DImode)
8181 *total += COSTS_N_INSNS (3);
8182 return true;
8185 *total += COSTS_N_INSNS (1);
8186 /* Increase the cost of complex shifts because they aren't any faster,
8187 and reduce dual issue opportunities. */
8188 if (arm_tune_cortex_a9
8189 && outer != SET && !CONST_INT_P (XEXP (x, 1)))
8190 ++*total;
8192 return true;
8194 case MINUS:
8195 if (mode == DImode)
8197 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
8198 if (CONST_INT_P (XEXP (x, 0))
8199 && const_ok_for_arm (INTVAL (XEXP (x, 0))))
8201 *total += rtx_cost (XEXP (x, 1), code, 1, speed);
8202 return true;
8205 if (CONST_INT_P (XEXP (x, 1))
8206 && const_ok_for_arm (INTVAL (XEXP (x, 1))))
8208 *total += rtx_cost (XEXP (x, 0), code, 0, speed);
8209 return true;
8212 return false;
8215 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
8217 if (TARGET_HARD_FLOAT
8218 && (mode == SFmode
8219 || (mode == DFmode && !TARGET_VFP_SINGLE)))
8221 *total = COSTS_N_INSNS (1);
8222 if (CONST_DOUBLE_P (XEXP (x, 0))
8223 && arm_const_double_rtx (XEXP (x, 0)))
8225 *total += rtx_cost (XEXP (x, 1), code, 1, speed);
8226 return true;
8229 if (CONST_DOUBLE_P (XEXP (x, 1))
8230 && arm_const_double_rtx (XEXP (x, 1)))
8232 *total += rtx_cost (XEXP (x, 0), code, 0, speed);
8233 return true;
8236 return false;
8238 *total = COSTS_N_INSNS (20);
8239 return false;
8242 *total = COSTS_N_INSNS (1);
8243 if (CONST_INT_P (XEXP (x, 0))
8244 && const_ok_for_arm (INTVAL (XEXP (x, 0))))
8246 *total += rtx_cost (XEXP (x, 1), code, 1, speed);
8247 return true;
8250 subcode = GET_CODE (XEXP (x, 1));
8251 if (subcode == ASHIFT || subcode == ASHIFTRT
8252 || subcode == LSHIFTRT
8253 || subcode == ROTATE || subcode == ROTATERT)
8255 *total += rtx_cost (XEXP (x, 0), code, 0, speed);
8256 *total += rtx_cost (XEXP (XEXP (x, 1), 0), subcode, 0, speed);
8257 return true;
8260 /* A shift as a part of RSB costs no more than RSB itself. */
8261 if (GET_CODE (XEXP (x, 0)) == MULT
8262 && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode))
8264 *total += rtx_cost (XEXP (XEXP (x, 0), 0), code, 0, speed);
8265 *total += rtx_cost (XEXP (x, 1), code, 1, speed);
8266 return true;
8269 if (subcode == MULT
8270 && power_of_two_operand (XEXP (XEXP (x, 1), 1), SImode))
8272 *total += rtx_cost (XEXP (x, 0), code, 0, speed);
8273 *total += rtx_cost (XEXP (XEXP (x, 1), 0), subcode, 0, speed);
8274 return true;
8277 if (GET_RTX_CLASS (GET_CODE (XEXP (x, 1))) == RTX_COMPARE
8278 || GET_RTX_CLASS (GET_CODE (XEXP (x, 1))) == RTX_COMM_COMPARE)
8280 *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 0), code, 0, speed);
8281 if (REG_P (XEXP (XEXP (x, 1), 0))
8282 && REGNO (XEXP (XEXP (x, 1), 0)) != CC_REGNUM)
8283 *total += COSTS_N_INSNS (1);
8285 return true;
8288 /* Fall through */
8290 case PLUS:
8291 if (code == PLUS && arm_arch6 && mode == SImode
8292 && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
8293 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
8295 *total = COSTS_N_INSNS (1);
8296 *total += rtx_cost (XEXP (XEXP (x, 0), 0), GET_CODE (XEXP (x, 0)),
8297 0, speed);
8298 *total += rtx_cost (XEXP (x, 1), code, 1, speed);
8299 return true;
8302 /* MLA: All arguments must be registers. We filter out
8303 multiplication by a power of two, so that we fall down into
8304 the code below. */
8305 if (GET_CODE (XEXP (x, 0)) == MULT
8306 && !power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode))
8308 /* The cost comes from the cost of the multiply. */
8309 return false;
8312 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
8314 if (TARGET_HARD_FLOAT
8315 && (mode == SFmode
8316 || (mode == DFmode && !TARGET_VFP_SINGLE)))
8318 *total = COSTS_N_INSNS (1);
8319 if (CONST_DOUBLE_P (XEXP (x, 1))
8320 && arm_const_double_rtx (XEXP (x, 1)))
8322 *total += rtx_cost (XEXP (x, 0), code, 0, speed);
8323 return true;
8326 return false;
8329 *total = COSTS_N_INSNS (20);
8330 return false;
8333 if (GET_RTX_CLASS (GET_CODE (XEXP (x, 0))) == RTX_COMPARE
8334 || GET_RTX_CLASS (GET_CODE (XEXP (x, 0))) == RTX_COMM_COMPARE)
8336 *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 1), code, 1, speed);
8337 if (REG_P (XEXP (XEXP (x, 0), 0))
8338 && REGNO (XEXP (XEXP (x, 0), 0)) != CC_REGNUM)
8339 *total += COSTS_N_INSNS (1);
8340 return true;
8343 /* Fall through */
8345 case AND: case XOR: case IOR:
8347 /* Normally the frame registers will be spilt into reg+const during
8348 reload, so it is a bad idea to combine them with other instructions,
8349 since then they might not be moved outside of loops. As a compromise
8350 we allow integration with ops that have a constant as their second
8351 operand. */
8352 if (REG_OR_SUBREG_REG (XEXP (x, 0))
8353 && ARM_FRAME_RTX (REG_OR_SUBREG_RTX (XEXP (x, 0)))
8354 && !CONST_INT_P (XEXP (x, 1)))
8355 *total = COSTS_N_INSNS (1);
8357 if (mode == DImode)
8359 *total += COSTS_N_INSNS (2);
8360 if (CONST_INT_P (XEXP (x, 1))
8361 && const_ok_for_op (INTVAL (XEXP (x, 1)), code))
8363 *total += rtx_cost (XEXP (x, 0), code, 0, speed);
8364 return true;
8367 return false;
8370 *total += COSTS_N_INSNS (1);
8371 if (CONST_INT_P (XEXP (x, 1))
8372 && const_ok_for_op (INTVAL (XEXP (x, 1)), code))
8374 *total += rtx_cost (XEXP (x, 0), code, 0, speed);
8375 return true;
8377 subcode = GET_CODE (XEXP (x, 0));
8378 if (subcode == ASHIFT || subcode == ASHIFTRT
8379 || subcode == LSHIFTRT
8380 || subcode == ROTATE || subcode == ROTATERT)
8382 *total += rtx_cost (XEXP (x, 1), code, 1, speed);
8383 *total += rtx_cost (XEXP (XEXP (x, 0), 0), subcode, 0, speed);
8384 return true;
8387 if (subcode == MULT
8388 && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode))
8390 *total += rtx_cost (XEXP (x, 1), code, 1, speed);
8391 *total += rtx_cost (XEXP (XEXP (x, 0), 0), subcode, 0, speed);
8392 return true;
8395 if (subcode == UMIN || subcode == UMAX
8396 || subcode == SMIN || subcode == SMAX)
8398 *total = COSTS_N_INSNS (3);
8399 return true;
8402 return false;
8404 case MULT:
8405 /* This should have been handled by the CPU specific routines. */
8406 gcc_unreachable ();
8408 case TRUNCATE:
8409 if (arm_arch3m && mode == SImode
8410 && GET_CODE (XEXP (x, 0)) == LSHIFTRT
8411 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
8412 && (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0))
8413 == GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1)))
8414 && (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == ZERO_EXTEND
8415 || GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == SIGN_EXTEND))
8417 *total = rtx_cost (XEXP (XEXP (x, 0), 0), LSHIFTRT, 0, speed);
8418 return true;
8420 *total = COSTS_N_INSNS (2); /* Plus the cost of the MULT */
8421 return false;
8423 case NEG:
8424 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
8426 if (TARGET_HARD_FLOAT
8427 && (mode == SFmode
8428 || (mode == DFmode && !TARGET_VFP_SINGLE)))
8430 *total = COSTS_N_INSNS (1);
8431 return false;
8433 *total = COSTS_N_INSNS (2);
8434 return false;
8437 /* Fall through */
8438 case NOT:
8439 *total = COSTS_N_INSNS (ARM_NUM_REGS(mode));
8440 if (mode == SImode && code == NOT)
8442 subcode = GET_CODE (XEXP (x, 0));
8443 if (subcode == ASHIFT || subcode == ASHIFTRT
8444 || subcode == LSHIFTRT
8445 || subcode == ROTATE || subcode == ROTATERT
8446 || (subcode == MULT
8447 && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode)))
8449 *total += rtx_cost (XEXP (XEXP (x, 0), 0), subcode, 0, speed);
8450 /* Register shifts cost an extra cycle. */
8451 if (!CONST_INT_P (XEXP (XEXP (x, 0), 1)))
8452 *total += COSTS_N_INSNS (1) + rtx_cost (XEXP (XEXP (x, 0), 1),
8453 subcode, 1, speed);
8454 return true;
8458 return false;
8460 case IF_THEN_ELSE:
8461 if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
8463 *total = COSTS_N_INSNS (4);
8464 return true;
8467 operand = XEXP (x, 0);
8469 if (!((GET_RTX_CLASS (GET_CODE (operand)) == RTX_COMPARE
8470 || GET_RTX_CLASS (GET_CODE (operand)) == RTX_COMM_COMPARE)
8471 && REG_P (XEXP (operand, 0))
8472 && REGNO (XEXP (operand, 0)) == CC_REGNUM))
8473 *total += COSTS_N_INSNS (1);
8474 *total += (rtx_cost (XEXP (x, 1), code, 1, speed)
8475 + rtx_cost (XEXP (x, 2), code, 2, speed));
8476 return true;
8478 case NE:
8479 if (mode == SImode && XEXP (x, 1) == const0_rtx)
8481 *total = COSTS_N_INSNS (2) + rtx_cost (XEXP (x, 0), code, 0, speed);
8482 return true;
8484 goto scc_insn;
8486 case GE:
8487 if ((!REG_P (XEXP (x, 0)) || REGNO (XEXP (x, 0)) != CC_REGNUM)
8488 && mode == SImode && XEXP (x, 1) == const0_rtx)
8490 *total = COSTS_N_INSNS (2) + rtx_cost (XEXP (x, 0), code, 0, speed);
8491 return true;
8493 goto scc_insn;
8495 case LT:
8496 if ((!REG_P (XEXP (x, 0)) || REGNO (XEXP (x, 0)) != CC_REGNUM)
8497 && mode == SImode && XEXP (x, 1) == const0_rtx)
8499 *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 0), code, 0, speed);
8500 return true;
8502 goto scc_insn;
8504 case EQ:
8505 case GT:
8506 case LE:
8507 case GEU:
8508 case LTU:
8509 case GTU:
8510 case LEU:
8511 case UNORDERED:
8512 case ORDERED:
8513 case UNEQ:
8514 case UNGE:
8515 case UNLT:
8516 case UNGT:
8517 case UNLE:
8518 scc_insn:
8519 /* SCC insns. In the case where the comparison has already been
8520 performed, then they cost 2 instructions. Otherwise they need
8521 an additional comparison before them. */
8522 *total = COSTS_N_INSNS (2);
8523 if (REG_P (XEXP (x, 0)) && REGNO (XEXP (x, 0)) == CC_REGNUM)
8525 return true;
8528 /* Fall through */
8529 case COMPARE:
8530 if (REG_P (XEXP (x, 0)) && REGNO (XEXP (x, 0)) == CC_REGNUM)
8532 *total = 0;
8533 return true;
8536 *total += COSTS_N_INSNS (1);
8537 if (CONST_INT_P (XEXP (x, 1))
8538 && const_ok_for_op (INTVAL (XEXP (x, 1)), code))
8540 *total += rtx_cost (XEXP (x, 0), code, 0, speed);
8541 return true;
8544 subcode = GET_CODE (XEXP (x, 0));
8545 if (subcode == ASHIFT || subcode == ASHIFTRT
8546 || subcode == LSHIFTRT
8547 || subcode == ROTATE || subcode == ROTATERT)
8549 *total += rtx_cost (XEXP (x, 1), code, 1, speed);
8550 *total += rtx_cost (XEXP (XEXP (x, 0), 0), subcode, 0, speed);
8551 return true;
8554 if (subcode == MULT
8555 && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode))
8557 *total += rtx_cost (XEXP (x, 1), code, 1, speed);
8558 *total += rtx_cost (XEXP (XEXP (x, 0), 0), subcode, 0, speed);
8559 return true;
8562 return false;
8564 case UMIN:
8565 case UMAX:
8566 case SMIN:
8567 case SMAX:
8568 *total = COSTS_N_INSNS (2) + rtx_cost (XEXP (x, 0), code, 0, speed);
8569 if (!CONST_INT_P (XEXP (x, 1))
8570 || !const_ok_for_arm (INTVAL (XEXP (x, 1))))
8571 *total += rtx_cost (XEXP (x, 1), code, 1, speed);
8572 return true;
8574 case ABS:
8575 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
8577 if (TARGET_HARD_FLOAT
8578 && (mode == SFmode
8579 || (mode == DFmode && !TARGET_VFP_SINGLE)))
8581 *total = COSTS_N_INSNS (1);
8582 return false;
8584 *total = COSTS_N_INSNS (20);
8585 return false;
8587 *total = COSTS_N_INSNS (1);
8588 if (mode == DImode)
8589 *total += COSTS_N_INSNS (3);
8590 return false;
8592 case SIGN_EXTEND:
8593 case ZERO_EXTEND:
8594 *total = 0;
8595 if (GET_MODE_CLASS (mode) == MODE_INT)
8597 rtx op = XEXP (x, 0);
8598 enum machine_mode opmode = GET_MODE (op);
8600 if (mode == DImode)
8601 *total += COSTS_N_INSNS (1);
8603 if (opmode != SImode)
8605 if (MEM_P (op))
8607 /* If !arm_arch4, we use one of the extendhisi2_mem
8608 or movhi_bytes patterns for HImode. For a QImode
8609 sign extension, we first zero-extend from memory
8610 and then perform a shift sequence. */
8611 if (!arm_arch4 && (opmode != QImode || code == SIGN_EXTEND))
8612 *total += COSTS_N_INSNS (2);
8614 else if (arm_arch6)
8615 *total += COSTS_N_INSNS (1);
8617 /* We don't have the necessary insn, so we need to perform some
8618 other operation. */
8619 else if (TARGET_ARM && code == ZERO_EXTEND && mode == QImode)
8620 /* An and with constant 255. */
8621 *total += COSTS_N_INSNS (1);
8622 else
8623 /* A shift sequence. Increase costs slightly to avoid
8624 combining two shifts into an extend operation. */
8625 *total += COSTS_N_INSNS (2) + 1;
8628 return false;
8631 switch (GET_MODE (XEXP (x, 0)))
8633 case V8QImode:
8634 case V4HImode:
8635 case V2SImode:
8636 case V4QImode:
8637 case V2HImode:
8638 *total = COSTS_N_INSNS (1);
8639 return false;
8641 default:
8642 gcc_unreachable ();
8644 gcc_unreachable ();
8646 case ZERO_EXTRACT:
8647 case SIGN_EXTRACT:
8648 *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 0), code, 0, speed);
8649 return true;
8651 case CONST_INT:
8652 if (const_ok_for_arm (INTVAL (x))
8653 || const_ok_for_arm (~INTVAL (x)))
8654 *total = COSTS_N_INSNS (1);
8655 else
8656 *total = COSTS_N_INSNS (arm_gen_constant (SET, mode, NULL_RTX,
8657 INTVAL (x), NULL_RTX,
8658 NULL_RTX, 0, 0));
8659 return true;
8661 case CONST:
8662 case LABEL_REF:
8663 case SYMBOL_REF:
8664 *total = COSTS_N_INSNS (3);
8665 return true;
8667 case HIGH:
8668 *total = COSTS_N_INSNS (1);
8669 return true;
8671 case LO_SUM:
8672 *total = COSTS_N_INSNS (1);
8673 *total += rtx_cost (XEXP (x, 0), code, 0, speed);
8674 return true;
8676 case CONST_DOUBLE:
8677 if (TARGET_HARD_FLOAT && vfp3_const_double_rtx (x)
8678 && (mode == SFmode || !TARGET_VFP_SINGLE))
8679 *total = COSTS_N_INSNS (1);
8680 else
8681 *total = COSTS_N_INSNS (4);
8682 return true;
8684 case SET:
8685 /* The vec_extract patterns accept memory operands that require an
8686 address reload. Account for the cost of that reload to give the
8687 auto-inc-dec pass an incentive to try to replace them. */
8688 if (TARGET_NEON && MEM_P (SET_DEST (x))
8689 && GET_CODE (SET_SRC (x)) == VEC_SELECT)
8691 *total = rtx_cost (SET_DEST (x), code, 0, speed);
8692 if (!neon_vector_mem_operand (SET_DEST (x), 2, true))
8693 *total += COSTS_N_INSNS (1);
8694 return true;
8696 /* Likewise for the vec_set patterns. */
8697 if (TARGET_NEON && GET_CODE (SET_SRC (x)) == VEC_MERGE
8698 && GET_CODE (XEXP (SET_SRC (x), 0)) == VEC_DUPLICATE
8699 && MEM_P (XEXP (XEXP (SET_SRC (x), 0), 0)))
8701 rtx mem = XEXP (XEXP (SET_SRC (x), 0), 0);
8702 *total = rtx_cost (mem, code, 0, speed);
8703 if (!neon_vector_mem_operand (mem, 2, true))
8704 *total += COSTS_N_INSNS (1);
8705 return true;
8707 return false;
8709 case UNSPEC:
8710 /* We cost this as high as our memory costs to allow this to
8711 be hoisted from loops. */
8712 if (XINT (x, 1) == UNSPEC_PIC_UNIFIED)
8714 *total = COSTS_N_INSNS (2 + ARM_NUM_REGS (mode));
8716 return true;
8718 case CONST_VECTOR:
8719 if (TARGET_NEON
8720 && TARGET_HARD_FLOAT
8721 && outer == SET
8722 && (VALID_NEON_DREG_MODE (mode) || VALID_NEON_QREG_MODE (mode))
8723 && neon_immediate_valid_for_move (x, mode, NULL, NULL))
8724 *total = COSTS_N_INSNS (1);
8725 else
8726 *total = COSTS_N_INSNS (4);
8727 return true;
8729 default:
8730 *total = COSTS_N_INSNS (4);
8731 return false;
8735 /* Estimates the size cost of thumb1 instructions.
8736 For now most of the code is copied from thumb1_rtx_costs. We need more
8737 fine grain tuning when we have more related test cases. */
8738 static inline int
8739 thumb1_size_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer)
8741 enum machine_mode mode = GET_MODE (x);
8742 int words;
8744 switch (code)
8746 case ASHIFT:
8747 case ASHIFTRT:
8748 case LSHIFTRT:
8749 case ROTATERT:
8750 return (mode == SImode) ? COSTS_N_INSNS (1) : COSTS_N_INSNS (2);
8752 case PLUS:
8753 case MINUS:
8754 /* Thumb-1 needs two instructions to fulfill shiftadd/shiftsub0/shiftsub1
8755 defined by RTL expansion, especially for the expansion of
8756 multiplication. */
8757 if ((GET_CODE (XEXP (x, 0)) == MULT
8758 && power_of_two_operand (XEXP (XEXP (x,0),1), SImode))
8759 || (GET_CODE (XEXP (x, 1)) == MULT
8760 && power_of_two_operand (XEXP (XEXP (x, 1), 1), SImode)))
8761 return COSTS_N_INSNS (2);
8762 /* On purpose fall through for normal RTX. */
8763 case COMPARE:
8764 case NEG:
8765 case NOT:
8766 return COSTS_N_INSNS (1);
8768 case MULT:
8769 if (CONST_INT_P (XEXP (x, 1)))
8771 /* Thumb1 mul instruction can't operate on const. We must Load it
8772 into a register first. */
8773 int const_size = thumb1_size_rtx_costs (XEXP (x, 1), CONST_INT, SET);
8774 return COSTS_N_INSNS (1) + const_size;
8776 return COSTS_N_INSNS (1);
8778 case SET:
8779 /* A SET doesn't have a mode, so let's look at the SET_DEST to get
8780 the mode. */
8781 words = ARM_NUM_INTS (GET_MODE_SIZE (GET_MODE (SET_DEST (x))));
8782 return (COSTS_N_INSNS (words)
8783 + 4 * ((MEM_P (SET_SRC (x)))
8784 + MEM_P (SET_DEST (x))));
8786 case CONST_INT:
8787 if (outer == SET)
8789 if ((unsigned HOST_WIDE_INT) INTVAL (x) < 256)
8790 return COSTS_N_INSNS (1);
8791 /* See split "TARGET_THUMB1 && satisfies_constraint_J". */
8792 if (INTVAL (x) >= -255 && INTVAL (x) <= -1)
8793 return COSTS_N_INSNS (2);
8794 /* See split "TARGET_THUMB1 && satisfies_constraint_K". */
8795 if (thumb_shiftable_const (INTVAL (x)))
8796 return COSTS_N_INSNS (2);
8797 return COSTS_N_INSNS (3);
8799 else if ((outer == PLUS || outer == COMPARE)
8800 && INTVAL (x) < 256 && INTVAL (x) > -256)
8801 return 0;
8802 else if ((outer == IOR || outer == XOR || outer == AND)
8803 && INTVAL (x) < 256 && INTVAL (x) >= -256)
8804 return COSTS_N_INSNS (1);
8805 else if (outer == AND)
8807 int i;
8808 /* This duplicates the tests in the andsi3 expander. */
8809 for (i = 9; i <= 31; i++)
8810 if ((((HOST_WIDE_INT) 1) << i) - 1 == INTVAL (x)
8811 || (((HOST_WIDE_INT) 1) << i) - 1 == ~INTVAL (x))
8812 return COSTS_N_INSNS (2);
8814 else if (outer == ASHIFT || outer == ASHIFTRT
8815 || outer == LSHIFTRT)
8816 return 0;
8817 return COSTS_N_INSNS (2);
8819 case CONST:
8820 case CONST_DOUBLE:
8821 case LABEL_REF:
8822 case SYMBOL_REF:
8823 return COSTS_N_INSNS (3);
8825 case UDIV:
8826 case UMOD:
8827 case DIV:
8828 case MOD:
8829 return 100;
8831 case TRUNCATE:
8832 return 99;
8834 case AND:
8835 case XOR:
8836 case IOR:
8837 /* XXX guess. */
8838 return 8;
8840 case MEM:
8841 /* XXX another guess. */
8842 /* Memory costs quite a lot for the first word, but subsequent words
8843 load at the equivalent of a single insn each. */
8844 return (10 + 4 * ((GET_MODE_SIZE (mode) - 1) / UNITS_PER_WORD)
8845 + ((GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
8846 ? 4 : 0));
8848 case IF_THEN_ELSE:
8849 /* XXX a guess. */
8850 if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
8851 return 14;
8852 return 2;
8854 case ZERO_EXTEND:
8855 /* XXX still guessing. */
8856 switch (GET_MODE (XEXP (x, 0)))
8858 case QImode:
8859 return (1 + (mode == DImode ? 4 : 0)
8860 + (MEM_P (XEXP (x, 0)) ? 10 : 0));
8862 case HImode:
8863 return (4 + (mode == DImode ? 4 : 0)
8864 + (MEM_P (XEXP (x, 0)) ? 10 : 0));
8866 case SImode:
8867 return (1 + (MEM_P (XEXP (x, 0)) ? 10 : 0));
8869 default:
8870 return 99;
8873 default:
8874 return 99;
8878 /* RTX costs when optimizing for size. */
8879 static bool
8880 arm_size_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
8881 int *total)
8883 enum machine_mode mode = GET_MODE (x);
8884 if (TARGET_THUMB1)
8886 *total = thumb1_size_rtx_costs (x, code, outer_code);
8887 return true;
8890 /* FIXME: This makes no attempt to prefer narrow Thumb-2 instructions. */
8891 switch (code)
8893 case MEM:
8894 /* A memory access costs 1 insn if the mode is small, or the address is
8895 a single register, otherwise it costs one insn per word. */
8896 if (REG_P (XEXP (x, 0)))
8897 *total = COSTS_N_INSNS (1);
8898 else if (flag_pic
8899 && GET_CODE (XEXP (x, 0)) == PLUS
8900 && will_be_in_index_register (XEXP (XEXP (x, 0), 1)))
8901 /* This will be split into two instructions.
8902 See arm.md:calculate_pic_address. */
8903 *total = COSTS_N_INSNS (2);
8904 else
8905 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
8906 return true;
8908 case DIV:
8909 case MOD:
8910 case UDIV:
8911 case UMOD:
8912 /* Needs a libcall, so it costs about this. */
8913 *total = COSTS_N_INSNS (2);
8914 return false;
8916 case ROTATE:
8917 if (mode == SImode && REG_P (XEXP (x, 1)))
8919 *total = COSTS_N_INSNS (2) + rtx_cost (XEXP (x, 0), code, 0, false);
8920 return true;
8922 /* Fall through */
8923 case ROTATERT:
8924 case ASHIFT:
8925 case LSHIFTRT:
8926 case ASHIFTRT:
8927 if (mode == DImode && CONST_INT_P (XEXP (x, 1)))
8929 *total = COSTS_N_INSNS (3) + rtx_cost (XEXP (x, 0), code, 0, false);
8930 return true;
8932 else if (mode == SImode)
8934 *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 0), code, 0, false);
8935 /* Slightly disparage register shifts, but not by much. */
8936 if (!CONST_INT_P (XEXP (x, 1)))
8937 *total += 1 + rtx_cost (XEXP (x, 1), code, 1, false);
8938 return true;
8941 /* Needs a libcall. */
8942 *total = COSTS_N_INSNS (2);
8943 return false;
8945 case MINUS:
8946 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
8947 && (mode == SFmode || !TARGET_VFP_SINGLE))
8949 *total = COSTS_N_INSNS (1);
8950 return false;
8953 if (mode == SImode)
8955 enum rtx_code subcode0 = GET_CODE (XEXP (x, 0));
8956 enum rtx_code subcode1 = GET_CODE (XEXP (x, 1));
8958 if (subcode0 == ROTATE || subcode0 == ROTATERT || subcode0 == ASHIFT
8959 || subcode0 == LSHIFTRT || subcode0 == ASHIFTRT
8960 || subcode1 == ROTATE || subcode1 == ROTATERT
8961 || subcode1 == ASHIFT || subcode1 == LSHIFTRT
8962 || subcode1 == ASHIFTRT)
8964 /* It's just the cost of the two operands. */
8965 *total = 0;
8966 return false;
8969 *total = COSTS_N_INSNS (1);
8970 return false;
8973 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
8974 return false;
8976 case PLUS:
8977 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
8978 && (mode == SFmode || !TARGET_VFP_SINGLE))
8980 *total = COSTS_N_INSNS (1);
8981 return false;
8984 /* A shift as a part of ADD costs nothing. */
8985 if (GET_CODE (XEXP (x, 0)) == MULT
8986 && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode))
8988 *total = COSTS_N_INSNS (TARGET_THUMB2 ? 2 : 1);
8989 *total += rtx_cost (XEXP (XEXP (x, 0), 0), code, 0, false);
8990 *total += rtx_cost (XEXP (x, 1), code, 1, false);
8991 return true;
8994 /* Fall through */
8995 case AND: case XOR: case IOR:
8996 if (mode == SImode)
8998 enum rtx_code subcode = GET_CODE (XEXP (x, 0));
9000 if (subcode == ROTATE || subcode == ROTATERT || subcode == ASHIFT
9001 || subcode == LSHIFTRT || subcode == ASHIFTRT
9002 || (code == AND && subcode == NOT))
9004 /* It's just the cost of the two operands. */
9005 *total = 0;
9006 return false;
9010 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
9011 return false;
9013 case MULT:
9014 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
9015 return false;
9017 case NEG:
9018 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
9019 && (mode == SFmode || !TARGET_VFP_SINGLE))
9021 *total = COSTS_N_INSNS (1);
9022 return false;
9025 /* Fall through */
9026 case NOT:
9027 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
9029 return false;
9031 case IF_THEN_ELSE:
9032 *total = 0;
9033 return false;
9035 case COMPARE:
9036 if (cc_register (XEXP (x, 0), VOIDmode))
9037 * total = 0;
9038 else
9039 *total = COSTS_N_INSNS (1);
9040 return false;
9042 case ABS:
9043 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
9044 && (mode == SFmode || !TARGET_VFP_SINGLE))
9045 *total = COSTS_N_INSNS (1);
9046 else
9047 *total = COSTS_N_INSNS (1 + ARM_NUM_REGS (mode));
9048 return false;
9050 case SIGN_EXTEND:
9051 case ZERO_EXTEND:
9052 return arm_rtx_costs_1 (x, outer_code, total, 0);
9054 case CONST_INT:
9055 if (const_ok_for_arm (INTVAL (x)))
9056 /* A multiplication by a constant requires another instruction
9057 to load the constant to a register. */
9058 *total = COSTS_N_INSNS ((outer_code == SET || outer_code == MULT)
9059 ? 1 : 0);
9060 else if (const_ok_for_arm (~INTVAL (x)))
9061 *total = COSTS_N_INSNS (outer_code == AND ? 0 : 1);
9062 else if (const_ok_for_arm (-INTVAL (x)))
9064 if (outer_code == COMPARE || outer_code == PLUS
9065 || outer_code == MINUS)
9066 *total = 0;
9067 else
9068 *total = COSTS_N_INSNS (1);
9070 else
9071 *total = COSTS_N_INSNS (2);
9072 return true;
9074 case CONST:
9075 case LABEL_REF:
9076 case SYMBOL_REF:
9077 *total = COSTS_N_INSNS (2);
9078 return true;
9080 case CONST_DOUBLE:
9081 *total = COSTS_N_INSNS (4);
9082 return true;
9084 case CONST_VECTOR:
9085 if (TARGET_NEON
9086 && TARGET_HARD_FLOAT
9087 && outer_code == SET
9088 && (VALID_NEON_DREG_MODE (mode) || VALID_NEON_QREG_MODE (mode))
9089 && neon_immediate_valid_for_move (x, mode, NULL, NULL))
9090 *total = COSTS_N_INSNS (1);
9091 else
9092 *total = COSTS_N_INSNS (4);
9093 return true;
9095 case HIGH:
9096 case LO_SUM:
9097 /* We prefer constant pool entries to MOVW/MOVT pairs, so bump the
9098 cost of these slightly. */
9099 *total = COSTS_N_INSNS (1) + 1;
9100 return true;
9102 case SET:
9103 return false;
9105 default:
9106 if (mode != VOIDmode)
9107 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
9108 else
9109 *total = COSTS_N_INSNS (4); /* How knows? */
9110 return false;
9114 /* Helper function for arm_rtx_costs. If the operand is a valid shift
9115 operand, then return the operand that is being shifted. If the shift
9116 is not by a constant, then set SHIFT_REG to point to the operand.
9117 Return NULL if OP is not a shifter operand. */
9118 static rtx
9119 shifter_op_p (rtx op, rtx *shift_reg)
9121 enum rtx_code code = GET_CODE (op);
9123 if (code == MULT && CONST_INT_P (XEXP (op, 1))
9124 && exact_log2 (INTVAL (XEXP (op, 1))) > 0)
9125 return XEXP (op, 0);
9126 else if (code == ROTATE && CONST_INT_P (XEXP (op, 1)))
9127 return XEXP (op, 0);
9128 else if (code == ROTATERT || code == ASHIFT || code == LSHIFTRT
9129 || code == ASHIFTRT)
9131 if (!CONST_INT_P (XEXP (op, 1)))
9132 *shift_reg = XEXP (op, 1);
9133 return XEXP (op, 0);
9136 return NULL;
9139 static bool
9140 arm_unspec_cost (rtx x, enum rtx_code /* outer_code */, bool speed_p, int *cost)
9142 const struct cpu_cost_table *extra_cost = current_tune->insn_extra_cost;
9143 gcc_assert (GET_CODE (x) == UNSPEC);
9145 switch (XINT (x, 1))
9147 case UNSPEC_UNALIGNED_LOAD:
9148 /* We can only do unaligned loads into the integer unit, and we can't
9149 use LDM or LDRD. */
9150 *cost = COSTS_N_INSNS (ARM_NUM_REGS (GET_MODE (x)));
9151 if (speed_p)
9152 *cost += (ARM_NUM_REGS (GET_MODE (x)) * extra_cost->ldst.load
9153 + extra_cost->ldst.load_unaligned);
9155 #ifdef NOT_YET
9156 *cost += arm_address_cost (XEXP (XVECEXP (x, 0, 0), 0), GET_MODE (x),
9157 ADDR_SPACE_GENERIC, speed_p);
9158 #endif
9159 return true;
9161 case UNSPEC_UNALIGNED_STORE:
9162 *cost = COSTS_N_INSNS (ARM_NUM_REGS (GET_MODE (x)));
9163 if (speed_p)
9164 *cost += (ARM_NUM_REGS (GET_MODE (x)) * extra_cost->ldst.store
9165 + extra_cost->ldst.store_unaligned);
9167 *cost += rtx_cost (XVECEXP (x, 0, 0), UNSPEC, 0, speed_p);
9168 #ifdef NOT_YET
9169 *cost += arm_address_cost (XEXP (XVECEXP (x, 0, 0), 0), GET_MODE (x),
9170 ADDR_SPACE_GENERIC, speed_p);
9171 #endif
9172 return true;
9174 case UNSPEC_VRINTZ:
9175 case UNSPEC_VRINTP:
9176 case UNSPEC_VRINTM:
9177 case UNSPEC_VRINTR:
9178 case UNSPEC_VRINTX:
9179 case UNSPEC_VRINTA:
9180 *cost = COSTS_N_INSNS (1);
9181 if (speed_p)
9182 *cost += extra_cost->fp[GET_MODE (x) == DFmode].roundint;
9184 return true;
9185 default:
9186 *cost = COSTS_N_INSNS (2);
9187 break;
9189 return false;
9192 /* Cost of a libcall. We assume one insn per argument, an amount for the
9193 call (one insn for -Os) and then one for processing the result. */
9194 #define LIBCALL_COST(N) COSTS_N_INSNS (N + (speed_p ? 18 : 2))
9196 #define HANDLE_NARROW_SHIFT_ARITH(OP, IDX) \
9197 do \
9199 shift_op = shifter_op_p (XEXP (x, IDX), &shift_reg); \
9200 if (shift_op != NULL \
9201 && arm_rtx_shift_left_p (XEXP (x, IDX))) \
9203 if (shift_reg) \
9205 if (speed_p) \
9206 *cost += extra_cost->alu.arith_shift_reg; \
9207 *cost += rtx_cost (shift_reg, ASHIFT, 1, speed_p); \
9209 else if (speed_p) \
9210 *cost += extra_cost->alu.arith_shift; \
9212 *cost += (rtx_cost (shift_op, ASHIFT, 0, speed_p) \
9213 + rtx_cost (XEXP (x, 1 - IDX), \
9214 OP, 1, speed_p)); \
9215 return true; \
9218 while (0);
9220 /* RTX costs. Make an estimate of the cost of executing the operation
9221 X, which is contained with an operation with code OUTER_CODE.
9222 SPEED_P indicates whether the cost desired is the performance cost,
9223 or the size cost. The estimate is stored in COST and the return
9224 value is TRUE if the cost calculation is final, or FALSE if the
9225 caller should recurse through the operands of X to add additional
9226 costs.
9228 We currently make no attempt to model the size savings of Thumb-2
9229 16-bit instructions. At the normal points in compilation where
9230 this code is called we have no measure of whether the condition
9231 flags are live or not, and thus no realistic way to determine what
9232 the size will eventually be. */
9233 static bool
9234 arm_new_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
9235 const struct cpu_cost_table *extra_cost,
9236 int *cost, bool speed_p)
9238 enum machine_mode mode = GET_MODE (x);
9240 if (TARGET_THUMB1)
9242 if (speed_p)
9243 *cost = thumb1_rtx_costs (x, code, outer_code);
9244 else
9245 *cost = thumb1_size_rtx_costs (x, code, outer_code);
9246 return true;
9249 switch (code)
9251 case SET:
9252 *cost = 0;
9253 /* SET RTXs don't have a mode so we get it from the destination. */
9254 mode = GET_MODE (SET_DEST (x));
9256 if (REG_P (SET_SRC (x))
9257 && REG_P (SET_DEST (x)))
9259 /* Assume that most copies can be done with a single insn,
9260 unless we don't have HW FP, in which case everything
9261 larger than word mode will require two insns. */
9262 *cost = COSTS_N_INSNS (((!TARGET_HARD_FLOAT
9263 && GET_MODE_SIZE (mode) > 4)
9264 || mode == DImode)
9265 ? 2 : 1);
9266 /* Conditional register moves can be encoded
9267 in 16 bits in Thumb mode. */
9268 if (!speed_p && TARGET_THUMB && outer_code == COND_EXEC)
9269 *cost >>= 1;
9271 return true;
9274 if (CONST_INT_P (SET_SRC (x)))
9276 /* Handle CONST_INT here, since the value doesn't have a mode
9277 and we would otherwise be unable to work out the true cost. */
9278 *cost = rtx_cost (SET_DEST (x), SET, 0, speed_p);
9279 outer_code = SET;
9280 /* Slightly lower the cost of setting a core reg to a constant.
9281 This helps break up chains and allows for better scheduling. */
9282 if (REG_P (SET_DEST (x))
9283 && REGNO (SET_DEST (x)) <= LR_REGNUM)
9284 *cost -= 1;
9285 x = SET_SRC (x);
9286 /* Immediate moves with an immediate in the range [0, 255] can be
9287 encoded in 16 bits in Thumb mode. */
9288 if (!speed_p && TARGET_THUMB && GET_MODE (x) == SImode
9289 && INTVAL (x) >= 0 && INTVAL (x) <=255)
9290 *cost >>= 1;
9291 goto const_int_cost;
9294 return false;
9296 case MEM:
9297 /* A memory access costs 1 insn if the mode is small, or the address is
9298 a single register, otherwise it costs one insn per word. */
9299 if (REG_P (XEXP (x, 0)))
9300 *cost = COSTS_N_INSNS (1);
9301 else if (flag_pic
9302 && GET_CODE (XEXP (x, 0)) == PLUS
9303 && will_be_in_index_register (XEXP (XEXP (x, 0), 1)))
9304 /* This will be split into two instructions.
9305 See arm.md:calculate_pic_address. */
9306 *cost = COSTS_N_INSNS (2);
9307 else
9308 *cost = COSTS_N_INSNS (ARM_NUM_REGS (mode));
9310 /* For speed optimizations, add the costs of the address and
9311 accessing memory. */
9312 if (speed_p)
9313 #ifdef NOT_YET
9314 *cost += (extra_cost->ldst.load
9315 + arm_address_cost (XEXP (x, 0), mode,
9316 ADDR_SPACE_GENERIC, speed_p));
9317 #else
9318 *cost += extra_cost->ldst.load;
9319 #endif
9320 return true;
9322 case PARALLEL:
9324 /* Calculations of LDM costs are complex. We assume an initial cost
9325 (ldm_1st) which will load the number of registers mentioned in
9326 ldm_regs_per_insn_1st registers; then each additional
9327 ldm_regs_per_insn_subsequent registers cost one more insn. The
9328 formula for N regs is thus:
9330 ldm_1st + COSTS_N_INSNS ((max (N - ldm_regs_per_insn_1st, 0)
9331 + ldm_regs_per_insn_subsequent - 1)
9332 / ldm_regs_per_insn_subsequent).
9334 Additional costs may also be added for addressing. A similar
9335 formula is used for STM. */
9337 bool is_ldm = load_multiple_operation (x, SImode);
9338 bool is_stm = store_multiple_operation (x, SImode);
9340 *cost = COSTS_N_INSNS (1);
9342 if (is_ldm || is_stm)
9344 if (speed_p)
9346 HOST_WIDE_INT nregs = XVECLEN (x, 0);
9347 HOST_WIDE_INT regs_per_insn_1st = is_ldm
9348 ? extra_cost->ldst.ldm_regs_per_insn_1st
9349 : extra_cost->ldst.stm_regs_per_insn_1st;
9350 HOST_WIDE_INT regs_per_insn_sub = is_ldm
9351 ? extra_cost->ldst.ldm_regs_per_insn_subsequent
9352 : extra_cost->ldst.stm_regs_per_insn_subsequent;
9354 *cost += regs_per_insn_1st
9355 + COSTS_N_INSNS (((MAX (nregs - regs_per_insn_1st, 0))
9356 + regs_per_insn_sub - 1)
9357 / regs_per_insn_sub);
9358 return true;
9362 return false;
9364 case DIV:
9365 case UDIV:
9366 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
9367 && (mode == SFmode || !TARGET_VFP_SINGLE))
9368 *cost = COSTS_N_INSNS (speed_p
9369 ? extra_cost->fp[mode != SFmode].div : 1);
9370 else if (mode == SImode && TARGET_IDIV)
9371 *cost = COSTS_N_INSNS (speed_p ? extra_cost->mult[0].idiv : 1);
9372 else
9373 *cost = LIBCALL_COST (2);
9374 return false; /* All arguments must be in registers. */
9376 case MOD:
9377 case UMOD:
9378 *cost = LIBCALL_COST (2);
9379 return false; /* All arguments must be in registers. */
9381 case ROTATE:
9382 if (mode == SImode && REG_P (XEXP (x, 1)))
9384 *cost = (COSTS_N_INSNS (2)
9385 + rtx_cost (XEXP (x, 0), code, 0, speed_p));
9386 if (speed_p)
9387 *cost += extra_cost->alu.shift_reg;
9388 return true;
9390 /* Fall through */
9391 case ROTATERT:
9392 case ASHIFT:
9393 case LSHIFTRT:
9394 case ASHIFTRT:
9395 if (mode == DImode && CONST_INT_P (XEXP (x, 1)))
9397 *cost = (COSTS_N_INSNS (3)
9398 + rtx_cost (XEXP (x, 0), code, 0, speed_p));
9399 if (speed_p)
9400 *cost += 2 * extra_cost->alu.shift;
9401 return true;
9403 else if (mode == SImode)
9405 *cost = (COSTS_N_INSNS (1)
9406 + rtx_cost (XEXP (x, 0), code, 0, speed_p));
9407 /* Slightly disparage register shifts at -Os, but not by much. */
9408 if (!CONST_INT_P (XEXP (x, 1)))
9409 *cost += (speed_p ? extra_cost->alu.shift_reg : 1
9410 + rtx_cost (XEXP (x, 1), code, 1, speed_p));
9411 return true;
9413 else if (GET_MODE_CLASS (mode) == MODE_INT
9414 && GET_MODE_SIZE (mode) < 4)
9416 if (code == ASHIFT)
9418 *cost = (COSTS_N_INSNS (1)
9419 + rtx_cost (XEXP (x, 0), code, 0, speed_p));
9420 /* Slightly disparage register shifts at -Os, but not by
9421 much. */
9422 if (!CONST_INT_P (XEXP (x, 1)))
9423 *cost += (speed_p ? extra_cost->alu.shift_reg : 1
9424 + rtx_cost (XEXP (x, 1), code, 1, speed_p));
9426 else if (code == LSHIFTRT || code == ASHIFTRT)
9428 if (arm_arch_thumb2 && CONST_INT_P (XEXP (x, 1)))
9430 /* Can use SBFX/UBFX. */
9431 *cost = COSTS_N_INSNS (1);
9432 if (speed_p)
9433 *cost += extra_cost->alu.bfx;
9434 *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
9436 else
9438 *cost = COSTS_N_INSNS (2);
9439 *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
9440 if (speed_p)
9442 if (CONST_INT_P (XEXP (x, 1)))
9443 *cost += 2 * extra_cost->alu.shift;
9444 else
9445 *cost += (extra_cost->alu.shift
9446 + extra_cost->alu.shift_reg);
9448 else
9449 /* Slightly disparage register shifts. */
9450 *cost += !CONST_INT_P (XEXP (x, 1));
9453 else /* Rotates. */
9455 *cost = COSTS_N_INSNS (3 + !CONST_INT_P (XEXP (x, 1)));
9456 *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
9457 if (speed_p)
9459 if (CONST_INT_P (XEXP (x, 1)))
9460 *cost += (2 * extra_cost->alu.shift
9461 + extra_cost->alu.log_shift);
9462 else
9463 *cost += (extra_cost->alu.shift
9464 + extra_cost->alu.shift_reg
9465 + extra_cost->alu.log_shift_reg);
9468 return true;
9471 *cost = LIBCALL_COST (2);
9472 return false;
9474 case BSWAP:
9475 if (arm_arch6)
9477 if (mode == SImode)
9479 *cost = COSTS_N_INSNS (1);
9480 if (speed_p)
9481 *cost += extra_cost->alu.rev;
9483 return false;
9486 else
9488 /* No rev instruction available. Look at arm_legacy_rev
9489 and thumb_legacy_rev for the form of RTL used then. */
9490 if (TARGET_THUMB)
9492 *cost = COSTS_N_INSNS (10);
9494 if (speed_p)
9496 *cost += 6 * extra_cost->alu.shift;
9497 *cost += 3 * extra_cost->alu.logical;
9500 else
9502 *cost = COSTS_N_INSNS (5);
9504 if (speed_p)
9506 *cost += 2 * extra_cost->alu.shift;
9507 *cost += extra_cost->alu.arith_shift;
9508 *cost += 2 * extra_cost->alu.logical;
9511 return true;
9513 return false;
9515 case MINUS:
9516 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
9517 && (mode == SFmode || !TARGET_VFP_SINGLE))
9519 *cost = COSTS_N_INSNS (1);
9520 if (GET_CODE (XEXP (x, 0)) == MULT
9521 || GET_CODE (XEXP (x, 1)) == MULT)
9523 rtx mul_op0, mul_op1, sub_op;
9525 if (speed_p)
9526 *cost += extra_cost->fp[mode != SFmode].mult_addsub;
9528 if (GET_CODE (XEXP (x, 0)) == MULT)
9530 mul_op0 = XEXP (XEXP (x, 0), 0);
9531 mul_op1 = XEXP (XEXP (x, 0), 1);
9532 sub_op = XEXP (x, 1);
9534 else
9536 mul_op0 = XEXP (XEXP (x, 1), 0);
9537 mul_op1 = XEXP (XEXP (x, 1), 1);
9538 sub_op = XEXP (x, 0);
9541 /* The first operand of the multiply may be optionally
9542 negated. */
9543 if (GET_CODE (mul_op0) == NEG)
9544 mul_op0 = XEXP (mul_op0, 0);
9546 *cost += (rtx_cost (mul_op0, code, 0, speed_p)
9547 + rtx_cost (mul_op1, code, 0, speed_p)
9548 + rtx_cost (sub_op, code, 0, speed_p));
9550 return true;
9553 if (speed_p)
9554 *cost += extra_cost->fp[mode != SFmode].addsub;
9555 return false;
9558 if (mode == SImode)
9560 rtx shift_by_reg = NULL;
9561 rtx shift_op;
9562 rtx non_shift_op;
9564 *cost = COSTS_N_INSNS (1);
9566 shift_op = shifter_op_p (XEXP (x, 0), &shift_by_reg);
9567 if (shift_op == NULL)
9569 shift_op = shifter_op_p (XEXP (x, 1), &shift_by_reg);
9570 non_shift_op = XEXP (x, 0);
9572 else
9573 non_shift_op = XEXP (x, 1);
9575 if (shift_op != NULL)
9577 if (shift_by_reg != NULL)
9579 if (speed_p)
9580 *cost += extra_cost->alu.arith_shift_reg;
9581 *cost += rtx_cost (shift_by_reg, code, 0, speed_p);
9583 else if (speed_p)
9584 *cost += extra_cost->alu.arith_shift;
9586 *cost += (rtx_cost (shift_op, code, 0, speed_p)
9587 + rtx_cost (non_shift_op, code, 0, speed_p));
9588 return true;
9591 if (arm_arch_thumb2
9592 && GET_CODE (XEXP (x, 1)) == MULT)
9594 /* MLS. */
9595 if (speed_p)
9596 *cost += extra_cost->mult[0].add;
9597 *cost += (rtx_cost (XEXP (x, 0), MINUS, 0, speed_p)
9598 + rtx_cost (XEXP (XEXP (x, 1), 0), MULT, 0, speed_p)
9599 + rtx_cost (XEXP (XEXP (x, 1), 1), MULT, 1, speed_p));
9600 return true;
9603 if (CONST_INT_P (XEXP (x, 0)))
9605 int insns = arm_gen_constant (MINUS, SImode, NULL_RTX,
9606 INTVAL (XEXP (x, 0)), NULL_RTX,
9607 NULL_RTX, 1, 0);
9608 *cost = COSTS_N_INSNS (insns);
9609 if (speed_p)
9610 *cost += insns * extra_cost->alu.arith;
9611 *cost += rtx_cost (XEXP (x, 1), code, 1, speed_p);
9612 return true;
9615 return false;
9618 if (GET_MODE_CLASS (mode) == MODE_INT
9619 && GET_MODE_SIZE (mode) < 4)
9621 rtx shift_op, shift_reg;
9622 shift_reg = NULL;
9624 /* We check both sides of the MINUS for shifter operands since,
9625 unlike PLUS, it's not commutative. */
9627 HANDLE_NARROW_SHIFT_ARITH (MINUS, 0)
9628 HANDLE_NARROW_SHIFT_ARITH (MINUS, 1)
9630 /* Slightly disparage, as we might need to widen the result. */
9631 *cost = 1 + COSTS_N_INSNS (1);
9632 if (speed_p)
9633 *cost += extra_cost->alu.arith;
9635 if (CONST_INT_P (XEXP (x, 0)))
9637 *cost += rtx_cost (XEXP (x, 1), code, 1, speed_p);
9638 return true;
9641 return false;
9644 if (mode == DImode)
9646 *cost = COSTS_N_INSNS (2);
9648 if (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND)
9650 rtx op1 = XEXP (x, 1);
9652 if (speed_p)
9653 *cost += 2 * extra_cost->alu.arith;
9655 if (GET_CODE (op1) == ZERO_EXTEND)
9656 *cost += rtx_cost (XEXP (op1, 0), ZERO_EXTEND, 0, speed_p);
9657 else
9658 *cost += rtx_cost (op1, MINUS, 1, speed_p);
9659 *cost += rtx_cost (XEXP (XEXP (x, 0), 0), ZERO_EXTEND,
9660 0, speed_p);
9661 return true;
9663 else if (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND)
9665 if (speed_p)
9666 *cost += extra_cost->alu.arith + extra_cost->alu.arith_shift;
9667 *cost += (rtx_cost (XEXP (XEXP (x, 0), 0), SIGN_EXTEND,
9668 0, speed_p)
9669 + rtx_cost (XEXP (x, 1), MINUS, 1, speed_p));
9670 return true;
9672 else if (GET_CODE (XEXP (x, 1)) == ZERO_EXTEND
9673 || GET_CODE (XEXP (x, 1)) == SIGN_EXTEND)
9675 if (speed_p)
9676 *cost += (extra_cost->alu.arith
9677 + (GET_CODE (XEXP (x, 1)) == ZERO_EXTEND
9678 ? extra_cost->alu.arith
9679 : extra_cost->alu.arith_shift));
9680 *cost += (rtx_cost (XEXP (x, 0), MINUS, 0, speed_p)
9681 + rtx_cost (XEXP (XEXP (x, 1), 0),
9682 GET_CODE (XEXP (x, 1)), 0, speed_p));
9683 return true;
9686 if (speed_p)
9687 *cost += 2 * extra_cost->alu.arith;
9688 return false;
9691 /* Vector mode? */
9693 *cost = LIBCALL_COST (2);
9694 return false;
9696 case PLUS:
9697 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
9698 && (mode == SFmode || !TARGET_VFP_SINGLE))
9700 *cost = COSTS_N_INSNS (1);
9701 if (GET_CODE (XEXP (x, 0)) == MULT)
9703 rtx mul_op0, mul_op1, add_op;
9705 if (speed_p)
9706 *cost += extra_cost->fp[mode != SFmode].mult_addsub;
9708 mul_op0 = XEXP (XEXP (x, 0), 0);
9709 mul_op1 = XEXP (XEXP (x, 0), 1);
9710 add_op = XEXP (x, 1);
9712 *cost += (rtx_cost (mul_op0, code, 0, speed_p)
9713 + rtx_cost (mul_op1, code, 0, speed_p)
9714 + rtx_cost (add_op, code, 0, speed_p));
9716 return true;
9719 if (speed_p)
9720 *cost += extra_cost->fp[mode != SFmode].addsub;
9721 return false;
9723 else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
9725 *cost = LIBCALL_COST (2);
9726 return false;
9729 /* Narrow modes can be synthesized in SImode, but the range
9730 of useful sub-operations is limited. Check for shift operations
9731 on one of the operands. Only left shifts can be used in the
9732 narrow modes. */
9733 if (GET_MODE_CLASS (mode) == MODE_INT
9734 && GET_MODE_SIZE (mode) < 4)
9736 rtx shift_op, shift_reg;
9737 shift_reg = NULL;
9739 HANDLE_NARROW_SHIFT_ARITH (PLUS, 0)
9741 if (CONST_INT_P (XEXP (x, 1)))
9743 int insns = arm_gen_constant (PLUS, SImode, NULL_RTX,
9744 INTVAL (XEXP (x, 1)), NULL_RTX,
9745 NULL_RTX, 1, 0);
9746 *cost = COSTS_N_INSNS (insns);
9747 if (speed_p)
9748 *cost += insns * extra_cost->alu.arith;
9749 /* Slightly penalize a narrow operation as the result may
9750 need widening. */
9751 *cost += 1 + rtx_cost (XEXP (x, 0), PLUS, 0, speed_p);
9752 return true;
9755 /* Slightly penalize a narrow operation as the result may
9756 need widening. */
9757 *cost = 1 + COSTS_N_INSNS (1);
9758 if (speed_p)
9759 *cost += extra_cost->alu.arith;
9761 return false;
9764 if (mode == SImode)
9766 rtx shift_op, shift_reg;
9768 *cost = COSTS_N_INSNS (1);
9769 if (TARGET_INT_SIMD
9770 && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
9771 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
9773 /* UXTA[BH] or SXTA[BH]. */
9774 if (speed_p)
9775 *cost += extra_cost->alu.extend_arith;
9776 *cost += (rtx_cost (XEXP (XEXP (x, 0), 0), ZERO_EXTEND, 0,
9777 speed_p)
9778 + rtx_cost (XEXP (x, 1), PLUS, 0, speed_p));
9779 return true;
9782 shift_reg = NULL;
9783 shift_op = shifter_op_p (XEXP (x, 0), &shift_reg);
9784 if (shift_op != NULL)
9786 if (shift_reg)
9788 if (speed_p)
9789 *cost += extra_cost->alu.arith_shift_reg;
9790 *cost += rtx_cost (shift_reg, ASHIFT, 1, speed_p);
9792 else if (speed_p)
9793 *cost += extra_cost->alu.arith_shift;
9795 *cost += (rtx_cost (shift_op, ASHIFT, 0, speed_p)
9796 + rtx_cost (XEXP (x, 1), PLUS, 1, speed_p));
9797 return true;
9799 if (GET_CODE (XEXP (x, 0)) == MULT)
9801 rtx mul_op = XEXP (x, 0);
9803 *cost = COSTS_N_INSNS (1);
9805 if (TARGET_DSP_MULTIPLY
9806 && ((GET_CODE (XEXP (mul_op, 0)) == SIGN_EXTEND
9807 && (GET_CODE (XEXP (mul_op, 1)) == SIGN_EXTEND
9808 || (GET_CODE (XEXP (mul_op, 1)) == ASHIFTRT
9809 && CONST_INT_P (XEXP (XEXP (mul_op, 1), 1))
9810 && INTVAL (XEXP (XEXP (mul_op, 1), 1)) == 16)))
9811 || (GET_CODE (XEXP (mul_op, 0)) == ASHIFTRT
9812 && CONST_INT_P (XEXP (XEXP (mul_op, 0), 1))
9813 && INTVAL (XEXP (XEXP (mul_op, 0), 1)) == 16
9814 && (GET_CODE (XEXP (mul_op, 1)) == SIGN_EXTEND
9815 || (GET_CODE (XEXP (mul_op, 1)) == ASHIFTRT
9816 && CONST_INT_P (XEXP (XEXP (mul_op, 1), 1))
9817 && (INTVAL (XEXP (XEXP (mul_op, 1), 1))
9818 == 16))))))
9820 /* SMLA[BT][BT]. */
9821 if (speed_p)
9822 *cost += extra_cost->mult[0].extend_add;
9823 *cost += (rtx_cost (XEXP (XEXP (mul_op, 0), 0),
9824 SIGN_EXTEND, 0, speed_p)
9825 + rtx_cost (XEXP (XEXP (mul_op, 1), 0),
9826 SIGN_EXTEND, 0, speed_p)
9827 + rtx_cost (XEXP (x, 1), PLUS, 1, speed_p));
9828 return true;
9831 if (speed_p)
9832 *cost += extra_cost->mult[0].add;
9833 *cost += (rtx_cost (XEXP (mul_op, 0), MULT, 0, speed_p)
9834 + rtx_cost (XEXP (mul_op, 1), MULT, 1, speed_p)
9835 + rtx_cost (XEXP (x, 1), PLUS, 1, speed_p));
9836 return true;
9838 if (CONST_INT_P (XEXP (x, 1)))
9840 int insns = arm_gen_constant (PLUS, SImode, NULL_RTX,
9841 INTVAL (XEXP (x, 1)), NULL_RTX,
9842 NULL_RTX, 1, 0);
9843 *cost = COSTS_N_INSNS (insns);
9844 if (speed_p)
9845 *cost += insns * extra_cost->alu.arith;
9846 *cost += rtx_cost (XEXP (x, 0), PLUS, 0, speed_p);
9847 return true;
9849 return false;
9852 if (mode == DImode)
9854 if (arm_arch3m
9855 && GET_CODE (XEXP (x, 0)) == MULT
9856 && ((GET_CODE (XEXP (XEXP (x, 0), 0)) == ZERO_EXTEND
9857 && GET_CODE (XEXP (XEXP (x, 0), 1)) == ZERO_EXTEND)
9858 || (GET_CODE (XEXP (XEXP (x, 0), 0)) == SIGN_EXTEND
9859 && GET_CODE (XEXP (XEXP (x, 0), 1)) == SIGN_EXTEND)))
9861 *cost = COSTS_N_INSNS (1);
9862 if (speed_p)
9863 *cost += extra_cost->mult[1].extend_add;
9864 *cost += (rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0),
9865 ZERO_EXTEND, 0, speed_p)
9866 + rtx_cost (XEXP (XEXP (XEXP (x, 0), 1), 0),
9867 ZERO_EXTEND, 0, speed_p)
9868 + rtx_cost (XEXP (x, 1), PLUS, 1, speed_p));
9869 return true;
9872 *cost = COSTS_N_INSNS (2);
9874 if (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
9875 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND)
9877 if (speed_p)
9878 *cost += (extra_cost->alu.arith
9879 + (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
9880 ? extra_cost->alu.arith
9881 : extra_cost->alu.arith_shift));
9883 *cost += (rtx_cost (XEXP (XEXP (x, 0), 0), ZERO_EXTEND, 0,
9884 speed_p)
9885 + rtx_cost (XEXP (x, 1), PLUS, 1, speed_p));
9886 return true;
9889 if (speed_p)
9890 *cost += 2 * extra_cost->alu.arith;
9891 return false;
9894 /* Vector mode? */
9895 *cost = LIBCALL_COST (2);
9896 return false;
9897 case IOR:
9898 if (mode == SImode && arm_arch6 && aarch_rev16_p (x))
9900 *cost = COSTS_N_INSNS (1);
9901 if (speed_p)
9902 *cost += extra_cost->alu.rev;
9904 return true;
9906 /* Fall through. */
9907 case AND: case XOR:
9908 if (mode == SImode)
9910 enum rtx_code subcode = GET_CODE (XEXP (x, 0));
9911 rtx op0 = XEXP (x, 0);
9912 rtx shift_op, shift_reg;
9914 *cost = COSTS_N_INSNS (1);
9916 if (subcode == NOT
9917 && (code == AND
9918 || (code == IOR && TARGET_THUMB2)))
9919 op0 = XEXP (op0, 0);
9921 shift_reg = NULL;
9922 shift_op = shifter_op_p (op0, &shift_reg);
9923 if (shift_op != NULL)
9925 if (shift_reg)
9927 if (speed_p)
9928 *cost += extra_cost->alu.log_shift_reg;
9929 *cost += rtx_cost (shift_reg, ASHIFT, 1, speed_p);
9931 else if (speed_p)
9932 *cost += extra_cost->alu.log_shift;
9934 *cost += (rtx_cost (shift_op, ASHIFT, 0, speed_p)
9935 + rtx_cost (XEXP (x, 1), code, 1, speed_p));
9936 return true;
9939 if (CONST_INT_P (XEXP (x, 1)))
9941 int insns = arm_gen_constant (code, SImode, NULL_RTX,
9942 INTVAL (XEXP (x, 1)), NULL_RTX,
9943 NULL_RTX, 1, 0);
9945 *cost = COSTS_N_INSNS (insns);
9946 if (speed_p)
9947 *cost += insns * extra_cost->alu.logical;
9948 *cost += rtx_cost (op0, code, 0, speed_p);
9949 return true;
9952 if (speed_p)
9953 *cost += extra_cost->alu.logical;
9954 *cost += (rtx_cost (op0, code, 0, speed_p)
9955 + rtx_cost (XEXP (x, 1), code, 1, speed_p));
9956 return true;
9959 if (mode == DImode)
9961 rtx op0 = XEXP (x, 0);
9962 enum rtx_code subcode = GET_CODE (op0);
9964 *cost = COSTS_N_INSNS (2);
9966 if (subcode == NOT
9967 && (code == AND
9968 || (code == IOR && TARGET_THUMB2)))
9969 op0 = XEXP (op0, 0);
9971 if (GET_CODE (op0) == ZERO_EXTEND)
9973 if (speed_p)
9974 *cost += 2 * extra_cost->alu.logical;
9976 *cost += (rtx_cost (XEXP (op0, 0), ZERO_EXTEND, 0, speed_p)
9977 + rtx_cost (XEXP (x, 1), code, 0, speed_p));
9978 return true;
9980 else if (GET_CODE (op0) == SIGN_EXTEND)
9982 if (speed_p)
9983 *cost += extra_cost->alu.logical + extra_cost->alu.log_shift;
9985 *cost += (rtx_cost (XEXP (op0, 0), SIGN_EXTEND, 0, speed_p)
9986 + rtx_cost (XEXP (x, 1), code, 0, speed_p));
9987 return true;
9990 if (speed_p)
9991 *cost += 2 * extra_cost->alu.logical;
9993 return true;
9995 /* Vector mode? */
9997 *cost = LIBCALL_COST (2);
9998 return false;
10000 case MULT:
10001 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
10002 && (mode == SFmode || !TARGET_VFP_SINGLE))
10004 rtx op0 = XEXP (x, 0);
10006 *cost = COSTS_N_INSNS (1);
10008 if (GET_CODE (op0) == NEG)
10009 op0 = XEXP (op0, 0);
10011 if (speed_p)
10012 *cost += extra_cost->fp[mode != SFmode].mult;
10014 *cost += (rtx_cost (op0, MULT, 0, speed_p)
10015 + rtx_cost (XEXP (x, 1), MULT, 1, speed_p));
10016 return true;
10018 else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
10020 *cost = LIBCALL_COST (2);
10021 return false;
10024 if (mode == SImode)
10026 *cost = COSTS_N_INSNS (1);
10027 if (TARGET_DSP_MULTIPLY
10028 && ((GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
10029 && (GET_CODE (XEXP (x, 1)) == SIGN_EXTEND
10030 || (GET_CODE (XEXP (x, 1)) == ASHIFTRT
10031 && CONST_INT_P (XEXP (XEXP (x, 1), 1))
10032 && INTVAL (XEXP (XEXP (x, 1), 1)) == 16)))
10033 || (GET_CODE (XEXP (x, 0)) == ASHIFTRT
10034 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
10035 && INTVAL (XEXP (XEXP (x, 0), 1)) == 16
10036 && (GET_CODE (XEXP (x, 1)) == SIGN_EXTEND
10037 || (GET_CODE (XEXP (x, 1)) == ASHIFTRT
10038 && CONST_INT_P (XEXP (XEXP (x, 1), 1))
10039 && (INTVAL (XEXP (XEXP (x, 1), 1))
10040 == 16))))))
10042 /* SMUL[TB][TB]. */
10043 if (speed_p)
10044 *cost += extra_cost->mult[0].extend;
10045 *cost += (rtx_cost (XEXP (x, 0), SIGN_EXTEND, 0, speed_p)
10046 + rtx_cost (XEXP (x, 1), SIGN_EXTEND, 0, speed_p));
10047 return true;
10049 if (speed_p)
10050 *cost += extra_cost->mult[0].simple;
10051 return false;
10054 if (mode == DImode)
10056 if (arm_arch3m
10057 && ((GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
10058 && GET_CODE (XEXP (x, 1)) == ZERO_EXTEND)
10059 || (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
10060 && GET_CODE (XEXP (x, 1)) == SIGN_EXTEND)))
10062 *cost = COSTS_N_INSNS (1);
10063 if (speed_p)
10064 *cost += extra_cost->mult[1].extend;
10065 *cost += (rtx_cost (XEXP (XEXP (x, 0), 0),
10066 ZERO_EXTEND, 0, speed_p)
10067 + rtx_cost (XEXP (XEXP (x, 1), 0),
10068 ZERO_EXTEND, 0, speed_p));
10069 return true;
10072 *cost = LIBCALL_COST (2);
10073 return false;
10076 /* Vector mode? */
10077 *cost = LIBCALL_COST (2);
10078 return false;
10080 case NEG:
10081 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
10082 && (mode == SFmode || !TARGET_VFP_SINGLE))
10084 *cost = COSTS_N_INSNS (1);
10085 if (speed_p)
10086 *cost += extra_cost->fp[mode != SFmode].neg;
10088 return false;
10090 else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
10092 *cost = LIBCALL_COST (1);
10093 return false;
10096 if (mode == SImode)
10098 if (GET_CODE (XEXP (x, 0)) == ABS)
10100 *cost = COSTS_N_INSNS (2);
10101 /* Assume the non-flag-changing variant. */
10102 if (speed_p)
10103 *cost += (extra_cost->alu.log_shift
10104 + extra_cost->alu.arith_shift);
10105 *cost += rtx_cost (XEXP (XEXP (x, 0), 0), ABS, 0, speed_p);
10106 return true;
10109 if (GET_RTX_CLASS (GET_CODE (XEXP (x, 0))) == RTX_COMPARE
10110 || GET_RTX_CLASS (GET_CODE (XEXP (x, 0))) == RTX_COMM_COMPARE)
10112 *cost = COSTS_N_INSNS (2);
10113 /* No extra cost for MOV imm and MVN imm. */
10114 /* If the comparison op is using the flags, there's no further
10115 cost, otherwise we need to add the cost of the comparison. */
10116 if (!(REG_P (XEXP (XEXP (x, 0), 0))
10117 && REGNO (XEXP (XEXP (x, 0), 0)) == CC_REGNUM
10118 && XEXP (XEXP (x, 0), 1) == const0_rtx))
10120 *cost += (COSTS_N_INSNS (1)
10121 + rtx_cost (XEXP (XEXP (x, 0), 0), COMPARE, 0,
10122 speed_p)
10123 + rtx_cost (XEXP (XEXP (x, 0), 1), COMPARE, 1,
10124 speed_p));
10125 if (speed_p)
10126 *cost += extra_cost->alu.arith;
10128 return true;
10130 *cost = COSTS_N_INSNS (1);
10131 if (speed_p)
10132 *cost += extra_cost->alu.arith;
10133 return false;
10136 if (GET_MODE_CLASS (mode) == MODE_INT
10137 && GET_MODE_SIZE (mode) < 4)
10139 /* Slightly disparage, as we might need an extend operation. */
10140 *cost = 1 + COSTS_N_INSNS (1);
10141 if (speed_p)
10142 *cost += extra_cost->alu.arith;
10143 return false;
10146 if (mode == DImode)
10148 *cost = COSTS_N_INSNS (2);
10149 if (speed_p)
10150 *cost += 2 * extra_cost->alu.arith;
10151 return false;
10154 /* Vector mode? */
10155 *cost = LIBCALL_COST (1);
10156 return false;
10158 case NOT:
10159 if (mode == SImode)
10161 rtx shift_op;
10162 rtx shift_reg = NULL;
10164 *cost = COSTS_N_INSNS (1);
10165 shift_op = shifter_op_p (XEXP (x, 0), &shift_reg);
10167 if (shift_op)
10169 if (shift_reg != NULL)
10171 if (speed_p)
10172 *cost += extra_cost->alu.log_shift_reg;
10173 *cost += rtx_cost (shift_reg, ASHIFT, 1, speed_p);
10175 else if (speed_p)
10176 *cost += extra_cost->alu.log_shift;
10177 *cost += rtx_cost (shift_op, ASHIFT, 0, speed_p);
10178 return true;
10181 if (speed_p)
10182 *cost += extra_cost->alu.logical;
10183 return false;
10185 if (mode == DImode)
10187 *cost = COSTS_N_INSNS (2);
10188 return false;
10191 /* Vector mode? */
10193 *cost += LIBCALL_COST (1);
10194 return false;
10196 case IF_THEN_ELSE:
10198 if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
10200 *cost = COSTS_N_INSNS (4);
10201 return true;
10203 int op1cost = rtx_cost (XEXP (x, 1), SET, 1, speed_p);
10204 int op2cost = rtx_cost (XEXP (x, 2), SET, 1, speed_p);
10206 *cost = rtx_cost (XEXP (x, 0), IF_THEN_ELSE, 0, speed_p);
10207 /* Assume that if one arm of the if_then_else is a register,
10208 that it will be tied with the result and eliminate the
10209 conditional insn. */
10210 if (REG_P (XEXP (x, 1)))
10211 *cost += op2cost;
10212 else if (REG_P (XEXP (x, 2)))
10213 *cost += op1cost;
10214 else
10216 if (speed_p)
10218 if (extra_cost->alu.non_exec_costs_exec)
10219 *cost += op1cost + op2cost + extra_cost->alu.non_exec;
10220 else
10221 *cost += MAX (op1cost, op2cost) + extra_cost->alu.non_exec;
10223 else
10224 *cost += op1cost + op2cost;
10227 return true;
10229 case COMPARE:
10230 if (cc_register (XEXP (x, 0), VOIDmode) && XEXP (x, 1) == const0_rtx)
10231 *cost = 0;
10232 else
10234 enum machine_mode op0mode;
10235 /* We'll mostly assume that the cost of a compare is the cost of the
10236 LHS. However, there are some notable exceptions. */
10238 /* Floating point compares are never done as side-effects. */
10239 op0mode = GET_MODE (XEXP (x, 0));
10240 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (op0mode) == MODE_FLOAT
10241 && (op0mode == SFmode || !TARGET_VFP_SINGLE))
10243 *cost = COSTS_N_INSNS (1);
10244 if (speed_p)
10245 *cost += extra_cost->fp[op0mode != SFmode].compare;
10247 if (XEXP (x, 1) == CONST0_RTX (op0mode))
10249 *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
10250 return true;
10253 return false;
10255 else if (GET_MODE_CLASS (op0mode) == MODE_FLOAT)
10257 *cost = LIBCALL_COST (2);
10258 return false;
10261 /* DImode compares normally take two insns. */
10262 if (op0mode == DImode)
10264 *cost = COSTS_N_INSNS (2);
10265 if (speed_p)
10266 *cost += 2 * extra_cost->alu.arith;
10267 return false;
10270 if (op0mode == SImode)
10272 rtx shift_op;
10273 rtx shift_reg;
10275 if (XEXP (x, 1) == const0_rtx
10276 && !(REG_P (XEXP (x, 0))
10277 || (GET_CODE (XEXP (x, 0)) == SUBREG
10278 && REG_P (SUBREG_REG (XEXP (x, 0))))))
10280 *cost = rtx_cost (XEXP (x, 0), COMPARE, 0, speed_p);
10282 /* Multiply operations that set the flags are often
10283 significantly more expensive. */
10284 if (speed_p
10285 && GET_CODE (XEXP (x, 0)) == MULT
10286 && !power_of_two_operand (XEXP (XEXP (x, 0), 1), mode))
10287 *cost += extra_cost->mult[0].flag_setting;
10289 if (speed_p
10290 && GET_CODE (XEXP (x, 0)) == PLUS
10291 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
10292 && !power_of_two_operand (XEXP (XEXP (XEXP (x, 0),
10293 0), 1), mode))
10294 *cost += extra_cost->mult[0].flag_setting;
10295 return true;
10298 shift_reg = NULL;
10299 shift_op = shifter_op_p (XEXP (x, 0), &shift_reg);
10300 if (shift_op != NULL)
10302 *cost = COSTS_N_INSNS (1);
10303 if (shift_reg != NULL)
10305 *cost += rtx_cost (shift_reg, ASHIFT, 1, speed_p);
10306 if (speed_p)
10307 *cost += extra_cost->alu.arith_shift_reg;
10309 else if (speed_p)
10310 *cost += extra_cost->alu.arith_shift;
10311 *cost += (rtx_cost (shift_op, ASHIFT, 0, speed_p)
10312 + rtx_cost (XEXP (x, 1), COMPARE, 1, speed_p));
10313 return true;
10316 *cost = COSTS_N_INSNS (1);
10317 if (speed_p)
10318 *cost += extra_cost->alu.arith;
10319 if (CONST_INT_P (XEXP (x, 1))
10320 && const_ok_for_op (INTVAL (XEXP (x, 1)), COMPARE))
10322 *cost += rtx_cost (XEXP (x, 0), COMPARE, 0, speed_p);
10323 return true;
10325 return false;
10328 /* Vector mode? */
10330 *cost = LIBCALL_COST (2);
10331 return false;
10333 return true;
10335 case EQ:
10336 case NE:
10337 case LT:
10338 case LE:
10339 case GT:
10340 case GE:
10341 case LTU:
10342 case LEU:
10343 case GEU:
10344 case GTU:
10345 case ORDERED:
10346 case UNORDERED:
10347 case UNEQ:
10348 case UNLE:
10349 case UNLT:
10350 case UNGE:
10351 case UNGT:
10352 case LTGT:
10353 if (outer_code == SET)
10355 /* Is it a store-flag operation? */
10356 if (REG_P (XEXP (x, 0)) && REGNO (XEXP (x, 0)) == CC_REGNUM
10357 && XEXP (x, 1) == const0_rtx)
10359 /* Thumb also needs an IT insn. */
10360 *cost = COSTS_N_INSNS (TARGET_THUMB ? 3 : 2);
10361 return true;
10363 if (XEXP (x, 1) == const0_rtx)
10365 switch (code)
10367 case LT:
10368 /* LSR Rd, Rn, #31. */
10369 *cost = COSTS_N_INSNS (1);
10370 if (speed_p)
10371 *cost += extra_cost->alu.shift;
10372 break;
10374 case EQ:
10375 /* RSBS T1, Rn, #0
10376 ADC Rd, Rn, T1. */
10378 case NE:
10379 /* SUBS T1, Rn, #1
10380 SBC Rd, Rn, T1. */
10381 *cost = COSTS_N_INSNS (2);
10382 break;
10384 case LE:
10385 /* RSBS T1, Rn, Rn, LSR #31
10386 ADC Rd, Rn, T1. */
10387 *cost = COSTS_N_INSNS (2);
10388 if (speed_p)
10389 *cost += extra_cost->alu.arith_shift;
10390 break;
10392 case GT:
10393 /* RSB Rd, Rn, Rn, ASR #1
10394 LSR Rd, Rd, #31. */
10395 *cost = COSTS_N_INSNS (2);
10396 if (speed_p)
10397 *cost += (extra_cost->alu.arith_shift
10398 + extra_cost->alu.shift);
10399 break;
10401 case GE:
10402 /* ASR Rd, Rn, #31
10403 ADD Rd, Rn, #1. */
10404 *cost = COSTS_N_INSNS (2);
10405 if (speed_p)
10406 *cost += extra_cost->alu.shift;
10407 break;
10409 default:
10410 /* Remaining cases are either meaningless or would take
10411 three insns anyway. */
10412 *cost = COSTS_N_INSNS (3);
10413 break;
10415 *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
10416 return true;
10418 else
10420 *cost = COSTS_N_INSNS (TARGET_THUMB ? 4 : 3);
10421 if (CONST_INT_P (XEXP (x, 1))
10422 && const_ok_for_op (INTVAL (XEXP (x, 1)), COMPARE))
10424 *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
10425 return true;
10428 return false;
10431 /* Not directly inside a set. If it involves the condition code
10432 register it must be the condition for a branch, cond_exec or
10433 I_T_E operation. Since the comparison is performed elsewhere
10434 this is just the control part which has no additional
10435 cost. */
10436 else if (REG_P (XEXP (x, 0)) && REGNO (XEXP (x, 0)) == CC_REGNUM
10437 && XEXP (x, 1) == const0_rtx)
10439 *cost = 0;
10440 return true;
10442 return false;
10444 case ABS:
10445 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
10446 && (mode == SFmode || !TARGET_VFP_SINGLE))
10448 *cost = COSTS_N_INSNS (1);
10449 if (speed_p)
10450 *cost += extra_cost->fp[mode != SFmode].neg;
10452 return false;
10454 else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
10456 *cost = LIBCALL_COST (1);
10457 return false;
10460 if (mode == SImode)
10462 *cost = COSTS_N_INSNS (1);
10463 if (speed_p)
10464 *cost += extra_cost->alu.log_shift + extra_cost->alu.arith_shift;
10465 return false;
10467 /* Vector mode? */
10468 *cost = LIBCALL_COST (1);
10469 return false;
10471 case SIGN_EXTEND:
10472 if ((arm_arch4 || GET_MODE (XEXP (x, 0)) == SImode)
10473 && MEM_P (XEXP (x, 0)))
10475 *cost = rtx_cost (XEXP (x, 0), code, 0, speed_p);
10477 if (mode == DImode)
10478 *cost += COSTS_N_INSNS (1);
10480 if (!speed_p)
10481 return true;
10483 if (GET_MODE (XEXP (x, 0)) == SImode)
10484 *cost += extra_cost->ldst.load;
10485 else
10486 *cost += extra_cost->ldst.load_sign_extend;
10488 if (mode == DImode)
10489 *cost += extra_cost->alu.shift;
10491 return true;
10494 /* Widening from less than 32-bits requires an extend operation. */
10495 if (GET_MODE (XEXP (x, 0)) != SImode && arm_arch6)
10497 /* We have SXTB/SXTH. */
10498 *cost = COSTS_N_INSNS (1);
10499 *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
10500 if (speed_p)
10501 *cost += extra_cost->alu.extend;
10503 else if (GET_MODE (XEXP (x, 0)) != SImode)
10505 /* Needs two shifts. */
10506 *cost = COSTS_N_INSNS (2);
10507 *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
10508 if (speed_p)
10509 *cost += 2 * extra_cost->alu.shift;
10512 /* Widening beyond 32-bits requires one more insn. */
10513 if (mode == DImode)
10515 *cost += COSTS_N_INSNS (1);
10516 if (speed_p)
10517 *cost += extra_cost->alu.shift;
10520 return true;
10522 case ZERO_EXTEND:
10523 if ((arm_arch4
10524 || GET_MODE (XEXP (x, 0)) == SImode
10525 || GET_MODE (XEXP (x, 0)) == QImode)
10526 && MEM_P (XEXP (x, 0)))
10528 *cost = rtx_cost (XEXP (x, 0), code, 0, speed_p);
10530 if (mode == DImode)
10531 *cost += COSTS_N_INSNS (1); /* No speed penalty. */
10533 return true;
10536 /* Widening from less than 32-bits requires an extend operation. */
10537 if (GET_MODE (XEXP (x, 0)) == QImode)
10539 /* UXTB can be a shorter instruction in Thumb2, but it might
10540 be slower than the AND Rd, Rn, #255 alternative. When
10541 optimizing for speed it should never be slower to use
10542 AND, and we don't really model 16-bit vs 32-bit insns
10543 here. */
10544 *cost = COSTS_N_INSNS (1);
10545 if (speed_p)
10546 *cost += extra_cost->alu.logical;
10548 else if (GET_MODE (XEXP (x, 0)) != SImode && arm_arch6)
10550 /* We have UXTB/UXTH. */
10551 *cost = COSTS_N_INSNS (1);
10552 *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
10553 if (speed_p)
10554 *cost += extra_cost->alu.extend;
10556 else if (GET_MODE (XEXP (x, 0)) != SImode)
10558 /* Needs two shifts. It's marginally preferable to use
10559 shifts rather than two BIC instructions as the second
10560 shift may merge with a subsequent insn as a shifter
10561 op. */
10562 *cost = COSTS_N_INSNS (2);
10563 *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
10564 if (speed_p)
10565 *cost += 2 * extra_cost->alu.shift;
10567 else /* GET_MODE (XEXP (x, 0)) == SImode. */
10568 *cost = COSTS_N_INSNS (1);
10570 /* Widening beyond 32-bits requires one more insn. */
10571 if (mode == DImode)
10573 *cost += COSTS_N_INSNS (1); /* No speed penalty. */
10576 return true;
10578 case CONST_INT:
10579 *cost = 0;
10580 /* CONST_INT has no mode, so we cannot tell for sure how many
10581 insns are really going to be needed. The best we can do is
10582 look at the value passed. If it fits in SImode, then assume
10583 that's the mode it will be used for. Otherwise assume it
10584 will be used in DImode. */
10585 if (INTVAL (x) == trunc_int_for_mode (INTVAL (x), SImode))
10586 mode = SImode;
10587 else
10588 mode = DImode;
10590 /* Avoid blowing up in arm_gen_constant (). */
10591 if (!(outer_code == PLUS
10592 || outer_code == AND
10593 || outer_code == IOR
10594 || outer_code == XOR
10595 || outer_code == MINUS))
10596 outer_code = SET;
10598 const_int_cost:
10599 if (mode == SImode)
10601 *cost += COSTS_N_INSNS (arm_gen_constant (outer_code, SImode, NULL,
10602 INTVAL (x), NULL, NULL,
10603 0, 0));
10604 /* Extra costs? */
10606 else
10608 *cost += COSTS_N_INSNS (arm_gen_constant
10609 (outer_code, SImode, NULL,
10610 trunc_int_for_mode (INTVAL (x), SImode),
10611 NULL, NULL, 0, 0)
10612 + arm_gen_constant (outer_code, SImode, NULL,
10613 INTVAL (x) >> 32, NULL,
10614 NULL, 0, 0));
10615 /* Extra costs? */
10618 return true;
10620 case CONST:
10621 case LABEL_REF:
10622 case SYMBOL_REF:
10623 if (speed_p)
10625 if (arm_arch_thumb2 && !flag_pic)
10626 *cost = COSTS_N_INSNS (2);
10627 else
10628 *cost = COSTS_N_INSNS (1) + extra_cost->ldst.load;
10630 else
10631 *cost = COSTS_N_INSNS (2);
10633 if (flag_pic)
10635 *cost += COSTS_N_INSNS (1);
10636 if (speed_p)
10637 *cost += extra_cost->alu.arith;
10640 return true;
10642 case CONST_FIXED:
10643 *cost = COSTS_N_INSNS (4);
10644 /* Fixme. */
10645 return true;
10647 case CONST_DOUBLE:
10648 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
10649 && (mode == SFmode || !TARGET_VFP_SINGLE))
10651 if (vfp3_const_double_rtx (x))
10653 *cost = COSTS_N_INSNS (1);
10654 if (speed_p)
10655 *cost += extra_cost->fp[mode == DFmode].fpconst;
10656 return true;
10659 if (speed_p)
10661 *cost = COSTS_N_INSNS (1);
10662 if (mode == DFmode)
10663 *cost += extra_cost->ldst.loadd;
10664 else
10665 *cost += extra_cost->ldst.loadf;
10667 else
10668 *cost = COSTS_N_INSNS (2 + (mode == DFmode));
10670 return true;
10672 *cost = COSTS_N_INSNS (4);
10673 return true;
10675 case CONST_VECTOR:
10676 /* Fixme. */
10677 if (TARGET_NEON
10678 && TARGET_HARD_FLOAT
10679 && (VALID_NEON_DREG_MODE (mode) || VALID_NEON_QREG_MODE (mode))
10680 && neon_immediate_valid_for_move (x, mode, NULL, NULL))
10681 *cost = COSTS_N_INSNS (1);
10682 else
10683 *cost = COSTS_N_INSNS (4);
10684 return true;
10686 case HIGH:
10687 case LO_SUM:
10688 *cost = COSTS_N_INSNS (1);
10689 /* When optimizing for size, we prefer constant pool entries to
10690 MOVW/MOVT pairs, so bump the cost of these slightly. */
10691 if (!speed_p)
10692 *cost += 1;
10693 return true;
10695 case CLZ:
10696 *cost = COSTS_N_INSNS (1);
10697 if (speed_p)
10698 *cost += extra_cost->alu.clz;
10699 return false;
10701 case SMIN:
10702 if (XEXP (x, 1) == const0_rtx)
10704 *cost = COSTS_N_INSNS (1);
10705 if (speed_p)
10706 *cost += extra_cost->alu.log_shift;
10707 *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
10708 return true;
10710 /* Fall through. */
10711 case SMAX:
10712 case UMIN:
10713 case UMAX:
10714 *cost = COSTS_N_INSNS (2);
10715 return false;
10717 case TRUNCATE:
10718 if (GET_CODE (XEXP (x, 0)) == ASHIFTRT
10719 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
10720 && INTVAL (XEXP (XEXP (x, 0), 1)) == 32
10721 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
10722 && ((GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == SIGN_EXTEND
10723 && GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1)) == SIGN_EXTEND)
10724 || (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == ZERO_EXTEND
10725 && (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1))
10726 == ZERO_EXTEND))))
10728 *cost = COSTS_N_INSNS (1);
10729 if (speed_p)
10730 *cost += extra_cost->mult[1].extend;
10731 *cost += (rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0), ZERO_EXTEND, 0,
10732 speed_p)
10733 + rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 1), ZERO_EXTEND,
10734 0, speed_p));
10735 return true;
10737 *cost = LIBCALL_COST (1);
10738 return false;
10740 case UNSPEC:
10741 return arm_unspec_cost (x, outer_code, speed_p, cost);
10743 case PC:
10744 /* Reading the PC is like reading any other register. Writing it
10745 is more expensive, but we take that into account elsewhere. */
10746 *cost = 0;
10747 return true;
10749 case ZERO_EXTRACT:
10750 /* TODO: Simple zero_extract of bottom bits using AND. */
10751 /* Fall through. */
10752 case SIGN_EXTRACT:
10753 if (arm_arch6
10754 && mode == SImode
10755 && CONST_INT_P (XEXP (x, 1))
10756 && CONST_INT_P (XEXP (x, 2)))
10758 *cost = COSTS_N_INSNS (1);
10759 if (speed_p)
10760 *cost += extra_cost->alu.bfx;
10761 *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
10762 return true;
10764 /* Without UBFX/SBFX, need to resort to shift operations. */
10765 *cost = COSTS_N_INSNS (2);
10766 if (speed_p)
10767 *cost += 2 * extra_cost->alu.shift;
10768 *cost += rtx_cost (XEXP (x, 0), ASHIFT, 0, speed_p);
10769 return true;
10771 case FLOAT_EXTEND:
10772 if (TARGET_HARD_FLOAT)
10774 *cost = COSTS_N_INSNS (1);
10775 if (speed_p)
10776 *cost += extra_cost->fp[mode == DFmode].widen;
10777 if (!TARGET_FPU_ARMV8
10778 && GET_MODE (XEXP (x, 0)) == HFmode)
10780 /* Pre v8, widening HF->DF is a two-step process, first
10781 widening to SFmode. */
10782 *cost += COSTS_N_INSNS (1);
10783 if (speed_p)
10784 *cost += extra_cost->fp[0].widen;
10786 *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
10787 return true;
10790 *cost = LIBCALL_COST (1);
10791 return false;
10793 case FLOAT_TRUNCATE:
10794 if (TARGET_HARD_FLOAT)
10796 *cost = COSTS_N_INSNS (1);
10797 if (speed_p)
10798 *cost += extra_cost->fp[mode == DFmode].narrow;
10799 *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
10800 return true;
10801 /* Vector modes? */
10803 *cost = LIBCALL_COST (1);
10804 return false;
10806 case FMA:
10807 if (TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_FMA)
10809 rtx op0 = XEXP (x, 0);
10810 rtx op1 = XEXP (x, 1);
10811 rtx op2 = XEXP (x, 2);
10813 *cost = COSTS_N_INSNS (1);
10815 /* vfms or vfnma. */
10816 if (GET_CODE (op0) == NEG)
10817 op0 = XEXP (op0, 0);
10819 /* vfnms or vfnma. */
10820 if (GET_CODE (op2) == NEG)
10821 op2 = XEXP (op2, 0);
10823 *cost += rtx_cost (op0, FMA, 0, speed_p);
10824 *cost += rtx_cost (op1, FMA, 1, speed_p);
10825 *cost += rtx_cost (op2, FMA, 2, speed_p);
10827 if (speed_p)
10828 *cost += extra_cost->fp[mode ==DFmode].fma;
10830 return true;
10833 *cost = LIBCALL_COST (3);
10834 return false;
10836 case FIX:
10837 case UNSIGNED_FIX:
10838 if (TARGET_HARD_FLOAT)
10840 if (GET_MODE_CLASS (mode) == MODE_INT)
10842 *cost = COSTS_N_INSNS (1);
10843 if (speed_p)
10844 *cost += extra_cost->fp[GET_MODE (XEXP (x, 0)) == DFmode].toint;
10845 /* Strip of the 'cost' of rounding towards zero. */
10846 if (GET_CODE (XEXP (x, 0)) == FIX)
10847 *cost += rtx_cost (XEXP (XEXP (x, 0), 0), code, 0, speed_p);
10848 else
10849 *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
10850 /* ??? Increase the cost to deal with transferring from
10851 FP -> CORE registers? */
10852 return true;
10854 else if (GET_MODE_CLASS (mode) == MODE_FLOAT
10855 && TARGET_FPU_ARMV8)
10857 *cost = COSTS_N_INSNS (1);
10858 if (speed_p)
10859 *cost += extra_cost->fp[mode == DFmode].roundint;
10860 return false;
10862 /* Vector costs? */
10864 *cost = LIBCALL_COST (1);
10865 return false;
10867 case FLOAT:
10868 case UNSIGNED_FLOAT:
10869 if (TARGET_HARD_FLOAT)
10871 /* ??? Increase the cost to deal with transferring from CORE
10872 -> FP registers? */
10873 *cost = COSTS_N_INSNS (1);
10874 if (speed_p)
10875 *cost += extra_cost->fp[mode == DFmode].fromint;
10876 return false;
10878 *cost = LIBCALL_COST (1);
10879 return false;
10881 case CALL:
10882 *cost = COSTS_N_INSNS (1);
10883 return true;
10885 case ASM_OPERANDS:
10887 /* Just a guess. Guess number of instructions in the asm
10888 plus one insn per input. Always a minimum of COSTS_N_INSNS (1)
10889 though (see PR60663). */
10890 int asm_length = MAX (1, asm_str_count (ASM_OPERANDS_TEMPLATE (x)));
10891 int num_operands = ASM_OPERANDS_INPUT_LENGTH (x);
10893 *cost = COSTS_N_INSNS (asm_length + num_operands);
10894 return true;
10896 default:
10897 if (mode != VOIDmode)
10898 *cost = COSTS_N_INSNS (ARM_NUM_REGS (mode));
10899 else
10900 *cost = COSTS_N_INSNS (4); /* Who knows? */
10901 return false;
10905 #undef HANDLE_NARROW_SHIFT_ARITH
10907 /* RTX costs when optimizing for size. */
10908 static bool
10909 arm_rtx_costs (rtx x, int code, int outer_code, int opno ATTRIBUTE_UNUSED,
10910 int *total, bool speed)
10912 bool result;
10914 if (TARGET_OLD_RTX_COSTS
10915 || (!current_tune->insn_extra_cost && !TARGET_NEW_GENERIC_COSTS))
10917 /* Old way. (Deprecated.) */
10918 if (!speed)
10919 result = arm_size_rtx_costs (x, (enum rtx_code) code,
10920 (enum rtx_code) outer_code, total);
10921 else
10922 result = current_tune->rtx_costs (x, (enum rtx_code) code,
10923 (enum rtx_code) outer_code, total,
10924 speed);
10926 else
10928 /* New way. */
10929 if (current_tune->insn_extra_cost)
10930 result = arm_new_rtx_costs (x, (enum rtx_code) code,
10931 (enum rtx_code) outer_code,
10932 current_tune->insn_extra_cost,
10933 total, speed);
10934 /* TARGET_NEW_GENERIC_COSTS && !TARGET_OLD_RTX_COSTS
10935 && current_tune->insn_extra_cost != NULL */
10936 else
10937 result = arm_new_rtx_costs (x, (enum rtx_code) code,
10938 (enum rtx_code) outer_code,
10939 &generic_extra_costs, total, speed);
10942 if (dump_file && (dump_flags & TDF_DETAILS))
10944 print_rtl_single (dump_file, x);
10945 fprintf (dump_file, "\n%s cost: %d (%s)\n", speed ? "Hot" : "Cold",
10946 *total, result ? "final" : "partial");
10948 return result;
10951 /* RTX costs for cores with a slow MUL implementation. Thumb-2 is not
10952 supported on any "slowmul" cores, so it can be ignored. */
10954 static bool
10955 arm_slowmul_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
10956 int *total, bool speed)
10958 enum machine_mode mode = GET_MODE (x);
10960 if (TARGET_THUMB)
10962 *total = thumb1_rtx_costs (x, code, outer_code);
10963 return true;
10966 switch (code)
10968 case MULT:
10969 if (GET_MODE_CLASS (mode) == MODE_FLOAT
10970 || mode == DImode)
10972 *total = COSTS_N_INSNS (20);
10973 return false;
10976 if (CONST_INT_P (XEXP (x, 1)))
10978 unsigned HOST_WIDE_INT i = (INTVAL (XEXP (x, 1))
10979 & (unsigned HOST_WIDE_INT) 0xffffffff);
10980 int cost, const_ok = const_ok_for_arm (i);
10981 int j, booth_unit_size;
10983 /* Tune as appropriate. */
10984 cost = const_ok ? 4 : 8;
10985 booth_unit_size = 2;
10986 for (j = 0; i && j < 32; j += booth_unit_size)
10988 i >>= booth_unit_size;
10989 cost++;
10992 *total = COSTS_N_INSNS (cost);
10993 *total += rtx_cost (XEXP (x, 0), code, 0, speed);
10994 return true;
10997 *total = COSTS_N_INSNS (20);
10998 return false;
11000 default:
11001 return arm_rtx_costs_1 (x, outer_code, total, speed);;
11006 /* RTX cost for cores with a fast multiply unit (M variants). */
11008 static bool
11009 arm_fastmul_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
11010 int *total, bool speed)
11012 enum machine_mode mode = GET_MODE (x);
11014 if (TARGET_THUMB1)
11016 *total = thumb1_rtx_costs (x, code, outer_code);
11017 return true;
11020 /* ??? should thumb2 use different costs? */
11021 switch (code)
11023 case MULT:
11024 /* There is no point basing this on the tuning, since it is always the
11025 fast variant if it exists at all. */
11026 if (mode == DImode
11027 && (GET_CODE (XEXP (x, 0)) == GET_CODE (XEXP (x, 1)))
11028 && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
11029 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
11031 *total = COSTS_N_INSNS(2);
11032 return false;
11036 if (mode == DImode)
11038 *total = COSTS_N_INSNS (5);
11039 return false;
11042 if (CONST_INT_P (XEXP (x, 1)))
11044 unsigned HOST_WIDE_INT i = (INTVAL (XEXP (x, 1))
11045 & (unsigned HOST_WIDE_INT) 0xffffffff);
11046 int cost, const_ok = const_ok_for_arm (i);
11047 int j, booth_unit_size;
11049 /* Tune as appropriate. */
11050 cost = const_ok ? 4 : 8;
11051 booth_unit_size = 8;
11052 for (j = 0; i && j < 32; j += booth_unit_size)
11054 i >>= booth_unit_size;
11055 cost++;
11058 *total = COSTS_N_INSNS(cost);
11059 return false;
11062 if (mode == SImode)
11064 *total = COSTS_N_INSNS (4);
11065 return false;
11068 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
11070 if (TARGET_HARD_FLOAT
11071 && (mode == SFmode
11072 || (mode == DFmode && !TARGET_VFP_SINGLE)))
11074 *total = COSTS_N_INSNS (1);
11075 return false;
11079 /* Requires a lib call */
11080 *total = COSTS_N_INSNS (20);
11081 return false;
11083 default:
11084 return arm_rtx_costs_1 (x, outer_code, total, speed);
11089 /* RTX cost for XScale CPUs. Thumb-2 is not supported on any xscale cores,
11090 so it can be ignored. */
11092 static bool
11093 arm_xscale_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
11094 int *total, bool speed)
11096 enum machine_mode mode = GET_MODE (x);
11098 if (TARGET_THUMB)
11100 *total = thumb1_rtx_costs (x, code, outer_code);
11101 return true;
11104 switch (code)
11106 case COMPARE:
11107 if (GET_CODE (XEXP (x, 0)) != MULT)
11108 return arm_rtx_costs_1 (x, outer_code, total, speed);
11110 /* A COMPARE of a MULT is slow on XScale; the muls instruction
11111 will stall until the multiplication is complete. */
11112 *total = COSTS_N_INSNS (3);
11113 return false;
11115 case MULT:
11116 /* There is no point basing this on the tuning, since it is always the
11117 fast variant if it exists at all. */
11118 if (mode == DImode
11119 && (GET_CODE (XEXP (x, 0)) == GET_CODE (XEXP (x, 1)))
11120 && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
11121 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
11123 *total = COSTS_N_INSNS (2);
11124 return false;
11128 if (mode == DImode)
11130 *total = COSTS_N_INSNS (5);
11131 return false;
11134 if (CONST_INT_P (XEXP (x, 1)))
11136 /* If operand 1 is a constant we can more accurately
11137 calculate the cost of the multiply. The multiplier can
11138 retire 15 bits on the first cycle and a further 12 on the
11139 second. We do, of course, have to load the constant into
11140 a register first. */
11141 unsigned HOST_WIDE_INT i = INTVAL (XEXP (x, 1));
11142 /* There's a general overhead of one cycle. */
11143 int cost = 1;
11144 unsigned HOST_WIDE_INT masked_const;
11146 if (i & 0x80000000)
11147 i = ~i;
11149 i &= (unsigned HOST_WIDE_INT) 0xffffffff;
11151 masked_const = i & 0xffff8000;
11152 if (masked_const != 0)
11154 cost++;
11155 masked_const = i & 0xf8000000;
11156 if (masked_const != 0)
11157 cost++;
11159 *total = COSTS_N_INSNS (cost);
11160 return false;
11163 if (mode == SImode)
11165 *total = COSTS_N_INSNS (3);
11166 return false;
11169 /* Requires a lib call */
11170 *total = COSTS_N_INSNS (20);
11171 return false;
11173 default:
11174 return arm_rtx_costs_1 (x, outer_code, total, speed);
11179 /* RTX costs for 9e (and later) cores. */
11181 static bool
11182 arm_9e_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
11183 int *total, bool speed)
11185 enum machine_mode mode = GET_MODE (x);
11187 if (TARGET_THUMB1)
11189 switch (code)
11191 case MULT:
11192 *total = COSTS_N_INSNS (3);
11193 return true;
11195 default:
11196 *total = thumb1_rtx_costs (x, code, outer_code);
11197 return true;
11201 switch (code)
11203 case MULT:
11204 /* There is no point basing this on the tuning, since it is always the
11205 fast variant if it exists at all. */
11206 if (mode == DImode
11207 && (GET_CODE (XEXP (x, 0)) == GET_CODE (XEXP (x, 1)))
11208 && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
11209 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
11211 *total = COSTS_N_INSNS (2);
11212 return false;
11216 if (mode == DImode)
11218 *total = COSTS_N_INSNS (5);
11219 return false;
11222 if (mode == SImode)
11224 *total = COSTS_N_INSNS (2);
11225 return false;
11228 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
11230 if (TARGET_HARD_FLOAT
11231 && (mode == SFmode
11232 || (mode == DFmode && !TARGET_VFP_SINGLE)))
11234 *total = COSTS_N_INSNS (1);
11235 return false;
11239 *total = COSTS_N_INSNS (20);
11240 return false;
11242 default:
11243 return arm_rtx_costs_1 (x, outer_code, total, speed);
11246 /* All address computations that can be done are free, but rtx cost returns
11247 the same for practically all of them. So we weight the different types
11248 of address here in the order (most pref first):
11249 PRE/POST_INC/DEC, SHIFT or NON-INT sum, INT sum, REG, MEM or LABEL. */
11250 static inline int
11251 arm_arm_address_cost (rtx x)
11253 enum rtx_code c = GET_CODE (x);
11255 if (c == PRE_INC || c == PRE_DEC || c == POST_INC || c == POST_DEC)
11256 return 0;
11257 if (c == MEM || c == LABEL_REF || c == SYMBOL_REF)
11258 return 10;
11260 if (c == PLUS)
11262 if (CONST_INT_P (XEXP (x, 1)))
11263 return 2;
11265 if (ARITHMETIC_P (XEXP (x, 0)) || ARITHMETIC_P (XEXP (x, 1)))
11266 return 3;
11268 return 4;
11271 return 6;
11274 static inline int
11275 arm_thumb_address_cost (rtx x)
11277 enum rtx_code c = GET_CODE (x);
11279 if (c == REG)
11280 return 1;
11281 if (c == PLUS
11282 && REG_P (XEXP (x, 0))
11283 && CONST_INT_P (XEXP (x, 1)))
11284 return 1;
11286 return 2;
11289 static int
11290 arm_address_cost (rtx x, enum machine_mode mode ATTRIBUTE_UNUSED,
11291 addr_space_t as ATTRIBUTE_UNUSED, bool speed ATTRIBUTE_UNUSED)
11293 return TARGET_32BIT ? arm_arm_address_cost (x) : arm_thumb_address_cost (x);
11296 /* Adjust cost hook for XScale. */
11297 static bool
11298 xscale_sched_adjust_cost (rtx insn, rtx link, rtx dep, int * cost)
11300 /* Some true dependencies can have a higher cost depending
11301 on precisely how certain input operands are used. */
11302 if (REG_NOTE_KIND(link) == 0
11303 && recog_memoized (insn) >= 0
11304 && recog_memoized (dep) >= 0)
11306 int shift_opnum = get_attr_shift (insn);
11307 enum attr_type attr_type = get_attr_type (dep);
11309 /* If nonzero, SHIFT_OPNUM contains the operand number of a shifted
11310 operand for INSN. If we have a shifted input operand and the
11311 instruction we depend on is another ALU instruction, then we may
11312 have to account for an additional stall. */
11313 if (shift_opnum != 0
11314 && (attr_type == TYPE_ALU_SHIFT_IMM
11315 || attr_type == TYPE_ALUS_SHIFT_IMM
11316 || attr_type == TYPE_LOGIC_SHIFT_IMM
11317 || attr_type == TYPE_LOGICS_SHIFT_IMM
11318 || attr_type == TYPE_ALU_SHIFT_REG
11319 || attr_type == TYPE_ALUS_SHIFT_REG
11320 || attr_type == TYPE_LOGIC_SHIFT_REG
11321 || attr_type == TYPE_LOGICS_SHIFT_REG
11322 || attr_type == TYPE_MOV_SHIFT
11323 || attr_type == TYPE_MVN_SHIFT
11324 || attr_type == TYPE_MOV_SHIFT_REG
11325 || attr_type == TYPE_MVN_SHIFT_REG))
11327 rtx shifted_operand;
11328 int opno;
11330 /* Get the shifted operand. */
11331 extract_insn (insn);
11332 shifted_operand = recog_data.operand[shift_opnum];
11334 /* Iterate over all the operands in DEP. If we write an operand
11335 that overlaps with SHIFTED_OPERAND, then we have increase the
11336 cost of this dependency. */
11337 extract_insn (dep);
11338 preprocess_constraints ();
11339 for (opno = 0; opno < recog_data.n_operands; opno++)
11341 /* We can ignore strict inputs. */
11342 if (recog_data.operand_type[opno] == OP_IN)
11343 continue;
11345 if (reg_overlap_mentioned_p (recog_data.operand[opno],
11346 shifted_operand))
11348 *cost = 2;
11349 return false;
11354 return true;
11357 /* Adjust cost hook for Cortex A9. */
11358 static bool
11359 cortex_a9_sched_adjust_cost (rtx insn, rtx link, rtx dep, int * cost)
11361 switch (REG_NOTE_KIND (link))
11363 case REG_DEP_ANTI:
11364 *cost = 0;
11365 return false;
11367 case REG_DEP_TRUE:
11368 case REG_DEP_OUTPUT:
11369 if (recog_memoized (insn) >= 0
11370 && recog_memoized (dep) >= 0)
11372 if (GET_CODE (PATTERN (insn)) == SET)
11374 if (GET_MODE_CLASS
11375 (GET_MODE (SET_DEST (PATTERN (insn)))) == MODE_FLOAT
11376 || GET_MODE_CLASS
11377 (GET_MODE (SET_SRC (PATTERN (insn)))) == MODE_FLOAT)
11379 enum attr_type attr_type_insn = get_attr_type (insn);
11380 enum attr_type attr_type_dep = get_attr_type (dep);
11382 /* By default all dependencies of the form
11383 s0 = s0 <op> s1
11384 s0 = s0 <op> s2
11385 have an extra latency of 1 cycle because
11386 of the input and output dependency in this
11387 case. However this gets modeled as an true
11388 dependency and hence all these checks. */
11389 if (REG_P (SET_DEST (PATTERN (insn)))
11390 && REG_P (SET_DEST (PATTERN (dep)))
11391 && reg_overlap_mentioned_p (SET_DEST (PATTERN (insn)),
11392 SET_DEST (PATTERN (dep))))
11394 /* FMACS is a special case where the dependent
11395 instruction can be issued 3 cycles before
11396 the normal latency in case of an output
11397 dependency. */
11398 if ((attr_type_insn == TYPE_FMACS
11399 || attr_type_insn == TYPE_FMACD)
11400 && (attr_type_dep == TYPE_FMACS
11401 || attr_type_dep == TYPE_FMACD))
11403 if (REG_NOTE_KIND (link) == REG_DEP_OUTPUT)
11404 *cost = insn_default_latency (dep) - 3;
11405 else
11406 *cost = insn_default_latency (dep);
11407 return false;
11409 else
11411 if (REG_NOTE_KIND (link) == REG_DEP_OUTPUT)
11412 *cost = insn_default_latency (dep) + 1;
11413 else
11414 *cost = insn_default_latency (dep);
11416 return false;
11421 break;
11423 default:
11424 gcc_unreachable ();
11427 return true;
11430 /* Adjust cost hook for FA726TE. */
11431 static bool
11432 fa726te_sched_adjust_cost (rtx insn, rtx link, rtx dep, int * cost)
11434 /* For FA726TE, true dependency on CPSR (i.e. set cond followed by predicated)
11435 have penalty of 3. */
11436 if (REG_NOTE_KIND (link) == REG_DEP_TRUE
11437 && recog_memoized (insn) >= 0
11438 && recog_memoized (dep) >= 0
11439 && get_attr_conds (dep) == CONDS_SET)
11441 /* Use of carry (e.g. 64-bit arithmetic) in ALU: 3-cycle latency. */
11442 if (get_attr_conds (insn) == CONDS_USE
11443 && get_attr_type (insn) != TYPE_BRANCH)
11445 *cost = 3;
11446 return false;
11449 if (GET_CODE (PATTERN (insn)) == COND_EXEC
11450 || get_attr_conds (insn) == CONDS_USE)
11452 *cost = 0;
11453 return false;
11457 return true;
11460 /* Implement TARGET_REGISTER_MOVE_COST.
11462 Moves between VFP_REGS and GENERAL_REGS are a single insn, but
11463 it is typically more expensive than a single memory access. We set
11464 the cost to less than two memory accesses so that floating
11465 point to integer conversion does not go through memory. */
11468 arm_register_move_cost (enum machine_mode mode ATTRIBUTE_UNUSED,
11469 reg_class_t from, reg_class_t to)
11471 if (TARGET_32BIT)
11473 if ((IS_VFP_CLASS (from) && !IS_VFP_CLASS (to))
11474 || (!IS_VFP_CLASS (from) && IS_VFP_CLASS (to)))
11475 return 15;
11476 else if ((from == IWMMXT_REGS && to != IWMMXT_REGS)
11477 || (from != IWMMXT_REGS && to == IWMMXT_REGS))
11478 return 4;
11479 else if (from == IWMMXT_GR_REGS || to == IWMMXT_GR_REGS)
11480 return 20;
11481 else
11482 return 2;
11484 else
11486 if (from == HI_REGS || to == HI_REGS)
11487 return 4;
11488 else
11489 return 2;
11493 /* Implement TARGET_MEMORY_MOVE_COST. */
11496 arm_memory_move_cost (enum machine_mode mode, reg_class_t rclass,
11497 bool in ATTRIBUTE_UNUSED)
11499 if (TARGET_32BIT)
11500 return 10;
11501 else
11503 if (GET_MODE_SIZE (mode) < 4)
11504 return 8;
11505 else
11506 return ((2 * GET_MODE_SIZE (mode)) * (rclass == LO_REGS ? 1 : 2));
11510 /* Vectorizer cost model implementation. */
11512 /* Implement targetm.vectorize.builtin_vectorization_cost. */
11513 static int
11514 arm_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
11515 tree vectype,
11516 int misalign ATTRIBUTE_UNUSED)
11518 unsigned elements;
11520 switch (type_of_cost)
11522 case scalar_stmt:
11523 return current_tune->vec_costs->scalar_stmt_cost;
11525 case scalar_load:
11526 return current_tune->vec_costs->scalar_load_cost;
11528 case scalar_store:
11529 return current_tune->vec_costs->scalar_store_cost;
11531 case vector_stmt:
11532 return current_tune->vec_costs->vec_stmt_cost;
11534 case vector_load:
11535 return current_tune->vec_costs->vec_align_load_cost;
11537 case vector_store:
11538 return current_tune->vec_costs->vec_store_cost;
11540 case vec_to_scalar:
11541 return current_tune->vec_costs->vec_to_scalar_cost;
11543 case scalar_to_vec:
11544 return current_tune->vec_costs->scalar_to_vec_cost;
11546 case unaligned_load:
11547 return current_tune->vec_costs->vec_unalign_load_cost;
11549 case unaligned_store:
11550 return current_tune->vec_costs->vec_unalign_store_cost;
11552 case cond_branch_taken:
11553 return current_tune->vec_costs->cond_taken_branch_cost;
11555 case cond_branch_not_taken:
11556 return current_tune->vec_costs->cond_not_taken_branch_cost;
11558 case vec_perm:
11559 case vec_promote_demote:
11560 return current_tune->vec_costs->vec_stmt_cost;
11562 case vec_construct:
11563 elements = TYPE_VECTOR_SUBPARTS (vectype);
11564 return elements / 2 + 1;
11566 default:
11567 gcc_unreachable ();
11571 /* Implement targetm.vectorize.add_stmt_cost. */
11573 static unsigned
11574 arm_add_stmt_cost (void *data, int count, enum vect_cost_for_stmt kind,
11575 struct _stmt_vec_info *stmt_info, int misalign,
11576 enum vect_cost_model_location where)
11578 unsigned *cost = (unsigned *) data;
11579 unsigned retval = 0;
11581 if (flag_vect_cost_model)
11583 tree vectype = stmt_info ? stmt_vectype (stmt_info) : NULL_TREE;
11584 int stmt_cost = arm_builtin_vectorization_cost (kind, vectype, misalign);
11586 /* Statements in an inner loop relative to the loop being
11587 vectorized are weighted more heavily. The value here is
11588 arbitrary and could potentially be improved with analysis. */
11589 if (where == vect_body && stmt_info && stmt_in_inner_loop_p (stmt_info))
11590 count *= 50; /* FIXME. */
11592 retval = (unsigned) (count * stmt_cost);
11593 cost[where] += retval;
11596 return retval;
11599 /* Return true if and only if this insn can dual-issue only as older. */
11600 static bool
11601 cortexa7_older_only (rtx insn)
11603 if (recog_memoized (insn) < 0)
11604 return false;
11606 switch (get_attr_type (insn))
11608 case TYPE_ALU_REG:
11609 case TYPE_ALUS_REG:
11610 case TYPE_LOGIC_REG:
11611 case TYPE_LOGICS_REG:
11612 case TYPE_ADC_REG:
11613 case TYPE_ADCS_REG:
11614 case TYPE_ADR:
11615 case TYPE_BFM:
11616 case TYPE_REV:
11617 case TYPE_MVN_REG:
11618 case TYPE_SHIFT_IMM:
11619 case TYPE_SHIFT_REG:
11620 case TYPE_LOAD_BYTE:
11621 case TYPE_LOAD1:
11622 case TYPE_STORE1:
11623 case TYPE_FFARITHS:
11624 case TYPE_FADDS:
11625 case TYPE_FFARITHD:
11626 case TYPE_FADDD:
11627 case TYPE_FMOV:
11628 case TYPE_F_CVT:
11629 case TYPE_FCMPS:
11630 case TYPE_FCMPD:
11631 case TYPE_FCONSTS:
11632 case TYPE_FCONSTD:
11633 case TYPE_FMULS:
11634 case TYPE_FMACS:
11635 case TYPE_FMULD:
11636 case TYPE_FMACD:
11637 case TYPE_FDIVS:
11638 case TYPE_FDIVD:
11639 case TYPE_F_MRC:
11640 case TYPE_F_MRRC:
11641 case TYPE_F_FLAG:
11642 case TYPE_F_LOADS:
11643 case TYPE_F_STORES:
11644 return true;
11645 default:
11646 return false;
11650 /* Return true if and only if this insn can dual-issue as younger. */
11651 static bool
11652 cortexa7_younger (FILE *file, int verbose, rtx insn)
11654 if (recog_memoized (insn) < 0)
11656 if (verbose > 5)
11657 fprintf (file, ";; not cortexa7_younger %d\n", INSN_UID (insn));
11658 return false;
11661 switch (get_attr_type (insn))
11663 case TYPE_ALU_IMM:
11664 case TYPE_ALUS_IMM:
11665 case TYPE_LOGIC_IMM:
11666 case TYPE_LOGICS_IMM:
11667 case TYPE_EXTEND:
11668 case TYPE_MVN_IMM:
11669 case TYPE_MOV_IMM:
11670 case TYPE_MOV_REG:
11671 case TYPE_MOV_SHIFT:
11672 case TYPE_MOV_SHIFT_REG:
11673 case TYPE_BRANCH:
11674 case TYPE_CALL:
11675 return true;
11676 default:
11677 return false;
11682 /* Look for an instruction that can dual issue only as an older
11683 instruction, and move it in front of any instructions that can
11684 dual-issue as younger, while preserving the relative order of all
11685 other instructions in the ready list. This is a hueuristic to help
11686 dual-issue in later cycles, by postponing issue of more flexible
11687 instructions. This heuristic may affect dual issue opportunities
11688 in the current cycle. */
11689 static void
11690 cortexa7_sched_reorder (FILE *file, int verbose, rtx *ready, int *n_readyp,
11691 int clock)
11693 int i;
11694 int first_older_only = -1, first_younger = -1;
11696 if (verbose > 5)
11697 fprintf (file,
11698 ";; sched_reorder for cycle %d with %d insns in ready list\n",
11699 clock,
11700 *n_readyp);
11702 /* Traverse the ready list from the head (the instruction to issue
11703 first), and looking for the first instruction that can issue as
11704 younger and the first instruction that can dual-issue only as
11705 older. */
11706 for (i = *n_readyp - 1; i >= 0; i--)
11708 rtx insn = ready[i];
11709 if (cortexa7_older_only (insn))
11711 first_older_only = i;
11712 if (verbose > 5)
11713 fprintf (file, ";; reorder older found %d\n", INSN_UID (insn));
11714 break;
11716 else if (cortexa7_younger (file, verbose, insn) && first_younger == -1)
11717 first_younger = i;
11720 /* Nothing to reorder because either no younger insn found or insn
11721 that can dual-issue only as older appears before any insn that
11722 can dual-issue as younger. */
11723 if (first_younger == -1)
11725 if (verbose > 5)
11726 fprintf (file, ";; sched_reorder nothing to reorder as no younger\n");
11727 return;
11730 /* Nothing to reorder because no older-only insn in the ready list. */
11731 if (first_older_only == -1)
11733 if (verbose > 5)
11734 fprintf (file, ";; sched_reorder nothing to reorder as no older_only\n");
11735 return;
11738 /* Move first_older_only insn before first_younger. */
11739 if (verbose > 5)
11740 fprintf (file, ";; cortexa7_sched_reorder insn %d before %d\n",
11741 INSN_UID(ready [first_older_only]),
11742 INSN_UID(ready [first_younger]));
11743 rtx first_older_only_insn = ready [first_older_only];
11744 for (i = first_older_only; i < first_younger; i++)
11746 ready[i] = ready[i+1];
11749 ready[i] = first_older_only_insn;
11750 return;
11753 /* Implement TARGET_SCHED_REORDER. */
11754 static int
11755 arm_sched_reorder (FILE *file, int verbose, rtx *ready, int *n_readyp,
11756 int clock)
11758 switch (arm_tune)
11760 case cortexa7:
11761 cortexa7_sched_reorder (file, verbose, ready, n_readyp, clock);
11762 break;
11763 default:
11764 /* Do nothing for other cores. */
11765 break;
11768 return arm_issue_rate ();
11771 /* This function implements the target macro TARGET_SCHED_ADJUST_COST.
11772 It corrects the value of COST based on the relationship between
11773 INSN and DEP through the dependence LINK. It returns the new
11774 value. There is a per-core adjust_cost hook to adjust scheduler costs
11775 and the per-core hook can choose to completely override the generic
11776 adjust_cost function. Only put bits of code into arm_adjust_cost that
11777 are common across all cores. */
11778 static int
11779 arm_adjust_cost (rtx insn, rtx link, rtx dep, int cost)
11781 rtx i_pat, d_pat;
11783 /* When generating Thumb-1 code, we want to place flag-setting operations
11784 close to a conditional branch which depends on them, so that we can
11785 omit the comparison. */
11786 if (TARGET_THUMB1
11787 && REG_NOTE_KIND (link) == 0
11788 && recog_memoized (insn) == CODE_FOR_cbranchsi4_insn
11789 && recog_memoized (dep) >= 0
11790 && get_attr_conds (dep) == CONDS_SET)
11791 return 0;
11793 if (current_tune->sched_adjust_cost != NULL)
11795 if (!current_tune->sched_adjust_cost (insn, link, dep, &cost))
11796 return cost;
11799 /* XXX Is this strictly true? */
11800 if (REG_NOTE_KIND (link) == REG_DEP_ANTI
11801 || REG_NOTE_KIND (link) == REG_DEP_OUTPUT)
11802 return 0;
11804 /* Call insns don't incur a stall, even if they follow a load. */
11805 if (REG_NOTE_KIND (link) == 0
11806 && CALL_P (insn))
11807 return 1;
11809 if ((i_pat = single_set (insn)) != NULL
11810 && MEM_P (SET_SRC (i_pat))
11811 && (d_pat = single_set (dep)) != NULL
11812 && MEM_P (SET_DEST (d_pat)))
11814 rtx src_mem = XEXP (SET_SRC (i_pat), 0);
11815 /* This is a load after a store, there is no conflict if the load reads
11816 from a cached area. Assume that loads from the stack, and from the
11817 constant pool are cached, and that others will miss. This is a
11818 hack. */
11820 if ((GET_CODE (src_mem) == SYMBOL_REF
11821 && CONSTANT_POOL_ADDRESS_P (src_mem))
11822 || reg_mentioned_p (stack_pointer_rtx, src_mem)
11823 || reg_mentioned_p (frame_pointer_rtx, src_mem)
11824 || reg_mentioned_p (hard_frame_pointer_rtx, src_mem))
11825 return 1;
11828 return cost;
11832 arm_max_conditional_execute (void)
11834 return max_insns_skipped;
11837 static int
11838 arm_default_branch_cost (bool speed_p, bool predictable_p ATTRIBUTE_UNUSED)
11840 if (TARGET_32BIT)
11841 return (TARGET_THUMB2 && !speed_p) ? 1 : 4;
11842 else
11843 return (optimize > 0) ? 2 : 0;
11846 static int
11847 arm_cortex_a5_branch_cost (bool speed_p, bool predictable_p)
11849 return speed_p ? 0 : arm_default_branch_cost (speed_p, predictable_p);
11852 /* Thumb-2 branches are relatively cheap on Cortex-M processors ("1 + P cycles"
11853 on Cortex-M4, where P varies from 1 to 3 according to some criteria), since
11854 sequences of non-executed instructions in IT blocks probably take the same
11855 amount of time as executed instructions (and the IT instruction itself takes
11856 space in icache). This function was experimentally determined to give good
11857 results on a popular embedded benchmark. */
11859 static int
11860 arm_cortex_m_branch_cost (bool speed_p, bool predictable_p)
11862 return (TARGET_32BIT && speed_p) ? 1
11863 : arm_default_branch_cost (speed_p, predictable_p);
11866 static bool fp_consts_inited = false;
11868 static REAL_VALUE_TYPE value_fp0;
11870 static void
11871 init_fp_table (void)
11873 REAL_VALUE_TYPE r;
11875 r = REAL_VALUE_ATOF ("0", DFmode);
11876 value_fp0 = r;
11877 fp_consts_inited = true;
11880 /* Return TRUE if rtx X is a valid immediate FP constant. */
11882 arm_const_double_rtx (rtx x)
11884 REAL_VALUE_TYPE r;
11886 if (!fp_consts_inited)
11887 init_fp_table ();
11889 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
11890 if (REAL_VALUE_MINUS_ZERO (r))
11891 return 0;
11893 if (REAL_VALUES_EQUAL (r, value_fp0))
11894 return 1;
11896 return 0;
11899 /* VFPv3 has a fairly wide range of representable immediates, formed from
11900 "quarter-precision" floating-point values. These can be evaluated using this
11901 formula (with ^ for exponentiation):
11903 -1^s * n * 2^-r
11905 Where 's' is a sign bit (0/1), 'n' and 'r' are integers such that
11906 16 <= n <= 31 and 0 <= r <= 7.
11908 These values are mapped onto an 8-bit integer ABCDEFGH s.t.
11910 - A (most-significant) is the sign bit.
11911 - BCD are the exponent (encoded as r XOR 3).
11912 - EFGH are the mantissa (encoded as n - 16).
11915 /* Return an integer index for a VFPv3 immediate operand X suitable for the
11916 fconst[sd] instruction, or -1 if X isn't suitable. */
11917 static int
11918 vfp3_const_double_index (rtx x)
11920 REAL_VALUE_TYPE r, m;
11921 int sign, exponent;
11922 unsigned HOST_WIDE_INT mantissa, mant_hi;
11923 unsigned HOST_WIDE_INT mask;
11924 int point_pos = 2 * HOST_BITS_PER_WIDE_INT - 1;
11925 bool fail;
11927 if (!TARGET_VFP3 || !CONST_DOUBLE_P (x))
11928 return -1;
11930 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
11932 /* We can't represent these things, so detect them first. */
11933 if (REAL_VALUE_ISINF (r) || REAL_VALUE_ISNAN (r) || REAL_VALUE_MINUS_ZERO (r))
11934 return -1;
11936 /* Extract sign, exponent and mantissa. */
11937 sign = REAL_VALUE_NEGATIVE (r) ? 1 : 0;
11938 r = real_value_abs (&r);
11939 exponent = REAL_EXP (&r);
11940 /* For the mantissa, we expand into two HOST_WIDE_INTS, apart from the
11941 highest (sign) bit, with a fixed binary point at bit point_pos.
11942 WARNING: If there's ever a VFP version which uses more than 2 * H_W_I - 1
11943 bits for the mantissa, this may fail (low bits would be lost). */
11944 real_ldexp (&m, &r, point_pos - exponent);
11945 wide_int w = real_to_integer (&m, &fail, HOST_BITS_PER_WIDE_INT * 2);
11946 mantissa = w.elt (0);
11947 mant_hi = w.elt (1);
11949 /* If there are bits set in the low part of the mantissa, we can't
11950 represent this value. */
11951 if (mantissa != 0)
11952 return -1;
11954 /* Now make it so that mantissa contains the most-significant bits, and move
11955 the point_pos to indicate that the least-significant bits have been
11956 discarded. */
11957 point_pos -= HOST_BITS_PER_WIDE_INT;
11958 mantissa = mant_hi;
11960 /* We can permit four significant bits of mantissa only, plus a high bit
11961 which is always 1. */
11962 mask = ((unsigned HOST_WIDE_INT)1 << (point_pos - 5)) - 1;
11963 if ((mantissa & mask) != 0)
11964 return -1;
11966 /* Now we know the mantissa is in range, chop off the unneeded bits. */
11967 mantissa >>= point_pos - 5;
11969 /* The mantissa may be zero. Disallow that case. (It's possible to load the
11970 floating-point immediate zero with Neon using an integer-zero load, but
11971 that case is handled elsewhere.) */
11972 if (mantissa == 0)
11973 return -1;
11975 gcc_assert (mantissa >= 16 && mantissa <= 31);
11977 /* The value of 5 here would be 4 if GCC used IEEE754-like encoding (where
11978 normalized significands are in the range [1, 2). (Our mantissa is shifted
11979 left 4 places at this point relative to normalized IEEE754 values). GCC
11980 internally uses [0.5, 1) (see real.c), so the exponent returned from
11981 REAL_EXP must be altered. */
11982 exponent = 5 - exponent;
11984 if (exponent < 0 || exponent > 7)
11985 return -1;
11987 /* Sign, mantissa and exponent are now in the correct form to plug into the
11988 formula described in the comment above. */
11989 return (sign << 7) | ((exponent ^ 3) << 4) | (mantissa - 16);
11992 /* Return TRUE if rtx X is a valid immediate VFPv3 constant. */
11994 vfp3_const_double_rtx (rtx x)
11996 if (!TARGET_VFP3)
11997 return 0;
11999 return vfp3_const_double_index (x) != -1;
12002 /* Recognize immediates which can be used in various Neon instructions. Legal
12003 immediates are described by the following table (for VMVN variants, the
12004 bitwise inverse of the constant shown is recognized. In either case, VMOV
12005 is output and the correct instruction to use for a given constant is chosen
12006 by the assembler). The constant shown is replicated across all elements of
12007 the destination vector.
12009 insn elems variant constant (binary)
12010 ---- ----- ------- -----------------
12011 vmov i32 0 00000000 00000000 00000000 abcdefgh
12012 vmov i32 1 00000000 00000000 abcdefgh 00000000
12013 vmov i32 2 00000000 abcdefgh 00000000 00000000
12014 vmov i32 3 abcdefgh 00000000 00000000 00000000
12015 vmov i16 4 00000000 abcdefgh
12016 vmov i16 5 abcdefgh 00000000
12017 vmvn i32 6 00000000 00000000 00000000 abcdefgh
12018 vmvn i32 7 00000000 00000000 abcdefgh 00000000
12019 vmvn i32 8 00000000 abcdefgh 00000000 00000000
12020 vmvn i32 9 abcdefgh 00000000 00000000 00000000
12021 vmvn i16 10 00000000 abcdefgh
12022 vmvn i16 11 abcdefgh 00000000
12023 vmov i32 12 00000000 00000000 abcdefgh 11111111
12024 vmvn i32 13 00000000 00000000 abcdefgh 11111111
12025 vmov i32 14 00000000 abcdefgh 11111111 11111111
12026 vmvn i32 15 00000000 abcdefgh 11111111 11111111
12027 vmov i8 16 abcdefgh
12028 vmov i64 17 aaaaaaaa bbbbbbbb cccccccc dddddddd
12029 eeeeeeee ffffffff gggggggg hhhhhhhh
12030 vmov f32 18 aBbbbbbc defgh000 00000000 00000000
12031 vmov f32 19 00000000 00000000 00000000 00000000
12033 For case 18, B = !b. Representable values are exactly those accepted by
12034 vfp3_const_double_index, but are output as floating-point numbers rather
12035 than indices.
12037 For case 19, we will change it to vmov.i32 when assembling.
12039 Variants 0-5 (inclusive) may also be used as immediates for the second
12040 operand of VORR/VBIC instructions.
12042 The INVERSE argument causes the bitwise inverse of the given operand to be
12043 recognized instead (used for recognizing legal immediates for the VAND/VORN
12044 pseudo-instructions). If INVERSE is true, the value placed in *MODCONST is
12045 *not* inverted (i.e. the pseudo-instruction forms vand/vorn should still be
12046 output, rather than the real insns vbic/vorr).
12048 INVERSE makes no difference to the recognition of float vectors.
12050 The return value is the variant of immediate as shown in the above table, or
12051 -1 if the given value doesn't match any of the listed patterns.
12053 static int
12054 neon_valid_immediate (rtx op, enum machine_mode mode, int inverse,
12055 rtx *modconst, int *elementwidth)
12057 #define CHECK(STRIDE, ELSIZE, CLASS, TEST) \
12058 matches = 1; \
12059 for (i = 0; i < idx; i += (STRIDE)) \
12060 if (!(TEST)) \
12061 matches = 0; \
12062 if (matches) \
12064 immtype = (CLASS); \
12065 elsize = (ELSIZE); \
12066 break; \
12069 unsigned int i, elsize = 0, idx = 0, n_elts;
12070 unsigned int innersize;
12071 unsigned char bytes[16];
12072 int immtype = -1, matches;
12073 unsigned int invmask = inverse ? 0xff : 0;
12074 bool vector = GET_CODE (op) == CONST_VECTOR;
12076 if (vector)
12078 n_elts = CONST_VECTOR_NUNITS (op);
12079 innersize = GET_MODE_SIZE (GET_MODE_INNER (mode));
12081 else
12083 n_elts = 1;
12084 if (mode == VOIDmode)
12085 mode = DImode;
12086 innersize = GET_MODE_SIZE (mode);
12089 /* Vectors of float constants. */
12090 if (GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
12092 rtx el0 = CONST_VECTOR_ELT (op, 0);
12093 REAL_VALUE_TYPE r0;
12095 if (!vfp3_const_double_rtx (el0) && el0 != CONST0_RTX (GET_MODE (el0)))
12096 return -1;
12098 REAL_VALUE_FROM_CONST_DOUBLE (r0, el0);
12100 for (i = 1; i < n_elts; i++)
12102 rtx elt = CONST_VECTOR_ELT (op, i);
12103 REAL_VALUE_TYPE re;
12105 REAL_VALUE_FROM_CONST_DOUBLE (re, elt);
12107 if (!REAL_VALUES_EQUAL (r0, re))
12108 return -1;
12111 if (modconst)
12112 *modconst = CONST_VECTOR_ELT (op, 0);
12114 if (elementwidth)
12115 *elementwidth = 0;
12117 if (el0 == CONST0_RTX (GET_MODE (el0)))
12118 return 19;
12119 else
12120 return 18;
12123 /* Splat vector constant out into a byte vector. */
12124 for (i = 0; i < n_elts; i++)
12126 rtx el = vector ? CONST_VECTOR_ELT (op, i) : op;
12127 unsigned HOST_WIDE_INT elpart;
12128 unsigned int part, parts;
12130 if (CONST_INT_P (el))
12132 elpart = INTVAL (el);
12133 parts = 1;
12135 else if (CONST_DOUBLE_P (el))
12137 elpart = CONST_DOUBLE_LOW (el);
12138 parts = 2;
12140 else
12141 gcc_unreachable ();
12143 for (part = 0; part < parts; part++)
12145 unsigned int byte;
12146 for (byte = 0; byte < innersize; byte++)
12148 bytes[idx++] = (elpart & 0xff) ^ invmask;
12149 elpart >>= BITS_PER_UNIT;
12151 if (CONST_DOUBLE_P (el))
12152 elpart = CONST_DOUBLE_HIGH (el);
12156 /* Sanity check. */
12157 gcc_assert (idx == GET_MODE_SIZE (mode));
12161 CHECK (4, 32, 0, bytes[i] == bytes[0] && bytes[i + 1] == 0
12162 && bytes[i + 2] == 0 && bytes[i + 3] == 0);
12164 CHECK (4, 32, 1, bytes[i] == 0 && bytes[i + 1] == bytes[1]
12165 && bytes[i + 2] == 0 && bytes[i + 3] == 0);
12167 CHECK (4, 32, 2, bytes[i] == 0 && bytes[i + 1] == 0
12168 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0);
12170 CHECK (4, 32, 3, bytes[i] == 0 && bytes[i + 1] == 0
12171 && bytes[i + 2] == 0 && bytes[i + 3] == bytes[3]);
12173 CHECK (2, 16, 4, bytes[i] == bytes[0] && bytes[i + 1] == 0);
12175 CHECK (2, 16, 5, bytes[i] == 0 && bytes[i + 1] == bytes[1]);
12177 CHECK (4, 32, 6, bytes[i] == bytes[0] && bytes[i + 1] == 0xff
12178 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff);
12180 CHECK (4, 32, 7, bytes[i] == 0xff && bytes[i + 1] == bytes[1]
12181 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff);
12183 CHECK (4, 32, 8, bytes[i] == 0xff && bytes[i + 1] == 0xff
12184 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff);
12186 CHECK (4, 32, 9, bytes[i] == 0xff && bytes[i + 1] == 0xff
12187 && bytes[i + 2] == 0xff && bytes[i + 3] == bytes[3]);
12189 CHECK (2, 16, 10, bytes[i] == bytes[0] && bytes[i + 1] == 0xff);
12191 CHECK (2, 16, 11, bytes[i] == 0xff && bytes[i + 1] == bytes[1]);
12193 CHECK (4, 32, 12, bytes[i] == 0xff && bytes[i + 1] == bytes[1]
12194 && bytes[i + 2] == 0 && bytes[i + 3] == 0);
12196 CHECK (4, 32, 13, bytes[i] == 0 && bytes[i + 1] == bytes[1]
12197 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff);
12199 CHECK (4, 32, 14, bytes[i] == 0xff && bytes[i + 1] == 0xff
12200 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0);
12202 CHECK (4, 32, 15, bytes[i] == 0 && bytes[i + 1] == 0
12203 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff);
12205 CHECK (1, 8, 16, bytes[i] == bytes[0]);
12207 CHECK (1, 64, 17, (bytes[i] == 0 || bytes[i] == 0xff)
12208 && bytes[i] == bytes[(i + 8) % idx]);
12210 while (0);
12212 if (immtype == -1)
12213 return -1;
12215 if (elementwidth)
12216 *elementwidth = elsize;
12218 if (modconst)
12220 unsigned HOST_WIDE_INT imm = 0;
12222 /* Un-invert bytes of recognized vector, if necessary. */
12223 if (invmask != 0)
12224 for (i = 0; i < idx; i++)
12225 bytes[i] ^= invmask;
12227 if (immtype == 17)
12229 /* FIXME: Broken on 32-bit H_W_I hosts. */
12230 gcc_assert (sizeof (HOST_WIDE_INT) == 8);
12232 for (i = 0; i < 8; i++)
12233 imm |= (unsigned HOST_WIDE_INT) (bytes[i] ? 0xff : 0)
12234 << (i * BITS_PER_UNIT);
12236 *modconst = GEN_INT (imm);
12238 else
12240 unsigned HOST_WIDE_INT imm = 0;
12242 for (i = 0; i < elsize / BITS_PER_UNIT; i++)
12243 imm |= (unsigned HOST_WIDE_INT) bytes[i] << (i * BITS_PER_UNIT);
12245 *modconst = GEN_INT (imm);
12249 return immtype;
12250 #undef CHECK
12253 /* Return TRUE if rtx X is legal for use as either a Neon VMOV (or, implicitly,
12254 VMVN) immediate. Write back width per element to *ELEMENTWIDTH (or zero for
12255 float elements), and a modified constant (whatever should be output for a
12256 VMOV) in *MODCONST. */
12259 neon_immediate_valid_for_move (rtx op, enum machine_mode mode,
12260 rtx *modconst, int *elementwidth)
12262 rtx tmpconst;
12263 int tmpwidth;
12264 int retval = neon_valid_immediate (op, mode, 0, &tmpconst, &tmpwidth);
12266 if (retval == -1)
12267 return 0;
12269 if (modconst)
12270 *modconst = tmpconst;
12272 if (elementwidth)
12273 *elementwidth = tmpwidth;
12275 return 1;
12278 /* Return TRUE if rtx X is legal for use in a VORR or VBIC instruction. If
12279 the immediate is valid, write a constant suitable for using as an operand
12280 to VORR/VBIC/VAND/VORN to *MODCONST and the corresponding element width to
12281 *ELEMENTWIDTH. See neon_valid_immediate for description of INVERSE. */
12284 neon_immediate_valid_for_logic (rtx op, enum machine_mode mode, int inverse,
12285 rtx *modconst, int *elementwidth)
12287 rtx tmpconst;
12288 int tmpwidth;
12289 int retval = neon_valid_immediate (op, mode, inverse, &tmpconst, &tmpwidth);
12291 if (retval < 0 || retval > 5)
12292 return 0;
12294 if (modconst)
12295 *modconst = tmpconst;
12297 if (elementwidth)
12298 *elementwidth = tmpwidth;
12300 return 1;
12303 /* Return TRUE if rtx OP is legal for use in a VSHR or VSHL instruction. If
12304 the immediate is valid, write a constant suitable for using as an operand
12305 to VSHR/VSHL to *MODCONST and the corresponding element width to
12306 *ELEMENTWIDTH. ISLEFTSHIFT is for determine left or right shift,
12307 because they have different limitations. */
12310 neon_immediate_valid_for_shift (rtx op, enum machine_mode mode,
12311 rtx *modconst, int *elementwidth,
12312 bool isleftshift)
12314 unsigned int innersize = GET_MODE_SIZE (GET_MODE_INNER (mode));
12315 unsigned int n_elts = CONST_VECTOR_NUNITS (op), i;
12316 unsigned HOST_WIDE_INT last_elt = 0;
12317 unsigned HOST_WIDE_INT maxshift;
12319 /* Split vector constant out into a byte vector. */
12320 for (i = 0; i < n_elts; i++)
12322 rtx el = CONST_VECTOR_ELT (op, i);
12323 unsigned HOST_WIDE_INT elpart;
12325 if (CONST_INT_P (el))
12326 elpart = INTVAL (el);
12327 else if (CONST_DOUBLE_P (el))
12328 return 0;
12329 else
12330 gcc_unreachable ();
12332 if (i != 0 && elpart != last_elt)
12333 return 0;
12335 last_elt = elpart;
12338 /* Shift less than element size. */
12339 maxshift = innersize * 8;
12341 if (isleftshift)
12343 /* Left shift immediate value can be from 0 to <size>-1. */
12344 if (last_elt >= maxshift)
12345 return 0;
12347 else
12349 /* Right shift immediate value can be from 1 to <size>. */
12350 if (last_elt == 0 || last_elt > maxshift)
12351 return 0;
12354 if (elementwidth)
12355 *elementwidth = innersize * 8;
12357 if (modconst)
12358 *modconst = CONST_VECTOR_ELT (op, 0);
12360 return 1;
12363 /* Return a string suitable for output of Neon immediate logic operation
12364 MNEM. */
12366 char *
12367 neon_output_logic_immediate (const char *mnem, rtx *op2, enum machine_mode mode,
12368 int inverse, int quad)
12370 int width, is_valid;
12371 static char templ[40];
12373 is_valid = neon_immediate_valid_for_logic (*op2, mode, inverse, op2, &width);
12375 gcc_assert (is_valid != 0);
12377 if (quad)
12378 sprintf (templ, "%s.i%d\t%%q0, %%2", mnem, width);
12379 else
12380 sprintf (templ, "%s.i%d\t%%P0, %%2", mnem, width);
12382 return templ;
12385 /* Return a string suitable for output of Neon immediate shift operation
12386 (VSHR or VSHL) MNEM. */
12388 char *
12389 neon_output_shift_immediate (const char *mnem, char sign, rtx *op2,
12390 enum machine_mode mode, int quad,
12391 bool isleftshift)
12393 int width, is_valid;
12394 static char templ[40];
12396 is_valid = neon_immediate_valid_for_shift (*op2, mode, op2, &width, isleftshift);
12397 gcc_assert (is_valid != 0);
12399 if (quad)
12400 sprintf (templ, "%s.%c%d\t%%q0, %%q1, %%2", mnem, sign, width);
12401 else
12402 sprintf (templ, "%s.%c%d\t%%P0, %%P1, %%2", mnem, sign, width);
12404 return templ;
12407 /* Output a sequence of pairwise operations to implement a reduction.
12408 NOTE: We do "too much work" here, because pairwise operations work on two
12409 registers-worth of operands in one go. Unfortunately we can't exploit those
12410 extra calculations to do the full operation in fewer steps, I don't think.
12411 Although all vector elements of the result but the first are ignored, we
12412 actually calculate the same result in each of the elements. An alternative
12413 such as initially loading a vector with zero to use as each of the second
12414 operands would use up an additional register and take an extra instruction,
12415 for no particular gain. */
12417 void
12418 neon_pairwise_reduce (rtx op0, rtx op1, enum machine_mode mode,
12419 rtx (*reduc) (rtx, rtx, rtx))
12421 enum machine_mode inner = GET_MODE_INNER (mode);
12422 unsigned int i, parts = GET_MODE_SIZE (mode) / GET_MODE_SIZE (inner);
12423 rtx tmpsum = op1;
12425 for (i = parts / 2; i >= 1; i /= 2)
12427 rtx dest = (i == 1) ? op0 : gen_reg_rtx (mode);
12428 emit_insn (reduc (dest, tmpsum, tmpsum));
12429 tmpsum = dest;
12433 /* If VALS is a vector constant that can be loaded into a register
12434 using VDUP, generate instructions to do so and return an RTX to
12435 assign to the register. Otherwise return NULL_RTX. */
12437 static rtx
12438 neon_vdup_constant (rtx vals)
12440 enum machine_mode mode = GET_MODE (vals);
12441 enum machine_mode inner_mode = GET_MODE_INNER (mode);
12442 int n_elts = GET_MODE_NUNITS (mode);
12443 bool all_same = true;
12444 rtx x;
12445 int i;
12447 if (GET_CODE (vals) != CONST_VECTOR || GET_MODE_SIZE (inner_mode) > 4)
12448 return NULL_RTX;
12450 for (i = 0; i < n_elts; ++i)
12452 x = XVECEXP (vals, 0, i);
12453 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
12454 all_same = false;
12457 if (!all_same)
12458 /* The elements are not all the same. We could handle repeating
12459 patterns of a mode larger than INNER_MODE here (e.g. int8x8_t
12460 {0, C, 0, C, 0, C, 0, C} which can be loaded using
12461 vdup.i16). */
12462 return NULL_RTX;
12464 /* We can load this constant by using VDUP and a constant in a
12465 single ARM register. This will be cheaper than a vector
12466 load. */
12468 x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, 0));
12469 return gen_rtx_VEC_DUPLICATE (mode, x);
12472 /* Generate code to load VALS, which is a PARALLEL containing only
12473 constants (for vec_init) or CONST_VECTOR, efficiently into a
12474 register. Returns an RTX to copy into the register, or NULL_RTX
12475 for a PARALLEL that can not be converted into a CONST_VECTOR. */
12478 neon_make_constant (rtx vals)
12480 enum machine_mode mode = GET_MODE (vals);
12481 rtx target;
12482 rtx const_vec = NULL_RTX;
12483 int n_elts = GET_MODE_NUNITS (mode);
12484 int n_const = 0;
12485 int i;
12487 if (GET_CODE (vals) == CONST_VECTOR)
12488 const_vec = vals;
12489 else if (GET_CODE (vals) == PARALLEL)
12491 /* A CONST_VECTOR must contain only CONST_INTs and
12492 CONST_DOUBLEs, but CONSTANT_P allows more (e.g. SYMBOL_REF).
12493 Only store valid constants in a CONST_VECTOR. */
12494 for (i = 0; i < n_elts; ++i)
12496 rtx x = XVECEXP (vals, 0, i);
12497 if (CONST_INT_P (x) || CONST_DOUBLE_P (x))
12498 n_const++;
12500 if (n_const == n_elts)
12501 const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0));
12503 else
12504 gcc_unreachable ();
12506 if (const_vec != NULL
12507 && neon_immediate_valid_for_move (const_vec, mode, NULL, NULL))
12508 /* Load using VMOV. On Cortex-A8 this takes one cycle. */
12509 return const_vec;
12510 else if ((target = neon_vdup_constant (vals)) != NULL_RTX)
12511 /* Loaded using VDUP. On Cortex-A8 the VDUP takes one NEON
12512 pipeline cycle; creating the constant takes one or two ARM
12513 pipeline cycles. */
12514 return target;
12515 else if (const_vec != NULL_RTX)
12516 /* Load from constant pool. On Cortex-A8 this takes two cycles
12517 (for either double or quad vectors). We can not take advantage
12518 of single-cycle VLD1 because we need a PC-relative addressing
12519 mode. */
12520 return const_vec;
12521 else
12522 /* A PARALLEL containing something not valid inside CONST_VECTOR.
12523 We can not construct an initializer. */
12524 return NULL_RTX;
12527 /* Initialize vector TARGET to VALS. */
12529 void
12530 neon_expand_vector_init (rtx target, rtx vals)
12532 enum machine_mode mode = GET_MODE (target);
12533 enum machine_mode inner_mode = GET_MODE_INNER (mode);
12534 int n_elts = GET_MODE_NUNITS (mode);
12535 int n_var = 0, one_var = -1;
12536 bool all_same = true;
12537 rtx x, mem;
12538 int i;
12540 for (i = 0; i < n_elts; ++i)
12542 x = XVECEXP (vals, 0, i);
12543 if (!CONSTANT_P (x))
12544 ++n_var, one_var = i;
12546 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
12547 all_same = false;
12550 if (n_var == 0)
12552 rtx constant = neon_make_constant (vals);
12553 if (constant != NULL_RTX)
12555 emit_move_insn (target, constant);
12556 return;
12560 /* Splat a single non-constant element if we can. */
12561 if (all_same && GET_MODE_SIZE (inner_mode) <= 4)
12563 x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, 0));
12564 emit_insn (gen_rtx_SET (VOIDmode, target,
12565 gen_rtx_VEC_DUPLICATE (mode, x)));
12566 return;
12569 /* One field is non-constant. Load constant then overwrite varying
12570 field. This is more efficient than using the stack. */
12571 if (n_var == 1)
12573 rtx copy = copy_rtx (vals);
12574 rtx index = GEN_INT (one_var);
12576 /* Load constant part of vector, substitute neighboring value for
12577 varying element. */
12578 XVECEXP (copy, 0, one_var) = XVECEXP (vals, 0, (one_var + 1) % n_elts);
12579 neon_expand_vector_init (target, copy);
12581 /* Insert variable. */
12582 x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, one_var));
12583 switch (mode)
12585 case V8QImode:
12586 emit_insn (gen_neon_vset_lanev8qi (target, x, target, index));
12587 break;
12588 case V16QImode:
12589 emit_insn (gen_neon_vset_lanev16qi (target, x, target, index));
12590 break;
12591 case V4HImode:
12592 emit_insn (gen_neon_vset_lanev4hi (target, x, target, index));
12593 break;
12594 case V8HImode:
12595 emit_insn (gen_neon_vset_lanev8hi (target, x, target, index));
12596 break;
12597 case V2SImode:
12598 emit_insn (gen_neon_vset_lanev2si (target, x, target, index));
12599 break;
12600 case V4SImode:
12601 emit_insn (gen_neon_vset_lanev4si (target, x, target, index));
12602 break;
12603 case V2SFmode:
12604 emit_insn (gen_neon_vset_lanev2sf (target, x, target, index));
12605 break;
12606 case V4SFmode:
12607 emit_insn (gen_neon_vset_lanev4sf (target, x, target, index));
12608 break;
12609 case V2DImode:
12610 emit_insn (gen_neon_vset_lanev2di (target, x, target, index));
12611 break;
12612 default:
12613 gcc_unreachable ();
12615 return;
12618 /* Construct the vector in memory one field at a time
12619 and load the whole vector. */
12620 mem = assign_stack_temp (mode, GET_MODE_SIZE (mode));
12621 for (i = 0; i < n_elts; i++)
12622 emit_move_insn (adjust_address_nv (mem, inner_mode,
12623 i * GET_MODE_SIZE (inner_mode)),
12624 XVECEXP (vals, 0, i));
12625 emit_move_insn (target, mem);
12628 /* Ensure OPERAND lies between LOW (inclusive) and HIGH (exclusive). Raise
12629 ERR if it doesn't. FIXME: NEON bounds checks occur late in compilation, so
12630 reported source locations are bogus. */
12632 static void
12633 bounds_check (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high,
12634 const char *err)
12636 HOST_WIDE_INT lane;
12638 gcc_assert (CONST_INT_P (operand));
12640 lane = INTVAL (operand);
12642 if (lane < low || lane >= high)
12643 error (err);
12646 /* Bounds-check lanes. */
12648 void
12649 neon_lane_bounds (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high)
12651 bounds_check (operand, low, high, "lane out of range");
12654 /* Bounds-check constants. */
12656 void
12657 neon_const_bounds (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high)
12659 bounds_check (operand, low, high, "constant out of range");
12662 HOST_WIDE_INT
12663 neon_element_bits (enum machine_mode mode)
12665 if (mode == DImode)
12666 return GET_MODE_BITSIZE (mode);
12667 else
12668 return GET_MODE_BITSIZE (GET_MODE_INNER (mode));
12672 /* Predicates for `match_operand' and `match_operator'. */
12674 /* Return TRUE if OP is a valid coprocessor memory address pattern.
12675 WB is true if full writeback address modes are allowed and is false
12676 if limited writeback address modes (POST_INC and PRE_DEC) are
12677 allowed. */
12680 arm_coproc_mem_operand (rtx op, bool wb)
12682 rtx ind;
12684 /* Reject eliminable registers. */
12685 if (! (reload_in_progress || reload_completed || lra_in_progress)
12686 && ( reg_mentioned_p (frame_pointer_rtx, op)
12687 || reg_mentioned_p (arg_pointer_rtx, op)
12688 || reg_mentioned_p (virtual_incoming_args_rtx, op)
12689 || reg_mentioned_p (virtual_outgoing_args_rtx, op)
12690 || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
12691 || reg_mentioned_p (virtual_stack_vars_rtx, op)))
12692 return FALSE;
12694 /* Constants are converted into offsets from labels. */
12695 if (!MEM_P (op))
12696 return FALSE;
12698 ind = XEXP (op, 0);
12700 if (reload_completed
12701 && (GET_CODE (ind) == LABEL_REF
12702 || (GET_CODE (ind) == CONST
12703 && GET_CODE (XEXP (ind, 0)) == PLUS
12704 && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF
12705 && CONST_INT_P (XEXP (XEXP (ind, 0), 1)))))
12706 return TRUE;
12708 /* Match: (mem (reg)). */
12709 if (REG_P (ind))
12710 return arm_address_register_rtx_p (ind, 0);
12712 /* Autoincremment addressing modes. POST_INC and PRE_DEC are
12713 acceptable in any case (subject to verification by
12714 arm_address_register_rtx_p). We need WB to be true to accept
12715 PRE_INC and POST_DEC. */
12716 if (GET_CODE (ind) == POST_INC
12717 || GET_CODE (ind) == PRE_DEC
12718 || (wb
12719 && (GET_CODE (ind) == PRE_INC
12720 || GET_CODE (ind) == POST_DEC)))
12721 return arm_address_register_rtx_p (XEXP (ind, 0), 0);
12723 if (wb
12724 && (GET_CODE (ind) == POST_MODIFY || GET_CODE (ind) == PRE_MODIFY)
12725 && arm_address_register_rtx_p (XEXP (ind, 0), 0)
12726 && GET_CODE (XEXP (ind, 1)) == PLUS
12727 && rtx_equal_p (XEXP (XEXP (ind, 1), 0), XEXP (ind, 0)))
12728 ind = XEXP (ind, 1);
12730 /* Match:
12731 (plus (reg)
12732 (const)). */
12733 if (GET_CODE (ind) == PLUS
12734 && REG_P (XEXP (ind, 0))
12735 && REG_MODE_OK_FOR_BASE_P (XEXP (ind, 0), VOIDmode)
12736 && CONST_INT_P (XEXP (ind, 1))
12737 && INTVAL (XEXP (ind, 1)) > -1024
12738 && INTVAL (XEXP (ind, 1)) < 1024
12739 && (INTVAL (XEXP (ind, 1)) & 3) == 0)
12740 return TRUE;
12742 return FALSE;
12745 /* Return TRUE if OP is a memory operand which we can load or store a vector
12746 to/from. TYPE is one of the following values:
12747 0 - Vector load/stor (vldr)
12748 1 - Core registers (ldm)
12749 2 - Element/structure loads (vld1)
12752 neon_vector_mem_operand (rtx op, int type, bool strict)
12754 rtx ind;
12756 /* Reject eliminable registers. */
12757 if (! (reload_in_progress || reload_completed)
12758 && ( reg_mentioned_p (frame_pointer_rtx, op)
12759 || reg_mentioned_p (arg_pointer_rtx, op)
12760 || reg_mentioned_p (virtual_incoming_args_rtx, op)
12761 || reg_mentioned_p (virtual_outgoing_args_rtx, op)
12762 || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
12763 || reg_mentioned_p (virtual_stack_vars_rtx, op)))
12764 return !strict;
12766 /* Constants are converted into offsets from labels. */
12767 if (!MEM_P (op))
12768 return FALSE;
12770 ind = XEXP (op, 0);
12772 if (reload_completed
12773 && (GET_CODE (ind) == LABEL_REF
12774 || (GET_CODE (ind) == CONST
12775 && GET_CODE (XEXP (ind, 0)) == PLUS
12776 && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF
12777 && CONST_INT_P (XEXP (XEXP (ind, 0), 1)))))
12778 return TRUE;
12780 /* Match: (mem (reg)). */
12781 if (REG_P (ind))
12782 return arm_address_register_rtx_p (ind, 0);
12784 /* Allow post-increment with Neon registers. */
12785 if ((type != 1 && GET_CODE (ind) == POST_INC)
12786 || (type == 0 && GET_CODE (ind) == PRE_DEC))
12787 return arm_address_register_rtx_p (XEXP (ind, 0), 0);
12789 /* FIXME: vld1 allows register post-modify. */
12791 /* Match:
12792 (plus (reg)
12793 (const)). */
12794 if (type == 0
12795 && GET_CODE (ind) == PLUS
12796 && REG_P (XEXP (ind, 0))
12797 && REG_MODE_OK_FOR_BASE_P (XEXP (ind, 0), VOIDmode)
12798 && CONST_INT_P (XEXP (ind, 1))
12799 && INTVAL (XEXP (ind, 1)) > -1024
12800 /* For quad modes, we restrict the constant offset to be slightly less
12801 than what the instruction format permits. We have no such constraint
12802 on double mode offsets. (This must match arm_legitimate_index_p.) */
12803 && (INTVAL (XEXP (ind, 1))
12804 < (VALID_NEON_QREG_MODE (GET_MODE (op))? 1016 : 1024))
12805 && (INTVAL (XEXP (ind, 1)) & 3) == 0)
12806 return TRUE;
12808 return FALSE;
12811 /* Return TRUE if OP is a mem suitable for loading/storing a Neon struct
12812 type. */
12814 neon_struct_mem_operand (rtx op)
12816 rtx ind;
12818 /* Reject eliminable registers. */
12819 if (! (reload_in_progress || reload_completed)
12820 && ( reg_mentioned_p (frame_pointer_rtx, op)
12821 || reg_mentioned_p (arg_pointer_rtx, op)
12822 || reg_mentioned_p (virtual_incoming_args_rtx, op)
12823 || reg_mentioned_p (virtual_outgoing_args_rtx, op)
12824 || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
12825 || reg_mentioned_p (virtual_stack_vars_rtx, op)))
12826 return FALSE;
12828 /* Constants are converted into offsets from labels. */
12829 if (!MEM_P (op))
12830 return FALSE;
12832 ind = XEXP (op, 0);
12834 if (reload_completed
12835 && (GET_CODE (ind) == LABEL_REF
12836 || (GET_CODE (ind) == CONST
12837 && GET_CODE (XEXP (ind, 0)) == PLUS
12838 && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF
12839 && CONST_INT_P (XEXP (XEXP (ind, 0), 1)))))
12840 return TRUE;
12842 /* Match: (mem (reg)). */
12843 if (REG_P (ind))
12844 return arm_address_register_rtx_p (ind, 0);
12846 /* vldm/vstm allows POST_INC (ia) and PRE_DEC (db). */
12847 if (GET_CODE (ind) == POST_INC
12848 || GET_CODE (ind) == PRE_DEC)
12849 return arm_address_register_rtx_p (XEXP (ind, 0), 0);
12851 return FALSE;
12854 /* Return true if X is a register that will be eliminated later on. */
12856 arm_eliminable_register (rtx x)
12858 return REG_P (x) && (REGNO (x) == FRAME_POINTER_REGNUM
12859 || REGNO (x) == ARG_POINTER_REGNUM
12860 || (REGNO (x) >= FIRST_VIRTUAL_REGISTER
12861 && REGNO (x) <= LAST_VIRTUAL_REGISTER));
12864 /* Return GENERAL_REGS if a scratch register required to reload x to/from
12865 coprocessor registers. Otherwise return NO_REGS. */
12867 enum reg_class
12868 coproc_secondary_reload_class (enum machine_mode mode, rtx x, bool wb)
12870 if (mode == HFmode)
12872 if (!TARGET_NEON_FP16)
12873 return GENERAL_REGS;
12874 if (s_register_operand (x, mode) || neon_vector_mem_operand (x, 2, true))
12875 return NO_REGS;
12876 return GENERAL_REGS;
12879 /* The neon move patterns handle all legitimate vector and struct
12880 addresses. */
12881 if (TARGET_NEON
12882 && (MEM_P (x) || GET_CODE (x) == CONST_VECTOR)
12883 && (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
12884 || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT
12885 || VALID_NEON_STRUCT_MODE (mode)))
12886 return NO_REGS;
12888 if (arm_coproc_mem_operand (x, wb) || s_register_operand (x, mode))
12889 return NO_REGS;
12891 return GENERAL_REGS;
12894 /* Values which must be returned in the most-significant end of the return
12895 register. */
12897 static bool
12898 arm_return_in_msb (const_tree valtype)
12900 return (TARGET_AAPCS_BASED
12901 && BYTES_BIG_ENDIAN
12902 && (AGGREGATE_TYPE_P (valtype)
12903 || TREE_CODE (valtype) == COMPLEX_TYPE
12904 || FIXED_POINT_TYPE_P (valtype)));
12907 /* Return TRUE if X references a SYMBOL_REF. */
12909 symbol_mentioned_p (rtx x)
12911 const char * fmt;
12912 int i;
12914 if (GET_CODE (x) == SYMBOL_REF)
12915 return 1;
12917 /* UNSPEC_TLS entries for a symbol include the SYMBOL_REF, but they
12918 are constant offsets, not symbols. */
12919 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
12920 return 0;
12922 fmt = GET_RTX_FORMAT (GET_CODE (x));
12924 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
12926 if (fmt[i] == 'E')
12928 int j;
12930 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
12931 if (symbol_mentioned_p (XVECEXP (x, i, j)))
12932 return 1;
12934 else if (fmt[i] == 'e' && symbol_mentioned_p (XEXP (x, i)))
12935 return 1;
12938 return 0;
12941 /* Return TRUE if X references a LABEL_REF. */
12943 label_mentioned_p (rtx x)
12945 const char * fmt;
12946 int i;
12948 if (GET_CODE (x) == LABEL_REF)
12949 return 1;
12951 /* UNSPEC_TLS entries for a symbol include a LABEL_REF for the referencing
12952 instruction, but they are constant offsets, not symbols. */
12953 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
12954 return 0;
12956 fmt = GET_RTX_FORMAT (GET_CODE (x));
12957 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
12959 if (fmt[i] == 'E')
12961 int j;
12963 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
12964 if (label_mentioned_p (XVECEXP (x, i, j)))
12965 return 1;
12967 else if (fmt[i] == 'e' && label_mentioned_p (XEXP (x, i)))
12968 return 1;
12971 return 0;
12975 tls_mentioned_p (rtx x)
12977 switch (GET_CODE (x))
12979 case CONST:
12980 return tls_mentioned_p (XEXP (x, 0));
12982 case UNSPEC:
12983 if (XINT (x, 1) == UNSPEC_TLS)
12984 return 1;
12986 default:
12987 return 0;
12991 /* Must not copy any rtx that uses a pc-relative address. */
12993 static int
12994 arm_note_pic_base (rtx *x, void *date ATTRIBUTE_UNUSED)
12996 if (GET_CODE (*x) == UNSPEC
12997 && (XINT (*x, 1) == UNSPEC_PIC_BASE
12998 || XINT (*x, 1) == UNSPEC_PIC_UNIFIED))
12999 return 1;
13000 return 0;
13003 static bool
13004 arm_cannot_copy_insn_p (rtx insn)
13006 /* The tls call insn cannot be copied, as it is paired with a data
13007 word. */
13008 if (recog_memoized (insn) == CODE_FOR_tlscall)
13009 return true;
13011 return for_each_rtx (&PATTERN (insn), arm_note_pic_base, NULL);
13014 enum rtx_code
13015 minmax_code (rtx x)
13017 enum rtx_code code = GET_CODE (x);
13019 switch (code)
13021 case SMAX:
13022 return GE;
13023 case SMIN:
13024 return LE;
13025 case UMIN:
13026 return LEU;
13027 case UMAX:
13028 return GEU;
13029 default:
13030 gcc_unreachable ();
13034 /* Match pair of min/max operators that can be implemented via usat/ssat. */
13036 bool
13037 arm_sat_operator_match (rtx lo_bound, rtx hi_bound,
13038 int *mask, bool *signed_sat)
13040 /* The high bound must be a power of two minus one. */
13041 int log = exact_log2 (INTVAL (hi_bound) + 1);
13042 if (log == -1)
13043 return false;
13045 /* The low bound is either zero (for usat) or one less than the
13046 negation of the high bound (for ssat). */
13047 if (INTVAL (lo_bound) == 0)
13049 if (mask)
13050 *mask = log;
13051 if (signed_sat)
13052 *signed_sat = false;
13054 return true;
13057 if (INTVAL (lo_bound) == -INTVAL (hi_bound) - 1)
13059 if (mask)
13060 *mask = log + 1;
13061 if (signed_sat)
13062 *signed_sat = true;
13064 return true;
13067 return false;
13070 /* Return 1 if memory locations are adjacent. */
13072 adjacent_mem_locations (rtx a, rtx b)
13074 /* We don't guarantee to preserve the order of these memory refs. */
13075 if (volatile_refs_p (a) || volatile_refs_p (b))
13076 return 0;
13078 if ((REG_P (XEXP (a, 0))
13079 || (GET_CODE (XEXP (a, 0)) == PLUS
13080 && CONST_INT_P (XEXP (XEXP (a, 0), 1))))
13081 && (REG_P (XEXP (b, 0))
13082 || (GET_CODE (XEXP (b, 0)) == PLUS
13083 && CONST_INT_P (XEXP (XEXP (b, 0), 1)))))
13085 HOST_WIDE_INT val0 = 0, val1 = 0;
13086 rtx reg0, reg1;
13087 int val_diff;
13089 if (GET_CODE (XEXP (a, 0)) == PLUS)
13091 reg0 = XEXP (XEXP (a, 0), 0);
13092 val0 = INTVAL (XEXP (XEXP (a, 0), 1));
13094 else
13095 reg0 = XEXP (a, 0);
13097 if (GET_CODE (XEXP (b, 0)) == PLUS)
13099 reg1 = XEXP (XEXP (b, 0), 0);
13100 val1 = INTVAL (XEXP (XEXP (b, 0), 1));
13102 else
13103 reg1 = XEXP (b, 0);
13105 /* Don't accept any offset that will require multiple
13106 instructions to handle, since this would cause the
13107 arith_adjacentmem pattern to output an overlong sequence. */
13108 if (!const_ok_for_op (val0, PLUS) || !const_ok_for_op (val1, PLUS))
13109 return 0;
13111 /* Don't allow an eliminable register: register elimination can make
13112 the offset too large. */
13113 if (arm_eliminable_register (reg0))
13114 return 0;
13116 val_diff = val1 - val0;
13118 if (arm_ld_sched)
13120 /* If the target has load delay slots, then there's no benefit
13121 to using an ldm instruction unless the offset is zero and
13122 we are optimizing for size. */
13123 return (optimize_size && (REGNO (reg0) == REGNO (reg1))
13124 && (val0 == 0 || val1 == 0 || val0 == 4 || val1 == 4)
13125 && (val_diff == 4 || val_diff == -4));
13128 return ((REGNO (reg0) == REGNO (reg1))
13129 && (val_diff == 4 || val_diff == -4));
13132 return 0;
13135 /* Return true if OP is a valid load or store multiple operation. LOAD is true
13136 for load operations, false for store operations. CONSECUTIVE is true
13137 if the register numbers in the operation must be consecutive in the register
13138 bank. RETURN_PC is true if value is to be loaded in PC.
13139 The pattern we are trying to match for load is:
13140 [(SET (R_d0) (MEM (PLUS (addr) (offset))))
13141 (SET (R_d1) (MEM (PLUS (addr) (offset + <reg_increment>))))
13144 (SET (R_dn) (MEM (PLUS (addr) (offset + n * <reg_increment>))))
13146 where
13147 1. If offset is 0, first insn should be (SET (R_d0) (MEM (src_addr))).
13148 2. REGNO (R_d0) < REGNO (R_d1) < ... < REGNO (R_dn).
13149 3. If consecutive is TRUE, then for kth register being loaded,
13150 REGNO (R_dk) = REGNO (R_d0) + k.
13151 The pattern for store is similar. */
13152 bool
13153 ldm_stm_operation_p (rtx op, bool load, enum machine_mode mode,
13154 bool consecutive, bool return_pc)
13156 HOST_WIDE_INT count = XVECLEN (op, 0);
13157 rtx reg, mem, addr;
13158 unsigned regno;
13159 unsigned first_regno;
13160 HOST_WIDE_INT i = 1, base = 0, offset = 0;
13161 rtx elt;
13162 bool addr_reg_in_reglist = false;
13163 bool update = false;
13164 int reg_increment;
13165 int offset_adj;
13166 int regs_per_val;
13168 /* If not in SImode, then registers must be consecutive
13169 (e.g., VLDM instructions for DFmode). */
13170 gcc_assert ((mode == SImode) || consecutive);
13171 /* Setting return_pc for stores is illegal. */
13172 gcc_assert (!return_pc || load);
13174 /* Set up the increments and the regs per val based on the mode. */
13175 reg_increment = GET_MODE_SIZE (mode);
13176 regs_per_val = reg_increment / 4;
13177 offset_adj = return_pc ? 1 : 0;
13179 if (count <= 1
13180 || GET_CODE (XVECEXP (op, 0, offset_adj)) != SET
13181 || (load && !REG_P (SET_DEST (XVECEXP (op, 0, offset_adj)))))
13182 return false;
13184 /* Check if this is a write-back. */
13185 elt = XVECEXP (op, 0, offset_adj);
13186 if (GET_CODE (SET_SRC (elt)) == PLUS)
13188 i++;
13189 base = 1;
13190 update = true;
13192 /* The offset adjustment must be the number of registers being
13193 popped times the size of a single register. */
13194 if (!REG_P (SET_DEST (elt))
13195 || !REG_P (XEXP (SET_SRC (elt), 0))
13196 || (REGNO (SET_DEST (elt)) != REGNO (XEXP (SET_SRC (elt), 0)))
13197 || !CONST_INT_P (XEXP (SET_SRC (elt), 1))
13198 || INTVAL (XEXP (SET_SRC (elt), 1)) !=
13199 ((count - 1 - offset_adj) * reg_increment))
13200 return false;
13203 i = i + offset_adj;
13204 base = base + offset_adj;
13205 /* Perform a quick check so we don't blow up below. If only one reg is loaded,
13206 success depends on the type: VLDM can do just one reg,
13207 LDM must do at least two. */
13208 if ((count <= i) && (mode == SImode))
13209 return false;
13211 elt = XVECEXP (op, 0, i - 1);
13212 if (GET_CODE (elt) != SET)
13213 return false;
13215 if (load)
13217 reg = SET_DEST (elt);
13218 mem = SET_SRC (elt);
13220 else
13222 reg = SET_SRC (elt);
13223 mem = SET_DEST (elt);
13226 if (!REG_P (reg) || !MEM_P (mem))
13227 return false;
13229 regno = REGNO (reg);
13230 first_regno = regno;
13231 addr = XEXP (mem, 0);
13232 if (GET_CODE (addr) == PLUS)
13234 if (!CONST_INT_P (XEXP (addr, 1)))
13235 return false;
13237 offset = INTVAL (XEXP (addr, 1));
13238 addr = XEXP (addr, 0);
13241 if (!REG_P (addr))
13242 return false;
13244 /* Don't allow SP to be loaded unless it is also the base register. It
13245 guarantees that SP is reset correctly when an LDM instruction
13246 is interrupted. Otherwise, we might end up with a corrupt stack. */
13247 if (load && (REGNO (reg) == SP_REGNUM) && (REGNO (addr) != SP_REGNUM))
13248 return false;
13250 for (; i < count; i++)
13252 elt = XVECEXP (op, 0, i);
13253 if (GET_CODE (elt) != SET)
13254 return false;
13256 if (load)
13258 reg = SET_DEST (elt);
13259 mem = SET_SRC (elt);
13261 else
13263 reg = SET_SRC (elt);
13264 mem = SET_DEST (elt);
13267 if (!REG_P (reg)
13268 || GET_MODE (reg) != mode
13269 || REGNO (reg) <= regno
13270 || (consecutive
13271 && (REGNO (reg) !=
13272 (unsigned int) (first_regno + regs_per_val * (i - base))))
13273 /* Don't allow SP to be loaded unless it is also the base register. It
13274 guarantees that SP is reset correctly when an LDM instruction
13275 is interrupted. Otherwise, we might end up with a corrupt stack. */
13276 || (load && (REGNO (reg) == SP_REGNUM) && (REGNO (addr) != SP_REGNUM))
13277 || !MEM_P (mem)
13278 || GET_MODE (mem) != mode
13279 || ((GET_CODE (XEXP (mem, 0)) != PLUS
13280 || !rtx_equal_p (XEXP (XEXP (mem, 0), 0), addr)
13281 || !CONST_INT_P (XEXP (XEXP (mem, 0), 1))
13282 || (INTVAL (XEXP (XEXP (mem, 0), 1)) !=
13283 offset + (i - base) * reg_increment))
13284 && (!REG_P (XEXP (mem, 0))
13285 || offset + (i - base) * reg_increment != 0)))
13286 return false;
13288 regno = REGNO (reg);
13289 if (regno == REGNO (addr))
13290 addr_reg_in_reglist = true;
13293 if (load)
13295 if (update && addr_reg_in_reglist)
13296 return false;
13298 /* For Thumb-1, address register is always modified - either by write-back
13299 or by explicit load. If the pattern does not describe an update,
13300 then the address register must be in the list of loaded registers. */
13301 if (TARGET_THUMB1)
13302 return update || addr_reg_in_reglist;
13305 return true;
13308 /* Return true iff it would be profitable to turn a sequence of NOPS loads
13309 or stores (depending on IS_STORE) into a load-multiple or store-multiple
13310 instruction. ADD_OFFSET is nonzero if the base address register needs
13311 to be modified with an add instruction before we can use it. */
13313 static bool
13314 multiple_operation_profitable_p (bool is_store ATTRIBUTE_UNUSED,
13315 int nops, HOST_WIDE_INT add_offset)
13317 /* For ARM8,9 & StrongARM, 2 ldr instructions are faster than an ldm
13318 if the offset isn't small enough. The reason 2 ldrs are faster
13319 is because these ARMs are able to do more than one cache access
13320 in a single cycle. The ARM9 and StrongARM have Harvard caches,
13321 whilst the ARM8 has a double bandwidth cache. This means that
13322 these cores can do both an instruction fetch and a data fetch in
13323 a single cycle, so the trick of calculating the address into a
13324 scratch register (one of the result regs) and then doing a load
13325 multiple actually becomes slower (and no smaller in code size).
13326 That is the transformation
13328 ldr rd1, [rbase + offset]
13329 ldr rd2, [rbase + offset + 4]
13333 add rd1, rbase, offset
13334 ldmia rd1, {rd1, rd2}
13336 produces worse code -- '3 cycles + any stalls on rd2' instead of
13337 '2 cycles + any stalls on rd2'. On ARMs with only one cache
13338 access per cycle, the first sequence could never complete in less
13339 than 6 cycles, whereas the ldm sequence would only take 5 and
13340 would make better use of sequential accesses if not hitting the
13341 cache.
13343 We cheat here and test 'arm_ld_sched' which we currently know to
13344 only be true for the ARM8, ARM9 and StrongARM. If this ever
13345 changes, then the test below needs to be reworked. */
13346 if (nops == 2 && arm_ld_sched && add_offset != 0)
13347 return false;
13349 /* XScale has load-store double instructions, but they have stricter
13350 alignment requirements than load-store multiple, so we cannot
13351 use them.
13353 For XScale ldm requires 2 + NREGS cycles to complete and blocks
13354 the pipeline until completion.
13356 NREGS CYCLES
13362 An ldr instruction takes 1-3 cycles, but does not block the
13363 pipeline.
13365 NREGS CYCLES
13366 1 1-3
13367 2 2-6
13368 3 3-9
13369 4 4-12
13371 Best case ldr will always win. However, the more ldr instructions
13372 we issue, the less likely we are to be able to schedule them well.
13373 Using ldr instructions also increases code size.
13375 As a compromise, we use ldr for counts of 1 or 2 regs, and ldm
13376 for counts of 3 or 4 regs. */
13377 if (nops <= 2 && arm_tune_xscale && !optimize_size)
13378 return false;
13379 return true;
13382 /* Subroutine of load_multiple_sequence and store_multiple_sequence.
13383 Given an array of UNSORTED_OFFSETS, of which there are NOPS, compute
13384 an array ORDER which describes the sequence to use when accessing the
13385 offsets that produces an ascending order. In this sequence, each
13386 offset must be larger by exactly 4 than the previous one. ORDER[0]
13387 must have been filled in with the lowest offset by the caller.
13388 If UNSORTED_REGS is nonnull, it is an array of register numbers that
13389 we use to verify that ORDER produces an ascending order of registers.
13390 Return true if it was possible to construct such an order, false if
13391 not. */
13393 static bool
13394 compute_offset_order (int nops, HOST_WIDE_INT *unsorted_offsets, int *order,
13395 int *unsorted_regs)
13397 int i;
13398 for (i = 1; i < nops; i++)
13400 int j;
13402 order[i] = order[i - 1];
13403 for (j = 0; j < nops; j++)
13404 if (unsorted_offsets[j] == unsorted_offsets[order[i - 1]] + 4)
13406 /* We must find exactly one offset that is higher than the
13407 previous one by 4. */
13408 if (order[i] != order[i - 1])
13409 return false;
13410 order[i] = j;
13412 if (order[i] == order[i - 1])
13413 return false;
13414 /* The register numbers must be ascending. */
13415 if (unsorted_regs != NULL
13416 && unsorted_regs[order[i]] <= unsorted_regs[order[i - 1]])
13417 return false;
13419 return true;
13422 /* Used to determine in a peephole whether a sequence of load
13423 instructions can be changed into a load-multiple instruction.
13424 NOPS is the number of separate load instructions we are examining. The
13425 first NOPS entries in OPERANDS are the destination registers, the
13426 next NOPS entries are memory operands. If this function is
13427 successful, *BASE is set to the common base register of the memory
13428 accesses; *LOAD_OFFSET is set to the first memory location's offset
13429 from that base register.
13430 REGS is an array filled in with the destination register numbers.
13431 SAVED_ORDER (if nonnull), is an array filled in with an order that maps
13432 insn numbers to an ascending order of stores. If CHECK_REGS is true,
13433 the sequence of registers in REGS matches the loads from ascending memory
13434 locations, and the function verifies that the register numbers are
13435 themselves ascending. If CHECK_REGS is false, the register numbers
13436 are stored in the order they are found in the operands. */
13437 static int
13438 load_multiple_sequence (rtx *operands, int nops, int *regs, int *saved_order,
13439 int *base, HOST_WIDE_INT *load_offset, bool check_regs)
13441 int unsorted_regs[MAX_LDM_STM_OPS];
13442 HOST_WIDE_INT unsorted_offsets[MAX_LDM_STM_OPS];
13443 int order[MAX_LDM_STM_OPS];
13444 rtx base_reg_rtx = NULL;
13445 int base_reg = -1;
13446 int i, ldm_case;
13448 /* Can only handle up to MAX_LDM_STM_OPS insns at present, though could be
13449 easily extended if required. */
13450 gcc_assert (nops >= 2 && nops <= MAX_LDM_STM_OPS);
13452 memset (order, 0, MAX_LDM_STM_OPS * sizeof (int));
13454 /* Loop over the operands and check that the memory references are
13455 suitable (i.e. immediate offsets from the same base register). At
13456 the same time, extract the target register, and the memory
13457 offsets. */
13458 for (i = 0; i < nops; i++)
13460 rtx reg;
13461 rtx offset;
13463 /* Convert a subreg of a mem into the mem itself. */
13464 if (GET_CODE (operands[nops + i]) == SUBREG)
13465 operands[nops + i] = alter_subreg (operands + (nops + i), true);
13467 gcc_assert (MEM_P (operands[nops + i]));
13469 /* Don't reorder volatile memory references; it doesn't seem worth
13470 looking for the case where the order is ok anyway. */
13471 if (MEM_VOLATILE_P (operands[nops + i]))
13472 return 0;
13474 offset = const0_rtx;
13476 if ((REG_P (reg = XEXP (operands[nops + i], 0))
13477 || (GET_CODE (reg) == SUBREG
13478 && REG_P (reg = SUBREG_REG (reg))))
13479 || (GET_CODE (XEXP (operands[nops + i], 0)) == PLUS
13480 && ((REG_P (reg = XEXP (XEXP (operands[nops + i], 0), 0)))
13481 || (GET_CODE (reg) == SUBREG
13482 && REG_P (reg = SUBREG_REG (reg))))
13483 && (CONST_INT_P (offset
13484 = XEXP (XEXP (operands[nops + i], 0), 1)))))
13486 if (i == 0)
13488 base_reg = REGNO (reg);
13489 base_reg_rtx = reg;
13490 if (TARGET_THUMB1 && base_reg > LAST_LO_REGNUM)
13491 return 0;
13493 else if (base_reg != (int) REGNO (reg))
13494 /* Not addressed from the same base register. */
13495 return 0;
13497 unsorted_regs[i] = (REG_P (operands[i])
13498 ? REGNO (operands[i])
13499 : REGNO (SUBREG_REG (operands[i])));
13501 /* If it isn't an integer register, or if it overwrites the
13502 base register but isn't the last insn in the list, then
13503 we can't do this. */
13504 if (unsorted_regs[i] < 0
13505 || (TARGET_THUMB1 && unsorted_regs[i] > LAST_LO_REGNUM)
13506 || unsorted_regs[i] > 14
13507 || (i != nops - 1 && unsorted_regs[i] == base_reg))
13508 return 0;
13510 /* Don't allow SP to be loaded unless it is also the base
13511 register. It guarantees that SP is reset correctly when
13512 an LDM instruction is interrupted. Otherwise, we might
13513 end up with a corrupt stack. */
13514 if (unsorted_regs[i] == SP_REGNUM && base_reg != SP_REGNUM)
13515 return 0;
13517 unsorted_offsets[i] = INTVAL (offset);
13518 if (i == 0 || unsorted_offsets[i] < unsorted_offsets[order[0]])
13519 order[0] = i;
13521 else
13522 /* Not a suitable memory address. */
13523 return 0;
13526 /* All the useful information has now been extracted from the
13527 operands into unsorted_regs and unsorted_offsets; additionally,
13528 order[0] has been set to the lowest offset in the list. Sort
13529 the offsets into order, verifying that they are adjacent, and
13530 check that the register numbers are ascending. */
13531 if (!compute_offset_order (nops, unsorted_offsets, order,
13532 check_regs ? unsorted_regs : NULL))
13533 return 0;
13535 if (saved_order)
13536 memcpy (saved_order, order, sizeof order);
13538 if (base)
13540 *base = base_reg;
13542 for (i = 0; i < nops; i++)
13543 regs[i] = unsorted_regs[check_regs ? order[i] : i];
13545 *load_offset = unsorted_offsets[order[0]];
13548 if (TARGET_THUMB1
13549 && !peep2_reg_dead_p (nops, base_reg_rtx))
13550 return 0;
13552 if (unsorted_offsets[order[0]] == 0)
13553 ldm_case = 1; /* ldmia */
13554 else if (TARGET_ARM && unsorted_offsets[order[0]] == 4)
13555 ldm_case = 2; /* ldmib */
13556 else if (TARGET_ARM && unsorted_offsets[order[nops - 1]] == 0)
13557 ldm_case = 3; /* ldmda */
13558 else if (TARGET_32BIT && unsorted_offsets[order[nops - 1]] == -4)
13559 ldm_case = 4; /* ldmdb */
13560 else if (const_ok_for_arm (unsorted_offsets[order[0]])
13561 || const_ok_for_arm (-unsorted_offsets[order[0]]))
13562 ldm_case = 5;
13563 else
13564 return 0;
13566 if (!multiple_operation_profitable_p (false, nops,
13567 ldm_case == 5
13568 ? unsorted_offsets[order[0]] : 0))
13569 return 0;
13571 return ldm_case;
13574 /* Used to determine in a peephole whether a sequence of store instructions can
13575 be changed into a store-multiple instruction.
13576 NOPS is the number of separate store instructions we are examining.
13577 NOPS_TOTAL is the total number of instructions recognized by the peephole
13578 pattern.
13579 The first NOPS entries in OPERANDS are the source registers, the next
13580 NOPS entries are memory operands. If this function is successful, *BASE is
13581 set to the common base register of the memory accesses; *LOAD_OFFSET is set
13582 to the first memory location's offset from that base register. REGS is an
13583 array filled in with the source register numbers, REG_RTXS (if nonnull) is
13584 likewise filled with the corresponding rtx's.
13585 SAVED_ORDER (if nonnull), is an array filled in with an order that maps insn
13586 numbers to an ascending order of stores.
13587 If CHECK_REGS is true, the sequence of registers in *REGS matches the stores
13588 from ascending memory locations, and the function verifies that the register
13589 numbers are themselves ascending. If CHECK_REGS is false, the register
13590 numbers are stored in the order they are found in the operands. */
13591 static int
13592 store_multiple_sequence (rtx *operands, int nops, int nops_total,
13593 int *regs, rtx *reg_rtxs, int *saved_order, int *base,
13594 HOST_WIDE_INT *load_offset, bool check_regs)
13596 int unsorted_regs[MAX_LDM_STM_OPS];
13597 rtx unsorted_reg_rtxs[MAX_LDM_STM_OPS];
13598 HOST_WIDE_INT unsorted_offsets[MAX_LDM_STM_OPS];
13599 int order[MAX_LDM_STM_OPS];
13600 int base_reg = -1;
13601 rtx base_reg_rtx = NULL;
13602 int i, stm_case;
13604 /* Write back of base register is currently only supported for Thumb 1. */
13605 int base_writeback = TARGET_THUMB1;
13607 /* Can only handle up to MAX_LDM_STM_OPS insns at present, though could be
13608 easily extended if required. */
13609 gcc_assert (nops >= 2 && nops <= MAX_LDM_STM_OPS);
13611 memset (order, 0, MAX_LDM_STM_OPS * sizeof (int));
13613 /* Loop over the operands and check that the memory references are
13614 suitable (i.e. immediate offsets from the same base register). At
13615 the same time, extract the target register, and the memory
13616 offsets. */
13617 for (i = 0; i < nops; i++)
13619 rtx reg;
13620 rtx offset;
13622 /* Convert a subreg of a mem into the mem itself. */
13623 if (GET_CODE (operands[nops + i]) == SUBREG)
13624 operands[nops + i] = alter_subreg (operands + (nops + i), true);
13626 gcc_assert (MEM_P (operands[nops + i]));
13628 /* Don't reorder volatile memory references; it doesn't seem worth
13629 looking for the case where the order is ok anyway. */
13630 if (MEM_VOLATILE_P (operands[nops + i]))
13631 return 0;
13633 offset = const0_rtx;
13635 if ((REG_P (reg = XEXP (operands[nops + i], 0))
13636 || (GET_CODE (reg) == SUBREG
13637 && REG_P (reg = SUBREG_REG (reg))))
13638 || (GET_CODE (XEXP (operands[nops + i], 0)) == PLUS
13639 && ((REG_P (reg = XEXP (XEXP (operands[nops + i], 0), 0)))
13640 || (GET_CODE (reg) == SUBREG
13641 && REG_P (reg = SUBREG_REG (reg))))
13642 && (CONST_INT_P (offset
13643 = XEXP (XEXP (operands[nops + i], 0), 1)))))
13645 unsorted_reg_rtxs[i] = (REG_P (operands[i])
13646 ? operands[i] : SUBREG_REG (operands[i]));
13647 unsorted_regs[i] = REGNO (unsorted_reg_rtxs[i]);
13649 if (i == 0)
13651 base_reg = REGNO (reg);
13652 base_reg_rtx = reg;
13653 if (TARGET_THUMB1 && base_reg > LAST_LO_REGNUM)
13654 return 0;
13656 else if (base_reg != (int) REGNO (reg))
13657 /* Not addressed from the same base register. */
13658 return 0;
13660 /* If it isn't an integer register, then we can't do this. */
13661 if (unsorted_regs[i] < 0
13662 || (TARGET_THUMB1 && unsorted_regs[i] > LAST_LO_REGNUM)
13663 /* The effects are unpredictable if the base register is
13664 both updated and stored. */
13665 || (base_writeback && unsorted_regs[i] == base_reg)
13666 || (TARGET_THUMB2 && unsorted_regs[i] == SP_REGNUM)
13667 || unsorted_regs[i] > 14)
13668 return 0;
13670 unsorted_offsets[i] = INTVAL (offset);
13671 if (i == 0 || unsorted_offsets[i] < unsorted_offsets[order[0]])
13672 order[0] = i;
13674 else
13675 /* Not a suitable memory address. */
13676 return 0;
13679 /* All the useful information has now been extracted from the
13680 operands into unsorted_regs and unsorted_offsets; additionally,
13681 order[0] has been set to the lowest offset in the list. Sort
13682 the offsets into order, verifying that they are adjacent, and
13683 check that the register numbers are ascending. */
13684 if (!compute_offset_order (nops, unsorted_offsets, order,
13685 check_regs ? unsorted_regs : NULL))
13686 return 0;
13688 if (saved_order)
13689 memcpy (saved_order, order, sizeof order);
13691 if (base)
13693 *base = base_reg;
13695 for (i = 0; i < nops; i++)
13697 regs[i] = unsorted_regs[check_regs ? order[i] : i];
13698 if (reg_rtxs)
13699 reg_rtxs[i] = unsorted_reg_rtxs[check_regs ? order[i] : i];
13702 *load_offset = unsorted_offsets[order[0]];
13705 if (TARGET_THUMB1
13706 && !peep2_reg_dead_p (nops_total, base_reg_rtx))
13707 return 0;
13709 if (unsorted_offsets[order[0]] == 0)
13710 stm_case = 1; /* stmia */
13711 else if (TARGET_ARM && unsorted_offsets[order[0]] == 4)
13712 stm_case = 2; /* stmib */
13713 else if (TARGET_ARM && unsorted_offsets[order[nops - 1]] == 0)
13714 stm_case = 3; /* stmda */
13715 else if (TARGET_32BIT && unsorted_offsets[order[nops - 1]] == -4)
13716 stm_case = 4; /* stmdb */
13717 else
13718 return 0;
13720 if (!multiple_operation_profitable_p (false, nops, 0))
13721 return 0;
13723 return stm_case;
13726 /* Routines for use in generating RTL. */
13728 /* Generate a load-multiple instruction. COUNT is the number of loads in
13729 the instruction; REGS and MEMS are arrays containing the operands.
13730 BASEREG is the base register to be used in addressing the memory operands.
13731 WBACK_OFFSET is nonzero if the instruction should update the base
13732 register. */
13734 static rtx
13735 arm_gen_load_multiple_1 (int count, int *regs, rtx *mems, rtx basereg,
13736 HOST_WIDE_INT wback_offset)
13738 int i = 0, j;
13739 rtx result;
13741 if (!multiple_operation_profitable_p (false, count, 0))
13743 rtx seq;
13745 start_sequence ();
13747 for (i = 0; i < count; i++)
13748 emit_move_insn (gen_rtx_REG (SImode, regs[i]), mems[i]);
13750 if (wback_offset != 0)
13751 emit_move_insn (basereg, plus_constant (Pmode, basereg, wback_offset));
13753 seq = get_insns ();
13754 end_sequence ();
13756 return seq;
13759 result = gen_rtx_PARALLEL (VOIDmode,
13760 rtvec_alloc (count + (wback_offset != 0 ? 1 : 0)));
13761 if (wback_offset != 0)
13763 XVECEXP (result, 0, 0)
13764 = gen_rtx_SET (VOIDmode, basereg,
13765 plus_constant (Pmode, basereg, wback_offset));
13766 i = 1;
13767 count++;
13770 for (j = 0; i < count; i++, j++)
13771 XVECEXP (result, 0, i)
13772 = gen_rtx_SET (VOIDmode, gen_rtx_REG (SImode, regs[j]), mems[j]);
13774 return result;
13777 /* Generate a store-multiple instruction. COUNT is the number of stores in
13778 the instruction; REGS and MEMS are arrays containing the operands.
13779 BASEREG is the base register to be used in addressing the memory operands.
13780 WBACK_OFFSET is nonzero if the instruction should update the base
13781 register. */
13783 static rtx
13784 arm_gen_store_multiple_1 (int count, int *regs, rtx *mems, rtx basereg,
13785 HOST_WIDE_INT wback_offset)
13787 int i = 0, j;
13788 rtx result;
13790 if (GET_CODE (basereg) == PLUS)
13791 basereg = XEXP (basereg, 0);
13793 if (!multiple_operation_profitable_p (false, count, 0))
13795 rtx seq;
13797 start_sequence ();
13799 for (i = 0; i < count; i++)
13800 emit_move_insn (mems[i], gen_rtx_REG (SImode, regs[i]));
13802 if (wback_offset != 0)
13803 emit_move_insn (basereg, plus_constant (Pmode, basereg, wback_offset));
13805 seq = get_insns ();
13806 end_sequence ();
13808 return seq;
13811 result = gen_rtx_PARALLEL (VOIDmode,
13812 rtvec_alloc (count + (wback_offset != 0 ? 1 : 0)));
13813 if (wback_offset != 0)
13815 XVECEXP (result, 0, 0)
13816 = gen_rtx_SET (VOIDmode, basereg,
13817 plus_constant (Pmode, basereg, wback_offset));
13818 i = 1;
13819 count++;
13822 for (j = 0; i < count; i++, j++)
13823 XVECEXP (result, 0, i)
13824 = gen_rtx_SET (VOIDmode, mems[j], gen_rtx_REG (SImode, regs[j]));
13826 return result;
13829 /* Generate either a load-multiple or a store-multiple instruction. This
13830 function can be used in situations where we can start with a single MEM
13831 rtx and adjust its address upwards.
13832 COUNT is the number of operations in the instruction, not counting a
13833 possible update of the base register. REGS is an array containing the
13834 register operands.
13835 BASEREG is the base register to be used in addressing the memory operands,
13836 which are constructed from BASEMEM.
13837 WRITE_BACK specifies whether the generated instruction should include an
13838 update of the base register.
13839 OFFSETP is used to pass an offset to and from this function; this offset
13840 is not used when constructing the address (instead BASEMEM should have an
13841 appropriate offset in its address), it is used only for setting
13842 MEM_OFFSET. It is updated only if WRITE_BACK is true.*/
13844 static rtx
13845 arm_gen_multiple_op (bool is_load, int *regs, int count, rtx basereg,
13846 bool write_back, rtx basemem, HOST_WIDE_INT *offsetp)
13848 rtx mems[MAX_LDM_STM_OPS];
13849 HOST_WIDE_INT offset = *offsetp;
13850 int i;
13852 gcc_assert (count <= MAX_LDM_STM_OPS);
13854 if (GET_CODE (basereg) == PLUS)
13855 basereg = XEXP (basereg, 0);
13857 for (i = 0; i < count; i++)
13859 rtx addr = plus_constant (Pmode, basereg, i * 4);
13860 mems[i] = adjust_automodify_address_nv (basemem, SImode, addr, offset);
13861 offset += 4;
13864 if (write_back)
13865 *offsetp = offset;
13867 if (is_load)
13868 return arm_gen_load_multiple_1 (count, regs, mems, basereg,
13869 write_back ? 4 * count : 0);
13870 else
13871 return arm_gen_store_multiple_1 (count, regs, mems, basereg,
13872 write_back ? 4 * count : 0);
13876 arm_gen_load_multiple (int *regs, int count, rtx basereg, int write_back,
13877 rtx basemem, HOST_WIDE_INT *offsetp)
13879 return arm_gen_multiple_op (TRUE, regs, count, basereg, write_back, basemem,
13880 offsetp);
13884 arm_gen_store_multiple (int *regs, int count, rtx basereg, int write_back,
13885 rtx basemem, HOST_WIDE_INT *offsetp)
13887 return arm_gen_multiple_op (FALSE, regs, count, basereg, write_back, basemem,
13888 offsetp);
13891 /* Called from a peephole2 expander to turn a sequence of loads into an
13892 LDM instruction. OPERANDS are the operands found by the peephole matcher;
13893 NOPS indicates how many separate loads we are trying to combine. SORT_REGS
13894 is true if we can reorder the registers because they are used commutatively
13895 subsequently.
13896 Returns true iff we could generate a new instruction. */
13898 bool
13899 gen_ldm_seq (rtx *operands, int nops, bool sort_regs)
13901 int regs[MAX_LDM_STM_OPS], mem_order[MAX_LDM_STM_OPS];
13902 rtx mems[MAX_LDM_STM_OPS];
13903 int i, j, base_reg;
13904 rtx base_reg_rtx;
13905 HOST_WIDE_INT offset;
13906 int write_back = FALSE;
13907 int ldm_case;
13908 rtx addr;
13910 ldm_case = load_multiple_sequence (operands, nops, regs, mem_order,
13911 &base_reg, &offset, !sort_regs);
13913 if (ldm_case == 0)
13914 return false;
13916 if (sort_regs)
13917 for (i = 0; i < nops - 1; i++)
13918 for (j = i + 1; j < nops; j++)
13919 if (regs[i] > regs[j])
13921 int t = regs[i];
13922 regs[i] = regs[j];
13923 regs[j] = t;
13925 base_reg_rtx = gen_rtx_REG (Pmode, base_reg);
13927 if (TARGET_THUMB1)
13929 gcc_assert (peep2_reg_dead_p (nops, base_reg_rtx));
13930 gcc_assert (ldm_case == 1 || ldm_case == 5);
13931 write_back = TRUE;
13934 if (ldm_case == 5)
13936 rtx newbase = TARGET_THUMB1 ? base_reg_rtx : gen_rtx_REG (SImode, regs[0]);
13937 emit_insn (gen_addsi3 (newbase, base_reg_rtx, GEN_INT (offset)));
13938 offset = 0;
13939 if (!TARGET_THUMB1)
13941 base_reg = regs[0];
13942 base_reg_rtx = newbase;
13946 for (i = 0; i < nops; i++)
13948 addr = plus_constant (Pmode, base_reg_rtx, offset + i * 4);
13949 mems[i] = adjust_automodify_address_nv (operands[nops + mem_order[i]],
13950 SImode, addr, 0);
13952 emit_insn (arm_gen_load_multiple_1 (nops, regs, mems, base_reg_rtx,
13953 write_back ? offset + i * 4 : 0));
13954 return true;
13957 /* Called from a peephole2 expander to turn a sequence of stores into an
13958 STM instruction. OPERANDS are the operands found by the peephole matcher;
13959 NOPS indicates how many separate stores we are trying to combine.
13960 Returns true iff we could generate a new instruction. */
13962 bool
13963 gen_stm_seq (rtx *operands, int nops)
13965 int i;
13966 int regs[MAX_LDM_STM_OPS], mem_order[MAX_LDM_STM_OPS];
13967 rtx mems[MAX_LDM_STM_OPS];
13968 int base_reg;
13969 rtx base_reg_rtx;
13970 HOST_WIDE_INT offset;
13971 int write_back = FALSE;
13972 int stm_case;
13973 rtx addr;
13974 bool base_reg_dies;
13976 stm_case = store_multiple_sequence (operands, nops, nops, regs, NULL,
13977 mem_order, &base_reg, &offset, true);
13979 if (stm_case == 0)
13980 return false;
13982 base_reg_rtx = gen_rtx_REG (Pmode, base_reg);
13984 base_reg_dies = peep2_reg_dead_p (nops, base_reg_rtx);
13985 if (TARGET_THUMB1)
13987 gcc_assert (base_reg_dies);
13988 write_back = TRUE;
13991 if (stm_case == 5)
13993 gcc_assert (base_reg_dies);
13994 emit_insn (gen_addsi3 (base_reg_rtx, base_reg_rtx, GEN_INT (offset)));
13995 offset = 0;
13998 addr = plus_constant (Pmode, base_reg_rtx, offset);
14000 for (i = 0; i < nops; i++)
14002 addr = plus_constant (Pmode, base_reg_rtx, offset + i * 4);
14003 mems[i] = adjust_automodify_address_nv (operands[nops + mem_order[i]],
14004 SImode, addr, 0);
14006 emit_insn (arm_gen_store_multiple_1 (nops, regs, mems, base_reg_rtx,
14007 write_back ? offset + i * 4 : 0));
14008 return true;
14011 /* Called from a peephole2 expander to turn a sequence of stores that are
14012 preceded by constant loads into an STM instruction. OPERANDS are the
14013 operands found by the peephole matcher; NOPS indicates how many
14014 separate stores we are trying to combine; there are 2 * NOPS
14015 instructions in the peephole.
14016 Returns true iff we could generate a new instruction. */
14018 bool
14019 gen_const_stm_seq (rtx *operands, int nops)
14021 int regs[MAX_LDM_STM_OPS], sorted_regs[MAX_LDM_STM_OPS];
14022 int reg_order[MAX_LDM_STM_OPS], mem_order[MAX_LDM_STM_OPS];
14023 rtx reg_rtxs[MAX_LDM_STM_OPS], orig_reg_rtxs[MAX_LDM_STM_OPS];
14024 rtx mems[MAX_LDM_STM_OPS];
14025 int base_reg;
14026 rtx base_reg_rtx;
14027 HOST_WIDE_INT offset;
14028 int write_back = FALSE;
14029 int stm_case;
14030 rtx addr;
14031 bool base_reg_dies;
14032 int i, j;
14033 HARD_REG_SET allocated;
14035 stm_case = store_multiple_sequence (operands, nops, 2 * nops, regs, reg_rtxs,
14036 mem_order, &base_reg, &offset, false);
14038 if (stm_case == 0)
14039 return false;
14041 memcpy (orig_reg_rtxs, reg_rtxs, sizeof orig_reg_rtxs);
14043 /* If the same register is used more than once, try to find a free
14044 register. */
14045 CLEAR_HARD_REG_SET (allocated);
14046 for (i = 0; i < nops; i++)
14048 for (j = i + 1; j < nops; j++)
14049 if (regs[i] == regs[j])
14051 rtx t = peep2_find_free_register (0, nops * 2,
14052 TARGET_THUMB1 ? "l" : "r",
14053 SImode, &allocated);
14054 if (t == NULL_RTX)
14055 return false;
14056 reg_rtxs[i] = t;
14057 regs[i] = REGNO (t);
14061 /* Compute an ordering that maps the register numbers to an ascending
14062 sequence. */
14063 reg_order[0] = 0;
14064 for (i = 0; i < nops; i++)
14065 if (regs[i] < regs[reg_order[0]])
14066 reg_order[0] = i;
14068 for (i = 1; i < nops; i++)
14070 int this_order = reg_order[i - 1];
14071 for (j = 0; j < nops; j++)
14072 if (regs[j] > regs[reg_order[i - 1]]
14073 && (this_order == reg_order[i - 1]
14074 || regs[j] < regs[this_order]))
14075 this_order = j;
14076 reg_order[i] = this_order;
14079 /* Ensure that registers that must be live after the instruction end
14080 up with the correct value. */
14081 for (i = 0; i < nops; i++)
14083 int this_order = reg_order[i];
14084 if ((this_order != mem_order[i]
14085 || orig_reg_rtxs[this_order] != reg_rtxs[this_order])
14086 && !peep2_reg_dead_p (nops * 2, orig_reg_rtxs[this_order]))
14087 return false;
14090 /* Load the constants. */
14091 for (i = 0; i < nops; i++)
14093 rtx op = operands[2 * nops + mem_order[i]];
14094 sorted_regs[i] = regs[reg_order[i]];
14095 emit_move_insn (reg_rtxs[reg_order[i]], op);
14098 base_reg_rtx = gen_rtx_REG (Pmode, base_reg);
14100 base_reg_dies = peep2_reg_dead_p (nops * 2, base_reg_rtx);
14101 if (TARGET_THUMB1)
14103 gcc_assert (base_reg_dies);
14104 write_back = TRUE;
14107 if (stm_case == 5)
14109 gcc_assert (base_reg_dies);
14110 emit_insn (gen_addsi3 (base_reg_rtx, base_reg_rtx, GEN_INT (offset)));
14111 offset = 0;
14114 addr = plus_constant (Pmode, base_reg_rtx, offset);
14116 for (i = 0; i < nops; i++)
14118 addr = plus_constant (Pmode, base_reg_rtx, offset + i * 4);
14119 mems[i] = adjust_automodify_address_nv (operands[nops + mem_order[i]],
14120 SImode, addr, 0);
14122 emit_insn (arm_gen_store_multiple_1 (nops, sorted_regs, mems, base_reg_rtx,
14123 write_back ? offset + i * 4 : 0));
14124 return true;
14127 /* Copy a block of memory using plain ldr/str/ldrh/strh instructions, to permit
14128 unaligned copies on processors which support unaligned semantics for those
14129 instructions. INTERLEAVE_FACTOR can be used to attempt to hide load latency
14130 (using more registers) by doing e.g. load/load/store/store for a factor of 2.
14131 An interleave factor of 1 (the minimum) will perform no interleaving.
14132 Load/store multiple are used for aligned addresses where possible. */
14134 static void
14135 arm_block_move_unaligned_straight (rtx dstbase, rtx srcbase,
14136 HOST_WIDE_INT length,
14137 unsigned int interleave_factor)
14139 rtx *regs = XALLOCAVEC (rtx, interleave_factor);
14140 int *regnos = XALLOCAVEC (int, interleave_factor);
14141 HOST_WIDE_INT block_size_bytes = interleave_factor * UNITS_PER_WORD;
14142 HOST_WIDE_INT i, j;
14143 HOST_WIDE_INT remaining = length, words;
14144 rtx halfword_tmp = NULL, byte_tmp = NULL;
14145 rtx dst, src;
14146 bool src_aligned = MEM_ALIGN (srcbase) >= BITS_PER_WORD;
14147 bool dst_aligned = MEM_ALIGN (dstbase) >= BITS_PER_WORD;
14148 HOST_WIDE_INT srcoffset, dstoffset;
14149 HOST_WIDE_INT src_autoinc, dst_autoinc;
14150 rtx mem, addr;
14152 gcc_assert (1 <= interleave_factor && interleave_factor <= 4);
14154 /* Use hard registers if we have aligned source or destination so we can use
14155 load/store multiple with contiguous registers. */
14156 if (dst_aligned || src_aligned)
14157 for (i = 0; i < interleave_factor; i++)
14158 regs[i] = gen_rtx_REG (SImode, i);
14159 else
14160 for (i = 0; i < interleave_factor; i++)
14161 regs[i] = gen_reg_rtx (SImode);
14163 dst = copy_addr_to_reg (XEXP (dstbase, 0));
14164 src = copy_addr_to_reg (XEXP (srcbase, 0));
14166 srcoffset = dstoffset = 0;
14168 /* Calls to arm_gen_load_multiple and arm_gen_store_multiple update SRC/DST.
14169 For copying the last bytes we want to subtract this offset again. */
14170 src_autoinc = dst_autoinc = 0;
14172 for (i = 0; i < interleave_factor; i++)
14173 regnos[i] = i;
14175 /* Copy BLOCK_SIZE_BYTES chunks. */
14177 for (i = 0; i + block_size_bytes <= length; i += block_size_bytes)
14179 /* Load words. */
14180 if (src_aligned && interleave_factor > 1)
14182 emit_insn (arm_gen_load_multiple (regnos, interleave_factor, src,
14183 TRUE, srcbase, &srcoffset));
14184 src_autoinc += UNITS_PER_WORD * interleave_factor;
14186 else
14188 for (j = 0; j < interleave_factor; j++)
14190 addr = plus_constant (Pmode, src, (srcoffset + j * UNITS_PER_WORD
14191 - src_autoinc));
14192 mem = adjust_automodify_address (srcbase, SImode, addr,
14193 srcoffset + j * UNITS_PER_WORD);
14194 emit_insn (gen_unaligned_loadsi (regs[j], mem));
14196 srcoffset += block_size_bytes;
14199 /* Store words. */
14200 if (dst_aligned && interleave_factor > 1)
14202 emit_insn (arm_gen_store_multiple (regnos, interleave_factor, dst,
14203 TRUE, dstbase, &dstoffset));
14204 dst_autoinc += UNITS_PER_WORD * interleave_factor;
14206 else
14208 for (j = 0; j < interleave_factor; j++)
14210 addr = plus_constant (Pmode, dst, (dstoffset + j * UNITS_PER_WORD
14211 - dst_autoinc));
14212 mem = adjust_automodify_address (dstbase, SImode, addr,
14213 dstoffset + j * UNITS_PER_WORD);
14214 emit_insn (gen_unaligned_storesi (mem, regs[j]));
14216 dstoffset += block_size_bytes;
14219 remaining -= block_size_bytes;
14222 /* Copy any whole words left (note these aren't interleaved with any
14223 subsequent halfword/byte load/stores in the interests of simplicity). */
14225 words = remaining / UNITS_PER_WORD;
14227 gcc_assert (words < interleave_factor);
14229 if (src_aligned && words > 1)
14231 emit_insn (arm_gen_load_multiple (regnos, words, src, TRUE, srcbase,
14232 &srcoffset));
14233 src_autoinc += UNITS_PER_WORD * words;
14235 else
14237 for (j = 0; j < words; j++)
14239 addr = plus_constant (Pmode, src,
14240 srcoffset + j * UNITS_PER_WORD - src_autoinc);
14241 mem = adjust_automodify_address (srcbase, SImode, addr,
14242 srcoffset + j * UNITS_PER_WORD);
14243 emit_insn (gen_unaligned_loadsi (regs[j], mem));
14245 srcoffset += words * UNITS_PER_WORD;
14248 if (dst_aligned && words > 1)
14250 emit_insn (arm_gen_store_multiple (regnos, words, dst, TRUE, dstbase,
14251 &dstoffset));
14252 dst_autoinc += words * UNITS_PER_WORD;
14254 else
14256 for (j = 0; j < words; j++)
14258 addr = plus_constant (Pmode, dst,
14259 dstoffset + j * UNITS_PER_WORD - dst_autoinc);
14260 mem = adjust_automodify_address (dstbase, SImode, addr,
14261 dstoffset + j * UNITS_PER_WORD);
14262 emit_insn (gen_unaligned_storesi (mem, regs[j]));
14264 dstoffset += words * UNITS_PER_WORD;
14267 remaining -= words * UNITS_PER_WORD;
14269 gcc_assert (remaining < 4);
14271 /* Copy a halfword if necessary. */
14273 if (remaining >= 2)
14275 halfword_tmp = gen_reg_rtx (SImode);
14277 addr = plus_constant (Pmode, src, srcoffset - src_autoinc);
14278 mem = adjust_automodify_address (srcbase, HImode, addr, srcoffset);
14279 emit_insn (gen_unaligned_loadhiu (halfword_tmp, mem));
14281 /* Either write out immediately, or delay until we've loaded the last
14282 byte, depending on interleave factor. */
14283 if (interleave_factor == 1)
14285 addr = plus_constant (Pmode, dst, dstoffset - dst_autoinc);
14286 mem = adjust_automodify_address (dstbase, HImode, addr, dstoffset);
14287 emit_insn (gen_unaligned_storehi (mem,
14288 gen_lowpart (HImode, halfword_tmp)));
14289 halfword_tmp = NULL;
14290 dstoffset += 2;
14293 remaining -= 2;
14294 srcoffset += 2;
14297 gcc_assert (remaining < 2);
14299 /* Copy last byte. */
14301 if ((remaining & 1) != 0)
14303 byte_tmp = gen_reg_rtx (SImode);
14305 addr = plus_constant (Pmode, src, srcoffset - src_autoinc);
14306 mem = adjust_automodify_address (srcbase, QImode, addr, srcoffset);
14307 emit_move_insn (gen_lowpart (QImode, byte_tmp), mem);
14309 if (interleave_factor == 1)
14311 addr = plus_constant (Pmode, dst, dstoffset - dst_autoinc);
14312 mem = adjust_automodify_address (dstbase, QImode, addr, dstoffset);
14313 emit_move_insn (mem, gen_lowpart (QImode, byte_tmp));
14314 byte_tmp = NULL;
14315 dstoffset++;
14318 remaining--;
14319 srcoffset++;
14322 /* Store last halfword if we haven't done so already. */
14324 if (halfword_tmp)
14326 addr = plus_constant (Pmode, dst, dstoffset - dst_autoinc);
14327 mem = adjust_automodify_address (dstbase, HImode, addr, dstoffset);
14328 emit_insn (gen_unaligned_storehi (mem,
14329 gen_lowpart (HImode, halfword_tmp)));
14330 dstoffset += 2;
14333 /* Likewise for last byte. */
14335 if (byte_tmp)
14337 addr = plus_constant (Pmode, dst, dstoffset - dst_autoinc);
14338 mem = adjust_automodify_address (dstbase, QImode, addr, dstoffset);
14339 emit_move_insn (mem, gen_lowpart (QImode, byte_tmp));
14340 dstoffset++;
14343 gcc_assert (remaining == 0 && srcoffset == dstoffset);
14346 /* From mips_adjust_block_mem:
14348 Helper function for doing a loop-based block operation on memory
14349 reference MEM. Each iteration of the loop will operate on LENGTH
14350 bytes of MEM.
14352 Create a new base register for use within the loop and point it to
14353 the start of MEM. Create a new memory reference that uses this
14354 register. Store them in *LOOP_REG and *LOOP_MEM respectively. */
14356 static void
14357 arm_adjust_block_mem (rtx mem, HOST_WIDE_INT length, rtx *loop_reg,
14358 rtx *loop_mem)
14360 *loop_reg = copy_addr_to_reg (XEXP (mem, 0));
14362 /* Although the new mem does not refer to a known location,
14363 it does keep up to LENGTH bytes of alignment. */
14364 *loop_mem = change_address (mem, BLKmode, *loop_reg);
14365 set_mem_align (*loop_mem, MIN (MEM_ALIGN (mem), length * BITS_PER_UNIT));
14368 /* From mips_block_move_loop:
14370 Move LENGTH bytes from SRC to DEST using a loop that moves BYTES_PER_ITER
14371 bytes at a time. LENGTH must be at least BYTES_PER_ITER. Assume that
14372 the memory regions do not overlap. */
14374 static void
14375 arm_block_move_unaligned_loop (rtx dest, rtx src, HOST_WIDE_INT length,
14376 unsigned int interleave_factor,
14377 HOST_WIDE_INT bytes_per_iter)
14379 rtx label, src_reg, dest_reg, final_src, test;
14380 HOST_WIDE_INT leftover;
14382 leftover = length % bytes_per_iter;
14383 length -= leftover;
14385 /* Create registers and memory references for use within the loop. */
14386 arm_adjust_block_mem (src, bytes_per_iter, &src_reg, &src);
14387 arm_adjust_block_mem (dest, bytes_per_iter, &dest_reg, &dest);
14389 /* Calculate the value that SRC_REG should have after the last iteration of
14390 the loop. */
14391 final_src = expand_simple_binop (Pmode, PLUS, src_reg, GEN_INT (length),
14392 0, 0, OPTAB_WIDEN);
14394 /* Emit the start of the loop. */
14395 label = gen_label_rtx ();
14396 emit_label (label);
14398 /* Emit the loop body. */
14399 arm_block_move_unaligned_straight (dest, src, bytes_per_iter,
14400 interleave_factor);
14402 /* Move on to the next block. */
14403 emit_move_insn (src_reg, plus_constant (Pmode, src_reg, bytes_per_iter));
14404 emit_move_insn (dest_reg, plus_constant (Pmode, dest_reg, bytes_per_iter));
14406 /* Emit the loop condition. */
14407 test = gen_rtx_NE (VOIDmode, src_reg, final_src);
14408 emit_jump_insn (gen_cbranchsi4 (test, src_reg, final_src, label));
14410 /* Mop up any left-over bytes. */
14411 if (leftover)
14412 arm_block_move_unaligned_straight (dest, src, leftover, interleave_factor);
14415 /* Emit a block move when either the source or destination is unaligned (not
14416 aligned to a four-byte boundary). This may need further tuning depending on
14417 core type, optimize_size setting, etc. */
14419 static int
14420 arm_movmemqi_unaligned (rtx *operands)
14422 HOST_WIDE_INT length = INTVAL (operands[2]);
14424 if (optimize_size)
14426 bool src_aligned = MEM_ALIGN (operands[1]) >= BITS_PER_WORD;
14427 bool dst_aligned = MEM_ALIGN (operands[0]) >= BITS_PER_WORD;
14428 /* Inlined memcpy using ldr/str/ldrh/strh can be quite big: try to limit
14429 size of code if optimizing for size. We'll use ldm/stm if src_aligned
14430 or dst_aligned though: allow more interleaving in those cases since the
14431 resulting code can be smaller. */
14432 unsigned int interleave_factor = (src_aligned || dst_aligned) ? 2 : 1;
14433 HOST_WIDE_INT bytes_per_iter = (src_aligned || dst_aligned) ? 8 : 4;
14435 if (length > 12)
14436 arm_block_move_unaligned_loop (operands[0], operands[1], length,
14437 interleave_factor, bytes_per_iter);
14438 else
14439 arm_block_move_unaligned_straight (operands[0], operands[1], length,
14440 interleave_factor);
14442 else
14444 /* Note that the loop created by arm_block_move_unaligned_loop may be
14445 subject to loop unrolling, which makes tuning this condition a little
14446 redundant. */
14447 if (length > 32)
14448 arm_block_move_unaligned_loop (operands[0], operands[1], length, 4, 16);
14449 else
14450 arm_block_move_unaligned_straight (operands[0], operands[1], length, 4);
14453 return 1;
14457 arm_gen_movmemqi (rtx *operands)
14459 HOST_WIDE_INT in_words_to_go, out_words_to_go, last_bytes;
14460 HOST_WIDE_INT srcoffset, dstoffset;
14461 int i;
14462 rtx src, dst, srcbase, dstbase;
14463 rtx part_bytes_reg = NULL;
14464 rtx mem;
14466 if (!CONST_INT_P (operands[2])
14467 || !CONST_INT_P (operands[3])
14468 || INTVAL (operands[2]) > 64)
14469 return 0;
14471 if (unaligned_access && (INTVAL (operands[3]) & 3) != 0)
14472 return arm_movmemqi_unaligned (operands);
14474 if (INTVAL (operands[3]) & 3)
14475 return 0;
14477 dstbase = operands[0];
14478 srcbase = operands[1];
14480 dst = copy_to_mode_reg (SImode, XEXP (dstbase, 0));
14481 src = copy_to_mode_reg (SImode, XEXP (srcbase, 0));
14483 in_words_to_go = ARM_NUM_INTS (INTVAL (operands[2]));
14484 out_words_to_go = INTVAL (operands[2]) / 4;
14485 last_bytes = INTVAL (operands[2]) & 3;
14486 dstoffset = srcoffset = 0;
14488 if (out_words_to_go != in_words_to_go && ((in_words_to_go - 1) & 3) != 0)
14489 part_bytes_reg = gen_rtx_REG (SImode, (in_words_to_go - 1) & 3);
14491 for (i = 0; in_words_to_go >= 2; i+=4)
14493 if (in_words_to_go > 4)
14494 emit_insn (arm_gen_load_multiple (arm_regs_in_sequence, 4, src,
14495 TRUE, srcbase, &srcoffset));
14496 else
14497 emit_insn (arm_gen_load_multiple (arm_regs_in_sequence, in_words_to_go,
14498 src, FALSE, srcbase,
14499 &srcoffset));
14501 if (out_words_to_go)
14503 if (out_words_to_go > 4)
14504 emit_insn (arm_gen_store_multiple (arm_regs_in_sequence, 4, dst,
14505 TRUE, dstbase, &dstoffset));
14506 else if (out_words_to_go != 1)
14507 emit_insn (arm_gen_store_multiple (arm_regs_in_sequence,
14508 out_words_to_go, dst,
14509 (last_bytes == 0
14510 ? FALSE : TRUE),
14511 dstbase, &dstoffset));
14512 else
14514 mem = adjust_automodify_address (dstbase, SImode, dst, dstoffset);
14515 emit_move_insn (mem, gen_rtx_REG (SImode, 0));
14516 if (last_bytes != 0)
14518 emit_insn (gen_addsi3 (dst, dst, GEN_INT (4)));
14519 dstoffset += 4;
14524 in_words_to_go -= in_words_to_go < 4 ? in_words_to_go : 4;
14525 out_words_to_go -= out_words_to_go < 4 ? out_words_to_go : 4;
14528 /* OUT_WORDS_TO_GO will be zero here if there are byte stores to do. */
14529 if (out_words_to_go)
14531 rtx sreg;
14533 mem = adjust_automodify_address (srcbase, SImode, src, srcoffset);
14534 sreg = copy_to_reg (mem);
14536 mem = adjust_automodify_address (dstbase, SImode, dst, dstoffset);
14537 emit_move_insn (mem, sreg);
14538 in_words_to_go--;
14540 gcc_assert (!in_words_to_go); /* Sanity check */
14543 if (in_words_to_go)
14545 gcc_assert (in_words_to_go > 0);
14547 mem = adjust_automodify_address (srcbase, SImode, src, srcoffset);
14548 part_bytes_reg = copy_to_mode_reg (SImode, mem);
14551 gcc_assert (!last_bytes || part_bytes_reg);
14553 if (BYTES_BIG_ENDIAN && last_bytes)
14555 rtx tmp = gen_reg_rtx (SImode);
14557 /* The bytes we want are in the top end of the word. */
14558 emit_insn (gen_lshrsi3 (tmp, part_bytes_reg,
14559 GEN_INT (8 * (4 - last_bytes))));
14560 part_bytes_reg = tmp;
14562 while (last_bytes)
14564 mem = adjust_automodify_address (dstbase, QImode,
14565 plus_constant (Pmode, dst,
14566 last_bytes - 1),
14567 dstoffset + last_bytes - 1);
14568 emit_move_insn (mem, gen_lowpart (QImode, part_bytes_reg));
14570 if (--last_bytes)
14572 tmp = gen_reg_rtx (SImode);
14573 emit_insn (gen_lshrsi3 (tmp, part_bytes_reg, GEN_INT (8)));
14574 part_bytes_reg = tmp;
14579 else
14581 if (last_bytes > 1)
14583 mem = adjust_automodify_address (dstbase, HImode, dst, dstoffset);
14584 emit_move_insn (mem, gen_lowpart (HImode, part_bytes_reg));
14585 last_bytes -= 2;
14586 if (last_bytes)
14588 rtx tmp = gen_reg_rtx (SImode);
14589 emit_insn (gen_addsi3 (dst, dst, const2_rtx));
14590 emit_insn (gen_lshrsi3 (tmp, part_bytes_reg, GEN_INT (16)));
14591 part_bytes_reg = tmp;
14592 dstoffset += 2;
14596 if (last_bytes)
14598 mem = adjust_automodify_address (dstbase, QImode, dst, dstoffset);
14599 emit_move_insn (mem, gen_lowpart (QImode, part_bytes_reg));
14603 return 1;
14606 /* Helper for gen_movmem_ldrd_strd. Increase the address of memory rtx
14607 by mode size. */
14608 inline static rtx
14609 next_consecutive_mem (rtx mem)
14611 enum machine_mode mode = GET_MODE (mem);
14612 HOST_WIDE_INT offset = GET_MODE_SIZE (mode);
14613 rtx addr = plus_constant (Pmode, XEXP (mem, 0), offset);
14615 return adjust_automodify_address (mem, mode, addr, offset);
14618 /* Copy using LDRD/STRD instructions whenever possible.
14619 Returns true upon success. */
14620 bool
14621 gen_movmem_ldrd_strd (rtx *operands)
14623 unsigned HOST_WIDE_INT len;
14624 HOST_WIDE_INT align;
14625 rtx src, dst, base;
14626 rtx reg0;
14627 bool src_aligned, dst_aligned;
14628 bool src_volatile, dst_volatile;
14630 gcc_assert (CONST_INT_P (operands[2]));
14631 gcc_assert (CONST_INT_P (operands[3]));
14633 len = UINTVAL (operands[2]);
14634 if (len > 64)
14635 return false;
14637 /* Maximum alignment we can assume for both src and dst buffers. */
14638 align = INTVAL (operands[3]);
14640 if ((!unaligned_access) && (len >= 4) && ((align & 3) != 0))
14641 return false;
14643 /* Place src and dst addresses in registers
14644 and update the corresponding mem rtx. */
14645 dst = operands[0];
14646 dst_volatile = MEM_VOLATILE_P (dst);
14647 dst_aligned = MEM_ALIGN (dst) >= BITS_PER_WORD;
14648 base = copy_to_mode_reg (SImode, XEXP (dst, 0));
14649 dst = adjust_automodify_address (dst, VOIDmode, base, 0);
14651 src = operands[1];
14652 src_volatile = MEM_VOLATILE_P (src);
14653 src_aligned = MEM_ALIGN (src) >= BITS_PER_WORD;
14654 base = copy_to_mode_reg (SImode, XEXP (src, 0));
14655 src = adjust_automodify_address (src, VOIDmode, base, 0);
14657 if (!unaligned_access && !(src_aligned && dst_aligned))
14658 return false;
14660 if (src_volatile || dst_volatile)
14661 return false;
14663 /* If we cannot generate any LDRD/STRD, try to generate LDM/STM. */
14664 if (!(dst_aligned || src_aligned))
14665 return arm_gen_movmemqi (operands);
14667 src = adjust_address (src, DImode, 0);
14668 dst = adjust_address (dst, DImode, 0);
14669 while (len >= 8)
14671 len -= 8;
14672 reg0 = gen_reg_rtx (DImode);
14673 if (src_aligned)
14674 emit_move_insn (reg0, src);
14675 else
14676 emit_insn (gen_unaligned_loaddi (reg0, src));
14678 if (dst_aligned)
14679 emit_move_insn (dst, reg0);
14680 else
14681 emit_insn (gen_unaligned_storedi (dst, reg0));
14683 src = next_consecutive_mem (src);
14684 dst = next_consecutive_mem (dst);
14687 gcc_assert (len < 8);
14688 if (len >= 4)
14690 /* More than a word but less than a double-word to copy. Copy a word. */
14691 reg0 = gen_reg_rtx (SImode);
14692 src = adjust_address (src, SImode, 0);
14693 dst = adjust_address (dst, SImode, 0);
14694 if (src_aligned)
14695 emit_move_insn (reg0, src);
14696 else
14697 emit_insn (gen_unaligned_loadsi (reg0, src));
14699 if (dst_aligned)
14700 emit_move_insn (dst, reg0);
14701 else
14702 emit_insn (gen_unaligned_storesi (dst, reg0));
14704 src = next_consecutive_mem (src);
14705 dst = next_consecutive_mem (dst);
14706 len -= 4;
14709 if (len == 0)
14710 return true;
14712 /* Copy the remaining bytes. */
14713 if (len >= 2)
14715 dst = adjust_address (dst, HImode, 0);
14716 src = adjust_address (src, HImode, 0);
14717 reg0 = gen_reg_rtx (SImode);
14718 if (src_aligned)
14719 emit_insn (gen_zero_extendhisi2 (reg0, src));
14720 else
14721 emit_insn (gen_unaligned_loadhiu (reg0, src));
14723 if (dst_aligned)
14724 emit_insn (gen_movhi (dst, gen_lowpart(HImode, reg0)));
14725 else
14726 emit_insn (gen_unaligned_storehi (dst, gen_lowpart (HImode, reg0)));
14728 src = next_consecutive_mem (src);
14729 dst = next_consecutive_mem (dst);
14730 if (len == 2)
14731 return true;
14734 dst = adjust_address (dst, QImode, 0);
14735 src = adjust_address (src, QImode, 0);
14736 reg0 = gen_reg_rtx (QImode);
14737 emit_move_insn (reg0, src);
14738 emit_move_insn (dst, reg0);
14739 return true;
14742 /* Select a dominance comparison mode if possible for a test of the general
14743 form (OP (COND_OR (X) (Y)) (const_int 0)). We support three forms.
14744 COND_OR == DOM_CC_X_AND_Y => (X && Y)
14745 COND_OR == DOM_CC_NX_OR_Y => ((! X) || Y)
14746 COND_OR == DOM_CC_X_OR_Y => (X || Y)
14747 In all cases OP will be either EQ or NE, but we don't need to know which
14748 here. If we are unable to support a dominance comparison we return
14749 CC mode. This will then fail to match for the RTL expressions that
14750 generate this call. */
14751 enum machine_mode
14752 arm_select_dominance_cc_mode (rtx x, rtx y, HOST_WIDE_INT cond_or)
14754 enum rtx_code cond1, cond2;
14755 int swapped = 0;
14757 /* Currently we will probably get the wrong result if the individual
14758 comparisons are not simple. This also ensures that it is safe to
14759 reverse a comparison if necessary. */
14760 if ((arm_select_cc_mode (cond1 = GET_CODE (x), XEXP (x, 0), XEXP (x, 1))
14761 != CCmode)
14762 || (arm_select_cc_mode (cond2 = GET_CODE (y), XEXP (y, 0), XEXP (y, 1))
14763 != CCmode))
14764 return CCmode;
14766 /* The if_then_else variant of this tests the second condition if the
14767 first passes, but is true if the first fails. Reverse the first
14768 condition to get a true "inclusive-or" expression. */
14769 if (cond_or == DOM_CC_NX_OR_Y)
14770 cond1 = reverse_condition (cond1);
14772 /* If the comparisons are not equal, and one doesn't dominate the other,
14773 then we can't do this. */
14774 if (cond1 != cond2
14775 && !comparison_dominates_p (cond1, cond2)
14776 && (swapped = 1, !comparison_dominates_p (cond2, cond1)))
14777 return CCmode;
14779 if (swapped)
14781 enum rtx_code temp = cond1;
14782 cond1 = cond2;
14783 cond2 = temp;
14786 switch (cond1)
14788 case EQ:
14789 if (cond_or == DOM_CC_X_AND_Y)
14790 return CC_DEQmode;
14792 switch (cond2)
14794 case EQ: return CC_DEQmode;
14795 case LE: return CC_DLEmode;
14796 case LEU: return CC_DLEUmode;
14797 case GE: return CC_DGEmode;
14798 case GEU: return CC_DGEUmode;
14799 default: gcc_unreachable ();
14802 case LT:
14803 if (cond_or == DOM_CC_X_AND_Y)
14804 return CC_DLTmode;
14806 switch (cond2)
14808 case LT:
14809 return CC_DLTmode;
14810 case LE:
14811 return CC_DLEmode;
14812 case NE:
14813 return CC_DNEmode;
14814 default:
14815 gcc_unreachable ();
14818 case GT:
14819 if (cond_or == DOM_CC_X_AND_Y)
14820 return CC_DGTmode;
14822 switch (cond2)
14824 case GT:
14825 return CC_DGTmode;
14826 case GE:
14827 return CC_DGEmode;
14828 case NE:
14829 return CC_DNEmode;
14830 default:
14831 gcc_unreachable ();
14834 case LTU:
14835 if (cond_or == DOM_CC_X_AND_Y)
14836 return CC_DLTUmode;
14838 switch (cond2)
14840 case LTU:
14841 return CC_DLTUmode;
14842 case LEU:
14843 return CC_DLEUmode;
14844 case NE:
14845 return CC_DNEmode;
14846 default:
14847 gcc_unreachable ();
14850 case GTU:
14851 if (cond_or == DOM_CC_X_AND_Y)
14852 return CC_DGTUmode;
14854 switch (cond2)
14856 case GTU:
14857 return CC_DGTUmode;
14858 case GEU:
14859 return CC_DGEUmode;
14860 case NE:
14861 return CC_DNEmode;
14862 default:
14863 gcc_unreachable ();
14866 /* The remaining cases only occur when both comparisons are the
14867 same. */
14868 case NE:
14869 gcc_assert (cond1 == cond2);
14870 return CC_DNEmode;
14872 case LE:
14873 gcc_assert (cond1 == cond2);
14874 return CC_DLEmode;
14876 case GE:
14877 gcc_assert (cond1 == cond2);
14878 return CC_DGEmode;
14880 case LEU:
14881 gcc_assert (cond1 == cond2);
14882 return CC_DLEUmode;
14884 case GEU:
14885 gcc_assert (cond1 == cond2);
14886 return CC_DGEUmode;
14888 default:
14889 gcc_unreachable ();
14893 enum machine_mode
14894 arm_select_cc_mode (enum rtx_code op, rtx x, rtx y)
14896 /* All floating point compares return CCFP if it is an equality
14897 comparison, and CCFPE otherwise. */
14898 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
14900 switch (op)
14902 case EQ:
14903 case NE:
14904 case UNORDERED:
14905 case ORDERED:
14906 case UNLT:
14907 case UNLE:
14908 case UNGT:
14909 case UNGE:
14910 case UNEQ:
14911 case LTGT:
14912 return CCFPmode;
14914 case LT:
14915 case LE:
14916 case GT:
14917 case GE:
14918 return CCFPEmode;
14920 default:
14921 gcc_unreachable ();
14925 /* A compare with a shifted operand. Because of canonicalization, the
14926 comparison will have to be swapped when we emit the assembler. */
14927 if (GET_MODE (y) == SImode
14928 && (REG_P (y) || (GET_CODE (y) == SUBREG))
14929 && (GET_CODE (x) == ASHIFT || GET_CODE (x) == ASHIFTRT
14930 || GET_CODE (x) == LSHIFTRT || GET_CODE (x) == ROTATE
14931 || GET_CODE (x) == ROTATERT))
14932 return CC_SWPmode;
14934 /* This operation is performed swapped, but since we only rely on the Z
14935 flag we don't need an additional mode. */
14936 if (GET_MODE (y) == SImode
14937 && (REG_P (y) || (GET_CODE (y) == SUBREG))
14938 && GET_CODE (x) == NEG
14939 && (op == EQ || op == NE))
14940 return CC_Zmode;
14942 /* This is a special case that is used by combine to allow a
14943 comparison of a shifted byte load to be split into a zero-extend
14944 followed by a comparison of the shifted integer (only valid for
14945 equalities and unsigned inequalities). */
14946 if (GET_MODE (x) == SImode
14947 && GET_CODE (x) == ASHIFT
14948 && CONST_INT_P (XEXP (x, 1)) && INTVAL (XEXP (x, 1)) == 24
14949 && GET_CODE (XEXP (x, 0)) == SUBREG
14950 && MEM_P (SUBREG_REG (XEXP (x, 0)))
14951 && GET_MODE (SUBREG_REG (XEXP (x, 0))) == QImode
14952 && (op == EQ || op == NE
14953 || op == GEU || op == GTU || op == LTU || op == LEU)
14954 && CONST_INT_P (y))
14955 return CC_Zmode;
14957 /* A construct for a conditional compare, if the false arm contains
14958 0, then both conditions must be true, otherwise either condition
14959 must be true. Not all conditions are possible, so CCmode is
14960 returned if it can't be done. */
14961 if (GET_CODE (x) == IF_THEN_ELSE
14962 && (XEXP (x, 2) == const0_rtx
14963 || XEXP (x, 2) == const1_rtx)
14964 && COMPARISON_P (XEXP (x, 0))
14965 && COMPARISON_P (XEXP (x, 1)))
14966 return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
14967 INTVAL (XEXP (x, 2)));
14969 /* Alternate canonicalizations of the above. These are somewhat cleaner. */
14970 if (GET_CODE (x) == AND
14971 && (op == EQ || op == NE)
14972 && COMPARISON_P (XEXP (x, 0))
14973 && COMPARISON_P (XEXP (x, 1)))
14974 return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
14975 DOM_CC_X_AND_Y);
14977 if (GET_CODE (x) == IOR
14978 && (op == EQ || op == NE)
14979 && COMPARISON_P (XEXP (x, 0))
14980 && COMPARISON_P (XEXP (x, 1)))
14981 return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
14982 DOM_CC_X_OR_Y);
14984 /* An operation (on Thumb) where we want to test for a single bit.
14985 This is done by shifting that bit up into the top bit of a
14986 scratch register; we can then branch on the sign bit. */
14987 if (TARGET_THUMB1
14988 && GET_MODE (x) == SImode
14989 && (op == EQ || op == NE)
14990 && GET_CODE (x) == ZERO_EXTRACT
14991 && XEXP (x, 1) == const1_rtx)
14992 return CC_Nmode;
14994 /* An operation that sets the condition codes as a side-effect, the
14995 V flag is not set correctly, so we can only use comparisons where
14996 this doesn't matter. (For LT and GE we can use "mi" and "pl"
14997 instead.) */
14998 /* ??? Does the ZERO_EXTRACT case really apply to thumb2? */
14999 if (GET_MODE (x) == SImode
15000 && y == const0_rtx
15001 && (op == EQ || op == NE || op == LT || op == GE)
15002 && (GET_CODE (x) == PLUS || GET_CODE (x) == MINUS
15003 || GET_CODE (x) == AND || GET_CODE (x) == IOR
15004 || GET_CODE (x) == XOR || GET_CODE (x) == MULT
15005 || GET_CODE (x) == NOT || GET_CODE (x) == NEG
15006 || GET_CODE (x) == LSHIFTRT
15007 || GET_CODE (x) == ASHIFT || GET_CODE (x) == ASHIFTRT
15008 || GET_CODE (x) == ROTATERT
15009 || (TARGET_32BIT && GET_CODE (x) == ZERO_EXTRACT)))
15010 return CC_NOOVmode;
15012 if (GET_MODE (x) == QImode && (op == EQ || op == NE))
15013 return CC_Zmode;
15015 if (GET_MODE (x) == SImode && (op == LTU || op == GEU)
15016 && GET_CODE (x) == PLUS
15017 && (rtx_equal_p (XEXP (x, 0), y) || rtx_equal_p (XEXP (x, 1), y)))
15018 return CC_Cmode;
15020 if (GET_MODE (x) == DImode || GET_MODE (y) == DImode)
15022 switch (op)
15024 case EQ:
15025 case NE:
15026 /* A DImode comparison against zero can be implemented by
15027 or'ing the two halves together. */
15028 if (y == const0_rtx)
15029 return CC_Zmode;
15031 /* We can do an equality test in three Thumb instructions. */
15032 if (!TARGET_32BIT)
15033 return CC_Zmode;
15035 /* FALLTHROUGH */
15037 case LTU:
15038 case LEU:
15039 case GTU:
15040 case GEU:
15041 /* DImode unsigned comparisons can be implemented by cmp +
15042 cmpeq without a scratch register. Not worth doing in
15043 Thumb-2. */
15044 if (TARGET_32BIT)
15045 return CC_CZmode;
15047 /* FALLTHROUGH */
15049 case LT:
15050 case LE:
15051 case GT:
15052 case GE:
15053 /* DImode signed and unsigned comparisons can be implemented
15054 by cmp + sbcs with a scratch register, but that does not
15055 set the Z flag - we must reverse GT/LE/GTU/LEU. */
15056 gcc_assert (op != EQ && op != NE);
15057 return CC_NCVmode;
15059 default:
15060 gcc_unreachable ();
15064 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_CC)
15065 return GET_MODE (x);
15067 return CCmode;
15070 /* X and Y are two things to compare using CODE. Emit the compare insn and
15071 return the rtx for register 0 in the proper mode. FP means this is a
15072 floating point compare: I don't think that it is needed on the arm. */
15074 arm_gen_compare_reg (enum rtx_code code, rtx x, rtx y, rtx scratch)
15076 enum machine_mode mode;
15077 rtx cc_reg;
15078 int dimode_comparison = GET_MODE (x) == DImode || GET_MODE (y) == DImode;
15080 /* We might have X as a constant, Y as a register because of the predicates
15081 used for cmpdi. If so, force X to a register here. */
15082 if (dimode_comparison && !REG_P (x))
15083 x = force_reg (DImode, x);
15085 mode = SELECT_CC_MODE (code, x, y);
15086 cc_reg = gen_rtx_REG (mode, CC_REGNUM);
15088 if (dimode_comparison
15089 && mode != CC_CZmode)
15091 rtx clobber, set;
15093 /* To compare two non-zero values for equality, XOR them and
15094 then compare against zero. Not used for ARM mode; there
15095 CC_CZmode is cheaper. */
15096 if (mode == CC_Zmode && y != const0_rtx)
15098 gcc_assert (!reload_completed);
15099 x = expand_binop (DImode, xor_optab, x, y, NULL_RTX, 0, OPTAB_WIDEN);
15100 y = const0_rtx;
15103 /* A scratch register is required. */
15104 if (reload_completed)
15105 gcc_assert (scratch != NULL && GET_MODE (scratch) == SImode);
15106 else
15107 scratch = gen_rtx_SCRATCH (SImode);
15109 clobber = gen_rtx_CLOBBER (VOIDmode, scratch);
15110 set = gen_rtx_SET (VOIDmode, cc_reg, gen_rtx_COMPARE (mode, x, y));
15111 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, set, clobber)));
15113 else
15114 emit_set_insn (cc_reg, gen_rtx_COMPARE (mode, x, y));
15116 return cc_reg;
15119 /* Generate a sequence of insns that will generate the correct return
15120 address mask depending on the physical architecture that the program
15121 is running on. */
15123 arm_gen_return_addr_mask (void)
15125 rtx reg = gen_reg_rtx (Pmode);
15127 emit_insn (gen_return_addr_mask (reg));
15128 return reg;
15131 void
15132 arm_reload_in_hi (rtx *operands)
15134 rtx ref = operands[1];
15135 rtx base, scratch;
15136 HOST_WIDE_INT offset = 0;
15138 if (GET_CODE (ref) == SUBREG)
15140 offset = SUBREG_BYTE (ref);
15141 ref = SUBREG_REG (ref);
15144 if (REG_P (ref))
15146 /* We have a pseudo which has been spilt onto the stack; there
15147 are two cases here: the first where there is a simple
15148 stack-slot replacement and a second where the stack-slot is
15149 out of range, or is used as a subreg. */
15150 if (reg_equiv_mem (REGNO (ref)))
15152 ref = reg_equiv_mem (REGNO (ref));
15153 base = find_replacement (&XEXP (ref, 0));
15155 else
15156 /* The slot is out of range, or was dressed up in a SUBREG. */
15157 base = reg_equiv_address (REGNO (ref));
15159 else
15160 base = find_replacement (&XEXP (ref, 0));
15162 /* Handle the case where the address is too complex to be offset by 1. */
15163 if (GET_CODE (base) == MINUS
15164 || (GET_CODE (base) == PLUS && !CONST_INT_P (XEXP (base, 1))))
15166 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
15168 emit_set_insn (base_plus, base);
15169 base = base_plus;
15171 else if (GET_CODE (base) == PLUS)
15173 /* The addend must be CONST_INT, or we would have dealt with it above. */
15174 HOST_WIDE_INT hi, lo;
15176 offset += INTVAL (XEXP (base, 1));
15177 base = XEXP (base, 0);
15179 /* Rework the address into a legal sequence of insns. */
15180 /* Valid range for lo is -4095 -> 4095 */
15181 lo = (offset >= 0
15182 ? (offset & 0xfff)
15183 : -((-offset) & 0xfff));
15185 /* Corner case, if lo is the max offset then we would be out of range
15186 once we have added the additional 1 below, so bump the msb into the
15187 pre-loading insn(s). */
15188 if (lo == 4095)
15189 lo &= 0x7ff;
15191 hi = ((((offset - lo) & (HOST_WIDE_INT) 0xffffffff)
15192 ^ (HOST_WIDE_INT) 0x80000000)
15193 - (HOST_WIDE_INT) 0x80000000);
15195 gcc_assert (hi + lo == offset);
15197 if (hi != 0)
15199 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
15201 /* Get the base address; addsi3 knows how to handle constants
15202 that require more than one insn. */
15203 emit_insn (gen_addsi3 (base_plus, base, GEN_INT (hi)));
15204 base = base_plus;
15205 offset = lo;
15209 /* Operands[2] may overlap operands[0] (though it won't overlap
15210 operands[1]), that's why we asked for a DImode reg -- so we can
15211 use the bit that does not overlap. */
15212 if (REGNO (operands[2]) == REGNO (operands[0]))
15213 scratch = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
15214 else
15215 scratch = gen_rtx_REG (SImode, REGNO (operands[2]));
15217 emit_insn (gen_zero_extendqisi2 (scratch,
15218 gen_rtx_MEM (QImode,
15219 plus_constant (Pmode, base,
15220 offset))));
15221 emit_insn (gen_zero_extendqisi2 (gen_rtx_SUBREG (SImode, operands[0], 0),
15222 gen_rtx_MEM (QImode,
15223 plus_constant (Pmode, base,
15224 offset + 1))));
15225 if (!BYTES_BIG_ENDIAN)
15226 emit_set_insn (gen_rtx_SUBREG (SImode, operands[0], 0),
15227 gen_rtx_IOR (SImode,
15228 gen_rtx_ASHIFT
15229 (SImode,
15230 gen_rtx_SUBREG (SImode, operands[0], 0),
15231 GEN_INT (8)),
15232 scratch));
15233 else
15234 emit_set_insn (gen_rtx_SUBREG (SImode, operands[0], 0),
15235 gen_rtx_IOR (SImode,
15236 gen_rtx_ASHIFT (SImode, scratch,
15237 GEN_INT (8)),
15238 gen_rtx_SUBREG (SImode, operands[0], 0)));
15241 /* Handle storing a half-word to memory during reload by synthesizing as two
15242 byte stores. Take care not to clobber the input values until after we
15243 have moved them somewhere safe. This code assumes that if the DImode
15244 scratch in operands[2] overlaps either the input value or output address
15245 in some way, then that value must die in this insn (we absolutely need
15246 two scratch registers for some corner cases). */
15247 void
15248 arm_reload_out_hi (rtx *operands)
15250 rtx ref = operands[0];
15251 rtx outval = operands[1];
15252 rtx base, scratch;
15253 HOST_WIDE_INT offset = 0;
15255 if (GET_CODE (ref) == SUBREG)
15257 offset = SUBREG_BYTE (ref);
15258 ref = SUBREG_REG (ref);
15261 if (REG_P (ref))
15263 /* We have a pseudo which has been spilt onto the stack; there
15264 are two cases here: the first where there is a simple
15265 stack-slot replacement and a second where the stack-slot is
15266 out of range, or is used as a subreg. */
15267 if (reg_equiv_mem (REGNO (ref)))
15269 ref = reg_equiv_mem (REGNO (ref));
15270 base = find_replacement (&XEXP (ref, 0));
15272 else
15273 /* The slot is out of range, or was dressed up in a SUBREG. */
15274 base = reg_equiv_address (REGNO (ref));
15276 else
15277 base = find_replacement (&XEXP (ref, 0));
15279 scratch = gen_rtx_REG (SImode, REGNO (operands[2]));
15281 /* Handle the case where the address is too complex to be offset by 1. */
15282 if (GET_CODE (base) == MINUS
15283 || (GET_CODE (base) == PLUS && !CONST_INT_P (XEXP (base, 1))))
15285 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
15287 /* Be careful not to destroy OUTVAL. */
15288 if (reg_overlap_mentioned_p (base_plus, outval))
15290 /* Updating base_plus might destroy outval, see if we can
15291 swap the scratch and base_plus. */
15292 if (!reg_overlap_mentioned_p (scratch, outval))
15294 rtx tmp = scratch;
15295 scratch = base_plus;
15296 base_plus = tmp;
15298 else
15300 rtx scratch_hi = gen_rtx_REG (HImode, REGNO (operands[2]));
15302 /* Be conservative and copy OUTVAL into the scratch now,
15303 this should only be necessary if outval is a subreg
15304 of something larger than a word. */
15305 /* XXX Might this clobber base? I can't see how it can,
15306 since scratch is known to overlap with OUTVAL, and
15307 must be wider than a word. */
15308 emit_insn (gen_movhi (scratch_hi, outval));
15309 outval = scratch_hi;
15313 emit_set_insn (base_plus, base);
15314 base = base_plus;
15316 else if (GET_CODE (base) == PLUS)
15318 /* The addend must be CONST_INT, or we would have dealt with it above. */
15319 HOST_WIDE_INT hi, lo;
15321 offset += INTVAL (XEXP (base, 1));
15322 base = XEXP (base, 0);
15324 /* Rework the address into a legal sequence of insns. */
15325 /* Valid range for lo is -4095 -> 4095 */
15326 lo = (offset >= 0
15327 ? (offset & 0xfff)
15328 : -((-offset) & 0xfff));
15330 /* Corner case, if lo is the max offset then we would be out of range
15331 once we have added the additional 1 below, so bump the msb into the
15332 pre-loading insn(s). */
15333 if (lo == 4095)
15334 lo &= 0x7ff;
15336 hi = ((((offset - lo) & (HOST_WIDE_INT) 0xffffffff)
15337 ^ (HOST_WIDE_INT) 0x80000000)
15338 - (HOST_WIDE_INT) 0x80000000);
15340 gcc_assert (hi + lo == offset);
15342 if (hi != 0)
15344 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
15346 /* Be careful not to destroy OUTVAL. */
15347 if (reg_overlap_mentioned_p (base_plus, outval))
15349 /* Updating base_plus might destroy outval, see if we
15350 can swap the scratch and base_plus. */
15351 if (!reg_overlap_mentioned_p (scratch, outval))
15353 rtx tmp = scratch;
15354 scratch = base_plus;
15355 base_plus = tmp;
15357 else
15359 rtx scratch_hi = gen_rtx_REG (HImode, REGNO (operands[2]));
15361 /* Be conservative and copy outval into scratch now,
15362 this should only be necessary if outval is a
15363 subreg of something larger than a word. */
15364 /* XXX Might this clobber base? I can't see how it
15365 can, since scratch is known to overlap with
15366 outval. */
15367 emit_insn (gen_movhi (scratch_hi, outval));
15368 outval = scratch_hi;
15372 /* Get the base address; addsi3 knows how to handle constants
15373 that require more than one insn. */
15374 emit_insn (gen_addsi3 (base_plus, base, GEN_INT (hi)));
15375 base = base_plus;
15376 offset = lo;
15380 if (BYTES_BIG_ENDIAN)
15382 emit_insn (gen_movqi (gen_rtx_MEM (QImode,
15383 plus_constant (Pmode, base,
15384 offset + 1)),
15385 gen_lowpart (QImode, outval)));
15386 emit_insn (gen_lshrsi3 (scratch,
15387 gen_rtx_SUBREG (SImode, outval, 0),
15388 GEN_INT (8)));
15389 emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (Pmode, base,
15390 offset)),
15391 gen_lowpart (QImode, scratch)));
15393 else
15395 emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (Pmode, base,
15396 offset)),
15397 gen_lowpart (QImode, outval)));
15398 emit_insn (gen_lshrsi3 (scratch,
15399 gen_rtx_SUBREG (SImode, outval, 0),
15400 GEN_INT (8)));
15401 emit_insn (gen_movqi (gen_rtx_MEM (QImode,
15402 plus_constant (Pmode, base,
15403 offset + 1)),
15404 gen_lowpart (QImode, scratch)));
15408 /* Return true if a type must be passed in memory. For AAPCS, small aggregates
15409 (padded to the size of a word) should be passed in a register. */
15411 static bool
15412 arm_must_pass_in_stack (enum machine_mode mode, const_tree type)
15414 if (TARGET_AAPCS_BASED)
15415 return must_pass_in_stack_var_size (mode, type);
15416 else
15417 return must_pass_in_stack_var_size_or_pad (mode, type);
15421 /* For use by FUNCTION_ARG_PADDING (MODE, TYPE).
15422 Return true if an argument passed on the stack should be padded upwards,
15423 i.e. if the least-significant byte has useful data.
15424 For legacy APCS ABIs we use the default. For AAPCS based ABIs small
15425 aggregate types are placed in the lowest memory address. */
15427 bool
15428 arm_pad_arg_upward (enum machine_mode mode ATTRIBUTE_UNUSED, const_tree type)
15430 if (!TARGET_AAPCS_BASED)
15431 return DEFAULT_FUNCTION_ARG_PADDING(mode, type) == upward;
15433 if (type && BYTES_BIG_ENDIAN && INTEGRAL_TYPE_P (type))
15434 return false;
15436 return true;
15440 /* Similarly, for use by BLOCK_REG_PADDING (MODE, TYPE, FIRST).
15441 Return !BYTES_BIG_ENDIAN if the least significant byte of the
15442 register has useful data, and return the opposite if the most
15443 significant byte does. */
15445 bool
15446 arm_pad_reg_upward (enum machine_mode mode,
15447 tree type, int first ATTRIBUTE_UNUSED)
15449 if (TARGET_AAPCS_BASED && BYTES_BIG_ENDIAN)
15451 /* For AAPCS, small aggregates, small fixed-point types,
15452 and small complex types are always padded upwards. */
15453 if (type)
15455 if ((AGGREGATE_TYPE_P (type)
15456 || TREE_CODE (type) == COMPLEX_TYPE
15457 || FIXED_POINT_TYPE_P (type))
15458 && int_size_in_bytes (type) <= 4)
15459 return true;
15461 else
15463 if ((COMPLEX_MODE_P (mode) || ALL_FIXED_POINT_MODE_P (mode))
15464 && GET_MODE_SIZE (mode) <= 4)
15465 return true;
15469 /* Otherwise, use default padding. */
15470 return !BYTES_BIG_ENDIAN;
15473 /* Returns true iff OFFSET is valid for use in an LDRD/STRD instruction,
15474 assuming that the address in the base register is word aligned. */
15475 bool
15476 offset_ok_for_ldrd_strd (HOST_WIDE_INT offset)
15478 HOST_WIDE_INT max_offset;
15480 /* Offset must be a multiple of 4 in Thumb mode. */
15481 if (TARGET_THUMB2 && ((offset & 3) != 0))
15482 return false;
15484 if (TARGET_THUMB2)
15485 max_offset = 1020;
15486 else if (TARGET_ARM)
15487 max_offset = 255;
15488 else
15489 return false;
15491 return ((offset <= max_offset) && (offset >= -max_offset));
15494 /* Checks whether the operands are valid for use in an LDRD/STRD instruction.
15495 Assumes that RT, RT2, and RN are REG. This is guaranteed by the patterns.
15496 Assumes that the address in the base register RN is word aligned. Pattern
15497 guarantees that both memory accesses use the same base register,
15498 the offsets are constants within the range, and the gap between the offsets is 4.
15499 If preload complete then check that registers are legal. WBACK indicates whether
15500 address is updated. LOAD indicates whether memory access is load or store. */
15501 bool
15502 operands_ok_ldrd_strd (rtx rt, rtx rt2, rtx rn, HOST_WIDE_INT offset,
15503 bool wback, bool load)
15505 unsigned int t, t2, n;
15507 if (!reload_completed)
15508 return true;
15510 if (!offset_ok_for_ldrd_strd (offset))
15511 return false;
15513 t = REGNO (rt);
15514 t2 = REGNO (rt2);
15515 n = REGNO (rn);
15517 if ((TARGET_THUMB2)
15518 && ((wback && (n == t || n == t2))
15519 || (t == SP_REGNUM)
15520 || (t == PC_REGNUM)
15521 || (t2 == SP_REGNUM)
15522 || (t2 == PC_REGNUM)
15523 || (!load && (n == PC_REGNUM))
15524 || (load && (t == t2))
15525 /* Triggers Cortex-M3 LDRD errata. */
15526 || (!wback && load && fix_cm3_ldrd && (n == t))))
15527 return false;
15529 if ((TARGET_ARM)
15530 && ((wback && (n == t || n == t2))
15531 || (t2 == PC_REGNUM)
15532 || (t % 2 != 0) /* First destination register is not even. */
15533 || (t2 != t + 1)
15534 /* PC can be used as base register (for offset addressing only),
15535 but it is depricated. */
15536 || (n == PC_REGNUM)))
15537 return false;
15539 return true;
15542 /* Helper for gen_operands_ldrd_strd. Returns true iff the memory
15543 operand MEM's address contains an immediate offset from the base
15544 register and has no side effects, in which case it sets BASE and
15545 OFFSET accordingly. */
15546 static bool
15547 mem_ok_for_ldrd_strd (rtx mem, rtx *base, rtx *offset)
15549 rtx addr;
15551 gcc_assert (base != NULL && offset != NULL);
15553 /* TODO: Handle more general memory operand patterns, such as
15554 PRE_DEC and PRE_INC. */
15556 if (side_effects_p (mem))
15557 return false;
15559 /* Can't deal with subregs. */
15560 if (GET_CODE (mem) == SUBREG)
15561 return false;
15563 gcc_assert (MEM_P (mem));
15565 *offset = const0_rtx;
15567 addr = XEXP (mem, 0);
15569 /* If addr isn't valid for DImode, then we can't handle it. */
15570 if (!arm_legitimate_address_p (DImode, addr,
15571 reload_in_progress || reload_completed))
15572 return false;
15574 if (REG_P (addr))
15576 *base = addr;
15577 return true;
15579 else if (GET_CODE (addr) == PLUS || GET_CODE (addr) == MINUS)
15581 *base = XEXP (addr, 0);
15582 *offset = XEXP (addr, 1);
15583 return (REG_P (*base) && CONST_INT_P (*offset));
15586 return false;
15589 #define SWAP_RTX(x,y) do { rtx tmp = x; x = y; y = tmp; } while (0)
15591 /* Called from a peephole2 to replace two word-size accesses with a
15592 single LDRD/STRD instruction. Returns true iff we can generate a
15593 new instruction sequence. That is, both accesses use the same base
15594 register and the gap between constant offsets is 4. This function
15595 may reorder its operands to match ldrd/strd RTL templates.
15596 OPERANDS are the operands found by the peephole matcher;
15597 OPERANDS[0,1] are register operands, and OPERANDS[2,3] are the
15598 corresponding memory operands. LOAD indicaates whether the access
15599 is load or store. CONST_STORE indicates a store of constant
15600 integer values held in OPERANDS[4,5] and assumes that the pattern
15601 is of length 4 insn, for the purpose of checking dead registers.
15602 COMMUTE indicates that register operands may be reordered. */
15603 bool
15604 gen_operands_ldrd_strd (rtx *operands, bool load,
15605 bool const_store, bool commute)
15607 int nops = 2;
15608 HOST_WIDE_INT offsets[2], offset;
15609 rtx base = NULL_RTX;
15610 rtx cur_base, cur_offset, tmp;
15611 int i, gap;
15612 HARD_REG_SET regset;
15614 gcc_assert (!const_store || !load);
15615 /* Check that the memory references are immediate offsets from the
15616 same base register. Extract the base register, the destination
15617 registers, and the corresponding memory offsets. */
15618 for (i = 0; i < nops; i++)
15620 if (!mem_ok_for_ldrd_strd (operands[nops+i], &cur_base, &cur_offset))
15621 return false;
15623 if (i == 0)
15624 base = cur_base;
15625 else if (REGNO (base) != REGNO (cur_base))
15626 return false;
15628 offsets[i] = INTVAL (cur_offset);
15629 if (GET_CODE (operands[i]) == SUBREG)
15631 tmp = SUBREG_REG (operands[i]);
15632 gcc_assert (GET_MODE (operands[i]) == GET_MODE (tmp));
15633 operands[i] = tmp;
15637 /* Make sure there is no dependency between the individual loads. */
15638 if (load && REGNO (operands[0]) == REGNO (base))
15639 return false; /* RAW */
15641 if (load && REGNO (operands[0]) == REGNO (operands[1]))
15642 return false; /* WAW */
15644 /* If the same input register is used in both stores
15645 when storing different constants, try to find a free register.
15646 For example, the code
15647 mov r0, 0
15648 str r0, [r2]
15649 mov r0, 1
15650 str r0, [r2, #4]
15651 can be transformed into
15652 mov r1, 0
15653 strd r1, r0, [r2]
15654 in Thumb mode assuming that r1 is free. */
15655 if (const_store
15656 && REGNO (operands[0]) == REGNO (operands[1])
15657 && INTVAL (operands[4]) != INTVAL (operands[5]))
15659 if (TARGET_THUMB2)
15661 CLEAR_HARD_REG_SET (regset);
15662 tmp = peep2_find_free_register (0, 4, "r", SImode, &regset);
15663 if (tmp == NULL_RTX)
15664 return false;
15666 /* Use the new register in the first load to ensure that
15667 if the original input register is not dead after peephole,
15668 then it will have the correct constant value. */
15669 operands[0] = tmp;
15671 else if (TARGET_ARM)
15673 return false;
15674 int regno = REGNO (operands[0]);
15675 if (!peep2_reg_dead_p (4, operands[0]))
15677 /* When the input register is even and is not dead after the
15678 pattern, it has to hold the second constant but we cannot
15679 form a legal STRD in ARM mode with this register as the second
15680 register. */
15681 if (regno % 2 == 0)
15682 return false;
15684 /* Is regno-1 free? */
15685 SET_HARD_REG_SET (regset);
15686 CLEAR_HARD_REG_BIT(regset, regno - 1);
15687 tmp = peep2_find_free_register (0, 4, "r", SImode, &regset);
15688 if (tmp == NULL_RTX)
15689 return false;
15691 operands[0] = tmp;
15693 else
15695 /* Find a DImode register. */
15696 CLEAR_HARD_REG_SET (regset);
15697 tmp = peep2_find_free_register (0, 4, "r", DImode, &regset);
15698 if (tmp != NULL_RTX)
15700 operands[0] = simplify_gen_subreg (SImode, tmp, DImode, 0);
15701 operands[1] = simplify_gen_subreg (SImode, tmp, DImode, 4);
15703 else
15705 /* Can we use the input register to form a DI register? */
15706 SET_HARD_REG_SET (regset);
15707 CLEAR_HARD_REG_BIT(regset,
15708 regno % 2 == 0 ? regno + 1 : regno - 1);
15709 tmp = peep2_find_free_register (0, 4, "r", SImode, &regset);
15710 if (tmp == NULL_RTX)
15711 return false;
15712 operands[regno % 2 == 1 ? 0 : 1] = tmp;
15716 gcc_assert (operands[0] != NULL_RTX);
15717 gcc_assert (operands[1] != NULL_RTX);
15718 gcc_assert (REGNO (operands[0]) % 2 == 0);
15719 gcc_assert (REGNO (operands[1]) == REGNO (operands[0]) + 1);
15723 /* Make sure the instructions are ordered with lower memory access first. */
15724 if (offsets[0] > offsets[1])
15726 gap = offsets[0] - offsets[1];
15727 offset = offsets[1];
15729 /* Swap the instructions such that lower memory is accessed first. */
15730 SWAP_RTX (operands[0], operands[1]);
15731 SWAP_RTX (operands[2], operands[3]);
15732 if (const_store)
15733 SWAP_RTX (operands[4], operands[5]);
15735 else
15737 gap = offsets[1] - offsets[0];
15738 offset = offsets[0];
15741 /* Make sure accesses are to consecutive memory locations. */
15742 if (gap != 4)
15743 return false;
15745 /* Make sure we generate legal instructions. */
15746 if (operands_ok_ldrd_strd (operands[0], operands[1], base, offset,
15747 false, load))
15748 return true;
15750 /* In Thumb state, where registers are almost unconstrained, there
15751 is little hope to fix it. */
15752 if (TARGET_THUMB2)
15753 return false;
15755 if (load && commute)
15757 /* Try reordering registers. */
15758 SWAP_RTX (operands[0], operands[1]);
15759 if (operands_ok_ldrd_strd (operands[0], operands[1], base, offset,
15760 false, load))
15761 return true;
15764 if (const_store)
15766 /* If input registers are dead after this pattern, they can be
15767 reordered or replaced by other registers that are free in the
15768 current pattern. */
15769 if (!peep2_reg_dead_p (4, operands[0])
15770 || !peep2_reg_dead_p (4, operands[1]))
15771 return false;
15773 /* Try to reorder the input registers. */
15774 /* For example, the code
15775 mov r0, 0
15776 mov r1, 1
15777 str r1, [r2]
15778 str r0, [r2, #4]
15779 can be transformed into
15780 mov r1, 0
15781 mov r0, 1
15782 strd r0, [r2]
15784 if (operands_ok_ldrd_strd (operands[1], operands[0], base, offset,
15785 false, false))
15787 SWAP_RTX (operands[0], operands[1]);
15788 return true;
15791 /* Try to find a free DI register. */
15792 CLEAR_HARD_REG_SET (regset);
15793 add_to_hard_reg_set (&regset, SImode, REGNO (operands[0]));
15794 add_to_hard_reg_set (&regset, SImode, REGNO (operands[1]));
15795 while (true)
15797 tmp = peep2_find_free_register (0, 4, "r", DImode, &regset);
15798 if (tmp == NULL_RTX)
15799 return false;
15801 /* DREG must be an even-numbered register in DImode.
15802 Split it into SI registers. */
15803 operands[0] = simplify_gen_subreg (SImode, tmp, DImode, 0);
15804 operands[1] = simplify_gen_subreg (SImode, tmp, DImode, 4);
15805 gcc_assert (operands[0] != NULL_RTX);
15806 gcc_assert (operands[1] != NULL_RTX);
15807 gcc_assert (REGNO (operands[0]) % 2 == 0);
15808 gcc_assert (REGNO (operands[0]) + 1 == REGNO (operands[1]));
15810 return (operands_ok_ldrd_strd (operands[0], operands[1],
15811 base, offset,
15812 false, load));
15816 return false;
15818 #undef SWAP_RTX
15823 /* Print a symbolic form of X to the debug file, F. */
15824 static void
15825 arm_print_value (FILE *f, rtx x)
15827 switch (GET_CODE (x))
15829 case CONST_INT:
15830 fprintf (f, HOST_WIDE_INT_PRINT_HEX, INTVAL (x));
15831 return;
15833 case CONST_DOUBLE:
15834 fprintf (f, "<0x%lx,0x%lx>", (long)XWINT (x, 2), (long)XWINT (x, 3));
15835 return;
15837 case CONST_VECTOR:
15839 int i;
15841 fprintf (f, "<");
15842 for (i = 0; i < CONST_VECTOR_NUNITS (x); i++)
15844 fprintf (f, HOST_WIDE_INT_PRINT_HEX, INTVAL (CONST_VECTOR_ELT (x, i)));
15845 if (i < (CONST_VECTOR_NUNITS (x) - 1))
15846 fputc (',', f);
15848 fprintf (f, ">");
15850 return;
15852 case CONST_STRING:
15853 fprintf (f, "\"%s\"", XSTR (x, 0));
15854 return;
15856 case SYMBOL_REF:
15857 fprintf (f, "`%s'", XSTR (x, 0));
15858 return;
15860 case LABEL_REF:
15861 fprintf (f, "L%d", INSN_UID (XEXP (x, 0)));
15862 return;
15864 case CONST:
15865 arm_print_value (f, XEXP (x, 0));
15866 return;
15868 case PLUS:
15869 arm_print_value (f, XEXP (x, 0));
15870 fprintf (f, "+");
15871 arm_print_value (f, XEXP (x, 1));
15872 return;
15874 case PC:
15875 fprintf (f, "pc");
15876 return;
15878 default:
15879 fprintf (f, "????");
15880 return;
15884 /* Routines for manipulation of the constant pool. */
15886 /* Arm instructions cannot load a large constant directly into a
15887 register; they have to come from a pc relative load. The constant
15888 must therefore be placed in the addressable range of the pc
15889 relative load. Depending on the precise pc relative load
15890 instruction the range is somewhere between 256 bytes and 4k. This
15891 means that we often have to dump a constant inside a function, and
15892 generate code to branch around it.
15894 It is important to minimize this, since the branches will slow
15895 things down and make the code larger.
15897 Normally we can hide the table after an existing unconditional
15898 branch so that there is no interruption of the flow, but in the
15899 worst case the code looks like this:
15901 ldr rn, L1
15903 b L2
15904 align
15905 L1: .long value
15909 ldr rn, L3
15911 b L4
15912 align
15913 L3: .long value
15917 We fix this by performing a scan after scheduling, which notices
15918 which instructions need to have their operands fetched from the
15919 constant table and builds the table.
15921 The algorithm starts by building a table of all the constants that
15922 need fixing up and all the natural barriers in the function (places
15923 where a constant table can be dropped without breaking the flow).
15924 For each fixup we note how far the pc-relative replacement will be
15925 able to reach and the offset of the instruction into the function.
15927 Having built the table we then group the fixes together to form
15928 tables that are as large as possible (subject to addressing
15929 constraints) and emit each table of constants after the last
15930 barrier that is within range of all the instructions in the group.
15931 If a group does not contain a barrier, then we forcibly create one
15932 by inserting a jump instruction into the flow. Once the table has
15933 been inserted, the insns are then modified to reference the
15934 relevant entry in the pool.
15936 Possible enhancements to the algorithm (not implemented) are:
15938 1) For some processors and object formats, there may be benefit in
15939 aligning the pools to the start of cache lines; this alignment
15940 would need to be taken into account when calculating addressability
15941 of a pool. */
15943 /* These typedefs are located at the start of this file, so that
15944 they can be used in the prototypes there. This comment is to
15945 remind readers of that fact so that the following structures
15946 can be understood more easily.
15948 typedef struct minipool_node Mnode;
15949 typedef struct minipool_fixup Mfix; */
15951 struct minipool_node
15953 /* Doubly linked chain of entries. */
15954 Mnode * next;
15955 Mnode * prev;
15956 /* The maximum offset into the code that this entry can be placed. While
15957 pushing fixes for forward references, all entries are sorted in order
15958 of increasing max_address. */
15959 HOST_WIDE_INT max_address;
15960 /* Similarly for an entry inserted for a backwards ref. */
15961 HOST_WIDE_INT min_address;
15962 /* The number of fixes referencing this entry. This can become zero
15963 if we "unpush" an entry. In this case we ignore the entry when we
15964 come to emit the code. */
15965 int refcount;
15966 /* The offset from the start of the minipool. */
15967 HOST_WIDE_INT offset;
15968 /* The value in table. */
15969 rtx value;
15970 /* The mode of value. */
15971 enum machine_mode mode;
15972 /* The size of the value. With iWMMXt enabled
15973 sizes > 4 also imply an alignment of 8-bytes. */
15974 int fix_size;
15977 struct minipool_fixup
15979 Mfix * next;
15980 rtx insn;
15981 HOST_WIDE_INT address;
15982 rtx * loc;
15983 enum machine_mode mode;
15984 int fix_size;
15985 rtx value;
15986 Mnode * minipool;
15987 HOST_WIDE_INT forwards;
15988 HOST_WIDE_INT backwards;
15991 /* Fixes less than a word need padding out to a word boundary. */
15992 #define MINIPOOL_FIX_SIZE(mode) \
15993 (GET_MODE_SIZE ((mode)) >= 4 ? GET_MODE_SIZE ((mode)) : 4)
15995 static Mnode * minipool_vector_head;
15996 static Mnode * minipool_vector_tail;
15997 static rtx minipool_vector_label;
15998 static int minipool_pad;
16000 /* The linked list of all minipool fixes required for this function. */
16001 Mfix * minipool_fix_head;
16002 Mfix * minipool_fix_tail;
16003 /* The fix entry for the current minipool, once it has been placed. */
16004 Mfix * minipool_barrier;
16006 #ifndef JUMP_TABLES_IN_TEXT_SECTION
16007 #define JUMP_TABLES_IN_TEXT_SECTION 0
16008 #endif
16010 static HOST_WIDE_INT
16011 get_jump_table_size (rtx insn)
16013 /* ADDR_VECs only take room if read-only data does into the text
16014 section. */
16015 if (JUMP_TABLES_IN_TEXT_SECTION || readonly_data_section == text_section)
16017 rtx body = PATTERN (insn);
16018 int elt = GET_CODE (body) == ADDR_DIFF_VEC ? 1 : 0;
16019 HOST_WIDE_INT size;
16020 HOST_WIDE_INT modesize;
16022 modesize = GET_MODE_SIZE (GET_MODE (body));
16023 size = modesize * XVECLEN (body, elt);
16024 switch (modesize)
16026 case 1:
16027 /* Round up size of TBB table to a halfword boundary. */
16028 size = (size + 1) & ~(HOST_WIDE_INT)1;
16029 break;
16030 case 2:
16031 /* No padding necessary for TBH. */
16032 break;
16033 case 4:
16034 /* Add two bytes for alignment on Thumb. */
16035 if (TARGET_THUMB)
16036 size += 2;
16037 break;
16038 default:
16039 gcc_unreachable ();
16041 return size;
16044 return 0;
16047 /* Return the maximum amount of padding that will be inserted before
16048 label LABEL. */
16050 static HOST_WIDE_INT
16051 get_label_padding (rtx label)
16053 HOST_WIDE_INT align, min_insn_size;
16055 align = 1 << label_to_alignment (label);
16056 min_insn_size = TARGET_THUMB ? 2 : 4;
16057 return align > min_insn_size ? align - min_insn_size : 0;
16060 /* Move a minipool fix MP from its current location to before MAX_MP.
16061 If MAX_MP is NULL, then MP doesn't need moving, but the addressing
16062 constraints may need updating. */
16063 static Mnode *
16064 move_minipool_fix_forward_ref (Mnode *mp, Mnode *max_mp,
16065 HOST_WIDE_INT max_address)
16067 /* The code below assumes these are different. */
16068 gcc_assert (mp != max_mp);
16070 if (max_mp == NULL)
16072 if (max_address < mp->max_address)
16073 mp->max_address = max_address;
16075 else
16077 if (max_address > max_mp->max_address - mp->fix_size)
16078 mp->max_address = max_mp->max_address - mp->fix_size;
16079 else
16080 mp->max_address = max_address;
16082 /* Unlink MP from its current position. Since max_mp is non-null,
16083 mp->prev must be non-null. */
16084 mp->prev->next = mp->next;
16085 if (mp->next != NULL)
16086 mp->next->prev = mp->prev;
16087 else
16088 minipool_vector_tail = mp->prev;
16090 /* Re-insert it before MAX_MP. */
16091 mp->next = max_mp;
16092 mp->prev = max_mp->prev;
16093 max_mp->prev = mp;
16095 if (mp->prev != NULL)
16096 mp->prev->next = mp;
16097 else
16098 minipool_vector_head = mp;
16101 /* Save the new entry. */
16102 max_mp = mp;
16104 /* Scan over the preceding entries and adjust their addresses as
16105 required. */
16106 while (mp->prev != NULL
16107 && mp->prev->max_address > mp->max_address - mp->prev->fix_size)
16109 mp->prev->max_address = mp->max_address - mp->prev->fix_size;
16110 mp = mp->prev;
16113 return max_mp;
16116 /* Add a constant to the minipool for a forward reference. Returns the
16117 node added or NULL if the constant will not fit in this pool. */
16118 static Mnode *
16119 add_minipool_forward_ref (Mfix *fix)
16121 /* If set, max_mp is the first pool_entry that has a lower
16122 constraint than the one we are trying to add. */
16123 Mnode * max_mp = NULL;
16124 HOST_WIDE_INT max_address = fix->address + fix->forwards - minipool_pad;
16125 Mnode * mp;
16127 /* If the minipool starts before the end of FIX->INSN then this FIX
16128 can not be placed into the current pool. Furthermore, adding the
16129 new constant pool entry may cause the pool to start FIX_SIZE bytes
16130 earlier. */
16131 if (minipool_vector_head &&
16132 (fix->address + get_attr_length (fix->insn)
16133 >= minipool_vector_head->max_address - fix->fix_size))
16134 return NULL;
16136 /* Scan the pool to see if a constant with the same value has
16137 already been added. While we are doing this, also note the
16138 location where we must insert the constant if it doesn't already
16139 exist. */
16140 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
16142 if (GET_CODE (fix->value) == GET_CODE (mp->value)
16143 && fix->mode == mp->mode
16144 && (!LABEL_P (fix->value)
16145 || (CODE_LABEL_NUMBER (fix->value)
16146 == CODE_LABEL_NUMBER (mp->value)))
16147 && rtx_equal_p (fix->value, mp->value))
16149 /* More than one fix references this entry. */
16150 mp->refcount++;
16151 return move_minipool_fix_forward_ref (mp, max_mp, max_address);
16154 /* Note the insertion point if necessary. */
16155 if (max_mp == NULL
16156 && mp->max_address > max_address)
16157 max_mp = mp;
16159 /* If we are inserting an 8-bytes aligned quantity and
16160 we have not already found an insertion point, then
16161 make sure that all such 8-byte aligned quantities are
16162 placed at the start of the pool. */
16163 if (ARM_DOUBLEWORD_ALIGN
16164 && max_mp == NULL
16165 && fix->fix_size >= 8
16166 && mp->fix_size < 8)
16168 max_mp = mp;
16169 max_address = mp->max_address;
16173 /* The value is not currently in the minipool, so we need to create
16174 a new entry for it. If MAX_MP is NULL, the entry will be put on
16175 the end of the list since the placement is less constrained than
16176 any existing entry. Otherwise, we insert the new fix before
16177 MAX_MP and, if necessary, adjust the constraints on the other
16178 entries. */
16179 mp = XNEW (Mnode);
16180 mp->fix_size = fix->fix_size;
16181 mp->mode = fix->mode;
16182 mp->value = fix->value;
16183 mp->refcount = 1;
16184 /* Not yet required for a backwards ref. */
16185 mp->min_address = -65536;
16187 if (max_mp == NULL)
16189 mp->max_address = max_address;
16190 mp->next = NULL;
16191 mp->prev = minipool_vector_tail;
16193 if (mp->prev == NULL)
16195 minipool_vector_head = mp;
16196 minipool_vector_label = gen_label_rtx ();
16198 else
16199 mp->prev->next = mp;
16201 minipool_vector_tail = mp;
16203 else
16205 if (max_address > max_mp->max_address - mp->fix_size)
16206 mp->max_address = max_mp->max_address - mp->fix_size;
16207 else
16208 mp->max_address = max_address;
16210 mp->next = max_mp;
16211 mp->prev = max_mp->prev;
16212 max_mp->prev = mp;
16213 if (mp->prev != NULL)
16214 mp->prev->next = mp;
16215 else
16216 minipool_vector_head = mp;
16219 /* Save the new entry. */
16220 max_mp = mp;
16222 /* Scan over the preceding entries and adjust their addresses as
16223 required. */
16224 while (mp->prev != NULL
16225 && mp->prev->max_address > mp->max_address - mp->prev->fix_size)
16227 mp->prev->max_address = mp->max_address - mp->prev->fix_size;
16228 mp = mp->prev;
16231 return max_mp;
16234 static Mnode *
16235 move_minipool_fix_backward_ref (Mnode *mp, Mnode *min_mp,
16236 HOST_WIDE_INT min_address)
16238 HOST_WIDE_INT offset;
16240 /* The code below assumes these are different. */
16241 gcc_assert (mp != min_mp);
16243 if (min_mp == NULL)
16245 if (min_address > mp->min_address)
16246 mp->min_address = min_address;
16248 else
16250 /* We will adjust this below if it is too loose. */
16251 mp->min_address = min_address;
16253 /* Unlink MP from its current position. Since min_mp is non-null,
16254 mp->next must be non-null. */
16255 mp->next->prev = mp->prev;
16256 if (mp->prev != NULL)
16257 mp->prev->next = mp->next;
16258 else
16259 minipool_vector_head = mp->next;
16261 /* Reinsert it after MIN_MP. */
16262 mp->prev = min_mp;
16263 mp->next = min_mp->next;
16264 min_mp->next = mp;
16265 if (mp->next != NULL)
16266 mp->next->prev = mp;
16267 else
16268 minipool_vector_tail = mp;
16271 min_mp = mp;
16273 offset = 0;
16274 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
16276 mp->offset = offset;
16277 if (mp->refcount > 0)
16278 offset += mp->fix_size;
16280 if (mp->next && mp->next->min_address < mp->min_address + mp->fix_size)
16281 mp->next->min_address = mp->min_address + mp->fix_size;
16284 return min_mp;
16287 /* Add a constant to the minipool for a backward reference. Returns the
16288 node added or NULL if the constant will not fit in this pool.
16290 Note that the code for insertion for a backwards reference can be
16291 somewhat confusing because the calculated offsets for each fix do
16292 not take into account the size of the pool (which is still under
16293 construction. */
16294 static Mnode *
16295 add_minipool_backward_ref (Mfix *fix)
16297 /* If set, min_mp is the last pool_entry that has a lower constraint
16298 than the one we are trying to add. */
16299 Mnode *min_mp = NULL;
16300 /* This can be negative, since it is only a constraint. */
16301 HOST_WIDE_INT min_address = fix->address - fix->backwards;
16302 Mnode *mp;
16304 /* If we can't reach the current pool from this insn, or if we can't
16305 insert this entry at the end of the pool without pushing other
16306 fixes out of range, then we don't try. This ensures that we
16307 can't fail later on. */
16308 if (min_address >= minipool_barrier->address
16309 || (minipool_vector_tail->min_address + fix->fix_size
16310 >= minipool_barrier->address))
16311 return NULL;
16313 /* Scan the pool to see if a constant with the same value has
16314 already been added. While we are doing this, also note the
16315 location where we must insert the constant if it doesn't already
16316 exist. */
16317 for (mp = minipool_vector_tail; mp != NULL; mp = mp->prev)
16319 if (GET_CODE (fix->value) == GET_CODE (mp->value)
16320 && fix->mode == mp->mode
16321 && (!LABEL_P (fix->value)
16322 || (CODE_LABEL_NUMBER (fix->value)
16323 == CODE_LABEL_NUMBER (mp->value)))
16324 && rtx_equal_p (fix->value, mp->value)
16325 /* Check that there is enough slack to move this entry to the
16326 end of the table (this is conservative). */
16327 && (mp->max_address
16328 > (minipool_barrier->address
16329 + minipool_vector_tail->offset
16330 + minipool_vector_tail->fix_size)))
16332 mp->refcount++;
16333 return move_minipool_fix_backward_ref (mp, min_mp, min_address);
16336 if (min_mp != NULL)
16337 mp->min_address += fix->fix_size;
16338 else
16340 /* Note the insertion point if necessary. */
16341 if (mp->min_address < min_address)
16343 /* For now, we do not allow the insertion of 8-byte alignment
16344 requiring nodes anywhere but at the start of the pool. */
16345 if (ARM_DOUBLEWORD_ALIGN
16346 && fix->fix_size >= 8 && mp->fix_size < 8)
16347 return NULL;
16348 else
16349 min_mp = mp;
16351 else if (mp->max_address
16352 < minipool_barrier->address + mp->offset + fix->fix_size)
16354 /* Inserting before this entry would push the fix beyond
16355 its maximum address (which can happen if we have
16356 re-located a forwards fix); force the new fix to come
16357 after it. */
16358 if (ARM_DOUBLEWORD_ALIGN
16359 && fix->fix_size >= 8 && mp->fix_size < 8)
16360 return NULL;
16361 else
16363 min_mp = mp;
16364 min_address = mp->min_address + fix->fix_size;
16367 /* Do not insert a non-8-byte aligned quantity before 8-byte
16368 aligned quantities. */
16369 else if (ARM_DOUBLEWORD_ALIGN
16370 && fix->fix_size < 8
16371 && mp->fix_size >= 8)
16373 min_mp = mp;
16374 min_address = mp->min_address + fix->fix_size;
16379 /* We need to create a new entry. */
16380 mp = XNEW (Mnode);
16381 mp->fix_size = fix->fix_size;
16382 mp->mode = fix->mode;
16383 mp->value = fix->value;
16384 mp->refcount = 1;
16385 mp->max_address = minipool_barrier->address + 65536;
16387 mp->min_address = min_address;
16389 if (min_mp == NULL)
16391 mp->prev = NULL;
16392 mp->next = minipool_vector_head;
16394 if (mp->next == NULL)
16396 minipool_vector_tail = mp;
16397 minipool_vector_label = gen_label_rtx ();
16399 else
16400 mp->next->prev = mp;
16402 minipool_vector_head = mp;
16404 else
16406 mp->next = min_mp->next;
16407 mp->prev = min_mp;
16408 min_mp->next = mp;
16410 if (mp->next != NULL)
16411 mp->next->prev = mp;
16412 else
16413 minipool_vector_tail = mp;
16416 /* Save the new entry. */
16417 min_mp = mp;
16419 if (mp->prev)
16420 mp = mp->prev;
16421 else
16422 mp->offset = 0;
16424 /* Scan over the following entries and adjust their offsets. */
16425 while (mp->next != NULL)
16427 if (mp->next->min_address < mp->min_address + mp->fix_size)
16428 mp->next->min_address = mp->min_address + mp->fix_size;
16430 if (mp->refcount)
16431 mp->next->offset = mp->offset + mp->fix_size;
16432 else
16433 mp->next->offset = mp->offset;
16435 mp = mp->next;
16438 return min_mp;
16441 static void
16442 assign_minipool_offsets (Mfix *barrier)
16444 HOST_WIDE_INT offset = 0;
16445 Mnode *mp;
16447 minipool_barrier = barrier;
16449 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
16451 mp->offset = offset;
16453 if (mp->refcount > 0)
16454 offset += mp->fix_size;
16458 /* Output the literal table */
16459 static void
16460 dump_minipool (rtx scan)
16462 Mnode * mp;
16463 Mnode * nmp;
16464 int align64 = 0;
16466 if (ARM_DOUBLEWORD_ALIGN)
16467 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
16468 if (mp->refcount > 0 && mp->fix_size >= 8)
16470 align64 = 1;
16471 break;
16474 if (dump_file)
16475 fprintf (dump_file,
16476 ";; Emitting minipool after insn %u; address %ld; align %d (bytes)\n",
16477 INSN_UID (scan), (unsigned long) minipool_barrier->address, align64 ? 8 : 4);
16479 scan = emit_label_after (gen_label_rtx (), scan);
16480 scan = emit_insn_after (align64 ? gen_align_8 () : gen_align_4 (), scan);
16481 scan = emit_label_after (minipool_vector_label, scan);
16483 for (mp = minipool_vector_head; mp != NULL; mp = nmp)
16485 if (mp->refcount > 0)
16487 if (dump_file)
16489 fprintf (dump_file,
16490 ";; Offset %u, min %ld, max %ld ",
16491 (unsigned) mp->offset, (unsigned long) mp->min_address,
16492 (unsigned long) mp->max_address);
16493 arm_print_value (dump_file, mp->value);
16494 fputc ('\n', dump_file);
16497 switch (mp->fix_size)
16499 #ifdef HAVE_consttable_1
16500 case 1:
16501 scan = emit_insn_after (gen_consttable_1 (mp->value), scan);
16502 break;
16504 #endif
16505 #ifdef HAVE_consttable_2
16506 case 2:
16507 scan = emit_insn_after (gen_consttable_2 (mp->value), scan);
16508 break;
16510 #endif
16511 #ifdef HAVE_consttable_4
16512 case 4:
16513 scan = emit_insn_after (gen_consttable_4 (mp->value), scan);
16514 break;
16516 #endif
16517 #ifdef HAVE_consttable_8
16518 case 8:
16519 scan = emit_insn_after (gen_consttable_8 (mp->value), scan);
16520 break;
16522 #endif
16523 #ifdef HAVE_consttable_16
16524 case 16:
16525 scan = emit_insn_after (gen_consttable_16 (mp->value), scan);
16526 break;
16528 #endif
16529 default:
16530 gcc_unreachable ();
16534 nmp = mp->next;
16535 free (mp);
16538 minipool_vector_head = minipool_vector_tail = NULL;
16539 scan = emit_insn_after (gen_consttable_end (), scan);
16540 scan = emit_barrier_after (scan);
16543 /* Return the cost of forcibly inserting a barrier after INSN. */
16544 static int
16545 arm_barrier_cost (rtx insn)
16547 /* Basing the location of the pool on the loop depth is preferable,
16548 but at the moment, the basic block information seems to be
16549 corrupt by this stage of the compilation. */
16550 int base_cost = 50;
16551 rtx next = next_nonnote_insn (insn);
16553 if (next != NULL && LABEL_P (next))
16554 base_cost -= 20;
16556 switch (GET_CODE (insn))
16558 case CODE_LABEL:
16559 /* It will always be better to place the table before the label, rather
16560 than after it. */
16561 return 50;
16563 case INSN:
16564 case CALL_INSN:
16565 return base_cost;
16567 case JUMP_INSN:
16568 return base_cost - 10;
16570 default:
16571 return base_cost + 10;
16575 /* Find the best place in the insn stream in the range
16576 (FIX->address,MAX_ADDRESS) to forcibly insert a minipool barrier.
16577 Create the barrier by inserting a jump and add a new fix entry for
16578 it. */
16579 static Mfix *
16580 create_fix_barrier (Mfix *fix, HOST_WIDE_INT max_address)
16582 HOST_WIDE_INT count = 0;
16583 rtx barrier;
16584 rtx from = fix->insn;
16585 /* The instruction after which we will insert the jump. */
16586 rtx selected = NULL;
16587 int selected_cost;
16588 /* The address at which the jump instruction will be placed. */
16589 HOST_WIDE_INT selected_address;
16590 Mfix * new_fix;
16591 HOST_WIDE_INT max_count = max_address - fix->address;
16592 rtx label = gen_label_rtx ();
16594 selected_cost = arm_barrier_cost (from);
16595 selected_address = fix->address;
16597 while (from && count < max_count)
16599 rtx tmp;
16600 int new_cost;
16602 /* This code shouldn't have been called if there was a natural barrier
16603 within range. */
16604 gcc_assert (!BARRIER_P (from));
16606 /* Count the length of this insn. This must stay in sync with the
16607 code that pushes minipool fixes. */
16608 if (LABEL_P (from))
16609 count += get_label_padding (from);
16610 else
16611 count += get_attr_length (from);
16613 /* If there is a jump table, add its length. */
16614 if (tablejump_p (from, NULL, &tmp))
16616 count += get_jump_table_size (tmp);
16618 /* Jump tables aren't in a basic block, so base the cost on
16619 the dispatch insn. If we select this location, we will
16620 still put the pool after the table. */
16621 new_cost = arm_barrier_cost (from);
16623 if (count < max_count
16624 && (!selected || new_cost <= selected_cost))
16626 selected = tmp;
16627 selected_cost = new_cost;
16628 selected_address = fix->address + count;
16631 /* Continue after the dispatch table. */
16632 from = NEXT_INSN (tmp);
16633 continue;
16636 new_cost = arm_barrier_cost (from);
16638 if (count < max_count
16639 && (!selected || new_cost <= selected_cost))
16641 selected = from;
16642 selected_cost = new_cost;
16643 selected_address = fix->address + count;
16646 from = NEXT_INSN (from);
16649 /* Make sure that we found a place to insert the jump. */
16650 gcc_assert (selected);
16652 /* Make sure we do not split a call and its corresponding
16653 CALL_ARG_LOCATION note. */
16654 if (CALL_P (selected))
16656 rtx next = NEXT_INSN (selected);
16657 if (next && NOTE_P (next)
16658 && NOTE_KIND (next) == NOTE_INSN_CALL_ARG_LOCATION)
16659 selected = next;
16662 /* Create a new JUMP_INSN that branches around a barrier. */
16663 from = emit_jump_insn_after (gen_jump (label), selected);
16664 JUMP_LABEL (from) = label;
16665 barrier = emit_barrier_after (from);
16666 emit_label_after (label, barrier);
16668 /* Create a minipool barrier entry for the new barrier. */
16669 new_fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* new_fix));
16670 new_fix->insn = barrier;
16671 new_fix->address = selected_address;
16672 new_fix->next = fix->next;
16673 fix->next = new_fix;
16675 return new_fix;
16678 /* Record that there is a natural barrier in the insn stream at
16679 ADDRESS. */
16680 static void
16681 push_minipool_barrier (rtx insn, HOST_WIDE_INT address)
16683 Mfix * fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* fix));
16685 fix->insn = insn;
16686 fix->address = address;
16688 fix->next = NULL;
16689 if (minipool_fix_head != NULL)
16690 minipool_fix_tail->next = fix;
16691 else
16692 minipool_fix_head = fix;
16694 minipool_fix_tail = fix;
16697 /* Record INSN, which will need fixing up to load a value from the
16698 minipool. ADDRESS is the offset of the insn since the start of the
16699 function; LOC is a pointer to the part of the insn which requires
16700 fixing; VALUE is the constant that must be loaded, which is of type
16701 MODE. */
16702 static void
16703 push_minipool_fix (rtx insn, HOST_WIDE_INT address, rtx *loc,
16704 enum machine_mode mode, rtx value)
16706 Mfix * fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* fix));
16708 fix->insn = insn;
16709 fix->address = address;
16710 fix->loc = loc;
16711 fix->mode = mode;
16712 fix->fix_size = MINIPOOL_FIX_SIZE (mode);
16713 fix->value = value;
16714 fix->forwards = get_attr_pool_range (insn);
16715 fix->backwards = get_attr_neg_pool_range (insn);
16716 fix->minipool = NULL;
16718 /* If an insn doesn't have a range defined for it, then it isn't
16719 expecting to be reworked by this code. Better to stop now than
16720 to generate duff assembly code. */
16721 gcc_assert (fix->forwards || fix->backwards);
16723 /* If an entry requires 8-byte alignment then assume all constant pools
16724 require 4 bytes of padding. Trying to do this later on a per-pool
16725 basis is awkward because existing pool entries have to be modified. */
16726 if (ARM_DOUBLEWORD_ALIGN && fix->fix_size >= 8)
16727 minipool_pad = 4;
16729 if (dump_file)
16731 fprintf (dump_file,
16732 ";; %smode fixup for i%d; addr %lu, range (%ld,%ld): ",
16733 GET_MODE_NAME (mode),
16734 INSN_UID (insn), (unsigned long) address,
16735 -1 * (long)fix->backwards, (long)fix->forwards);
16736 arm_print_value (dump_file, fix->value);
16737 fprintf (dump_file, "\n");
16740 /* Add it to the chain of fixes. */
16741 fix->next = NULL;
16743 if (minipool_fix_head != NULL)
16744 minipool_fix_tail->next = fix;
16745 else
16746 minipool_fix_head = fix;
16748 minipool_fix_tail = fix;
16751 /* Return maximum allowed cost of synthesizing a 64-bit constant VAL inline.
16752 Returns the number of insns needed, or 99 if we always want to synthesize
16753 the value. */
16755 arm_max_const_double_inline_cost ()
16757 /* Let the value get synthesized to avoid the use of literal pools. */
16758 if (arm_disable_literal_pool)
16759 return 99;
16761 return ((optimize_size || arm_ld_sched) ? 3 : 4);
16764 /* Return the cost of synthesizing a 64-bit constant VAL inline.
16765 Returns the number of insns needed, or 99 if we don't know how to
16766 do it. */
16768 arm_const_double_inline_cost (rtx val)
16770 rtx lowpart, highpart;
16771 enum machine_mode mode;
16773 mode = GET_MODE (val);
16775 if (mode == VOIDmode)
16776 mode = DImode;
16778 gcc_assert (GET_MODE_SIZE (mode) == 8);
16780 lowpart = gen_lowpart (SImode, val);
16781 highpart = gen_highpart_mode (SImode, mode, val);
16783 gcc_assert (CONST_INT_P (lowpart));
16784 gcc_assert (CONST_INT_P (highpart));
16786 return (arm_gen_constant (SET, SImode, NULL_RTX, INTVAL (lowpart),
16787 NULL_RTX, NULL_RTX, 0, 0)
16788 + arm_gen_constant (SET, SImode, NULL_RTX, INTVAL (highpart),
16789 NULL_RTX, NULL_RTX, 0, 0));
16792 /* Return true if it is worthwhile to split a 64-bit constant into two
16793 32-bit operations. This is the case if optimizing for size, or
16794 if we have load delay slots, or if one 32-bit part can be done with
16795 a single data operation. */
16796 bool
16797 arm_const_double_by_parts (rtx val)
16799 enum machine_mode mode = GET_MODE (val);
16800 rtx part;
16802 if (optimize_size || arm_ld_sched)
16803 return true;
16805 if (mode == VOIDmode)
16806 mode = DImode;
16808 part = gen_highpart_mode (SImode, mode, val);
16810 gcc_assert (CONST_INT_P (part));
16812 if (const_ok_for_arm (INTVAL (part))
16813 || const_ok_for_arm (~INTVAL (part)))
16814 return true;
16816 part = gen_lowpart (SImode, val);
16818 gcc_assert (CONST_INT_P (part));
16820 if (const_ok_for_arm (INTVAL (part))
16821 || const_ok_for_arm (~INTVAL (part)))
16822 return true;
16824 return false;
16827 /* Return true if it is possible to inline both the high and low parts
16828 of a 64-bit constant into 32-bit data processing instructions. */
16829 bool
16830 arm_const_double_by_immediates (rtx val)
16832 enum machine_mode mode = GET_MODE (val);
16833 rtx part;
16835 if (mode == VOIDmode)
16836 mode = DImode;
16838 part = gen_highpart_mode (SImode, mode, val);
16840 gcc_assert (CONST_INT_P (part));
16842 if (!const_ok_for_arm (INTVAL (part)))
16843 return false;
16845 part = gen_lowpart (SImode, val);
16847 gcc_assert (CONST_INT_P (part));
16849 if (!const_ok_for_arm (INTVAL (part)))
16850 return false;
16852 return true;
16855 /* Scan INSN and note any of its operands that need fixing.
16856 If DO_PUSHES is false we do not actually push any of the fixups
16857 needed. */
16858 static void
16859 note_invalid_constants (rtx insn, HOST_WIDE_INT address, int do_pushes)
16861 int opno;
16863 extract_insn (insn);
16865 if (!constrain_operands (1))
16866 fatal_insn_not_found (insn);
16868 if (recog_data.n_alternatives == 0)
16869 return;
16871 /* Fill in recog_op_alt with information about the constraints of
16872 this insn. */
16873 preprocess_constraints ();
16875 for (opno = 0; opno < recog_data.n_operands; opno++)
16877 /* Things we need to fix can only occur in inputs. */
16878 if (recog_data.operand_type[opno] != OP_IN)
16879 continue;
16881 /* If this alternative is a memory reference, then any mention
16882 of constants in this alternative is really to fool reload
16883 into allowing us to accept one there. We need to fix them up
16884 now so that we output the right code. */
16885 if (recog_op_alt[opno][which_alternative].memory_ok)
16887 rtx op = recog_data.operand[opno];
16889 if (CONSTANT_P (op))
16891 if (do_pushes)
16892 push_minipool_fix (insn, address, recog_data.operand_loc[opno],
16893 recog_data.operand_mode[opno], op);
16895 else if (MEM_P (op)
16896 && GET_CODE (XEXP (op, 0)) == SYMBOL_REF
16897 && CONSTANT_POOL_ADDRESS_P (XEXP (op, 0)))
16899 if (do_pushes)
16901 rtx cop = avoid_constant_pool_reference (op);
16903 /* Casting the address of something to a mode narrower
16904 than a word can cause avoid_constant_pool_reference()
16905 to return the pool reference itself. That's no good to
16906 us here. Lets just hope that we can use the
16907 constant pool value directly. */
16908 if (op == cop)
16909 cop = get_pool_constant (XEXP (op, 0));
16911 push_minipool_fix (insn, address,
16912 recog_data.operand_loc[opno],
16913 recog_data.operand_mode[opno], cop);
16920 return;
16923 /* Rewrite move insn into subtract of 0 if the condition codes will
16924 be useful in next conditional jump insn. */
16926 static void
16927 thumb1_reorg (void)
16929 basic_block bb;
16931 FOR_EACH_BB_FN (bb, cfun)
16933 rtx dest, src;
16934 rtx pat, op0, set = NULL;
16935 rtx prev, insn = BB_END (bb);
16936 bool insn_clobbered = false;
16938 while (insn != BB_HEAD (bb) && !NONDEBUG_INSN_P (insn))
16939 insn = PREV_INSN (insn);
16941 /* Find the last cbranchsi4_insn in basic block BB. */
16942 if (INSN_CODE (insn) != CODE_FOR_cbranchsi4_insn)
16943 continue;
16945 /* Get the register with which we are comparing. */
16946 pat = PATTERN (insn);
16947 op0 = XEXP (XEXP (SET_SRC (pat), 0), 0);
16949 /* Find the first flag setting insn before INSN in basic block BB. */
16950 gcc_assert (insn != BB_HEAD (bb));
16951 for (prev = PREV_INSN (insn);
16952 (!insn_clobbered
16953 && prev != BB_HEAD (bb)
16954 && (NOTE_P (prev)
16955 || DEBUG_INSN_P (prev)
16956 || ((set = single_set (prev)) != NULL
16957 && get_attr_conds (prev) == CONDS_NOCOND)));
16958 prev = PREV_INSN (prev))
16960 if (reg_set_p (op0, prev))
16961 insn_clobbered = true;
16964 /* Skip if op0 is clobbered by insn other than prev. */
16965 if (insn_clobbered)
16966 continue;
16968 if (!set)
16969 continue;
16971 dest = SET_DEST (set);
16972 src = SET_SRC (set);
16973 if (!low_register_operand (dest, SImode)
16974 || !low_register_operand (src, SImode))
16975 continue;
16977 /* Rewrite move into subtract of 0 if its operand is compared with ZERO
16978 in INSN. Both src and dest of the move insn are checked. */
16979 if (REGNO (op0) == REGNO (src) || REGNO (op0) == REGNO (dest))
16981 dest = copy_rtx (dest);
16982 src = copy_rtx (src);
16983 src = gen_rtx_MINUS (SImode, src, const0_rtx);
16984 PATTERN (prev) = gen_rtx_SET (VOIDmode, dest, src);
16985 INSN_CODE (prev) = -1;
16986 /* Set test register in INSN to dest. */
16987 XEXP (XEXP (SET_SRC (pat), 0), 0) = copy_rtx (dest);
16988 INSN_CODE (insn) = -1;
16993 /* Convert instructions to their cc-clobbering variant if possible, since
16994 that allows us to use smaller encodings. */
16996 static void
16997 thumb2_reorg (void)
16999 basic_block bb;
17000 regset_head live;
17002 INIT_REG_SET (&live);
17004 /* We are freeing block_for_insn in the toplev to keep compatibility
17005 with old MDEP_REORGS that are not CFG based. Recompute it now. */
17006 compute_bb_for_insn ();
17007 df_analyze ();
17009 enum Convert_Action {SKIP, CONV, SWAP_CONV};
17011 FOR_EACH_BB_FN (bb, cfun)
17013 if (current_tune->disparage_flag_setting_t16_encodings
17014 && optimize_bb_for_speed_p (bb))
17015 continue;
17017 rtx insn;
17018 Convert_Action action = SKIP;
17019 Convert_Action action_for_partial_flag_setting
17020 = (current_tune->disparage_partial_flag_setting_t16_encodings
17021 && optimize_bb_for_speed_p (bb))
17022 ? SKIP : CONV;
17024 COPY_REG_SET (&live, DF_LR_OUT (bb));
17025 df_simulate_initialize_backwards (bb, &live);
17026 FOR_BB_INSNS_REVERSE (bb, insn)
17028 if (NONJUMP_INSN_P (insn)
17029 && !REGNO_REG_SET_P (&live, CC_REGNUM)
17030 && GET_CODE (PATTERN (insn)) == SET)
17032 action = SKIP;
17033 rtx pat = PATTERN (insn);
17034 rtx dst = XEXP (pat, 0);
17035 rtx src = XEXP (pat, 1);
17036 rtx op0 = NULL_RTX, op1 = NULL_RTX;
17038 if (!OBJECT_P (src))
17039 op0 = XEXP (src, 0);
17041 if (BINARY_P (src))
17042 op1 = XEXP (src, 1);
17044 if (low_register_operand (dst, SImode))
17046 switch (GET_CODE (src))
17048 case PLUS:
17049 /* Adding two registers and storing the result
17050 in the first source is already a 16-bit
17051 operation. */
17052 if (rtx_equal_p (dst, op0)
17053 && register_operand (op1, SImode))
17054 break;
17056 if (low_register_operand (op0, SImode))
17058 /* ADDS <Rd>,<Rn>,<Rm> */
17059 if (low_register_operand (op1, SImode))
17060 action = CONV;
17061 /* ADDS <Rdn>,#<imm8> */
17062 /* SUBS <Rdn>,#<imm8> */
17063 else if (rtx_equal_p (dst, op0)
17064 && CONST_INT_P (op1)
17065 && IN_RANGE (INTVAL (op1), -255, 255))
17066 action = CONV;
17067 /* ADDS <Rd>,<Rn>,#<imm3> */
17068 /* SUBS <Rd>,<Rn>,#<imm3> */
17069 else if (CONST_INT_P (op1)
17070 && IN_RANGE (INTVAL (op1), -7, 7))
17071 action = CONV;
17073 /* ADCS <Rd>, <Rn> */
17074 else if (GET_CODE (XEXP (src, 0)) == PLUS
17075 && rtx_equal_p (XEXP (XEXP (src, 0), 0), dst)
17076 && low_register_operand (XEXP (XEXP (src, 0), 1),
17077 SImode)
17078 && COMPARISON_P (op1)
17079 && cc_register (XEXP (op1, 0), VOIDmode)
17080 && maybe_get_arm_condition_code (op1) == ARM_CS
17081 && XEXP (op1, 1) == const0_rtx)
17082 action = CONV;
17083 break;
17085 case MINUS:
17086 /* RSBS <Rd>,<Rn>,#0
17087 Not handled here: see NEG below. */
17088 /* SUBS <Rd>,<Rn>,#<imm3>
17089 SUBS <Rdn>,#<imm8>
17090 Not handled here: see PLUS above. */
17091 /* SUBS <Rd>,<Rn>,<Rm> */
17092 if (low_register_operand (op0, SImode)
17093 && low_register_operand (op1, SImode))
17094 action = CONV;
17095 break;
17097 case MULT:
17098 /* MULS <Rdm>,<Rn>,<Rdm>
17099 As an exception to the rule, this is only used
17100 when optimizing for size since MULS is slow on all
17101 known implementations. We do not even want to use
17102 MULS in cold code, if optimizing for speed, so we
17103 test the global flag here. */
17104 if (!optimize_size)
17105 break;
17106 /* else fall through. */
17107 case AND:
17108 case IOR:
17109 case XOR:
17110 /* ANDS <Rdn>,<Rm> */
17111 if (rtx_equal_p (dst, op0)
17112 && low_register_operand (op1, SImode))
17113 action = action_for_partial_flag_setting;
17114 else if (rtx_equal_p (dst, op1)
17115 && low_register_operand (op0, SImode))
17116 action = action_for_partial_flag_setting == SKIP
17117 ? SKIP : SWAP_CONV;
17118 break;
17120 case ASHIFTRT:
17121 case ASHIFT:
17122 case LSHIFTRT:
17123 /* ASRS <Rdn>,<Rm> */
17124 /* LSRS <Rdn>,<Rm> */
17125 /* LSLS <Rdn>,<Rm> */
17126 if (rtx_equal_p (dst, op0)
17127 && low_register_operand (op1, SImode))
17128 action = action_for_partial_flag_setting;
17129 /* ASRS <Rd>,<Rm>,#<imm5> */
17130 /* LSRS <Rd>,<Rm>,#<imm5> */
17131 /* LSLS <Rd>,<Rm>,#<imm5> */
17132 else if (low_register_operand (op0, SImode)
17133 && CONST_INT_P (op1)
17134 && IN_RANGE (INTVAL (op1), 0, 31))
17135 action = action_for_partial_flag_setting;
17136 break;
17138 case ROTATERT:
17139 /* RORS <Rdn>,<Rm> */
17140 if (rtx_equal_p (dst, op0)
17141 && low_register_operand (op1, SImode))
17142 action = action_for_partial_flag_setting;
17143 break;
17145 case NOT:
17146 /* MVNS <Rd>,<Rm> */
17147 if (low_register_operand (op0, SImode))
17148 action = action_for_partial_flag_setting;
17149 break;
17151 case NEG:
17152 /* NEGS <Rd>,<Rm> (a.k.a RSBS) */
17153 if (low_register_operand (op0, SImode))
17154 action = CONV;
17155 break;
17157 case CONST_INT:
17158 /* MOVS <Rd>,#<imm8> */
17159 if (CONST_INT_P (src)
17160 && IN_RANGE (INTVAL (src), 0, 255))
17161 action = action_for_partial_flag_setting;
17162 break;
17164 case REG:
17165 /* MOVS and MOV<c> with registers have different
17166 encodings, so are not relevant here. */
17167 break;
17169 default:
17170 break;
17174 if (action != SKIP)
17176 rtx ccreg = gen_rtx_REG (CCmode, CC_REGNUM);
17177 rtx clobber = gen_rtx_CLOBBER (VOIDmode, ccreg);
17178 rtvec vec;
17180 if (action == SWAP_CONV)
17182 src = copy_rtx (src);
17183 XEXP (src, 0) = op1;
17184 XEXP (src, 1) = op0;
17185 pat = gen_rtx_SET (VOIDmode, dst, src);
17186 vec = gen_rtvec (2, pat, clobber);
17188 else /* action == CONV */
17189 vec = gen_rtvec (2, pat, clobber);
17191 PATTERN (insn) = gen_rtx_PARALLEL (VOIDmode, vec);
17192 INSN_CODE (insn) = -1;
17196 if (NONDEBUG_INSN_P (insn))
17197 df_simulate_one_insn_backwards (bb, insn, &live);
17201 CLEAR_REG_SET (&live);
17204 /* Gcc puts the pool in the wrong place for ARM, since we can only
17205 load addresses a limited distance around the pc. We do some
17206 special munging to move the constant pool values to the correct
17207 point in the code. */
17208 static void
17209 arm_reorg (void)
17211 rtx insn;
17212 HOST_WIDE_INT address = 0;
17213 Mfix * fix;
17215 if (TARGET_THUMB1)
17216 thumb1_reorg ();
17217 else if (TARGET_THUMB2)
17218 thumb2_reorg ();
17220 /* Ensure all insns that must be split have been split at this point.
17221 Otherwise, the pool placement code below may compute incorrect
17222 insn lengths. Note that when optimizing, all insns have already
17223 been split at this point. */
17224 if (!optimize)
17225 split_all_insns_noflow ();
17227 minipool_fix_head = minipool_fix_tail = NULL;
17229 /* The first insn must always be a note, or the code below won't
17230 scan it properly. */
17231 insn = get_insns ();
17232 gcc_assert (NOTE_P (insn));
17233 minipool_pad = 0;
17235 /* Scan all the insns and record the operands that will need fixing. */
17236 for (insn = next_nonnote_insn (insn); insn; insn = next_nonnote_insn (insn))
17238 if (BARRIER_P (insn))
17239 push_minipool_barrier (insn, address);
17240 else if (INSN_P (insn))
17242 rtx table;
17244 note_invalid_constants (insn, address, true);
17245 address += get_attr_length (insn);
17247 /* If the insn is a vector jump, add the size of the table
17248 and skip the table. */
17249 if (tablejump_p (insn, NULL, &table))
17251 address += get_jump_table_size (table);
17252 insn = table;
17255 else if (LABEL_P (insn))
17256 /* Add the worst-case padding due to alignment. We don't add
17257 the _current_ padding because the minipool insertions
17258 themselves might change it. */
17259 address += get_label_padding (insn);
17262 fix = minipool_fix_head;
17264 /* Now scan the fixups and perform the required changes. */
17265 while (fix)
17267 Mfix * ftmp;
17268 Mfix * fdel;
17269 Mfix * last_added_fix;
17270 Mfix * last_barrier = NULL;
17271 Mfix * this_fix;
17273 /* Skip any further barriers before the next fix. */
17274 while (fix && BARRIER_P (fix->insn))
17275 fix = fix->next;
17277 /* No more fixes. */
17278 if (fix == NULL)
17279 break;
17281 last_added_fix = NULL;
17283 for (ftmp = fix; ftmp; ftmp = ftmp->next)
17285 if (BARRIER_P (ftmp->insn))
17287 if (ftmp->address >= minipool_vector_head->max_address)
17288 break;
17290 last_barrier = ftmp;
17292 else if ((ftmp->minipool = add_minipool_forward_ref (ftmp)) == NULL)
17293 break;
17295 last_added_fix = ftmp; /* Keep track of the last fix added. */
17298 /* If we found a barrier, drop back to that; any fixes that we
17299 could have reached but come after the barrier will now go in
17300 the next mini-pool. */
17301 if (last_barrier != NULL)
17303 /* Reduce the refcount for those fixes that won't go into this
17304 pool after all. */
17305 for (fdel = last_barrier->next;
17306 fdel && fdel != ftmp;
17307 fdel = fdel->next)
17309 fdel->minipool->refcount--;
17310 fdel->minipool = NULL;
17313 ftmp = last_barrier;
17315 else
17317 /* ftmp is first fix that we can't fit into this pool and
17318 there no natural barriers that we could use. Insert a
17319 new barrier in the code somewhere between the previous
17320 fix and this one, and arrange to jump around it. */
17321 HOST_WIDE_INT max_address;
17323 /* The last item on the list of fixes must be a barrier, so
17324 we can never run off the end of the list of fixes without
17325 last_barrier being set. */
17326 gcc_assert (ftmp);
17328 max_address = minipool_vector_head->max_address;
17329 /* Check that there isn't another fix that is in range that
17330 we couldn't fit into this pool because the pool was
17331 already too large: we need to put the pool before such an
17332 instruction. The pool itself may come just after the
17333 fix because create_fix_barrier also allows space for a
17334 jump instruction. */
17335 if (ftmp->address < max_address)
17336 max_address = ftmp->address + 1;
17338 last_barrier = create_fix_barrier (last_added_fix, max_address);
17341 assign_minipool_offsets (last_barrier);
17343 while (ftmp)
17345 if (!BARRIER_P (ftmp->insn)
17346 && ((ftmp->minipool = add_minipool_backward_ref (ftmp))
17347 == NULL))
17348 break;
17350 ftmp = ftmp->next;
17353 /* Scan over the fixes we have identified for this pool, fixing them
17354 up and adding the constants to the pool itself. */
17355 for (this_fix = fix; this_fix && ftmp != this_fix;
17356 this_fix = this_fix->next)
17357 if (!BARRIER_P (this_fix->insn))
17359 rtx addr
17360 = plus_constant (Pmode,
17361 gen_rtx_LABEL_REF (VOIDmode,
17362 minipool_vector_label),
17363 this_fix->minipool->offset);
17364 *this_fix->loc = gen_rtx_MEM (this_fix->mode, addr);
17367 dump_minipool (last_barrier->insn);
17368 fix = ftmp;
17371 /* From now on we must synthesize any constants that we can't handle
17372 directly. This can happen if the RTL gets split during final
17373 instruction generation. */
17374 cfun->machine->after_arm_reorg = 1;
17376 /* Free the minipool memory. */
17377 obstack_free (&minipool_obstack, minipool_startobj);
17380 /* Routines to output assembly language. */
17382 /* If the rtx is the correct value then return the string of the number.
17383 In this way we can ensure that valid double constants are generated even
17384 when cross compiling. */
17385 const char *
17386 fp_immediate_constant (rtx x)
17388 REAL_VALUE_TYPE r;
17390 if (!fp_consts_inited)
17391 init_fp_table ();
17393 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
17395 gcc_assert (REAL_VALUES_EQUAL (r, value_fp0));
17396 return "0";
17399 /* As for fp_immediate_constant, but value is passed directly, not in rtx. */
17400 static const char *
17401 fp_const_from_val (REAL_VALUE_TYPE *r)
17403 if (!fp_consts_inited)
17404 init_fp_table ();
17406 gcc_assert (REAL_VALUES_EQUAL (*r, value_fp0));
17407 return "0";
17410 /* OPERANDS[0] is the entire list of insns that constitute pop,
17411 OPERANDS[1] is the base register, RETURN_PC is true iff return insn
17412 is in the list, UPDATE is true iff the list contains explicit
17413 update of base register. */
17414 void
17415 arm_output_multireg_pop (rtx *operands, bool return_pc, rtx cond, bool reverse,
17416 bool update)
17418 int i;
17419 char pattern[100];
17420 int offset;
17421 const char *conditional;
17422 int num_saves = XVECLEN (operands[0], 0);
17423 unsigned int regno;
17424 unsigned int regno_base = REGNO (operands[1]);
17426 offset = 0;
17427 offset += update ? 1 : 0;
17428 offset += return_pc ? 1 : 0;
17430 /* Is the base register in the list? */
17431 for (i = offset; i < num_saves; i++)
17433 regno = REGNO (XEXP (XVECEXP (operands[0], 0, i), 0));
17434 /* If SP is in the list, then the base register must be SP. */
17435 gcc_assert ((regno != SP_REGNUM) || (regno_base == SP_REGNUM));
17436 /* If base register is in the list, there must be no explicit update. */
17437 if (regno == regno_base)
17438 gcc_assert (!update);
17441 conditional = reverse ? "%?%D0" : "%?%d0";
17442 if ((regno_base == SP_REGNUM) && TARGET_UNIFIED_ASM)
17444 /* Output pop (not stmfd) because it has a shorter encoding. */
17445 gcc_assert (update);
17446 sprintf (pattern, "pop%s\t{", conditional);
17448 else
17450 /* Output ldmfd when the base register is SP, otherwise output ldmia.
17451 It's just a convention, their semantics are identical. */
17452 if (regno_base == SP_REGNUM)
17453 sprintf (pattern, "ldm%sfd\t", conditional);
17454 else if (TARGET_UNIFIED_ASM)
17455 sprintf (pattern, "ldmia%s\t", conditional);
17456 else
17457 sprintf (pattern, "ldm%sia\t", conditional);
17459 strcat (pattern, reg_names[regno_base]);
17460 if (update)
17461 strcat (pattern, "!, {");
17462 else
17463 strcat (pattern, ", {");
17466 /* Output the first destination register. */
17467 strcat (pattern,
17468 reg_names[REGNO (XEXP (XVECEXP (operands[0], 0, offset), 0))]);
17470 /* Output the rest of the destination registers. */
17471 for (i = offset + 1; i < num_saves; i++)
17473 strcat (pattern, ", ");
17474 strcat (pattern,
17475 reg_names[REGNO (XEXP (XVECEXP (operands[0], 0, i), 0))]);
17478 strcat (pattern, "}");
17480 if (IS_INTERRUPT (arm_current_func_type ()) && return_pc)
17481 strcat (pattern, "^");
17483 output_asm_insn (pattern, &cond);
17487 /* Output the assembly for a store multiple. */
17489 const char *
17490 vfp_output_fstmd (rtx * operands)
17492 char pattern[100];
17493 int p;
17494 int base;
17495 int i;
17497 strcpy (pattern, "fstmfdd%?\t%m0!, {%P1");
17498 p = strlen (pattern);
17500 gcc_assert (REG_P (operands[1]));
17502 base = (REGNO (operands[1]) - FIRST_VFP_REGNUM) / 2;
17503 for (i = 1; i < XVECLEN (operands[2], 0); i++)
17505 p += sprintf (&pattern[p], ", d%d", base + i);
17507 strcpy (&pattern[p], "}");
17509 output_asm_insn (pattern, operands);
17510 return "";
17514 /* Emit RTL to save block of VFP register pairs to the stack. Returns the
17515 number of bytes pushed. */
17517 static int
17518 vfp_emit_fstmd (int base_reg, int count)
17520 rtx par;
17521 rtx dwarf;
17522 rtx tmp, reg;
17523 int i;
17525 /* Workaround ARM10 VFPr1 bug. Data corruption can occur when exactly two
17526 register pairs are stored by a store multiple insn. We avoid this
17527 by pushing an extra pair. */
17528 if (count == 2 && !arm_arch6)
17530 if (base_reg == LAST_VFP_REGNUM - 3)
17531 base_reg -= 2;
17532 count++;
17535 /* FSTMD may not store more than 16 doubleword registers at once. Split
17536 larger stores into multiple parts (up to a maximum of two, in
17537 practice). */
17538 if (count > 16)
17540 int saved;
17541 /* NOTE: base_reg is an internal register number, so each D register
17542 counts as 2. */
17543 saved = vfp_emit_fstmd (base_reg + 32, count - 16);
17544 saved += vfp_emit_fstmd (base_reg, 16);
17545 return saved;
17548 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (count));
17549 dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (count + 1));
17551 reg = gen_rtx_REG (DFmode, base_reg);
17552 base_reg += 2;
17554 XVECEXP (par, 0, 0)
17555 = gen_rtx_SET (VOIDmode,
17556 gen_frame_mem
17557 (BLKmode,
17558 gen_rtx_PRE_MODIFY (Pmode,
17559 stack_pointer_rtx,
17560 plus_constant
17561 (Pmode, stack_pointer_rtx,
17562 - (count * 8)))
17564 gen_rtx_UNSPEC (BLKmode,
17565 gen_rtvec (1, reg),
17566 UNSPEC_PUSH_MULT));
17568 tmp = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
17569 plus_constant (Pmode, stack_pointer_rtx, -(count * 8)));
17570 RTX_FRAME_RELATED_P (tmp) = 1;
17571 XVECEXP (dwarf, 0, 0) = tmp;
17573 tmp = gen_rtx_SET (VOIDmode,
17574 gen_frame_mem (DFmode, stack_pointer_rtx),
17575 reg);
17576 RTX_FRAME_RELATED_P (tmp) = 1;
17577 XVECEXP (dwarf, 0, 1) = tmp;
17579 for (i = 1; i < count; i++)
17581 reg = gen_rtx_REG (DFmode, base_reg);
17582 base_reg += 2;
17583 XVECEXP (par, 0, i) = gen_rtx_USE (VOIDmode, reg);
17585 tmp = gen_rtx_SET (VOIDmode,
17586 gen_frame_mem (DFmode,
17587 plus_constant (Pmode,
17588 stack_pointer_rtx,
17589 i * 8)),
17590 reg);
17591 RTX_FRAME_RELATED_P (tmp) = 1;
17592 XVECEXP (dwarf, 0, i + 1) = tmp;
17595 par = emit_insn (par);
17596 add_reg_note (par, REG_FRAME_RELATED_EXPR, dwarf);
17597 RTX_FRAME_RELATED_P (par) = 1;
17599 return count * 8;
17602 /* Emit a call instruction with pattern PAT. ADDR is the address of
17603 the call target. */
17605 void
17606 arm_emit_call_insn (rtx pat, rtx addr)
17608 rtx insn;
17610 insn = emit_call_insn (pat);
17612 /* The PIC register is live on entry to VxWorks PIC PLT entries.
17613 If the call might use such an entry, add a use of the PIC register
17614 to the instruction's CALL_INSN_FUNCTION_USAGE. */
17615 if (TARGET_VXWORKS_RTP
17616 && flag_pic
17617 && GET_CODE (addr) == SYMBOL_REF
17618 && (SYMBOL_REF_DECL (addr)
17619 ? !targetm.binds_local_p (SYMBOL_REF_DECL (addr))
17620 : !SYMBOL_REF_LOCAL_P (addr)))
17622 require_pic_register ();
17623 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), cfun->machine->pic_reg);
17627 /* Output a 'call' insn. */
17628 const char *
17629 output_call (rtx *operands)
17631 gcc_assert (!arm_arch5); /* Patterns should call blx <reg> directly. */
17633 /* Handle calls to lr using ip (which may be clobbered in subr anyway). */
17634 if (REGNO (operands[0]) == LR_REGNUM)
17636 operands[0] = gen_rtx_REG (SImode, IP_REGNUM);
17637 output_asm_insn ("mov%?\t%0, %|lr", operands);
17640 output_asm_insn ("mov%?\t%|lr, %|pc", operands);
17642 if (TARGET_INTERWORK || arm_arch4t)
17643 output_asm_insn ("bx%?\t%0", operands);
17644 else
17645 output_asm_insn ("mov%?\t%|pc, %0", operands);
17647 return "";
17650 /* Output a 'call' insn that is a reference in memory. This is
17651 disabled for ARMv5 and we prefer a blx instead because otherwise
17652 there's a significant performance overhead. */
17653 const char *
17654 output_call_mem (rtx *operands)
17656 gcc_assert (!arm_arch5);
17657 if (TARGET_INTERWORK)
17659 output_asm_insn ("ldr%?\t%|ip, %0", operands);
17660 output_asm_insn ("mov%?\t%|lr, %|pc", operands);
17661 output_asm_insn ("bx%?\t%|ip", operands);
17663 else if (regno_use_in (LR_REGNUM, operands[0]))
17665 /* LR is used in the memory address. We load the address in the
17666 first instruction. It's safe to use IP as the target of the
17667 load since the call will kill it anyway. */
17668 output_asm_insn ("ldr%?\t%|ip, %0", operands);
17669 output_asm_insn ("mov%?\t%|lr, %|pc", operands);
17670 if (arm_arch4t)
17671 output_asm_insn ("bx%?\t%|ip", operands);
17672 else
17673 output_asm_insn ("mov%?\t%|pc, %|ip", operands);
17675 else
17677 output_asm_insn ("mov%?\t%|lr, %|pc", operands);
17678 output_asm_insn ("ldr%?\t%|pc, %0", operands);
17681 return "";
17685 /* Output a move from arm registers to arm registers of a long double
17686 OPERANDS[0] is the destination.
17687 OPERANDS[1] is the source. */
17688 const char *
17689 output_mov_long_double_arm_from_arm (rtx *operands)
17691 /* We have to be careful here because the two might overlap. */
17692 int dest_start = REGNO (operands[0]);
17693 int src_start = REGNO (operands[1]);
17694 rtx ops[2];
17695 int i;
17697 if (dest_start < src_start)
17699 for (i = 0; i < 3; i++)
17701 ops[0] = gen_rtx_REG (SImode, dest_start + i);
17702 ops[1] = gen_rtx_REG (SImode, src_start + i);
17703 output_asm_insn ("mov%?\t%0, %1", ops);
17706 else
17708 for (i = 2; i >= 0; i--)
17710 ops[0] = gen_rtx_REG (SImode, dest_start + i);
17711 ops[1] = gen_rtx_REG (SImode, src_start + i);
17712 output_asm_insn ("mov%?\t%0, %1", ops);
17716 return "";
17719 void
17720 arm_emit_movpair (rtx dest, rtx src)
17722 /* If the src is an immediate, simplify it. */
17723 if (CONST_INT_P (src))
17725 HOST_WIDE_INT val = INTVAL (src);
17726 emit_set_insn (dest, GEN_INT (val & 0x0000ffff));
17727 if ((val >> 16) & 0x0000ffff)
17728 emit_set_insn (gen_rtx_ZERO_EXTRACT (SImode, dest, GEN_INT (16),
17729 GEN_INT (16)),
17730 GEN_INT ((val >> 16) & 0x0000ffff));
17731 return;
17733 emit_set_insn (dest, gen_rtx_HIGH (SImode, src));
17734 emit_set_insn (dest, gen_rtx_LO_SUM (SImode, dest, src));
17737 /* Output a move between double words. It must be REG<-MEM
17738 or MEM<-REG. */
17739 const char *
17740 output_move_double (rtx *operands, bool emit, int *count)
17742 enum rtx_code code0 = GET_CODE (operands[0]);
17743 enum rtx_code code1 = GET_CODE (operands[1]);
17744 rtx otherops[3];
17745 if (count)
17746 *count = 1;
17748 /* The only case when this might happen is when
17749 you are looking at the length of a DImode instruction
17750 that has an invalid constant in it. */
17751 if (code0 == REG && code1 != MEM)
17753 gcc_assert (!emit);
17754 *count = 2;
17755 return "";
17758 if (code0 == REG)
17760 unsigned int reg0 = REGNO (operands[0]);
17762 otherops[0] = gen_rtx_REG (SImode, 1 + reg0);
17764 gcc_assert (code1 == MEM); /* Constraints should ensure this. */
17766 switch (GET_CODE (XEXP (operands[1], 0)))
17768 case REG:
17770 if (emit)
17772 if (TARGET_LDRD
17773 && !(fix_cm3_ldrd && reg0 == REGNO(XEXP (operands[1], 0))))
17774 output_asm_insn ("ldr%(d%)\t%0, [%m1]", operands);
17775 else
17776 output_asm_insn ("ldm%(ia%)\t%m1, %M0", operands);
17778 break;
17780 case PRE_INC:
17781 gcc_assert (TARGET_LDRD);
17782 if (emit)
17783 output_asm_insn ("ldr%(d%)\t%0, [%m1, #8]!", operands);
17784 break;
17786 case PRE_DEC:
17787 if (emit)
17789 if (TARGET_LDRD)
17790 output_asm_insn ("ldr%(d%)\t%0, [%m1, #-8]!", operands);
17791 else
17792 output_asm_insn ("ldm%(db%)\t%m1!, %M0", operands);
17794 break;
17796 case POST_INC:
17797 if (emit)
17799 if (TARGET_LDRD)
17800 output_asm_insn ("ldr%(d%)\t%0, [%m1], #8", operands);
17801 else
17802 output_asm_insn ("ldm%(ia%)\t%m1!, %M0", operands);
17804 break;
17806 case POST_DEC:
17807 gcc_assert (TARGET_LDRD);
17808 if (emit)
17809 output_asm_insn ("ldr%(d%)\t%0, [%m1], #-8", operands);
17810 break;
17812 case PRE_MODIFY:
17813 case POST_MODIFY:
17814 /* Autoicrement addressing modes should never have overlapping
17815 base and destination registers, and overlapping index registers
17816 are already prohibited, so this doesn't need to worry about
17817 fix_cm3_ldrd. */
17818 otherops[0] = operands[0];
17819 otherops[1] = XEXP (XEXP (XEXP (operands[1], 0), 1), 0);
17820 otherops[2] = XEXP (XEXP (XEXP (operands[1], 0), 1), 1);
17822 if (GET_CODE (XEXP (operands[1], 0)) == PRE_MODIFY)
17824 if (reg_overlap_mentioned_p (otherops[0], otherops[2]))
17826 /* Registers overlap so split out the increment. */
17827 if (emit)
17829 output_asm_insn ("add%?\t%1, %1, %2", otherops);
17830 output_asm_insn ("ldr%(d%)\t%0, [%1] @split", otherops);
17832 if (count)
17833 *count = 2;
17835 else
17837 /* Use a single insn if we can.
17838 FIXME: IWMMXT allows offsets larger than ldrd can
17839 handle, fix these up with a pair of ldr. */
17840 if (TARGET_THUMB2
17841 || !CONST_INT_P (otherops[2])
17842 || (INTVAL (otherops[2]) > -256
17843 && INTVAL (otherops[2]) < 256))
17845 if (emit)
17846 output_asm_insn ("ldr%(d%)\t%0, [%1, %2]!", otherops);
17848 else
17850 if (emit)
17852 output_asm_insn ("ldr%?\t%0, [%1, %2]!", otherops);
17853 output_asm_insn ("ldr%?\t%H0, [%1, #4]", otherops);
17855 if (count)
17856 *count = 2;
17861 else
17863 /* Use a single insn if we can.
17864 FIXME: IWMMXT allows offsets larger than ldrd can handle,
17865 fix these up with a pair of ldr. */
17866 if (TARGET_THUMB2
17867 || !CONST_INT_P (otherops[2])
17868 || (INTVAL (otherops[2]) > -256
17869 && INTVAL (otherops[2]) < 256))
17871 if (emit)
17872 output_asm_insn ("ldr%(d%)\t%0, [%1], %2", otherops);
17874 else
17876 if (emit)
17878 output_asm_insn ("ldr%?\t%H0, [%1, #4]", otherops);
17879 output_asm_insn ("ldr%?\t%0, [%1], %2", otherops);
17881 if (count)
17882 *count = 2;
17885 break;
17887 case LABEL_REF:
17888 case CONST:
17889 /* We might be able to use ldrd %0, %1 here. However the range is
17890 different to ldr/adr, and it is broken on some ARMv7-M
17891 implementations. */
17892 /* Use the second register of the pair to avoid problematic
17893 overlap. */
17894 otherops[1] = operands[1];
17895 if (emit)
17896 output_asm_insn ("adr%?\t%0, %1", otherops);
17897 operands[1] = otherops[0];
17898 if (emit)
17900 if (TARGET_LDRD)
17901 output_asm_insn ("ldr%(d%)\t%0, [%1]", operands);
17902 else
17903 output_asm_insn ("ldm%(ia%)\t%1, %M0", operands);
17906 if (count)
17907 *count = 2;
17908 break;
17910 /* ??? This needs checking for thumb2. */
17911 default:
17912 if (arm_add_operand (XEXP (XEXP (operands[1], 0), 1),
17913 GET_MODE (XEXP (XEXP (operands[1], 0), 1))))
17915 otherops[0] = operands[0];
17916 otherops[1] = XEXP (XEXP (operands[1], 0), 0);
17917 otherops[2] = XEXP (XEXP (operands[1], 0), 1);
17919 if (GET_CODE (XEXP (operands[1], 0)) == PLUS)
17921 if (CONST_INT_P (otherops[2]) && !TARGET_LDRD)
17923 switch ((int) INTVAL (otherops[2]))
17925 case -8:
17926 if (emit)
17927 output_asm_insn ("ldm%(db%)\t%1, %M0", otherops);
17928 return "";
17929 case -4:
17930 if (TARGET_THUMB2)
17931 break;
17932 if (emit)
17933 output_asm_insn ("ldm%(da%)\t%1, %M0", otherops);
17934 return "";
17935 case 4:
17936 if (TARGET_THUMB2)
17937 break;
17938 if (emit)
17939 output_asm_insn ("ldm%(ib%)\t%1, %M0", otherops);
17940 return "";
17943 otherops[0] = gen_rtx_REG(SImode, REGNO(operands[0]) + 1);
17944 operands[1] = otherops[0];
17945 if (TARGET_LDRD
17946 && (REG_P (otherops[2])
17947 || TARGET_THUMB2
17948 || (CONST_INT_P (otherops[2])
17949 && INTVAL (otherops[2]) > -256
17950 && INTVAL (otherops[2]) < 256)))
17952 if (reg_overlap_mentioned_p (operands[0],
17953 otherops[2]))
17955 rtx tmp;
17956 /* Swap base and index registers over to
17957 avoid a conflict. */
17958 tmp = otherops[1];
17959 otherops[1] = otherops[2];
17960 otherops[2] = tmp;
17962 /* If both registers conflict, it will usually
17963 have been fixed by a splitter. */
17964 if (reg_overlap_mentioned_p (operands[0], otherops[2])
17965 || (fix_cm3_ldrd && reg0 == REGNO (otherops[1])))
17967 if (emit)
17969 output_asm_insn ("add%?\t%0, %1, %2", otherops);
17970 output_asm_insn ("ldr%(d%)\t%0, [%1]", operands);
17972 if (count)
17973 *count = 2;
17975 else
17977 otherops[0] = operands[0];
17978 if (emit)
17979 output_asm_insn ("ldr%(d%)\t%0, [%1, %2]", otherops);
17981 return "";
17984 if (CONST_INT_P (otherops[2]))
17986 if (emit)
17988 if (!(const_ok_for_arm (INTVAL (otherops[2]))))
17989 output_asm_insn ("sub%?\t%0, %1, #%n2", otherops);
17990 else
17991 output_asm_insn ("add%?\t%0, %1, %2", otherops);
17994 else
17996 if (emit)
17997 output_asm_insn ("add%?\t%0, %1, %2", otherops);
18000 else
18002 if (emit)
18003 output_asm_insn ("sub%?\t%0, %1, %2", otherops);
18006 if (count)
18007 *count = 2;
18009 if (TARGET_LDRD)
18010 return "ldr%(d%)\t%0, [%1]";
18012 return "ldm%(ia%)\t%1, %M0";
18014 else
18016 otherops[1] = adjust_address (operands[1], SImode, 4);
18017 /* Take care of overlapping base/data reg. */
18018 if (reg_mentioned_p (operands[0], operands[1]))
18020 if (emit)
18022 output_asm_insn ("ldr%?\t%0, %1", otherops);
18023 output_asm_insn ("ldr%?\t%0, %1", operands);
18025 if (count)
18026 *count = 2;
18029 else
18031 if (emit)
18033 output_asm_insn ("ldr%?\t%0, %1", operands);
18034 output_asm_insn ("ldr%?\t%0, %1", otherops);
18036 if (count)
18037 *count = 2;
18042 else
18044 /* Constraints should ensure this. */
18045 gcc_assert (code0 == MEM && code1 == REG);
18046 gcc_assert ((REGNO (operands[1]) != IP_REGNUM)
18047 || (TARGET_ARM && TARGET_LDRD));
18049 switch (GET_CODE (XEXP (operands[0], 0)))
18051 case REG:
18052 if (emit)
18054 if (TARGET_LDRD)
18055 output_asm_insn ("str%(d%)\t%1, [%m0]", operands);
18056 else
18057 output_asm_insn ("stm%(ia%)\t%m0, %M1", operands);
18059 break;
18061 case PRE_INC:
18062 gcc_assert (TARGET_LDRD);
18063 if (emit)
18064 output_asm_insn ("str%(d%)\t%1, [%m0, #8]!", operands);
18065 break;
18067 case PRE_DEC:
18068 if (emit)
18070 if (TARGET_LDRD)
18071 output_asm_insn ("str%(d%)\t%1, [%m0, #-8]!", operands);
18072 else
18073 output_asm_insn ("stm%(db%)\t%m0!, %M1", operands);
18075 break;
18077 case POST_INC:
18078 if (emit)
18080 if (TARGET_LDRD)
18081 output_asm_insn ("str%(d%)\t%1, [%m0], #8", operands);
18082 else
18083 output_asm_insn ("stm%(ia%)\t%m0!, %M1", operands);
18085 break;
18087 case POST_DEC:
18088 gcc_assert (TARGET_LDRD);
18089 if (emit)
18090 output_asm_insn ("str%(d%)\t%1, [%m0], #-8", operands);
18091 break;
18093 case PRE_MODIFY:
18094 case POST_MODIFY:
18095 otherops[0] = operands[1];
18096 otherops[1] = XEXP (XEXP (XEXP (operands[0], 0), 1), 0);
18097 otherops[2] = XEXP (XEXP (XEXP (operands[0], 0), 1), 1);
18099 /* IWMMXT allows offsets larger than ldrd can handle,
18100 fix these up with a pair of ldr. */
18101 if (!TARGET_THUMB2
18102 && CONST_INT_P (otherops[2])
18103 && (INTVAL(otherops[2]) <= -256
18104 || INTVAL(otherops[2]) >= 256))
18106 if (GET_CODE (XEXP (operands[0], 0)) == PRE_MODIFY)
18108 if (emit)
18110 output_asm_insn ("str%?\t%0, [%1, %2]!", otherops);
18111 output_asm_insn ("str%?\t%H0, [%1, #4]", otherops);
18113 if (count)
18114 *count = 2;
18116 else
18118 if (emit)
18120 output_asm_insn ("str%?\t%H0, [%1, #4]", otherops);
18121 output_asm_insn ("str%?\t%0, [%1], %2", otherops);
18123 if (count)
18124 *count = 2;
18127 else if (GET_CODE (XEXP (operands[0], 0)) == PRE_MODIFY)
18129 if (emit)
18130 output_asm_insn ("str%(d%)\t%0, [%1, %2]!", otherops);
18132 else
18134 if (emit)
18135 output_asm_insn ("str%(d%)\t%0, [%1], %2", otherops);
18137 break;
18139 case PLUS:
18140 otherops[2] = XEXP (XEXP (operands[0], 0), 1);
18141 if (CONST_INT_P (otherops[2]) && !TARGET_LDRD)
18143 switch ((int) INTVAL (XEXP (XEXP (operands[0], 0), 1)))
18145 case -8:
18146 if (emit)
18147 output_asm_insn ("stm%(db%)\t%m0, %M1", operands);
18148 return "";
18150 case -4:
18151 if (TARGET_THUMB2)
18152 break;
18153 if (emit)
18154 output_asm_insn ("stm%(da%)\t%m0, %M1", operands);
18155 return "";
18157 case 4:
18158 if (TARGET_THUMB2)
18159 break;
18160 if (emit)
18161 output_asm_insn ("stm%(ib%)\t%m0, %M1", operands);
18162 return "";
18165 if (TARGET_LDRD
18166 && (REG_P (otherops[2])
18167 || TARGET_THUMB2
18168 || (CONST_INT_P (otherops[2])
18169 && INTVAL (otherops[2]) > -256
18170 && INTVAL (otherops[2]) < 256)))
18172 otherops[0] = operands[1];
18173 otherops[1] = XEXP (XEXP (operands[0], 0), 0);
18174 if (emit)
18175 output_asm_insn ("str%(d%)\t%0, [%1, %2]", otherops);
18176 return "";
18178 /* Fall through */
18180 default:
18181 otherops[0] = adjust_address (operands[0], SImode, 4);
18182 otherops[1] = operands[1];
18183 if (emit)
18185 output_asm_insn ("str%?\t%1, %0", operands);
18186 output_asm_insn ("str%?\t%H1, %0", otherops);
18188 if (count)
18189 *count = 2;
18193 return "";
18196 /* Output a move, load or store for quad-word vectors in ARM registers. Only
18197 handles MEMs accepted by neon_vector_mem_operand with TYPE=1. */
18199 const char *
18200 output_move_quad (rtx *operands)
18202 if (REG_P (operands[0]))
18204 /* Load, or reg->reg move. */
18206 if (MEM_P (operands[1]))
18208 switch (GET_CODE (XEXP (operands[1], 0)))
18210 case REG:
18211 output_asm_insn ("ldm%(ia%)\t%m1, %M0", operands);
18212 break;
18214 case LABEL_REF:
18215 case CONST:
18216 output_asm_insn ("adr%?\t%0, %1", operands);
18217 output_asm_insn ("ldm%(ia%)\t%0, %M0", operands);
18218 break;
18220 default:
18221 gcc_unreachable ();
18224 else
18226 rtx ops[2];
18227 int dest, src, i;
18229 gcc_assert (REG_P (operands[1]));
18231 dest = REGNO (operands[0]);
18232 src = REGNO (operands[1]);
18234 /* This seems pretty dumb, but hopefully GCC won't try to do it
18235 very often. */
18236 if (dest < src)
18237 for (i = 0; i < 4; i++)
18239 ops[0] = gen_rtx_REG (SImode, dest + i);
18240 ops[1] = gen_rtx_REG (SImode, src + i);
18241 output_asm_insn ("mov%?\t%0, %1", ops);
18243 else
18244 for (i = 3; i >= 0; i--)
18246 ops[0] = gen_rtx_REG (SImode, dest + i);
18247 ops[1] = gen_rtx_REG (SImode, src + i);
18248 output_asm_insn ("mov%?\t%0, %1", ops);
18252 else
18254 gcc_assert (MEM_P (operands[0]));
18255 gcc_assert (REG_P (operands[1]));
18256 gcc_assert (!reg_overlap_mentioned_p (operands[1], operands[0]));
18258 switch (GET_CODE (XEXP (operands[0], 0)))
18260 case REG:
18261 output_asm_insn ("stm%(ia%)\t%m0, %M1", operands);
18262 break;
18264 default:
18265 gcc_unreachable ();
18269 return "";
18272 /* Output a VFP load or store instruction. */
18274 const char *
18275 output_move_vfp (rtx *operands)
18277 rtx reg, mem, addr, ops[2];
18278 int load = REG_P (operands[0]);
18279 int dp = GET_MODE_SIZE (GET_MODE (operands[0])) == 8;
18280 int integer_p = GET_MODE_CLASS (GET_MODE (operands[0])) == MODE_INT;
18281 const char *templ;
18282 char buff[50];
18283 enum machine_mode mode;
18285 reg = operands[!load];
18286 mem = operands[load];
18288 mode = GET_MODE (reg);
18290 gcc_assert (REG_P (reg));
18291 gcc_assert (IS_VFP_REGNUM (REGNO (reg)));
18292 gcc_assert (mode == SFmode
18293 || mode == DFmode
18294 || mode == SImode
18295 || mode == DImode
18296 || (TARGET_NEON && VALID_NEON_DREG_MODE (mode)));
18297 gcc_assert (MEM_P (mem));
18299 addr = XEXP (mem, 0);
18301 switch (GET_CODE (addr))
18303 case PRE_DEC:
18304 templ = "f%smdb%c%%?\t%%0!, {%%%s1}%s";
18305 ops[0] = XEXP (addr, 0);
18306 ops[1] = reg;
18307 break;
18309 case POST_INC:
18310 templ = "f%smia%c%%?\t%%0!, {%%%s1}%s";
18311 ops[0] = XEXP (addr, 0);
18312 ops[1] = reg;
18313 break;
18315 default:
18316 templ = "f%s%c%%?\t%%%s0, %%1%s";
18317 ops[0] = reg;
18318 ops[1] = mem;
18319 break;
18322 sprintf (buff, templ,
18323 load ? "ld" : "st",
18324 dp ? 'd' : 's',
18325 dp ? "P" : "",
18326 integer_p ? "\t%@ int" : "");
18327 output_asm_insn (buff, ops);
18329 return "";
18332 /* Output a Neon double-word or quad-word load or store, or a load
18333 or store for larger structure modes.
18335 WARNING: The ordering of elements is weird in big-endian mode,
18336 because the EABI requires that vectors stored in memory appear
18337 as though they were stored by a VSTM, as required by the EABI.
18338 GCC RTL defines element ordering based on in-memory order.
18339 This can be different from the architectural ordering of elements
18340 within a NEON register. The intrinsics defined in arm_neon.h use the
18341 NEON register element ordering, not the GCC RTL element ordering.
18343 For example, the in-memory ordering of a big-endian a quadword
18344 vector with 16-bit elements when stored from register pair {d0,d1}
18345 will be (lowest address first, d0[N] is NEON register element N):
18347 [d0[3], d0[2], d0[1], d0[0], d1[7], d1[6], d1[5], d1[4]]
18349 When necessary, quadword registers (dN, dN+1) are moved to ARM
18350 registers from rN in the order:
18352 dN -> (rN+1, rN), dN+1 -> (rN+3, rN+2)
18354 So that STM/LDM can be used on vectors in ARM registers, and the
18355 same memory layout will result as if VSTM/VLDM were used.
18357 Instead of VSTM/VLDM we prefer to use VST1.64/VLD1.64 where
18358 possible, which allows use of appropriate alignment tags.
18359 Note that the choice of "64" is independent of the actual vector
18360 element size; this size simply ensures that the behavior is
18361 equivalent to VSTM/VLDM in both little-endian and big-endian mode.
18363 Due to limitations of those instructions, use of VST1.64/VLD1.64
18364 is not possible if:
18365 - the address contains PRE_DEC, or
18366 - the mode refers to more than 4 double-word registers
18368 In those cases, it would be possible to replace VSTM/VLDM by a
18369 sequence of instructions; this is not currently implemented since
18370 this is not certain to actually improve performance. */
18372 const char *
18373 output_move_neon (rtx *operands)
18375 rtx reg, mem, addr, ops[2];
18376 int regno, nregs, load = REG_P (operands[0]);
18377 const char *templ;
18378 char buff[50];
18379 enum machine_mode mode;
18381 reg = operands[!load];
18382 mem = operands[load];
18384 mode = GET_MODE (reg);
18386 gcc_assert (REG_P (reg));
18387 regno = REGNO (reg);
18388 nregs = HARD_REGNO_NREGS (regno, mode) / 2;
18389 gcc_assert (VFP_REGNO_OK_FOR_DOUBLE (regno)
18390 || NEON_REGNO_OK_FOR_QUAD (regno));
18391 gcc_assert (VALID_NEON_DREG_MODE (mode)
18392 || VALID_NEON_QREG_MODE (mode)
18393 || VALID_NEON_STRUCT_MODE (mode));
18394 gcc_assert (MEM_P (mem));
18396 addr = XEXP (mem, 0);
18398 /* Strip off const from addresses like (const (plus (...))). */
18399 if (GET_CODE (addr) == CONST && GET_CODE (XEXP (addr, 0)) == PLUS)
18400 addr = XEXP (addr, 0);
18402 switch (GET_CODE (addr))
18404 case POST_INC:
18405 /* We have to use vldm / vstm for too-large modes. */
18406 if (nregs > 4)
18408 templ = "v%smia%%?\t%%0!, %%h1";
18409 ops[0] = XEXP (addr, 0);
18411 else
18413 templ = "v%s1.64\t%%h1, %%A0";
18414 ops[0] = mem;
18416 ops[1] = reg;
18417 break;
18419 case PRE_DEC:
18420 /* We have to use vldm / vstm in this case, since there is no
18421 pre-decrement form of the vld1 / vst1 instructions. */
18422 templ = "v%smdb%%?\t%%0!, %%h1";
18423 ops[0] = XEXP (addr, 0);
18424 ops[1] = reg;
18425 break;
18427 case POST_MODIFY:
18428 /* FIXME: Not currently enabled in neon_vector_mem_operand. */
18429 gcc_unreachable ();
18431 case LABEL_REF:
18432 case PLUS:
18434 int i;
18435 int overlap = -1;
18436 for (i = 0; i < nregs; i++)
18438 /* We're only using DImode here because it's a convenient size. */
18439 ops[0] = gen_rtx_REG (DImode, REGNO (reg) + 2 * i);
18440 ops[1] = adjust_address (mem, DImode, 8 * i);
18441 if (reg_overlap_mentioned_p (ops[0], mem))
18443 gcc_assert (overlap == -1);
18444 overlap = i;
18446 else
18448 sprintf (buff, "v%sr%%?\t%%P0, %%1", load ? "ld" : "st");
18449 output_asm_insn (buff, ops);
18452 if (overlap != -1)
18454 ops[0] = gen_rtx_REG (DImode, REGNO (reg) + 2 * overlap);
18455 ops[1] = adjust_address (mem, SImode, 8 * overlap);
18456 sprintf (buff, "v%sr%%?\t%%P0, %%1", load ? "ld" : "st");
18457 output_asm_insn (buff, ops);
18460 return "";
18463 default:
18464 /* We have to use vldm / vstm for too-large modes. */
18465 if (nregs > 4)
18466 templ = "v%smia%%?\t%%m0, %%h1";
18467 else
18468 templ = "v%s1.64\t%%h1, %%A0";
18470 ops[0] = mem;
18471 ops[1] = reg;
18474 sprintf (buff, templ, load ? "ld" : "st");
18475 output_asm_insn (buff, ops);
18477 return "";
18480 /* Compute and return the length of neon_mov<mode>, where <mode> is
18481 one of VSTRUCT modes: EI, OI, CI or XI. */
18483 arm_attr_length_move_neon (rtx insn)
18485 rtx reg, mem, addr;
18486 int load;
18487 enum machine_mode mode;
18489 extract_insn_cached (insn);
18491 if (REG_P (recog_data.operand[0]) && REG_P (recog_data.operand[1]))
18493 mode = GET_MODE (recog_data.operand[0]);
18494 switch (mode)
18496 case EImode:
18497 case OImode:
18498 return 8;
18499 case CImode:
18500 return 12;
18501 case XImode:
18502 return 16;
18503 default:
18504 gcc_unreachable ();
18508 load = REG_P (recog_data.operand[0]);
18509 reg = recog_data.operand[!load];
18510 mem = recog_data.operand[load];
18512 gcc_assert (MEM_P (mem));
18514 mode = GET_MODE (reg);
18515 addr = XEXP (mem, 0);
18517 /* Strip off const from addresses like (const (plus (...))). */
18518 if (GET_CODE (addr) == CONST && GET_CODE (XEXP (addr, 0)) == PLUS)
18519 addr = XEXP (addr, 0);
18521 if (GET_CODE (addr) == LABEL_REF || GET_CODE (addr) == PLUS)
18523 int insns = HARD_REGNO_NREGS (REGNO (reg), mode) / 2;
18524 return insns * 4;
18526 else
18527 return 4;
18530 /* Return nonzero if the offset in the address is an immediate. Otherwise,
18531 return zero. */
18534 arm_address_offset_is_imm (rtx insn)
18536 rtx mem, addr;
18538 extract_insn_cached (insn);
18540 if (REG_P (recog_data.operand[0]))
18541 return 0;
18543 mem = recog_data.operand[0];
18545 gcc_assert (MEM_P (mem));
18547 addr = XEXP (mem, 0);
18549 if (REG_P (addr)
18550 || (GET_CODE (addr) == PLUS
18551 && REG_P (XEXP (addr, 0))
18552 && CONST_INT_P (XEXP (addr, 1))))
18553 return 1;
18554 else
18555 return 0;
18558 /* Output an ADD r, s, #n where n may be too big for one instruction.
18559 If adding zero to one register, output nothing. */
18560 const char *
18561 output_add_immediate (rtx *operands)
18563 HOST_WIDE_INT n = INTVAL (operands[2]);
18565 if (n != 0 || REGNO (operands[0]) != REGNO (operands[1]))
18567 if (n < 0)
18568 output_multi_immediate (operands,
18569 "sub%?\t%0, %1, %2", "sub%?\t%0, %0, %2", 2,
18570 -n);
18571 else
18572 output_multi_immediate (operands,
18573 "add%?\t%0, %1, %2", "add%?\t%0, %0, %2", 2,
18577 return "";
18580 /* Output a multiple immediate operation.
18581 OPERANDS is the vector of operands referred to in the output patterns.
18582 INSTR1 is the output pattern to use for the first constant.
18583 INSTR2 is the output pattern to use for subsequent constants.
18584 IMMED_OP is the index of the constant slot in OPERANDS.
18585 N is the constant value. */
18586 static const char *
18587 output_multi_immediate (rtx *operands, const char *instr1, const char *instr2,
18588 int immed_op, HOST_WIDE_INT n)
18590 #if HOST_BITS_PER_WIDE_INT > 32
18591 n &= 0xffffffff;
18592 #endif
18594 if (n == 0)
18596 /* Quick and easy output. */
18597 operands[immed_op] = const0_rtx;
18598 output_asm_insn (instr1, operands);
18600 else
18602 int i;
18603 const char * instr = instr1;
18605 /* Note that n is never zero here (which would give no output). */
18606 for (i = 0; i < 32; i += 2)
18608 if (n & (3 << i))
18610 operands[immed_op] = GEN_INT (n & (255 << i));
18611 output_asm_insn (instr, operands);
18612 instr = instr2;
18613 i += 6;
18618 return "";
18621 /* Return the name of a shifter operation. */
18622 static const char *
18623 arm_shift_nmem(enum rtx_code code)
18625 switch (code)
18627 case ASHIFT:
18628 return ARM_LSL_NAME;
18630 case ASHIFTRT:
18631 return "asr";
18633 case LSHIFTRT:
18634 return "lsr";
18636 case ROTATERT:
18637 return "ror";
18639 default:
18640 abort();
18644 /* Return the appropriate ARM instruction for the operation code.
18645 The returned result should not be overwritten. OP is the rtx of the
18646 operation. SHIFT_FIRST_ARG is TRUE if the first argument of the operator
18647 was shifted. */
18648 const char *
18649 arithmetic_instr (rtx op, int shift_first_arg)
18651 switch (GET_CODE (op))
18653 case PLUS:
18654 return "add";
18656 case MINUS:
18657 return shift_first_arg ? "rsb" : "sub";
18659 case IOR:
18660 return "orr";
18662 case XOR:
18663 return "eor";
18665 case AND:
18666 return "and";
18668 case ASHIFT:
18669 case ASHIFTRT:
18670 case LSHIFTRT:
18671 case ROTATERT:
18672 return arm_shift_nmem(GET_CODE(op));
18674 default:
18675 gcc_unreachable ();
18679 /* Ensure valid constant shifts and return the appropriate shift mnemonic
18680 for the operation code. The returned result should not be overwritten.
18681 OP is the rtx code of the shift.
18682 On exit, *AMOUNTP will be -1 if the shift is by a register, or a constant
18683 shift. */
18684 static const char *
18685 shift_op (rtx op, HOST_WIDE_INT *amountp)
18687 const char * mnem;
18688 enum rtx_code code = GET_CODE (op);
18690 switch (code)
18692 case ROTATE:
18693 if (!CONST_INT_P (XEXP (op, 1)))
18695 output_operand_lossage ("invalid shift operand");
18696 return NULL;
18699 code = ROTATERT;
18700 *amountp = 32 - INTVAL (XEXP (op, 1));
18701 mnem = "ror";
18702 break;
18704 case ASHIFT:
18705 case ASHIFTRT:
18706 case LSHIFTRT:
18707 case ROTATERT:
18708 mnem = arm_shift_nmem(code);
18709 if (CONST_INT_P (XEXP (op, 1)))
18711 *amountp = INTVAL (XEXP (op, 1));
18713 else if (REG_P (XEXP (op, 1)))
18715 *amountp = -1;
18716 return mnem;
18718 else
18720 output_operand_lossage ("invalid shift operand");
18721 return NULL;
18723 break;
18725 case MULT:
18726 /* We never have to worry about the amount being other than a
18727 power of 2, since this case can never be reloaded from a reg. */
18728 if (!CONST_INT_P (XEXP (op, 1)))
18730 output_operand_lossage ("invalid shift operand");
18731 return NULL;
18734 *amountp = INTVAL (XEXP (op, 1)) & 0xFFFFFFFF;
18736 /* Amount must be a power of two. */
18737 if (*amountp & (*amountp - 1))
18739 output_operand_lossage ("invalid shift operand");
18740 return NULL;
18743 *amountp = int_log2 (*amountp);
18744 return ARM_LSL_NAME;
18746 default:
18747 output_operand_lossage ("invalid shift operand");
18748 return NULL;
18751 /* This is not 100% correct, but follows from the desire to merge
18752 multiplication by a power of 2 with the recognizer for a
18753 shift. >=32 is not a valid shift for "lsl", so we must try and
18754 output a shift that produces the correct arithmetical result.
18755 Using lsr #32 is identical except for the fact that the carry bit
18756 is not set correctly if we set the flags; but we never use the
18757 carry bit from such an operation, so we can ignore that. */
18758 if (code == ROTATERT)
18759 /* Rotate is just modulo 32. */
18760 *amountp &= 31;
18761 else if (*amountp != (*amountp & 31))
18763 if (code == ASHIFT)
18764 mnem = "lsr";
18765 *amountp = 32;
18768 /* Shifts of 0 are no-ops. */
18769 if (*amountp == 0)
18770 return NULL;
18772 return mnem;
18775 /* Obtain the shift from the POWER of two. */
18777 static HOST_WIDE_INT
18778 int_log2 (HOST_WIDE_INT power)
18780 HOST_WIDE_INT shift = 0;
18782 while ((((HOST_WIDE_INT) 1 << shift) & power) == 0)
18784 gcc_assert (shift <= 31);
18785 shift++;
18788 return shift;
18791 /* Output a .ascii pseudo-op, keeping track of lengths. This is
18792 because /bin/as is horribly restrictive. The judgement about
18793 whether or not each character is 'printable' (and can be output as
18794 is) or not (and must be printed with an octal escape) must be made
18795 with reference to the *host* character set -- the situation is
18796 similar to that discussed in the comments above pp_c_char in
18797 c-pretty-print.c. */
18799 #define MAX_ASCII_LEN 51
18801 void
18802 output_ascii_pseudo_op (FILE *stream, const unsigned char *p, int len)
18804 int i;
18805 int len_so_far = 0;
18807 fputs ("\t.ascii\t\"", stream);
18809 for (i = 0; i < len; i++)
18811 int c = p[i];
18813 if (len_so_far >= MAX_ASCII_LEN)
18815 fputs ("\"\n\t.ascii\t\"", stream);
18816 len_so_far = 0;
18819 if (ISPRINT (c))
18821 if (c == '\\' || c == '\"')
18823 putc ('\\', stream);
18824 len_so_far++;
18826 putc (c, stream);
18827 len_so_far++;
18829 else
18831 fprintf (stream, "\\%03o", c);
18832 len_so_far += 4;
18836 fputs ("\"\n", stream);
18839 /* Compute the register save mask for registers 0 through 12
18840 inclusive. This code is used by arm_compute_save_reg_mask. */
18842 static unsigned long
18843 arm_compute_save_reg0_reg12_mask (void)
18845 unsigned long func_type = arm_current_func_type ();
18846 unsigned long save_reg_mask = 0;
18847 unsigned int reg;
18849 if (IS_INTERRUPT (func_type))
18851 unsigned int max_reg;
18852 /* Interrupt functions must not corrupt any registers,
18853 even call clobbered ones. If this is a leaf function
18854 we can just examine the registers used by the RTL, but
18855 otherwise we have to assume that whatever function is
18856 called might clobber anything, and so we have to save
18857 all the call-clobbered registers as well. */
18858 if (ARM_FUNC_TYPE (func_type) == ARM_FT_FIQ)
18859 /* FIQ handlers have registers r8 - r12 banked, so
18860 we only need to check r0 - r7, Normal ISRs only
18861 bank r14 and r15, so we must check up to r12.
18862 r13 is the stack pointer which is always preserved,
18863 so we do not need to consider it here. */
18864 max_reg = 7;
18865 else
18866 max_reg = 12;
18868 for (reg = 0; reg <= max_reg; reg++)
18869 if (df_regs_ever_live_p (reg)
18870 || (! crtl->is_leaf && call_used_regs[reg]))
18871 save_reg_mask |= (1 << reg);
18873 /* Also save the pic base register if necessary. */
18874 if (flag_pic
18875 && !TARGET_SINGLE_PIC_BASE
18876 && arm_pic_register != INVALID_REGNUM
18877 && crtl->uses_pic_offset_table)
18878 save_reg_mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
18880 else if (IS_VOLATILE(func_type))
18882 /* For noreturn functions we historically omitted register saves
18883 altogether. However this really messes up debugging. As a
18884 compromise save just the frame pointers. Combined with the link
18885 register saved elsewhere this should be sufficient to get
18886 a backtrace. */
18887 if (frame_pointer_needed)
18888 save_reg_mask |= 1 << HARD_FRAME_POINTER_REGNUM;
18889 if (df_regs_ever_live_p (ARM_HARD_FRAME_POINTER_REGNUM))
18890 save_reg_mask |= 1 << ARM_HARD_FRAME_POINTER_REGNUM;
18891 if (df_regs_ever_live_p (THUMB_HARD_FRAME_POINTER_REGNUM))
18892 save_reg_mask |= 1 << THUMB_HARD_FRAME_POINTER_REGNUM;
18894 else
18896 /* In the normal case we only need to save those registers
18897 which are call saved and which are used by this function. */
18898 for (reg = 0; reg <= 11; reg++)
18899 if (df_regs_ever_live_p (reg) && ! call_used_regs[reg])
18900 save_reg_mask |= (1 << reg);
18902 /* Handle the frame pointer as a special case. */
18903 if (frame_pointer_needed)
18904 save_reg_mask |= 1 << HARD_FRAME_POINTER_REGNUM;
18906 /* If we aren't loading the PIC register,
18907 don't stack it even though it may be live. */
18908 if (flag_pic
18909 && !TARGET_SINGLE_PIC_BASE
18910 && arm_pic_register != INVALID_REGNUM
18911 && (df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM)
18912 || crtl->uses_pic_offset_table))
18913 save_reg_mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
18915 /* The prologue will copy SP into R0, so save it. */
18916 if (IS_STACKALIGN (func_type))
18917 save_reg_mask |= 1;
18920 /* Save registers so the exception handler can modify them. */
18921 if (crtl->calls_eh_return)
18923 unsigned int i;
18925 for (i = 0; ; i++)
18927 reg = EH_RETURN_DATA_REGNO (i);
18928 if (reg == INVALID_REGNUM)
18929 break;
18930 save_reg_mask |= 1 << reg;
18934 return save_reg_mask;
18937 /* Return true if r3 is live at the start of the function. */
18939 static bool
18940 arm_r3_live_at_start_p (void)
18942 /* Just look at cfg info, which is still close enough to correct at this
18943 point. This gives false positives for broken functions that might use
18944 uninitialized data that happens to be allocated in r3, but who cares? */
18945 return REGNO_REG_SET_P (df_get_live_out (ENTRY_BLOCK_PTR_FOR_FN (cfun)), 3);
18948 /* Compute the number of bytes used to store the static chain register on the
18949 stack, above the stack frame. We need to know this accurately to get the
18950 alignment of the rest of the stack frame correct. */
18952 static int
18953 arm_compute_static_chain_stack_bytes (void)
18955 /* See the defining assertion in arm_expand_prologue. */
18956 if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM
18957 && IS_NESTED (arm_current_func_type ())
18958 && arm_r3_live_at_start_p ()
18959 && crtl->args.pretend_args_size == 0)
18960 return 4;
18962 return 0;
18965 /* Compute a bit mask of which registers need to be
18966 saved on the stack for the current function.
18967 This is used by arm_get_frame_offsets, which may add extra registers. */
18969 static unsigned long
18970 arm_compute_save_reg_mask (void)
18972 unsigned int save_reg_mask = 0;
18973 unsigned long func_type = arm_current_func_type ();
18974 unsigned int reg;
18976 if (IS_NAKED (func_type))
18977 /* This should never really happen. */
18978 return 0;
18980 /* If we are creating a stack frame, then we must save the frame pointer,
18981 IP (which will hold the old stack pointer), LR and the PC. */
18982 if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
18983 save_reg_mask |=
18984 (1 << ARM_HARD_FRAME_POINTER_REGNUM)
18985 | (1 << IP_REGNUM)
18986 | (1 << LR_REGNUM)
18987 | (1 << PC_REGNUM);
18989 save_reg_mask |= arm_compute_save_reg0_reg12_mask ();
18991 /* Decide if we need to save the link register.
18992 Interrupt routines have their own banked link register,
18993 so they never need to save it.
18994 Otherwise if we do not use the link register we do not need to save
18995 it. If we are pushing other registers onto the stack however, we
18996 can save an instruction in the epilogue by pushing the link register
18997 now and then popping it back into the PC. This incurs extra memory
18998 accesses though, so we only do it when optimizing for size, and only
18999 if we know that we will not need a fancy return sequence. */
19000 if (df_regs_ever_live_p (LR_REGNUM)
19001 || (save_reg_mask
19002 && optimize_size
19003 && ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL
19004 && !crtl->calls_eh_return))
19005 save_reg_mask |= 1 << LR_REGNUM;
19007 if (cfun->machine->lr_save_eliminated)
19008 save_reg_mask &= ~ (1 << LR_REGNUM);
19010 if (TARGET_REALLY_IWMMXT
19011 && ((bit_count (save_reg_mask)
19012 + ARM_NUM_INTS (crtl->args.pretend_args_size +
19013 arm_compute_static_chain_stack_bytes())
19014 ) % 2) != 0)
19016 /* The total number of registers that are going to be pushed
19017 onto the stack is odd. We need to ensure that the stack
19018 is 64-bit aligned before we start to save iWMMXt registers,
19019 and also before we start to create locals. (A local variable
19020 might be a double or long long which we will load/store using
19021 an iWMMXt instruction). Therefore we need to push another
19022 ARM register, so that the stack will be 64-bit aligned. We
19023 try to avoid using the arg registers (r0 -r3) as they might be
19024 used to pass values in a tail call. */
19025 for (reg = 4; reg <= 12; reg++)
19026 if ((save_reg_mask & (1 << reg)) == 0)
19027 break;
19029 if (reg <= 12)
19030 save_reg_mask |= (1 << reg);
19031 else
19033 cfun->machine->sibcall_blocked = 1;
19034 save_reg_mask |= (1 << 3);
19038 /* We may need to push an additional register for use initializing the
19039 PIC base register. */
19040 if (TARGET_THUMB2 && IS_NESTED (func_type) && flag_pic
19041 && (save_reg_mask & THUMB2_WORK_REGS) == 0)
19043 reg = thumb_find_work_register (1 << 4);
19044 if (!call_used_regs[reg])
19045 save_reg_mask |= (1 << reg);
19048 return save_reg_mask;
19052 /* Compute a bit mask of which registers need to be
19053 saved on the stack for the current function. */
19054 static unsigned long
19055 thumb1_compute_save_reg_mask (void)
19057 unsigned long mask;
19058 unsigned reg;
19060 mask = 0;
19061 for (reg = 0; reg < 12; reg ++)
19062 if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
19063 mask |= 1 << reg;
19065 if (flag_pic
19066 && !TARGET_SINGLE_PIC_BASE
19067 && arm_pic_register != INVALID_REGNUM
19068 && crtl->uses_pic_offset_table)
19069 mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
19071 /* See if we might need r11 for calls to _interwork_r11_call_via_rN(). */
19072 if (!frame_pointer_needed && CALLER_INTERWORKING_SLOT_SIZE > 0)
19073 mask |= 1 << ARM_HARD_FRAME_POINTER_REGNUM;
19075 /* LR will also be pushed if any lo regs are pushed. */
19076 if (mask & 0xff || thumb_force_lr_save ())
19077 mask |= (1 << LR_REGNUM);
19079 /* Make sure we have a low work register if we need one.
19080 We will need one if we are going to push a high register,
19081 but we are not currently intending to push a low register. */
19082 if ((mask & 0xff) == 0
19083 && ((mask & 0x0f00) || TARGET_BACKTRACE))
19085 /* Use thumb_find_work_register to choose which register
19086 we will use. If the register is live then we will
19087 have to push it. Use LAST_LO_REGNUM as our fallback
19088 choice for the register to select. */
19089 reg = thumb_find_work_register (1 << LAST_LO_REGNUM);
19090 /* Make sure the register returned by thumb_find_work_register is
19091 not part of the return value. */
19092 if (reg * UNITS_PER_WORD <= (unsigned) arm_size_return_regs ())
19093 reg = LAST_LO_REGNUM;
19095 if (! call_used_regs[reg])
19096 mask |= 1 << reg;
19099 /* The 504 below is 8 bytes less than 512 because there are two possible
19100 alignment words. We can't tell here if they will be present or not so we
19101 have to play it safe and assume that they are. */
19102 if ((CALLER_INTERWORKING_SLOT_SIZE +
19103 ROUND_UP_WORD (get_frame_size ()) +
19104 crtl->outgoing_args_size) >= 504)
19106 /* This is the same as the code in thumb1_expand_prologue() which
19107 determines which register to use for stack decrement. */
19108 for (reg = LAST_ARG_REGNUM + 1; reg <= LAST_LO_REGNUM; reg++)
19109 if (mask & (1 << reg))
19110 break;
19112 if (reg > LAST_LO_REGNUM)
19114 /* Make sure we have a register available for stack decrement. */
19115 mask |= 1 << LAST_LO_REGNUM;
19119 return mask;
19123 /* Return the number of bytes required to save VFP registers. */
19124 static int
19125 arm_get_vfp_saved_size (void)
19127 unsigned int regno;
19128 int count;
19129 int saved;
19131 saved = 0;
19132 /* Space for saved VFP registers. */
19133 if (TARGET_HARD_FLOAT && TARGET_VFP)
19135 count = 0;
19136 for (regno = FIRST_VFP_REGNUM;
19137 regno < LAST_VFP_REGNUM;
19138 regno += 2)
19140 if ((!df_regs_ever_live_p (regno) || call_used_regs[regno])
19141 && (!df_regs_ever_live_p (regno + 1) || call_used_regs[regno + 1]))
19143 if (count > 0)
19145 /* Workaround ARM10 VFPr1 bug. */
19146 if (count == 2 && !arm_arch6)
19147 count++;
19148 saved += count * 8;
19150 count = 0;
19152 else
19153 count++;
19155 if (count > 0)
19157 if (count == 2 && !arm_arch6)
19158 count++;
19159 saved += count * 8;
19162 return saved;
19166 /* Generate a function exit sequence. If REALLY_RETURN is false, then do
19167 everything bar the final return instruction. If simple_return is true,
19168 then do not output epilogue, because it has already been emitted in RTL. */
19169 const char *
19170 output_return_instruction (rtx operand, bool really_return, bool reverse,
19171 bool simple_return)
19173 char conditional[10];
19174 char instr[100];
19175 unsigned reg;
19176 unsigned long live_regs_mask;
19177 unsigned long func_type;
19178 arm_stack_offsets *offsets;
19180 func_type = arm_current_func_type ();
19182 if (IS_NAKED (func_type))
19183 return "";
19185 if (IS_VOLATILE (func_type) && TARGET_ABORT_NORETURN)
19187 /* If this function was declared non-returning, and we have
19188 found a tail call, then we have to trust that the called
19189 function won't return. */
19190 if (really_return)
19192 rtx ops[2];
19194 /* Otherwise, trap an attempted return by aborting. */
19195 ops[0] = operand;
19196 ops[1] = gen_rtx_SYMBOL_REF (Pmode, NEED_PLT_RELOC ? "abort(PLT)"
19197 : "abort");
19198 assemble_external_libcall (ops[1]);
19199 output_asm_insn (reverse ? "bl%D0\t%a1" : "bl%d0\t%a1", ops);
19202 return "";
19205 gcc_assert (!cfun->calls_alloca || really_return);
19207 sprintf (conditional, "%%?%%%c0", reverse ? 'D' : 'd');
19209 cfun->machine->return_used_this_function = 1;
19211 offsets = arm_get_frame_offsets ();
19212 live_regs_mask = offsets->saved_regs_mask;
19214 if (!simple_return && live_regs_mask)
19216 const char * return_reg;
19218 /* If we do not have any special requirements for function exit
19219 (e.g. interworking) then we can load the return address
19220 directly into the PC. Otherwise we must load it into LR. */
19221 if (really_return
19222 && (IS_INTERRUPT (func_type) || !TARGET_INTERWORK))
19223 return_reg = reg_names[PC_REGNUM];
19224 else
19225 return_reg = reg_names[LR_REGNUM];
19227 if ((live_regs_mask & (1 << IP_REGNUM)) == (1 << IP_REGNUM))
19229 /* There are three possible reasons for the IP register
19230 being saved. 1) a stack frame was created, in which case
19231 IP contains the old stack pointer, or 2) an ISR routine
19232 corrupted it, or 3) it was saved to align the stack on
19233 iWMMXt. In case 1, restore IP into SP, otherwise just
19234 restore IP. */
19235 if (frame_pointer_needed)
19237 live_regs_mask &= ~ (1 << IP_REGNUM);
19238 live_regs_mask |= (1 << SP_REGNUM);
19240 else
19241 gcc_assert (IS_INTERRUPT (func_type) || TARGET_REALLY_IWMMXT);
19244 /* On some ARM architectures it is faster to use LDR rather than
19245 LDM to load a single register. On other architectures, the
19246 cost is the same. In 26 bit mode, or for exception handlers,
19247 we have to use LDM to load the PC so that the CPSR is also
19248 restored. */
19249 for (reg = 0; reg <= LAST_ARM_REGNUM; reg++)
19250 if (live_regs_mask == (1U << reg))
19251 break;
19253 if (reg <= LAST_ARM_REGNUM
19254 && (reg != LR_REGNUM
19255 || ! really_return
19256 || ! IS_INTERRUPT (func_type)))
19258 sprintf (instr, "ldr%s\t%%|%s, [%%|sp], #4", conditional,
19259 (reg == LR_REGNUM) ? return_reg : reg_names[reg]);
19261 else
19263 char *p;
19264 int first = 1;
19266 /* Generate the load multiple instruction to restore the
19267 registers. Note we can get here, even if
19268 frame_pointer_needed is true, but only if sp already
19269 points to the base of the saved core registers. */
19270 if (live_regs_mask & (1 << SP_REGNUM))
19272 unsigned HOST_WIDE_INT stack_adjust;
19274 stack_adjust = offsets->outgoing_args - offsets->saved_regs;
19275 gcc_assert (stack_adjust == 0 || stack_adjust == 4);
19277 if (stack_adjust && arm_arch5 && TARGET_ARM)
19278 if (TARGET_UNIFIED_ASM)
19279 sprintf (instr, "ldmib%s\t%%|sp, {", conditional);
19280 else
19281 sprintf (instr, "ldm%sib\t%%|sp, {", conditional);
19282 else
19284 /* If we can't use ldmib (SA110 bug),
19285 then try to pop r3 instead. */
19286 if (stack_adjust)
19287 live_regs_mask |= 1 << 3;
19289 if (TARGET_UNIFIED_ASM)
19290 sprintf (instr, "ldmfd%s\t%%|sp, {", conditional);
19291 else
19292 sprintf (instr, "ldm%sfd\t%%|sp, {", conditional);
19295 else
19296 if (TARGET_UNIFIED_ASM)
19297 sprintf (instr, "pop%s\t{", conditional);
19298 else
19299 sprintf (instr, "ldm%sfd\t%%|sp!, {", conditional);
19301 p = instr + strlen (instr);
19303 for (reg = 0; reg <= SP_REGNUM; reg++)
19304 if (live_regs_mask & (1 << reg))
19306 int l = strlen (reg_names[reg]);
19308 if (first)
19309 first = 0;
19310 else
19312 memcpy (p, ", ", 2);
19313 p += 2;
19316 memcpy (p, "%|", 2);
19317 memcpy (p + 2, reg_names[reg], l);
19318 p += l + 2;
19321 if (live_regs_mask & (1 << LR_REGNUM))
19323 sprintf (p, "%s%%|%s}", first ? "" : ", ", return_reg);
19324 /* If returning from an interrupt, restore the CPSR. */
19325 if (IS_INTERRUPT (func_type))
19326 strcat (p, "^");
19328 else
19329 strcpy (p, "}");
19332 output_asm_insn (instr, & operand);
19334 /* See if we need to generate an extra instruction to
19335 perform the actual function return. */
19336 if (really_return
19337 && func_type != ARM_FT_INTERWORKED
19338 && (live_regs_mask & (1 << LR_REGNUM)) != 0)
19340 /* The return has already been handled
19341 by loading the LR into the PC. */
19342 return "";
19346 if (really_return)
19348 switch ((int) ARM_FUNC_TYPE (func_type))
19350 case ARM_FT_ISR:
19351 case ARM_FT_FIQ:
19352 /* ??? This is wrong for unified assembly syntax. */
19353 sprintf (instr, "sub%ss\t%%|pc, %%|lr, #4", conditional);
19354 break;
19356 case ARM_FT_INTERWORKED:
19357 sprintf (instr, "bx%s\t%%|lr", conditional);
19358 break;
19360 case ARM_FT_EXCEPTION:
19361 /* ??? This is wrong for unified assembly syntax. */
19362 sprintf (instr, "mov%ss\t%%|pc, %%|lr", conditional);
19363 break;
19365 default:
19366 /* Use bx if it's available. */
19367 if (arm_arch5 || arm_arch4t)
19368 sprintf (instr, "bx%s\t%%|lr", conditional);
19369 else
19370 sprintf (instr, "mov%s\t%%|pc, %%|lr", conditional);
19371 break;
19374 output_asm_insn (instr, & operand);
19377 return "";
19380 /* Write the function name into the code section, directly preceding
19381 the function prologue.
19383 Code will be output similar to this:
19385 .ascii "arm_poke_function_name", 0
19386 .align
19388 .word 0xff000000 + (t1 - t0)
19389 arm_poke_function_name
19390 mov ip, sp
19391 stmfd sp!, {fp, ip, lr, pc}
19392 sub fp, ip, #4
19394 When performing a stack backtrace, code can inspect the value
19395 of 'pc' stored at 'fp' + 0. If the trace function then looks
19396 at location pc - 12 and the top 8 bits are set, then we know
19397 that there is a function name embedded immediately preceding this
19398 location and has length ((pc[-3]) & 0xff000000).
19400 We assume that pc is declared as a pointer to an unsigned long.
19402 It is of no benefit to output the function name if we are assembling
19403 a leaf function. These function types will not contain a stack
19404 backtrace structure, therefore it is not possible to determine the
19405 function name. */
19406 void
19407 arm_poke_function_name (FILE *stream, const char *name)
19409 unsigned long alignlength;
19410 unsigned long length;
19411 rtx x;
19413 length = strlen (name) + 1;
19414 alignlength = ROUND_UP_WORD (length);
19416 ASM_OUTPUT_ASCII (stream, name, length);
19417 ASM_OUTPUT_ALIGN (stream, 2);
19418 x = GEN_INT ((unsigned HOST_WIDE_INT) 0xff000000 + alignlength);
19419 assemble_aligned_integer (UNITS_PER_WORD, x);
19422 /* Place some comments into the assembler stream
19423 describing the current function. */
19424 static void
19425 arm_output_function_prologue (FILE *f, HOST_WIDE_INT frame_size)
19427 unsigned long func_type;
19429 /* ??? Do we want to print some of the below anyway? */
19430 if (TARGET_THUMB1)
19431 return;
19433 /* Sanity check. */
19434 gcc_assert (!arm_ccfsm_state && !arm_target_insn);
19436 func_type = arm_current_func_type ();
19438 switch ((int) ARM_FUNC_TYPE (func_type))
19440 default:
19441 case ARM_FT_NORMAL:
19442 break;
19443 case ARM_FT_INTERWORKED:
19444 asm_fprintf (f, "\t%@ Function supports interworking.\n");
19445 break;
19446 case ARM_FT_ISR:
19447 asm_fprintf (f, "\t%@ Interrupt Service Routine.\n");
19448 break;
19449 case ARM_FT_FIQ:
19450 asm_fprintf (f, "\t%@ Fast Interrupt Service Routine.\n");
19451 break;
19452 case ARM_FT_EXCEPTION:
19453 asm_fprintf (f, "\t%@ ARM Exception Handler.\n");
19454 break;
19457 if (IS_NAKED (func_type))
19458 asm_fprintf (f, "\t%@ Naked Function: prologue and epilogue provided by programmer.\n");
19460 if (IS_VOLATILE (func_type))
19461 asm_fprintf (f, "\t%@ Volatile: function does not return.\n");
19463 if (IS_NESTED (func_type))
19464 asm_fprintf (f, "\t%@ Nested: function declared inside another function.\n");
19465 if (IS_STACKALIGN (func_type))
19466 asm_fprintf (f, "\t%@ Stack Align: May be called with mis-aligned SP.\n");
19468 asm_fprintf (f, "\t%@ args = %d, pretend = %d, frame = %wd\n",
19469 crtl->args.size,
19470 crtl->args.pretend_args_size, frame_size);
19472 asm_fprintf (f, "\t%@ frame_needed = %d, uses_anonymous_args = %d\n",
19473 frame_pointer_needed,
19474 cfun->machine->uses_anonymous_args);
19476 if (cfun->machine->lr_save_eliminated)
19477 asm_fprintf (f, "\t%@ link register save eliminated.\n");
19479 if (crtl->calls_eh_return)
19480 asm_fprintf (f, "\t@ Calls __builtin_eh_return.\n");
19484 static void
19485 arm_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED,
19486 HOST_WIDE_INT frame_size ATTRIBUTE_UNUSED)
19488 arm_stack_offsets *offsets;
19490 if (TARGET_THUMB1)
19492 int regno;
19494 /* Emit any call-via-reg trampolines that are needed for v4t support
19495 of call_reg and call_value_reg type insns. */
19496 for (regno = 0; regno < LR_REGNUM; regno++)
19498 rtx label = cfun->machine->call_via[regno];
19500 if (label != NULL)
19502 switch_to_section (function_section (current_function_decl));
19503 targetm.asm_out.internal_label (asm_out_file, "L",
19504 CODE_LABEL_NUMBER (label));
19505 asm_fprintf (asm_out_file, "\tbx\t%r\n", regno);
19509 /* ??? Probably not safe to set this here, since it assumes that a
19510 function will be emitted as assembly immediately after we generate
19511 RTL for it. This does not happen for inline functions. */
19512 cfun->machine->return_used_this_function = 0;
19514 else /* TARGET_32BIT */
19516 /* We need to take into account any stack-frame rounding. */
19517 offsets = arm_get_frame_offsets ();
19519 gcc_assert (!use_return_insn (FALSE, NULL)
19520 || (cfun->machine->return_used_this_function != 0)
19521 || offsets->saved_regs == offsets->outgoing_args
19522 || frame_pointer_needed);
19526 /* Generate and emit a sequence of insns equivalent to PUSH, but using
19527 STR and STRD. If an even number of registers are being pushed, one
19528 or more STRD patterns are created for each register pair. If an
19529 odd number of registers are pushed, emit an initial STR followed by
19530 as many STRD instructions as are needed. This works best when the
19531 stack is initially 64-bit aligned (the normal case), since it
19532 ensures that each STRD is also 64-bit aligned. */
19533 static void
19534 thumb2_emit_strd_push (unsigned long saved_regs_mask)
19536 int num_regs = 0;
19537 int i;
19538 int regno;
19539 rtx par = NULL_RTX;
19540 rtx dwarf = NULL_RTX;
19541 rtx tmp;
19542 bool first = true;
19544 num_regs = bit_count (saved_regs_mask);
19546 /* Must be at least one register to save, and can't save SP or PC. */
19547 gcc_assert (num_regs > 0 && num_regs <= 14);
19548 gcc_assert (!(saved_regs_mask & (1 << SP_REGNUM)));
19549 gcc_assert (!(saved_regs_mask & (1 << PC_REGNUM)));
19551 /* Create sequence for DWARF info. All the frame-related data for
19552 debugging is held in this wrapper. */
19553 dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (num_regs + 1));
19555 /* Describe the stack adjustment. */
19556 tmp = gen_rtx_SET (VOIDmode,
19557 stack_pointer_rtx,
19558 plus_constant (Pmode, stack_pointer_rtx, -4 * num_regs));
19559 RTX_FRAME_RELATED_P (tmp) = 1;
19560 XVECEXP (dwarf, 0, 0) = tmp;
19562 /* Find the first register. */
19563 for (regno = 0; (saved_regs_mask & (1 << regno)) == 0; regno++)
19566 i = 0;
19568 /* If there's an odd number of registers to push. Start off by
19569 pushing a single register. This ensures that subsequent strd
19570 operations are dword aligned (assuming that SP was originally
19571 64-bit aligned). */
19572 if ((num_regs & 1) != 0)
19574 rtx reg, mem, insn;
19576 reg = gen_rtx_REG (SImode, regno);
19577 if (num_regs == 1)
19578 mem = gen_frame_mem (Pmode, gen_rtx_PRE_DEC (Pmode,
19579 stack_pointer_rtx));
19580 else
19581 mem = gen_frame_mem (Pmode,
19582 gen_rtx_PRE_MODIFY
19583 (Pmode, stack_pointer_rtx,
19584 plus_constant (Pmode, stack_pointer_rtx,
19585 -4 * num_regs)));
19587 tmp = gen_rtx_SET (VOIDmode, mem, reg);
19588 RTX_FRAME_RELATED_P (tmp) = 1;
19589 insn = emit_insn (tmp);
19590 RTX_FRAME_RELATED_P (insn) = 1;
19591 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
19592 tmp = gen_rtx_SET (VOIDmode, gen_frame_mem (Pmode, stack_pointer_rtx),
19593 reg);
19594 RTX_FRAME_RELATED_P (tmp) = 1;
19595 i++;
19596 regno++;
19597 XVECEXP (dwarf, 0, i) = tmp;
19598 first = false;
19601 while (i < num_regs)
19602 if (saved_regs_mask & (1 << regno))
19604 rtx reg1, reg2, mem1, mem2;
19605 rtx tmp0, tmp1, tmp2;
19606 int regno2;
19608 /* Find the register to pair with this one. */
19609 for (regno2 = regno + 1; (saved_regs_mask & (1 << regno2)) == 0;
19610 regno2++)
19613 reg1 = gen_rtx_REG (SImode, regno);
19614 reg2 = gen_rtx_REG (SImode, regno2);
19616 if (first)
19618 rtx insn;
19620 first = false;
19621 mem1 = gen_frame_mem (Pmode, plus_constant (Pmode,
19622 stack_pointer_rtx,
19623 -4 * num_regs));
19624 mem2 = gen_frame_mem (Pmode, plus_constant (Pmode,
19625 stack_pointer_rtx,
19626 -4 * (num_regs - 1)));
19627 tmp0 = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
19628 plus_constant (Pmode, stack_pointer_rtx,
19629 -4 * (num_regs)));
19630 tmp1 = gen_rtx_SET (VOIDmode, mem1, reg1);
19631 tmp2 = gen_rtx_SET (VOIDmode, mem2, reg2);
19632 RTX_FRAME_RELATED_P (tmp0) = 1;
19633 RTX_FRAME_RELATED_P (tmp1) = 1;
19634 RTX_FRAME_RELATED_P (tmp2) = 1;
19635 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (3));
19636 XVECEXP (par, 0, 0) = tmp0;
19637 XVECEXP (par, 0, 1) = tmp1;
19638 XVECEXP (par, 0, 2) = tmp2;
19639 insn = emit_insn (par);
19640 RTX_FRAME_RELATED_P (insn) = 1;
19641 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
19643 else
19645 mem1 = gen_frame_mem (Pmode, plus_constant (Pmode,
19646 stack_pointer_rtx,
19647 4 * i));
19648 mem2 = gen_frame_mem (Pmode, plus_constant (Pmode,
19649 stack_pointer_rtx,
19650 4 * (i + 1)));
19651 tmp1 = gen_rtx_SET (VOIDmode, mem1, reg1);
19652 tmp2 = gen_rtx_SET (VOIDmode, mem2, reg2);
19653 RTX_FRAME_RELATED_P (tmp1) = 1;
19654 RTX_FRAME_RELATED_P (tmp2) = 1;
19655 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
19656 XVECEXP (par, 0, 0) = tmp1;
19657 XVECEXP (par, 0, 1) = tmp2;
19658 emit_insn (par);
19661 /* Create unwind information. This is an approximation. */
19662 tmp1 = gen_rtx_SET (VOIDmode,
19663 gen_frame_mem (Pmode,
19664 plus_constant (Pmode,
19665 stack_pointer_rtx,
19666 4 * i)),
19667 reg1);
19668 tmp2 = gen_rtx_SET (VOIDmode,
19669 gen_frame_mem (Pmode,
19670 plus_constant (Pmode,
19671 stack_pointer_rtx,
19672 4 * (i + 1))),
19673 reg2);
19675 RTX_FRAME_RELATED_P (tmp1) = 1;
19676 RTX_FRAME_RELATED_P (tmp2) = 1;
19677 XVECEXP (dwarf, 0, i + 1) = tmp1;
19678 XVECEXP (dwarf, 0, i + 2) = tmp2;
19679 i += 2;
19680 regno = regno2 + 1;
19682 else
19683 regno++;
19685 return;
19688 /* STRD in ARM mode requires consecutive registers. This function emits STRD
19689 whenever possible, otherwise it emits single-word stores. The first store
19690 also allocates stack space for all saved registers, using writeback with
19691 post-addressing mode. All other stores use offset addressing. If no STRD
19692 can be emitted, this function emits a sequence of single-word stores,
19693 and not an STM as before, because single-word stores provide more freedom
19694 scheduling and can be turned into an STM by peephole optimizations. */
19695 static void
19696 arm_emit_strd_push (unsigned long saved_regs_mask)
19698 int num_regs = 0;
19699 int i, j, dwarf_index = 0;
19700 int offset = 0;
19701 rtx dwarf = NULL_RTX;
19702 rtx insn = NULL_RTX;
19703 rtx tmp, mem;
19705 /* TODO: A more efficient code can be emitted by changing the
19706 layout, e.g., first push all pairs that can use STRD to keep the
19707 stack aligned, and then push all other registers. */
19708 for (i = 0; i <= LAST_ARM_REGNUM; i++)
19709 if (saved_regs_mask & (1 << i))
19710 num_regs++;
19712 gcc_assert (!(saved_regs_mask & (1 << SP_REGNUM)));
19713 gcc_assert (!(saved_regs_mask & (1 << PC_REGNUM)));
19714 gcc_assert (num_regs > 0);
19716 /* Create sequence for DWARF info. */
19717 dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (num_regs + 1));
19719 /* For dwarf info, we generate explicit stack update. */
19720 tmp = gen_rtx_SET (VOIDmode,
19721 stack_pointer_rtx,
19722 plus_constant (Pmode, stack_pointer_rtx, -4 * num_regs));
19723 RTX_FRAME_RELATED_P (tmp) = 1;
19724 XVECEXP (dwarf, 0, dwarf_index++) = tmp;
19726 /* Save registers. */
19727 offset = - 4 * num_regs;
19728 j = 0;
19729 while (j <= LAST_ARM_REGNUM)
19730 if (saved_regs_mask & (1 << j))
19732 if ((j % 2 == 0)
19733 && (saved_regs_mask & (1 << (j + 1))))
19735 /* Current register and previous register form register pair for
19736 which STRD can be generated. */
19737 if (offset < 0)
19739 /* Allocate stack space for all saved registers. */
19740 tmp = plus_constant (Pmode, stack_pointer_rtx, offset);
19741 tmp = gen_rtx_PRE_MODIFY (Pmode, stack_pointer_rtx, tmp);
19742 mem = gen_frame_mem (DImode, tmp);
19743 offset = 0;
19745 else if (offset > 0)
19746 mem = gen_frame_mem (DImode,
19747 plus_constant (Pmode,
19748 stack_pointer_rtx,
19749 offset));
19750 else
19751 mem = gen_frame_mem (DImode, stack_pointer_rtx);
19753 tmp = gen_rtx_SET (DImode, mem, gen_rtx_REG (DImode, j));
19754 RTX_FRAME_RELATED_P (tmp) = 1;
19755 tmp = emit_insn (tmp);
19757 /* Record the first store insn. */
19758 if (dwarf_index == 1)
19759 insn = tmp;
19761 /* Generate dwarf info. */
19762 mem = gen_frame_mem (SImode,
19763 plus_constant (Pmode,
19764 stack_pointer_rtx,
19765 offset));
19766 tmp = gen_rtx_SET (SImode, mem, gen_rtx_REG (SImode, j));
19767 RTX_FRAME_RELATED_P (tmp) = 1;
19768 XVECEXP (dwarf, 0, dwarf_index++) = tmp;
19770 mem = gen_frame_mem (SImode,
19771 plus_constant (Pmode,
19772 stack_pointer_rtx,
19773 offset + 4));
19774 tmp = gen_rtx_SET (SImode, mem, gen_rtx_REG (SImode, j + 1));
19775 RTX_FRAME_RELATED_P (tmp) = 1;
19776 XVECEXP (dwarf, 0, dwarf_index++) = tmp;
19778 offset += 8;
19779 j += 2;
19781 else
19783 /* Emit a single word store. */
19784 if (offset < 0)
19786 /* Allocate stack space for all saved registers. */
19787 tmp = plus_constant (Pmode, stack_pointer_rtx, offset);
19788 tmp = gen_rtx_PRE_MODIFY (Pmode, stack_pointer_rtx, tmp);
19789 mem = gen_frame_mem (SImode, tmp);
19790 offset = 0;
19792 else if (offset > 0)
19793 mem = gen_frame_mem (SImode,
19794 plus_constant (Pmode,
19795 stack_pointer_rtx,
19796 offset));
19797 else
19798 mem = gen_frame_mem (SImode, stack_pointer_rtx);
19800 tmp = gen_rtx_SET (SImode, mem, gen_rtx_REG (SImode, j));
19801 RTX_FRAME_RELATED_P (tmp) = 1;
19802 tmp = emit_insn (tmp);
19804 /* Record the first store insn. */
19805 if (dwarf_index == 1)
19806 insn = tmp;
19808 /* Generate dwarf info. */
19809 mem = gen_frame_mem (SImode,
19810 plus_constant(Pmode,
19811 stack_pointer_rtx,
19812 offset));
19813 tmp = gen_rtx_SET (SImode, mem, gen_rtx_REG (SImode, j));
19814 RTX_FRAME_RELATED_P (tmp) = 1;
19815 XVECEXP (dwarf, 0, dwarf_index++) = tmp;
19817 offset += 4;
19818 j += 1;
19821 else
19822 j++;
19824 /* Attach dwarf info to the first insn we generate. */
19825 gcc_assert (insn != NULL_RTX);
19826 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
19827 RTX_FRAME_RELATED_P (insn) = 1;
19830 /* Generate and emit an insn that we will recognize as a push_multi.
19831 Unfortunately, since this insn does not reflect very well the actual
19832 semantics of the operation, we need to annotate the insn for the benefit
19833 of DWARF2 frame unwind information. DWARF_REGS_MASK is a subset of
19834 MASK for registers that should be annotated for DWARF2 frame unwind
19835 information. */
19836 static rtx
19837 emit_multi_reg_push (unsigned long mask, unsigned long dwarf_regs_mask)
19839 int num_regs = 0;
19840 int num_dwarf_regs = 0;
19841 int i, j;
19842 rtx par;
19843 rtx dwarf;
19844 int dwarf_par_index;
19845 rtx tmp, reg;
19847 /* We don't record the PC in the dwarf frame information. */
19848 dwarf_regs_mask &= ~(1 << PC_REGNUM);
19850 for (i = 0; i <= LAST_ARM_REGNUM; i++)
19852 if (mask & (1 << i))
19853 num_regs++;
19854 if (dwarf_regs_mask & (1 << i))
19855 num_dwarf_regs++;
19858 gcc_assert (num_regs && num_regs <= 16);
19859 gcc_assert ((dwarf_regs_mask & ~mask) == 0);
19861 /* For the body of the insn we are going to generate an UNSPEC in
19862 parallel with several USEs. This allows the insn to be recognized
19863 by the push_multi pattern in the arm.md file.
19865 The body of the insn looks something like this:
19867 (parallel [
19868 (set (mem:BLK (pre_modify:SI (reg:SI sp)
19869 (const_int:SI <num>)))
19870 (unspec:BLK [(reg:SI r4)] UNSPEC_PUSH_MULT))
19871 (use (reg:SI XX))
19872 (use (reg:SI YY))
19876 For the frame note however, we try to be more explicit and actually
19877 show each register being stored into the stack frame, plus a (single)
19878 decrement of the stack pointer. We do it this way in order to be
19879 friendly to the stack unwinding code, which only wants to see a single
19880 stack decrement per instruction. The RTL we generate for the note looks
19881 something like this:
19883 (sequence [
19884 (set (reg:SI sp) (plus:SI (reg:SI sp) (const_int -20)))
19885 (set (mem:SI (reg:SI sp)) (reg:SI r4))
19886 (set (mem:SI (plus:SI (reg:SI sp) (const_int 4))) (reg:SI XX))
19887 (set (mem:SI (plus:SI (reg:SI sp) (const_int 8))) (reg:SI YY))
19891 FIXME:: In an ideal world the PRE_MODIFY would not exist and
19892 instead we'd have a parallel expression detailing all
19893 the stores to the various memory addresses so that debug
19894 information is more up-to-date. Remember however while writing
19895 this to take care of the constraints with the push instruction.
19897 Note also that this has to be taken care of for the VFP registers.
19899 For more see PR43399. */
19901 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (num_regs));
19902 dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (num_dwarf_regs + 1));
19903 dwarf_par_index = 1;
19905 for (i = 0; i <= LAST_ARM_REGNUM; i++)
19907 if (mask & (1 << i))
19909 reg = gen_rtx_REG (SImode, i);
19911 XVECEXP (par, 0, 0)
19912 = gen_rtx_SET (VOIDmode,
19913 gen_frame_mem
19914 (BLKmode,
19915 gen_rtx_PRE_MODIFY (Pmode,
19916 stack_pointer_rtx,
19917 plus_constant
19918 (Pmode, stack_pointer_rtx,
19919 -4 * num_regs))
19921 gen_rtx_UNSPEC (BLKmode,
19922 gen_rtvec (1, reg),
19923 UNSPEC_PUSH_MULT));
19925 if (dwarf_regs_mask & (1 << i))
19927 tmp = gen_rtx_SET (VOIDmode,
19928 gen_frame_mem (SImode, stack_pointer_rtx),
19929 reg);
19930 RTX_FRAME_RELATED_P (tmp) = 1;
19931 XVECEXP (dwarf, 0, dwarf_par_index++) = tmp;
19934 break;
19938 for (j = 1, i++; j < num_regs; i++)
19940 if (mask & (1 << i))
19942 reg = gen_rtx_REG (SImode, i);
19944 XVECEXP (par, 0, j) = gen_rtx_USE (VOIDmode, reg);
19946 if (dwarf_regs_mask & (1 << i))
19949 = gen_rtx_SET (VOIDmode,
19950 gen_frame_mem
19951 (SImode,
19952 plus_constant (Pmode, stack_pointer_rtx,
19953 4 * j)),
19954 reg);
19955 RTX_FRAME_RELATED_P (tmp) = 1;
19956 XVECEXP (dwarf, 0, dwarf_par_index++) = tmp;
19959 j++;
19963 par = emit_insn (par);
19965 tmp = gen_rtx_SET (VOIDmode,
19966 stack_pointer_rtx,
19967 plus_constant (Pmode, stack_pointer_rtx, -4 * num_regs));
19968 RTX_FRAME_RELATED_P (tmp) = 1;
19969 XVECEXP (dwarf, 0, 0) = tmp;
19971 add_reg_note (par, REG_FRAME_RELATED_EXPR, dwarf);
19973 return par;
19976 /* Add a REG_CFA_ADJUST_CFA REG note to INSN.
19977 SIZE is the offset to be adjusted.
19978 DEST and SRC might be stack_pointer_rtx or hard_frame_pointer_rtx. */
19979 static void
19980 arm_add_cfa_adjust_cfa_note (rtx insn, int size, rtx dest, rtx src)
19982 rtx dwarf;
19984 RTX_FRAME_RELATED_P (insn) = 1;
19985 dwarf = gen_rtx_SET (VOIDmode, dest, plus_constant (Pmode, src, size));
19986 add_reg_note (insn, REG_CFA_ADJUST_CFA, dwarf);
19989 /* Generate and emit an insn pattern that we will recognize as a pop_multi.
19990 SAVED_REGS_MASK shows which registers need to be restored.
19992 Unfortunately, since this insn does not reflect very well the actual
19993 semantics of the operation, we need to annotate the insn for the benefit
19994 of DWARF2 frame unwind information. */
19995 static void
19996 arm_emit_multi_reg_pop (unsigned long saved_regs_mask)
19998 int num_regs = 0;
19999 int i, j;
20000 rtx par;
20001 rtx dwarf = NULL_RTX;
20002 rtx tmp, reg;
20003 bool return_in_pc;
20004 int offset_adj;
20005 int emit_update;
20007 return_in_pc = (saved_regs_mask & (1 << PC_REGNUM)) ? true : false;
20008 offset_adj = return_in_pc ? 1 : 0;
20009 for (i = 0; i <= LAST_ARM_REGNUM; i++)
20010 if (saved_regs_mask & (1 << i))
20011 num_regs++;
20013 gcc_assert (num_regs && num_regs <= 16);
20015 /* If SP is in reglist, then we don't emit SP update insn. */
20016 emit_update = (saved_regs_mask & (1 << SP_REGNUM)) ? 0 : 1;
20018 /* The parallel needs to hold num_regs SETs
20019 and one SET for the stack update. */
20020 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (num_regs + emit_update + offset_adj));
20022 if (return_in_pc)
20024 tmp = ret_rtx;
20025 XVECEXP (par, 0, 0) = tmp;
20028 if (emit_update)
20030 /* Increment the stack pointer, based on there being
20031 num_regs 4-byte registers to restore. */
20032 tmp = gen_rtx_SET (VOIDmode,
20033 stack_pointer_rtx,
20034 plus_constant (Pmode,
20035 stack_pointer_rtx,
20036 4 * num_regs));
20037 RTX_FRAME_RELATED_P (tmp) = 1;
20038 XVECEXP (par, 0, offset_adj) = tmp;
20041 /* Now restore every reg, which may include PC. */
20042 for (j = 0, i = 0; j < num_regs; i++)
20043 if (saved_regs_mask & (1 << i))
20045 reg = gen_rtx_REG (SImode, i);
20046 if ((num_regs == 1) && emit_update && !return_in_pc)
20048 /* Emit single load with writeback. */
20049 tmp = gen_frame_mem (SImode,
20050 gen_rtx_POST_INC (Pmode,
20051 stack_pointer_rtx));
20052 tmp = emit_insn (gen_rtx_SET (VOIDmode, reg, tmp));
20053 REG_NOTES (tmp) = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
20054 return;
20057 tmp = gen_rtx_SET (VOIDmode,
20058 reg,
20059 gen_frame_mem
20060 (SImode,
20061 plus_constant (Pmode, stack_pointer_rtx, 4 * j)));
20062 RTX_FRAME_RELATED_P (tmp) = 1;
20063 XVECEXP (par, 0, j + emit_update + offset_adj) = tmp;
20065 /* We need to maintain a sequence for DWARF info too. As dwarf info
20066 should not have PC, skip PC. */
20067 if (i != PC_REGNUM)
20068 dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
20070 j++;
20073 if (return_in_pc)
20074 par = emit_jump_insn (par);
20075 else
20076 par = emit_insn (par);
20078 REG_NOTES (par) = dwarf;
20079 if (!return_in_pc)
20080 arm_add_cfa_adjust_cfa_note (par, UNITS_PER_WORD * num_regs,
20081 stack_pointer_rtx, stack_pointer_rtx);
20084 /* Generate and emit an insn pattern that we will recognize as a pop_multi
20085 of NUM_REGS consecutive VFP regs, starting at FIRST_REG.
20087 Unfortunately, since this insn does not reflect very well the actual
20088 semantics of the operation, we need to annotate the insn for the benefit
20089 of DWARF2 frame unwind information. */
20090 static void
20091 arm_emit_vfp_multi_reg_pop (int first_reg, int num_regs, rtx base_reg)
20093 int i, j;
20094 rtx par;
20095 rtx dwarf = NULL_RTX;
20096 rtx tmp, reg;
20098 gcc_assert (num_regs && num_regs <= 32);
20100 /* Workaround ARM10 VFPr1 bug. */
20101 if (num_regs == 2 && !arm_arch6)
20103 if (first_reg == 15)
20104 first_reg--;
20106 num_regs++;
20109 /* We can emit at most 16 D-registers in a single pop_multi instruction, and
20110 there could be up to 32 D-registers to restore.
20111 If there are more than 16 D-registers, make two recursive calls,
20112 each of which emits one pop_multi instruction. */
20113 if (num_regs > 16)
20115 arm_emit_vfp_multi_reg_pop (first_reg, 16, base_reg);
20116 arm_emit_vfp_multi_reg_pop (first_reg + 16, num_regs - 16, base_reg);
20117 return;
20120 /* The parallel needs to hold num_regs SETs
20121 and one SET for the stack update. */
20122 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (num_regs + 1));
20124 /* Increment the stack pointer, based on there being
20125 num_regs 8-byte registers to restore. */
20126 tmp = gen_rtx_SET (VOIDmode,
20127 base_reg,
20128 plus_constant (Pmode, base_reg, 8 * num_regs));
20129 RTX_FRAME_RELATED_P (tmp) = 1;
20130 XVECEXP (par, 0, 0) = tmp;
20132 /* Now show every reg that will be restored, using a SET for each. */
20133 for (j = 0, i=first_reg; j < num_regs; i += 2)
20135 reg = gen_rtx_REG (DFmode, i);
20137 tmp = gen_rtx_SET (VOIDmode,
20138 reg,
20139 gen_frame_mem
20140 (DFmode,
20141 plus_constant (Pmode, base_reg, 8 * j)));
20142 RTX_FRAME_RELATED_P (tmp) = 1;
20143 XVECEXP (par, 0, j + 1) = tmp;
20145 dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
20147 j++;
20150 par = emit_insn (par);
20151 REG_NOTES (par) = dwarf;
20153 /* Make sure cfa doesn't leave with IP_REGNUM to allow unwinding fron FP. */
20154 if (TARGET_VFP && REGNO (base_reg) == IP_REGNUM)
20156 RTX_FRAME_RELATED_P (par) = 1;
20157 add_reg_note (par, REG_CFA_DEF_CFA, hard_frame_pointer_rtx);
20159 else
20160 arm_add_cfa_adjust_cfa_note (par, 2 * UNITS_PER_WORD * num_regs,
20161 base_reg, base_reg);
20164 /* Generate and emit a pattern that will be recognized as LDRD pattern. If even
20165 number of registers are being popped, multiple LDRD patterns are created for
20166 all register pairs. If odd number of registers are popped, last register is
20167 loaded by using LDR pattern. */
20168 static void
20169 thumb2_emit_ldrd_pop (unsigned long saved_regs_mask)
20171 int num_regs = 0;
20172 int i, j;
20173 rtx par = NULL_RTX;
20174 rtx dwarf = NULL_RTX;
20175 rtx tmp, reg, tmp1;
20176 bool return_in_pc;
20178 return_in_pc = (saved_regs_mask & (1 << PC_REGNUM)) ? true : false;
20179 for (i = 0; i <= LAST_ARM_REGNUM; i++)
20180 if (saved_regs_mask & (1 << i))
20181 num_regs++;
20183 gcc_assert (num_regs && num_regs <= 16);
20185 /* We cannot generate ldrd for PC. Hence, reduce the count if PC is
20186 to be popped. So, if num_regs is even, now it will become odd,
20187 and we can generate pop with PC. If num_regs is odd, it will be
20188 even now, and ldr with return can be generated for PC. */
20189 if (return_in_pc)
20190 num_regs--;
20192 gcc_assert (!(saved_regs_mask & (1 << SP_REGNUM)));
20194 /* Var j iterates over all the registers to gather all the registers in
20195 saved_regs_mask. Var i gives index of saved registers in stack frame.
20196 A PARALLEL RTX of register-pair is created here, so that pattern for
20197 LDRD can be matched. As PC is always last register to be popped, and
20198 we have already decremented num_regs if PC, we don't have to worry
20199 about PC in this loop. */
20200 for (i = 0, j = 0; i < (num_regs - (num_regs % 2)); j++)
20201 if (saved_regs_mask & (1 << j))
20203 /* Create RTX for memory load. */
20204 reg = gen_rtx_REG (SImode, j);
20205 tmp = gen_rtx_SET (SImode,
20206 reg,
20207 gen_frame_mem (SImode,
20208 plus_constant (Pmode,
20209 stack_pointer_rtx, 4 * i)));
20210 RTX_FRAME_RELATED_P (tmp) = 1;
20212 if (i % 2 == 0)
20214 /* When saved-register index (i) is even, the RTX to be emitted is
20215 yet to be created. Hence create it first. The LDRD pattern we
20216 are generating is :
20217 [ (SET (reg_t0) (MEM (PLUS (SP) (NUM))))
20218 (SET (reg_t1) (MEM (PLUS (SP) (NUM + 4)))) ]
20219 where target registers need not be consecutive. */
20220 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
20221 dwarf = NULL_RTX;
20224 /* ith register is added in PARALLEL RTX. If i is even, the reg_i is
20225 added as 0th element and if i is odd, reg_i is added as 1st element
20226 of LDRD pattern shown above. */
20227 XVECEXP (par, 0, (i % 2)) = tmp;
20228 dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
20230 if ((i % 2) == 1)
20232 /* When saved-register index (i) is odd, RTXs for both the registers
20233 to be loaded are generated in above given LDRD pattern, and the
20234 pattern can be emitted now. */
20235 par = emit_insn (par);
20236 REG_NOTES (par) = dwarf;
20237 RTX_FRAME_RELATED_P (par) = 1;
20240 i++;
20243 /* If the number of registers pushed is odd AND return_in_pc is false OR
20244 number of registers are even AND return_in_pc is true, last register is
20245 popped using LDR. It can be PC as well. Hence, adjust the stack first and
20246 then LDR with post increment. */
20248 /* Increment the stack pointer, based on there being
20249 num_regs 4-byte registers to restore. */
20250 tmp = gen_rtx_SET (VOIDmode,
20251 stack_pointer_rtx,
20252 plus_constant (Pmode, stack_pointer_rtx, 4 * i));
20253 RTX_FRAME_RELATED_P (tmp) = 1;
20254 tmp = emit_insn (tmp);
20255 if (!return_in_pc)
20257 arm_add_cfa_adjust_cfa_note (tmp, UNITS_PER_WORD * i,
20258 stack_pointer_rtx, stack_pointer_rtx);
20261 dwarf = NULL_RTX;
20263 if (((num_regs % 2) == 1 && !return_in_pc)
20264 || ((num_regs % 2) == 0 && return_in_pc))
20266 /* Scan for the single register to be popped. Skip until the saved
20267 register is found. */
20268 for (; (saved_regs_mask & (1 << j)) == 0; j++);
20270 /* Gen LDR with post increment here. */
20271 tmp1 = gen_rtx_MEM (SImode,
20272 gen_rtx_POST_INC (SImode,
20273 stack_pointer_rtx));
20274 set_mem_alias_set (tmp1, get_frame_alias_set ());
20276 reg = gen_rtx_REG (SImode, j);
20277 tmp = gen_rtx_SET (SImode, reg, tmp1);
20278 RTX_FRAME_RELATED_P (tmp) = 1;
20279 dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
20281 if (return_in_pc)
20283 /* If return_in_pc, j must be PC_REGNUM. */
20284 gcc_assert (j == PC_REGNUM);
20285 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
20286 XVECEXP (par, 0, 0) = ret_rtx;
20287 XVECEXP (par, 0, 1) = tmp;
20288 par = emit_jump_insn (par);
20290 else
20292 par = emit_insn (tmp);
20293 REG_NOTES (par) = dwarf;
20294 arm_add_cfa_adjust_cfa_note (par, UNITS_PER_WORD,
20295 stack_pointer_rtx, stack_pointer_rtx);
20299 else if ((num_regs % 2) == 1 && return_in_pc)
20301 /* There are 2 registers to be popped. So, generate the pattern
20302 pop_multiple_with_stack_update_and_return to pop in PC. */
20303 arm_emit_multi_reg_pop (saved_regs_mask & (~((1 << j) - 1)));
20306 return;
20309 /* LDRD in ARM mode needs consecutive registers as operands. This function
20310 emits LDRD whenever possible, otherwise it emits single-word loads. It uses
20311 offset addressing and then generates one separate stack udpate. This provides
20312 more scheduling freedom, compared to writeback on every load. However,
20313 if the function returns using load into PC directly
20314 (i.e., if PC is in SAVED_REGS_MASK), the stack needs to be updated
20315 before the last load. TODO: Add a peephole optimization to recognize
20316 the new epilogue sequence as an LDM instruction whenever possible. TODO: Add
20317 peephole optimization to merge the load at stack-offset zero
20318 with the stack update instruction using load with writeback
20319 in post-index addressing mode. */
20320 static void
20321 arm_emit_ldrd_pop (unsigned long saved_regs_mask)
20323 int j = 0;
20324 int offset = 0;
20325 rtx par = NULL_RTX;
20326 rtx dwarf = NULL_RTX;
20327 rtx tmp, mem;
20329 /* Restore saved registers. */
20330 gcc_assert (!((saved_regs_mask & (1 << SP_REGNUM))));
20331 j = 0;
20332 while (j <= LAST_ARM_REGNUM)
20333 if (saved_regs_mask & (1 << j))
20335 if ((j % 2) == 0
20336 && (saved_regs_mask & (1 << (j + 1)))
20337 && (j + 1) != PC_REGNUM)
20339 /* Current register and next register form register pair for which
20340 LDRD can be generated. PC is always the last register popped, and
20341 we handle it separately. */
20342 if (offset > 0)
20343 mem = gen_frame_mem (DImode,
20344 plus_constant (Pmode,
20345 stack_pointer_rtx,
20346 offset));
20347 else
20348 mem = gen_frame_mem (DImode, stack_pointer_rtx);
20350 tmp = gen_rtx_SET (DImode, gen_rtx_REG (DImode, j), mem);
20351 tmp = emit_insn (tmp);
20352 RTX_FRAME_RELATED_P (tmp) = 1;
20354 /* Generate dwarf info. */
20356 dwarf = alloc_reg_note (REG_CFA_RESTORE,
20357 gen_rtx_REG (SImode, j),
20358 NULL_RTX);
20359 dwarf = alloc_reg_note (REG_CFA_RESTORE,
20360 gen_rtx_REG (SImode, j + 1),
20361 dwarf);
20363 REG_NOTES (tmp) = dwarf;
20365 offset += 8;
20366 j += 2;
20368 else if (j != PC_REGNUM)
20370 /* Emit a single word load. */
20371 if (offset > 0)
20372 mem = gen_frame_mem (SImode,
20373 plus_constant (Pmode,
20374 stack_pointer_rtx,
20375 offset));
20376 else
20377 mem = gen_frame_mem (SImode, stack_pointer_rtx);
20379 tmp = gen_rtx_SET (SImode, gen_rtx_REG (SImode, j), mem);
20380 tmp = emit_insn (tmp);
20381 RTX_FRAME_RELATED_P (tmp) = 1;
20383 /* Generate dwarf info. */
20384 REG_NOTES (tmp) = alloc_reg_note (REG_CFA_RESTORE,
20385 gen_rtx_REG (SImode, j),
20386 NULL_RTX);
20388 offset += 4;
20389 j += 1;
20391 else /* j == PC_REGNUM */
20392 j++;
20394 else
20395 j++;
20397 /* Update the stack. */
20398 if (offset > 0)
20400 tmp = gen_rtx_SET (Pmode,
20401 stack_pointer_rtx,
20402 plus_constant (Pmode,
20403 stack_pointer_rtx,
20404 offset));
20405 tmp = emit_insn (tmp);
20406 arm_add_cfa_adjust_cfa_note (tmp, offset,
20407 stack_pointer_rtx, stack_pointer_rtx);
20408 offset = 0;
20411 if (saved_regs_mask & (1 << PC_REGNUM))
20413 /* Only PC is to be popped. */
20414 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
20415 XVECEXP (par, 0, 0) = ret_rtx;
20416 tmp = gen_rtx_SET (SImode,
20417 gen_rtx_REG (SImode, PC_REGNUM),
20418 gen_frame_mem (SImode,
20419 gen_rtx_POST_INC (SImode,
20420 stack_pointer_rtx)));
20421 RTX_FRAME_RELATED_P (tmp) = 1;
20422 XVECEXP (par, 0, 1) = tmp;
20423 par = emit_jump_insn (par);
20425 /* Generate dwarf info. */
20426 dwarf = alloc_reg_note (REG_CFA_RESTORE,
20427 gen_rtx_REG (SImode, PC_REGNUM),
20428 NULL_RTX);
20429 REG_NOTES (par) = dwarf;
20430 arm_add_cfa_adjust_cfa_note (par, UNITS_PER_WORD,
20431 stack_pointer_rtx, stack_pointer_rtx);
20435 /* Calculate the size of the return value that is passed in registers. */
20436 static unsigned
20437 arm_size_return_regs (void)
20439 enum machine_mode mode;
20441 if (crtl->return_rtx != 0)
20442 mode = GET_MODE (crtl->return_rtx);
20443 else
20444 mode = DECL_MODE (DECL_RESULT (current_function_decl));
20446 return GET_MODE_SIZE (mode);
20449 /* Return true if the current function needs to save/restore LR. */
20450 static bool
20451 thumb_force_lr_save (void)
20453 return !cfun->machine->lr_save_eliminated
20454 && (!leaf_function_p ()
20455 || thumb_far_jump_used_p ()
20456 || df_regs_ever_live_p (LR_REGNUM));
20459 /* We do not know if r3 will be available because
20460 we do have an indirect tailcall happening in this
20461 particular case. */
20462 static bool
20463 is_indirect_tailcall_p (rtx call)
20465 rtx pat = PATTERN (call);
20467 /* Indirect tail call. */
20468 pat = XVECEXP (pat, 0, 0);
20469 if (GET_CODE (pat) == SET)
20470 pat = SET_SRC (pat);
20472 pat = XEXP (XEXP (pat, 0), 0);
20473 return REG_P (pat);
20476 /* Return true if r3 is used by any of the tail call insns in the
20477 current function. */
20478 static bool
20479 any_sibcall_could_use_r3 (void)
20481 edge_iterator ei;
20482 edge e;
20484 if (!crtl->tail_call_emit)
20485 return false;
20486 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
20487 if (e->flags & EDGE_SIBCALL)
20489 rtx call = BB_END (e->src);
20490 if (!CALL_P (call))
20491 call = prev_nonnote_nondebug_insn (call);
20492 gcc_assert (CALL_P (call) && SIBLING_CALL_P (call));
20493 if (find_regno_fusage (call, USE, 3)
20494 || is_indirect_tailcall_p (call))
20495 return true;
20497 return false;
20501 /* Compute the distance from register FROM to register TO.
20502 These can be the arg pointer (26), the soft frame pointer (25),
20503 the stack pointer (13) or the hard frame pointer (11).
20504 In thumb mode r7 is used as the soft frame pointer, if needed.
20505 Typical stack layout looks like this:
20507 old stack pointer -> | |
20508 ----
20509 | | \
20510 | | saved arguments for
20511 | | vararg functions
20512 | | /
20514 hard FP & arg pointer -> | | \
20515 | | stack
20516 | | frame
20517 | | /
20519 | | \
20520 | | call saved
20521 | | registers
20522 soft frame pointer -> | | /
20524 | | \
20525 | | local
20526 | | variables
20527 locals base pointer -> | | /
20529 | | \
20530 | | outgoing
20531 | | arguments
20532 current stack pointer -> | | /
20535 For a given function some or all of these stack components
20536 may not be needed, giving rise to the possibility of
20537 eliminating some of the registers.
20539 The values returned by this function must reflect the behavior
20540 of arm_expand_prologue() and arm_compute_save_reg_mask().
20542 The sign of the number returned reflects the direction of stack
20543 growth, so the values are positive for all eliminations except
20544 from the soft frame pointer to the hard frame pointer.
20546 SFP may point just inside the local variables block to ensure correct
20547 alignment. */
20550 /* Calculate stack offsets. These are used to calculate register elimination
20551 offsets and in prologue/epilogue code. Also calculates which registers
20552 should be saved. */
20554 static arm_stack_offsets *
20555 arm_get_frame_offsets (void)
20557 struct arm_stack_offsets *offsets;
20558 unsigned long func_type;
20559 int leaf;
20560 int saved;
20561 int core_saved;
20562 HOST_WIDE_INT frame_size;
20563 int i;
20565 offsets = &cfun->machine->stack_offsets;
20567 /* We need to know if we are a leaf function. Unfortunately, it
20568 is possible to be called after start_sequence has been called,
20569 which causes get_insns to return the insns for the sequence,
20570 not the function, which will cause leaf_function_p to return
20571 the incorrect result.
20573 to know about leaf functions once reload has completed, and the
20574 frame size cannot be changed after that time, so we can safely
20575 use the cached value. */
20577 if (reload_completed)
20578 return offsets;
20580 /* Initially this is the size of the local variables. It will translated
20581 into an offset once we have determined the size of preceding data. */
20582 frame_size = ROUND_UP_WORD (get_frame_size ());
20584 leaf = leaf_function_p ();
20586 /* Space for variadic functions. */
20587 offsets->saved_args = crtl->args.pretend_args_size;
20589 /* In Thumb mode this is incorrect, but never used. */
20590 offsets->frame
20591 = (offsets->saved_args
20592 + arm_compute_static_chain_stack_bytes ()
20593 + (frame_pointer_needed ? 4 : 0));
20595 if (TARGET_32BIT)
20597 unsigned int regno;
20599 offsets->saved_regs_mask = arm_compute_save_reg_mask ();
20600 core_saved = bit_count (offsets->saved_regs_mask) * 4;
20601 saved = core_saved;
20603 /* We know that SP will be doubleword aligned on entry, and we must
20604 preserve that condition at any subroutine call. We also require the
20605 soft frame pointer to be doubleword aligned. */
20607 if (TARGET_REALLY_IWMMXT)
20609 /* Check for the call-saved iWMMXt registers. */
20610 for (regno = FIRST_IWMMXT_REGNUM;
20611 regno <= LAST_IWMMXT_REGNUM;
20612 regno++)
20613 if (df_regs_ever_live_p (regno) && ! call_used_regs[regno])
20614 saved += 8;
20617 func_type = arm_current_func_type ();
20618 /* Space for saved VFP registers. */
20619 if (! IS_VOLATILE (func_type)
20620 && TARGET_HARD_FLOAT && TARGET_VFP)
20621 saved += arm_get_vfp_saved_size ();
20623 else /* TARGET_THUMB1 */
20625 offsets->saved_regs_mask = thumb1_compute_save_reg_mask ();
20626 core_saved = bit_count (offsets->saved_regs_mask) * 4;
20627 saved = core_saved;
20628 if (TARGET_BACKTRACE)
20629 saved += 16;
20632 /* Saved registers include the stack frame. */
20633 offsets->saved_regs
20634 = offsets->saved_args + arm_compute_static_chain_stack_bytes () + saved;
20635 offsets->soft_frame = offsets->saved_regs + CALLER_INTERWORKING_SLOT_SIZE;
20637 /* A leaf function does not need any stack alignment if it has nothing
20638 on the stack. */
20639 if (leaf && frame_size == 0
20640 /* However if it calls alloca(), we have a dynamically allocated
20641 block of BIGGEST_ALIGNMENT on stack, so still do stack alignment. */
20642 && ! cfun->calls_alloca)
20644 offsets->outgoing_args = offsets->soft_frame;
20645 offsets->locals_base = offsets->soft_frame;
20646 return offsets;
20649 /* Ensure SFP has the correct alignment. */
20650 if (ARM_DOUBLEWORD_ALIGN
20651 && (offsets->soft_frame & 7))
20653 offsets->soft_frame += 4;
20654 /* Try to align stack by pushing an extra reg. Don't bother doing this
20655 when there is a stack frame as the alignment will be rolled into
20656 the normal stack adjustment. */
20657 if (frame_size + crtl->outgoing_args_size == 0)
20659 int reg = -1;
20661 /* If it is safe to use r3, then do so. This sometimes
20662 generates better code on Thumb-2 by avoiding the need to
20663 use 32-bit push/pop instructions. */
20664 if (! any_sibcall_could_use_r3 ()
20665 && arm_size_return_regs () <= 12
20666 && (offsets->saved_regs_mask & (1 << 3)) == 0
20667 && (TARGET_THUMB2
20668 || !(TARGET_LDRD && current_tune->prefer_ldrd_strd)))
20670 reg = 3;
20672 else
20673 for (i = 4; i <= (TARGET_THUMB1 ? LAST_LO_REGNUM : 11); i++)
20675 /* Avoid fixed registers; they may be changed at
20676 arbitrary times so it's unsafe to restore them
20677 during the epilogue. */
20678 if (!fixed_regs[i]
20679 && (offsets->saved_regs_mask & (1 << i)) == 0)
20681 reg = i;
20682 break;
20686 if (reg != -1)
20688 offsets->saved_regs += 4;
20689 offsets->saved_regs_mask |= (1 << reg);
20694 offsets->locals_base = offsets->soft_frame + frame_size;
20695 offsets->outgoing_args = (offsets->locals_base
20696 + crtl->outgoing_args_size);
20698 if (ARM_DOUBLEWORD_ALIGN)
20700 /* Ensure SP remains doubleword aligned. */
20701 if (offsets->outgoing_args & 7)
20702 offsets->outgoing_args += 4;
20703 gcc_assert (!(offsets->outgoing_args & 7));
20706 return offsets;
20710 /* Calculate the relative offsets for the different stack pointers. Positive
20711 offsets are in the direction of stack growth. */
20713 HOST_WIDE_INT
20714 arm_compute_initial_elimination_offset (unsigned int from, unsigned int to)
20716 arm_stack_offsets *offsets;
20718 offsets = arm_get_frame_offsets ();
20720 /* OK, now we have enough information to compute the distances.
20721 There must be an entry in these switch tables for each pair
20722 of registers in ELIMINABLE_REGS, even if some of the entries
20723 seem to be redundant or useless. */
20724 switch (from)
20726 case ARG_POINTER_REGNUM:
20727 switch (to)
20729 case THUMB_HARD_FRAME_POINTER_REGNUM:
20730 return 0;
20732 case FRAME_POINTER_REGNUM:
20733 /* This is the reverse of the soft frame pointer
20734 to hard frame pointer elimination below. */
20735 return offsets->soft_frame - offsets->saved_args;
20737 case ARM_HARD_FRAME_POINTER_REGNUM:
20738 /* This is only non-zero in the case where the static chain register
20739 is stored above the frame. */
20740 return offsets->frame - offsets->saved_args - 4;
20742 case STACK_POINTER_REGNUM:
20743 /* If nothing has been pushed on the stack at all
20744 then this will return -4. This *is* correct! */
20745 return offsets->outgoing_args - (offsets->saved_args + 4);
20747 default:
20748 gcc_unreachable ();
20750 gcc_unreachable ();
20752 case FRAME_POINTER_REGNUM:
20753 switch (to)
20755 case THUMB_HARD_FRAME_POINTER_REGNUM:
20756 return 0;
20758 case ARM_HARD_FRAME_POINTER_REGNUM:
20759 /* The hard frame pointer points to the top entry in the
20760 stack frame. The soft frame pointer to the bottom entry
20761 in the stack frame. If there is no stack frame at all,
20762 then they are identical. */
20764 return offsets->frame - offsets->soft_frame;
20766 case STACK_POINTER_REGNUM:
20767 return offsets->outgoing_args - offsets->soft_frame;
20769 default:
20770 gcc_unreachable ();
20772 gcc_unreachable ();
20774 default:
20775 /* You cannot eliminate from the stack pointer.
20776 In theory you could eliminate from the hard frame
20777 pointer to the stack pointer, but this will never
20778 happen, since if a stack frame is not needed the
20779 hard frame pointer will never be used. */
20780 gcc_unreachable ();
20784 /* Given FROM and TO register numbers, say whether this elimination is
20785 allowed. Frame pointer elimination is automatically handled.
20787 All eliminations are permissible. Note that ARG_POINTER_REGNUM and
20788 HARD_FRAME_POINTER_REGNUM are in fact the same thing. If we need a frame
20789 pointer, we must eliminate FRAME_POINTER_REGNUM into
20790 HARD_FRAME_POINTER_REGNUM and not into STACK_POINTER_REGNUM or
20791 ARG_POINTER_REGNUM. */
20793 bool
20794 arm_can_eliminate (const int from, const int to)
20796 return ((to == FRAME_POINTER_REGNUM && from == ARG_POINTER_REGNUM) ? false :
20797 (to == STACK_POINTER_REGNUM && frame_pointer_needed) ? false :
20798 (to == ARM_HARD_FRAME_POINTER_REGNUM && TARGET_THUMB) ? false :
20799 (to == THUMB_HARD_FRAME_POINTER_REGNUM && TARGET_ARM) ? false :
20800 true);
20803 /* Emit RTL to save coprocessor registers on function entry. Returns the
20804 number of bytes pushed. */
20806 static int
20807 arm_save_coproc_regs(void)
20809 int saved_size = 0;
20810 unsigned reg;
20811 unsigned start_reg;
20812 rtx insn;
20814 for (reg = LAST_IWMMXT_REGNUM; reg >= FIRST_IWMMXT_REGNUM; reg--)
20815 if (df_regs_ever_live_p (reg) && ! call_used_regs[reg])
20817 insn = gen_rtx_PRE_DEC (Pmode, stack_pointer_rtx);
20818 insn = gen_rtx_MEM (V2SImode, insn);
20819 insn = emit_set_insn (insn, gen_rtx_REG (V2SImode, reg));
20820 RTX_FRAME_RELATED_P (insn) = 1;
20821 saved_size += 8;
20824 if (TARGET_HARD_FLOAT && TARGET_VFP)
20826 start_reg = FIRST_VFP_REGNUM;
20828 for (reg = FIRST_VFP_REGNUM; reg < LAST_VFP_REGNUM; reg += 2)
20830 if ((!df_regs_ever_live_p (reg) || call_used_regs[reg])
20831 && (!df_regs_ever_live_p (reg + 1) || call_used_regs[reg + 1]))
20833 if (start_reg != reg)
20834 saved_size += vfp_emit_fstmd (start_reg,
20835 (reg - start_reg) / 2);
20836 start_reg = reg + 2;
20839 if (start_reg != reg)
20840 saved_size += vfp_emit_fstmd (start_reg,
20841 (reg - start_reg) / 2);
20843 return saved_size;
20847 /* Set the Thumb frame pointer from the stack pointer. */
20849 static void
20850 thumb_set_frame_pointer (arm_stack_offsets *offsets)
20852 HOST_WIDE_INT amount;
20853 rtx insn, dwarf;
20855 amount = offsets->outgoing_args - offsets->locals_base;
20856 if (amount < 1024)
20857 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
20858 stack_pointer_rtx, GEN_INT (amount)));
20859 else
20861 emit_insn (gen_movsi (hard_frame_pointer_rtx, GEN_INT (amount)));
20862 /* Thumb-2 RTL patterns expect sp as the first input. Thumb-1
20863 expects the first two operands to be the same. */
20864 if (TARGET_THUMB2)
20866 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
20867 stack_pointer_rtx,
20868 hard_frame_pointer_rtx));
20870 else
20872 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
20873 hard_frame_pointer_rtx,
20874 stack_pointer_rtx));
20876 dwarf = gen_rtx_SET (VOIDmode, hard_frame_pointer_rtx,
20877 plus_constant (Pmode, stack_pointer_rtx, amount));
20878 RTX_FRAME_RELATED_P (dwarf) = 1;
20879 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
20882 RTX_FRAME_RELATED_P (insn) = 1;
20885 /* Generate the prologue instructions for entry into an ARM or Thumb-2
20886 function. */
20887 void
20888 arm_expand_prologue (void)
20890 rtx amount;
20891 rtx insn;
20892 rtx ip_rtx;
20893 unsigned long live_regs_mask;
20894 unsigned long func_type;
20895 int fp_offset = 0;
20896 int saved_pretend_args = 0;
20897 int saved_regs = 0;
20898 unsigned HOST_WIDE_INT args_to_push;
20899 arm_stack_offsets *offsets;
20901 func_type = arm_current_func_type ();
20903 /* Naked functions don't have prologues. */
20904 if (IS_NAKED (func_type))
20905 return;
20907 /* Make a copy of c_f_p_a_s as we may need to modify it locally. */
20908 args_to_push = crtl->args.pretend_args_size;
20910 /* Compute which register we will have to save onto the stack. */
20911 offsets = arm_get_frame_offsets ();
20912 live_regs_mask = offsets->saved_regs_mask;
20914 ip_rtx = gen_rtx_REG (SImode, IP_REGNUM);
20916 if (IS_STACKALIGN (func_type))
20918 rtx r0, r1;
20920 /* Handle a word-aligned stack pointer. We generate the following:
20922 mov r0, sp
20923 bic r1, r0, #7
20924 mov sp, r1
20925 <save and restore r0 in normal prologue/epilogue>
20926 mov sp, r0
20927 bx lr
20929 The unwinder doesn't need to know about the stack realignment.
20930 Just tell it we saved SP in r0. */
20931 gcc_assert (TARGET_THUMB2 && !arm_arch_notm && args_to_push == 0);
20933 r0 = gen_rtx_REG (SImode, 0);
20934 r1 = gen_rtx_REG (SImode, 1);
20936 insn = emit_insn (gen_movsi (r0, stack_pointer_rtx));
20937 RTX_FRAME_RELATED_P (insn) = 1;
20938 add_reg_note (insn, REG_CFA_REGISTER, NULL);
20940 emit_insn (gen_andsi3 (r1, r0, GEN_INT (~(HOST_WIDE_INT)7)));
20942 /* ??? The CFA changes here, which may cause GDB to conclude that it
20943 has entered a different function. That said, the unwind info is
20944 correct, individually, before and after this instruction because
20945 we've described the save of SP, which will override the default
20946 handling of SP as restoring from the CFA. */
20947 emit_insn (gen_movsi (stack_pointer_rtx, r1));
20950 /* For APCS frames, if IP register is clobbered
20951 when creating frame, save that register in a special
20952 way. */
20953 if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
20955 if (IS_INTERRUPT (func_type))
20957 /* Interrupt functions must not corrupt any registers.
20958 Creating a frame pointer however, corrupts the IP
20959 register, so we must push it first. */
20960 emit_multi_reg_push (1 << IP_REGNUM, 1 << IP_REGNUM);
20962 /* Do not set RTX_FRAME_RELATED_P on this insn.
20963 The dwarf stack unwinding code only wants to see one
20964 stack decrement per function, and this is not it. If
20965 this instruction is labeled as being part of the frame
20966 creation sequence then dwarf2out_frame_debug_expr will
20967 die when it encounters the assignment of IP to FP
20968 later on, since the use of SP here establishes SP as
20969 the CFA register and not IP.
20971 Anyway this instruction is not really part of the stack
20972 frame creation although it is part of the prologue. */
20974 else if (IS_NESTED (func_type))
20976 /* The static chain register is the same as the IP register
20977 used as a scratch register during stack frame creation.
20978 To get around this need to find somewhere to store IP
20979 whilst the frame is being created. We try the following
20980 places in order:
20982 1. The last argument register r3 if it is available.
20983 2. A slot on the stack above the frame if there are no
20984 arguments to push onto the stack.
20985 3. Register r3 again, after pushing the argument registers
20986 onto the stack, if this is a varargs function.
20987 4. The last slot on the stack created for the arguments to
20988 push, if this isn't a varargs function.
20990 Note - we only need to tell the dwarf2 backend about the SP
20991 adjustment in the second variant; the static chain register
20992 doesn't need to be unwound, as it doesn't contain a value
20993 inherited from the caller. */
20995 if (!arm_r3_live_at_start_p ())
20996 insn = emit_set_insn (gen_rtx_REG (SImode, 3), ip_rtx);
20997 else if (args_to_push == 0)
20999 rtx addr, dwarf;
21001 gcc_assert(arm_compute_static_chain_stack_bytes() == 4);
21002 saved_regs += 4;
21004 addr = gen_rtx_PRE_DEC (Pmode, stack_pointer_rtx);
21005 insn = emit_set_insn (gen_frame_mem (SImode, addr), ip_rtx);
21006 fp_offset = 4;
21008 /* Just tell the dwarf backend that we adjusted SP. */
21009 dwarf = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
21010 plus_constant (Pmode, stack_pointer_rtx,
21011 -fp_offset));
21012 RTX_FRAME_RELATED_P (insn) = 1;
21013 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
21015 else
21017 /* Store the args on the stack. */
21018 if (cfun->machine->uses_anonymous_args)
21020 insn
21021 = emit_multi_reg_push ((0xf0 >> (args_to_push / 4)) & 0xf,
21022 (0xf0 >> (args_to_push / 4)) & 0xf);
21023 emit_set_insn (gen_rtx_REG (SImode, 3), ip_rtx);
21024 saved_pretend_args = 1;
21026 else
21028 rtx addr, dwarf;
21030 if (args_to_push == 4)
21031 addr = gen_rtx_PRE_DEC (Pmode, stack_pointer_rtx);
21032 else
21033 addr
21034 = gen_rtx_PRE_MODIFY (Pmode, stack_pointer_rtx,
21035 plus_constant (Pmode,
21036 stack_pointer_rtx,
21037 -args_to_push));
21039 insn = emit_set_insn (gen_frame_mem (SImode, addr), ip_rtx);
21041 /* Just tell the dwarf backend that we adjusted SP. */
21042 dwarf
21043 = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
21044 plus_constant (Pmode, stack_pointer_rtx,
21045 -args_to_push));
21046 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
21049 RTX_FRAME_RELATED_P (insn) = 1;
21050 fp_offset = args_to_push;
21051 args_to_push = 0;
21055 insn = emit_set_insn (ip_rtx,
21056 plus_constant (Pmode, stack_pointer_rtx,
21057 fp_offset));
21058 RTX_FRAME_RELATED_P (insn) = 1;
21061 if (args_to_push)
21063 /* Push the argument registers, or reserve space for them. */
21064 if (cfun->machine->uses_anonymous_args)
21065 insn = emit_multi_reg_push
21066 ((0xf0 >> (args_to_push / 4)) & 0xf,
21067 (0xf0 >> (args_to_push / 4)) & 0xf);
21068 else
21069 insn = emit_insn
21070 (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
21071 GEN_INT (- args_to_push)));
21072 RTX_FRAME_RELATED_P (insn) = 1;
21075 /* If this is an interrupt service routine, and the link register
21076 is going to be pushed, and we're not generating extra
21077 push of IP (needed when frame is needed and frame layout if apcs),
21078 subtracting four from LR now will mean that the function return
21079 can be done with a single instruction. */
21080 if ((func_type == ARM_FT_ISR || func_type == ARM_FT_FIQ)
21081 && (live_regs_mask & (1 << LR_REGNUM)) != 0
21082 && !(frame_pointer_needed && TARGET_APCS_FRAME)
21083 && TARGET_ARM)
21085 rtx lr = gen_rtx_REG (SImode, LR_REGNUM);
21087 emit_set_insn (lr, plus_constant (SImode, lr, -4));
21090 if (live_regs_mask)
21092 unsigned long dwarf_regs_mask = live_regs_mask;
21094 saved_regs += bit_count (live_regs_mask) * 4;
21095 if (optimize_size && !frame_pointer_needed
21096 && saved_regs == offsets->saved_regs - offsets->saved_args)
21098 /* If no coprocessor registers are being pushed and we don't have
21099 to worry about a frame pointer then push extra registers to
21100 create the stack frame. This is done is a way that does not
21101 alter the frame layout, so is independent of the epilogue. */
21102 int n;
21103 int frame;
21104 n = 0;
21105 while (n < 8 && (live_regs_mask & (1 << n)) == 0)
21106 n++;
21107 frame = offsets->outgoing_args - (offsets->saved_args + saved_regs);
21108 if (frame && n * 4 >= frame)
21110 n = frame / 4;
21111 live_regs_mask |= (1 << n) - 1;
21112 saved_regs += frame;
21116 if (TARGET_LDRD
21117 && current_tune->prefer_ldrd_strd
21118 && !optimize_function_for_size_p (cfun))
21120 gcc_checking_assert (live_regs_mask == dwarf_regs_mask);
21121 if (TARGET_THUMB2)
21122 thumb2_emit_strd_push (live_regs_mask);
21123 else if (TARGET_ARM
21124 && !TARGET_APCS_FRAME
21125 && !IS_INTERRUPT (func_type))
21126 arm_emit_strd_push (live_regs_mask);
21127 else
21129 insn = emit_multi_reg_push (live_regs_mask, live_regs_mask);
21130 RTX_FRAME_RELATED_P (insn) = 1;
21133 else
21135 insn = emit_multi_reg_push (live_regs_mask, dwarf_regs_mask);
21136 RTX_FRAME_RELATED_P (insn) = 1;
21140 if (! IS_VOLATILE (func_type))
21141 saved_regs += arm_save_coproc_regs ();
21143 if (frame_pointer_needed && TARGET_ARM)
21145 /* Create the new frame pointer. */
21146 if (TARGET_APCS_FRAME)
21148 insn = GEN_INT (-(4 + args_to_push + fp_offset));
21149 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx, ip_rtx, insn));
21150 RTX_FRAME_RELATED_P (insn) = 1;
21152 if (IS_NESTED (func_type))
21154 /* Recover the static chain register. */
21155 if (!arm_r3_live_at_start_p () || saved_pretend_args)
21156 insn = gen_rtx_REG (SImode, 3);
21157 else
21159 insn = plus_constant (Pmode, hard_frame_pointer_rtx, 4);
21160 insn = gen_frame_mem (SImode, insn);
21162 emit_set_insn (ip_rtx, insn);
21163 /* Add a USE to stop propagate_one_insn() from barfing. */
21164 emit_insn (gen_force_register_use (ip_rtx));
21167 else
21169 insn = GEN_INT (saved_regs - 4);
21170 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
21171 stack_pointer_rtx, insn));
21172 RTX_FRAME_RELATED_P (insn) = 1;
21176 if (flag_stack_usage_info)
21177 current_function_static_stack_size
21178 = offsets->outgoing_args - offsets->saved_args;
21180 if (offsets->outgoing_args != offsets->saved_args + saved_regs)
21182 /* This add can produce multiple insns for a large constant, so we
21183 need to get tricky. */
21184 rtx last = get_last_insn ();
21186 amount = GEN_INT (offsets->saved_args + saved_regs
21187 - offsets->outgoing_args);
21189 insn = emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
21190 amount));
21193 last = last ? NEXT_INSN (last) : get_insns ();
21194 RTX_FRAME_RELATED_P (last) = 1;
21196 while (last != insn);
21198 /* If the frame pointer is needed, emit a special barrier that
21199 will prevent the scheduler from moving stores to the frame
21200 before the stack adjustment. */
21201 if (frame_pointer_needed)
21202 insn = emit_insn (gen_stack_tie (stack_pointer_rtx,
21203 hard_frame_pointer_rtx));
21207 if (frame_pointer_needed && TARGET_THUMB2)
21208 thumb_set_frame_pointer (offsets);
21210 if (flag_pic && arm_pic_register != INVALID_REGNUM)
21212 unsigned long mask;
21214 mask = live_regs_mask;
21215 mask &= THUMB2_WORK_REGS;
21216 if (!IS_NESTED (func_type))
21217 mask |= (1 << IP_REGNUM);
21218 arm_load_pic_register (mask);
21221 /* If we are profiling, make sure no instructions are scheduled before
21222 the call to mcount. Similarly if the user has requested no
21223 scheduling in the prolog. Similarly if we want non-call exceptions
21224 using the EABI unwinder, to prevent faulting instructions from being
21225 swapped with a stack adjustment. */
21226 if (crtl->profile || !TARGET_SCHED_PROLOG
21227 || (arm_except_unwind_info (&global_options) == UI_TARGET
21228 && cfun->can_throw_non_call_exceptions))
21229 emit_insn (gen_blockage ());
21231 /* If the link register is being kept alive, with the return address in it,
21232 then make sure that it does not get reused by the ce2 pass. */
21233 if ((live_regs_mask & (1 << LR_REGNUM)) == 0)
21234 cfun->machine->lr_save_eliminated = 1;
21237 /* Print condition code to STREAM. Helper function for arm_print_operand. */
21238 static void
21239 arm_print_condition (FILE *stream)
21241 if (arm_ccfsm_state == 3 || arm_ccfsm_state == 4)
21243 /* Branch conversion is not implemented for Thumb-2. */
21244 if (TARGET_THUMB)
21246 output_operand_lossage ("predicated Thumb instruction");
21247 return;
21249 if (current_insn_predicate != NULL)
21251 output_operand_lossage
21252 ("predicated instruction in conditional sequence");
21253 return;
21256 fputs (arm_condition_codes[arm_current_cc], stream);
21258 else if (current_insn_predicate)
21260 enum arm_cond_code code;
21262 if (TARGET_THUMB1)
21264 output_operand_lossage ("predicated Thumb instruction");
21265 return;
21268 code = get_arm_condition_code (current_insn_predicate);
21269 fputs (arm_condition_codes[code], stream);
21274 /* If CODE is 'd', then the X is a condition operand and the instruction
21275 should only be executed if the condition is true.
21276 if CODE is 'D', then the X is a condition operand and the instruction
21277 should only be executed if the condition is false: however, if the mode
21278 of the comparison is CCFPEmode, then always execute the instruction -- we
21279 do this because in these circumstances !GE does not necessarily imply LT;
21280 in these cases the instruction pattern will take care to make sure that
21281 an instruction containing %d will follow, thereby undoing the effects of
21282 doing this instruction unconditionally.
21283 If CODE is 'N' then X is a floating point operand that must be negated
21284 before output.
21285 If CODE is 'B' then output a bitwise inverted value of X (a const int).
21286 If X is a REG and CODE is `M', output a ldm/stm style multi-reg. */
21287 static void
21288 arm_print_operand (FILE *stream, rtx x, int code)
21290 switch (code)
21292 case '@':
21293 fputs (ASM_COMMENT_START, stream);
21294 return;
21296 case '_':
21297 fputs (user_label_prefix, stream);
21298 return;
21300 case '|':
21301 fputs (REGISTER_PREFIX, stream);
21302 return;
21304 case '?':
21305 arm_print_condition (stream);
21306 return;
21308 case '(':
21309 /* Nothing in unified syntax, otherwise the current condition code. */
21310 if (!TARGET_UNIFIED_ASM)
21311 arm_print_condition (stream);
21312 break;
21314 case ')':
21315 /* The current condition code in unified syntax, otherwise nothing. */
21316 if (TARGET_UNIFIED_ASM)
21317 arm_print_condition (stream);
21318 break;
21320 case '.':
21321 /* The current condition code for a condition code setting instruction.
21322 Preceded by 's' in unified syntax, otherwise followed by 's'. */
21323 if (TARGET_UNIFIED_ASM)
21325 fputc('s', stream);
21326 arm_print_condition (stream);
21328 else
21330 arm_print_condition (stream);
21331 fputc('s', stream);
21333 return;
21335 case '!':
21336 /* If the instruction is conditionally executed then print
21337 the current condition code, otherwise print 's'. */
21338 gcc_assert (TARGET_THUMB2 && TARGET_UNIFIED_ASM);
21339 if (current_insn_predicate)
21340 arm_print_condition (stream);
21341 else
21342 fputc('s', stream);
21343 break;
21345 /* %# is a "break" sequence. It doesn't output anything, but is used to
21346 separate e.g. operand numbers from following text, if that text consists
21347 of further digits which we don't want to be part of the operand
21348 number. */
21349 case '#':
21350 return;
21352 case 'N':
21354 REAL_VALUE_TYPE r;
21355 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
21356 r = real_value_negate (&r);
21357 fprintf (stream, "%s", fp_const_from_val (&r));
21359 return;
21361 /* An integer or symbol address without a preceding # sign. */
21362 case 'c':
21363 switch (GET_CODE (x))
21365 case CONST_INT:
21366 fprintf (stream, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
21367 break;
21369 case SYMBOL_REF:
21370 output_addr_const (stream, x);
21371 break;
21373 case CONST:
21374 if (GET_CODE (XEXP (x, 0)) == PLUS
21375 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF)
21377 output_addr_const (stream, x);
21378 break;
21380 /* Fall through. */
21382 default:
21383 output_operand_lossage ("Unsupported operand for code '%c'", code);
21385 return;
21387 /* An integer that we want to print in HEX. */
21388 case 'x':
21389 switch (GET_CODE (x))
21391 case CONST_INT:
21392 fprintf (stream, "#" HOST_WIDE_INT_PRINT_HEX, INTVAL (x));
21393 break;
21395 default:
21396 output_operand_lossage ("Unsupported operand for code '%c'", code);
21398 return;
21400 case 'B':
21401 if (CONST_INT_P (x))
21403 HOST_WIDE_INT val;
21404 val = ARM_SIGN_EXTEND (~INTVAL (x));
21405 fprintf (stream, HOST_WIDE_INT_PRINT_DEC, val);
21407 else
21409 putc ('~', stream);
21410 output_addr_const (stream, x);
21412 return;
21414 case 'L':
21415 /* The low 16 bits of an immediate constant. */
21416 fprintf (stream, HOST_WIDE_INT_PRINT_DEC, INTVAL(x) & 0xffff);
21417 return;
21419 case 'i':
21420 fprintf (stream, "%s", arithmetic_instr (x, 1));
21421 return;
21423 case 'I':
21424 fprintf (stream, "%s", arithmetic_instr (x, 0));
21425 return;
21427 case 'S':
21429 HOST_WIDE_INT val;
21430 const char *shift;
21432 shift = shift_op (x, &val);
21434 if (shift)
21436 fprintf (stream, ", %s ", shift);
21437 if (val == -1)
21438 arm_print_operand (stream, XEXP (x, 1), 0);
21439 else
21440 fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, val);
21443 return;
21445 /* An explanation of the 'Q', 'R' and 'H' register operands:
21447 In a pair of registers containing a DI or DF value the 'Q'
21448 operand returns the register number of the register containing
21449 the least significant part of the value. The 'R' operand returns
21450 the register number of the register containing the most
21451 significant part of the value.
21453 The 'H' operand returns the higher of the two register numbers.
21454 On a run where WORDS_BIG_ENDIAN is true the 'H' operand is the
21455 same as the 'Q' operand, since the most significant part of the
21456 value is held in the lower number register. The reverse is true
21457 on systems where WORDS_BIG_ENDIAN is false.
21459 The purpose of these operands is to distinguish between cases
21460 where the endian-ness of the values is important (for example
21461 when they are added together), and cases where the endian-ness
21462 is irrelevant, but the order of register operations is important.
21463 For example when loading a value from memory into a register
21464 pair, the endian-ness does not matter. Provided that the value
21465 from the lower memory address is put into the lower numbered
21466 register, and the value from the higher address is put into the
21467 higher numbered register, the load will work regardless of whether
21468 the value being loaded is big-wordian or little-wordian. The
21469 order of the two register loads can matter however, if the address
21470 of the memory location is actually held in one of the registers
21471 being overwritten by the load.
21473 The 'Q' and 'R' constraints are also available for 64-bit
21474 constants. */
21475 case 'Q':
21476 if (CONST_INT_P (x) || CONST_DOUBLE_P (x))
21478 rtx part = gen_lowpart (SImode, x);
21479 fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, INTVAL (part));
21480 return;
21483 if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
21485 output_operand_lossage ("invalid operand for code '%c'", code);
21486 return;
21489 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 1 : 0));
21490 return;
21492 case 'R':
21493 if (CONST_INT_P (x) || CONST_DOUBLE_P (x))
21495 enum machine_mode mode = GET_MODE (x);
21496 rtx part;
21498 if (mode == VOIDmode)
21499 mode = DImode;
21500 part = gen_highpart_mode (SImode, mode, x);
21501 fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, INTVAL (part));
21502 return;
21505 if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
21507 output_operand_lossage ("invalid operand for code '%c'", code);
21508 return;
21511 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 0 : 1));
21512 return;
21514 case 'H':
21515 if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
21517 output_operand_lossage ("invalid operand for code '%c'", code);
21518 return;
21521 asm_fprintf (stream, "%r", REGNO (x) + 1);
21522 return;
21524 case 'J':
21525 if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
21527 output_operand_lossage ("invalid operand for code '%c'", code);
21528 return;
21531 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 3 : 2));
21532 return;
21534 case 'K':
21535 if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
21537 output_operand_lossage ("invalid operand for code '%c'", code);
21538 return;
21541 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 2 : 3));
21542 return;
21544 case 'm':
21545 asm_fprintf (stream, "%r",
21546 REG_P (XEXP (x, 0))
21547 ? REGNO (XEXP (x, 0)) : REGNO (XEXP (XEXP (x, 0), 0)));
21548 return;
21550 case 'M':
21551 asm_fprintf (stream, "{%r-%r}",
21552 REGNO (x),
21553 REGNO (x) + ARM_NUM_REGS (GET_MODE (x)) - 1);
21554 return;
21556 /* Like 'M', but writing doubleword vector registers, for use by Neon
21557 insns. */
21558 case 'h':
21560 int regno = (REGNO (x) - FIRST_VFP_REGNUM) / 2;
21561 int numregs = ARM_NUM_REGS (GET_MODE (x)) / 2;
21562 if (numregs == 1)
21563 asm_fprintf (stream, "{d%d}", regno);
21564 else
21565 asm_fprintf (stream, "{d%d-d%d}", regno, regno + numregs - 1);
21567 return;
21569 case 'd':
21570 /* CONST_TRUE_RTX means always -- that's the default. */
21571 if (x == const_true_rtx)
21572 return;
21574 if (!COMPARISON_P (x))
21576 output_operand_lossage ("invalid operand for code '%c'", code);
21577 return;
21580 fputs (arm_condition_codes[get_arm_condition_code (x)],
21581 stream);
21582 return;
21584 case 'D':
21585 /* CONST_TRUE_RTX means not always -- i.e. never. We shouldn't ever
21586 want to do that. */
21587 if (x == const_true_rtx)
21589 output_operand_lossage ("instruction never executed");
21590 return;
21592 if (!COMPARISON_P (x))
21594 output_operand_lossage ("invalid operand for code '%c'", code);
21595 return;
21598 fputs (arm_condition_codes[ARM_INVERSE_CONDITION_CODE
21599 (get_arm_condition_code (x))],
21600 stream);
21601 return;
21603 case 's':
21604 case 'V':
21605 case 'W':
21606 case 'X':
21607 case 'Y':
21608 case 'Z':
21609 /* Former Maverick support, removed after GCC-4.7. */
21610 output_operand_lossage ("obsolete Maverick format code '%c'", code);
21611 return;
21613 case 'U':
21614 if (!REG_P (x)
21615 || REGNO (x) < FIRST_IWMMXT_GR_REGNUM
21616 || REGNO (x) > LAST_IWMMXT_GR_REGNUM)
21617 /* Bad value for wCG register number. */
21619 output_operand_lossage ("invalid operand for code '%c'", code);
21620 return;
21623 else
21624 fprintf (stream, "%d", REGNO (x) - FIRST_IWMMXT_GR_REGNUM);
21625 return;
21627 /* Print an iWMMXt control register name. */
21628 case 'w':
21629 if (!CONST_INT_P (x)
21630 || INTVAL (x) < 0
21631 || INTVAL (x) >= 16)
21632 /* Bad value for wC register number. */
21634 output_operand_lossage ("invalid operand for code '%c'", code);
21635 return;
21638 else
21640 static const char * wc_reg_names [16] =
21642 "wCID", "wCon", "wCSSF", "wCASF",
21643 "wC4", "wC5", "wC6", "wC7",
21644 "wCGR0", "wCGR1", "wCGR2", "wCGR3",
21645 "wC12", "wC13", "wC14", "wC15"
21648 fputs (wc_reg_names [INTVAL (x)], stream);
21650 return;
21652 /* Print the high single-precision register of a VFP double-precision
21653 register. */
21654 case 'p':
21656 enum machine_mode mode = GET_MODE (x);
21657 int regno;
21659 if (GET_MODE_SIZE (mode) != 8 || !REG_P (x))
21661 output_operand_lossage ("invalid operand for code '%c'", code);
21662 return;
21665 regno = REGNO (x);
21666 if (!VFP_REGNO_OK_FOR_DOUBLE (regno))
21668 output_operand_lossage ("invalid operand for code '%c'", code);
21669 return;
21672 fprintf (stream, "s%d", regno - FIRST_VFP_REGNUM + 1);
21674 return;
21676 /* Print a VFP/Neon double precision or quad precision register name. */
21677 case 'P':
21678 case 'q':
21680 enum machine_mode mode = GET_MODE (x);
21681 int is_quad = (code == 'q');
21682 int regno;
21684 if (GET_MODE_SIZE (mode) != (is_quad ? 16 : 8))
21686 output_operand_lossage ("invalid operand for code '%c'", code);
21687 return;
21690 if (!REG_P (x)
21691 || !IS_VFP_REGNUM (REGNO (x)))
21693 output_operand_lossage ("invalid operand for code '%c'", code);
21694 return;
21697 regno = REGNO (x);
21698 if ((is_quad && !NEON_REGNO_OK_FOR_QUAD (regno))
21699 || (!is_quad && !VFP_REGNO_OK_FOR_DOUBLE (regno)))
21701 output_operand_lossage ("invalid operand for code '%c'", code);
21702 return;
21705 fprintf (stream, "%c%d", is_quad ? 'q' : 'd',
21706 (regno - FIRST_VFP_REGNUM) >> (is_quad ? 2 : 1));
21708 return;
21710 /* These two codes print the low/high doubleword register of a Neon quad
21711 register, respectively. For pair-structure types, can also print
21712 low/high quadword registers. */
21713 case 'e':
21714 case 'f':
21716 enum machine_mode mode = GET_MODE (x);
21717 int regno;
21719 if ((GET_MODE_SIZE (mode) != 16
21720 && GET_MODE_SIZE (mode) != 32) || !REG_P (x))
21722 output_operand_lossage ("invalid operand for code '%c'", code);
21723 return;
21726 regno = REGNO (x);
21727 if (!NEON_REGNO_OK_FOR_QUAD (regno))
21729 output_operand_lossage ("invalid operand for code '%c'", code);
21730 return;
21733 if (GET_MODE_SIZE (mode) == 16)
21734 fprintf (stream, "d%d", ((regno - FIRST_VFP_REGNUM) >> 1)
21735 + (code == 'f' ? 1 : 0));
21736 else
21737 fprintf (stream, "q%d", ((regno - FIRST_VFP_REGNUM) >> 2)
21738 + (code == 'f' ? 1 : 0));
21740 return;
21742 /* Print a VFPv3 floating-point constant, represented as an integer
21743 index. */
21744 case 'G':
21746 int index = vfp3_const_double_index (x);
21747 gcc_assert (index != -1);
21748 fprintf (stream, "%d", index);
21750 return;
21752 /* Print bits representing opcode features for Neon.
21754 Bit 0 is 1 for signed, 0 for unsigned. Floats count as signed
21755 and polynomials as unsigned.
21757 Bit 1 is 1 for floats and polynomials, 0 for ordinary integers.
21759 Bit 2 is 1 for rounding functions, 0 otherwise. */
21761 /* Identify the type as 's', 'u', 'p' or 'f'. */
21762 case 'T':
21764 HOST_WIDE_INT bits = INTVAL (x);
21765 fputc ("uspf"[bits & 3], stream);
21767 return;
21769 /* Likewise, but signed and unsigned integers are both 'i'. */
21770 case 'F':
21772 HOST_WIDE_INT bits = INTVAL (x);
21773 fputc ("iipf"[bits & 3], stream);
21775 return;
21777 /* As for 'T', but emit 'u' instead of 'p'. */
21778 case 't':
21780 HOST_WIDE_INT bits = INTVAL (x);
21781 fputc ("usuf"[bits & 3], stream);
21783 return;
21785 /* Bit 2: rounding (vs none). */
21786 case 'O':
21788 HOST_WIDE_INT bits = INTVAL (x);
21789 fputs ((bits & 4) != 0 ? "r" : "", stream);
21791 return;
21793 /* Memory operand for vld1/vst1 instruction. */
21794 case 'A':
21796 rtx addr;
21797 bool postinc = FALSE;
21798 unsigned align, memsize, align_bits;
21800 gcc_assert (MEM_P (x));
21801 addr = XEXP (x, 0);
21802 if (GET_CODE (addr) == POST_INC)
21804 postinc = 1;
21805 addr = XEXP (addr, 0);
21807 asm_fprintf (stream, "[%r", REGNO (addr));
21809 /* We know the alignment of this access, so we can emit a hint in the
21810 instruction (for some alignments) as an aid to the memory subsystem
21811 of the target. */
21812 align = MEM_ALIGN (x) >> 3;
21813 memsize = MEM_SIZE (x);
21815 /* Only certain alignment specifiers are supported by the hardware. */
21816 if (memsize == 32 && (align % 32) == 0)
21817 align_bits = 256;
21818 else if ((memsize == 16 || memsize == 32) && (align % 16) == 0)
21819 align_bits = 128;
21820 else if (memsize >= 8 && (align % 8) == 0)
21821 align_bits = 64;
21822 else
21823 align_bits = 0;
21825 if (align_bits != 0)
21826 asm_fprintf (stream, ":%d", align_bits);
21828 asm_fprintf (stream, "]");
21830 if (postinc)
21831 fputs("!", stream);
21833 return;
21835 case 'C':
21837 rtx addr;
21839 gcc_assert (MEM_P (x));
21840 addr = XEXP (x, 0);
21841 gcc_assert (REG_P (addr));
21842 asm_fprintf (stream, "[%r]", REGNO (addr));
21844 return;
21846 /* Translate an S register number into a D register number and element index. */
21847 case 'y':
21849 enum machine_mode mode = GET_MODE (x);
21850 int regno;
21852 if (GET_MODE_SIZE (mode) != 4 || !REG_P (x))
21854 output_operand_lossage ("invalid operand for code '%c'", code);
21855 return;
21858 regno = REGNO (x);
21859 if (!VFP_REGNO_OK_FOR_SINGLE (regno))
21861 output_operand_lossage ("invalid operand for code '%c'", code);
21862 return;
21865 regno = regno - FIRST_VFP_REGNUM;
21866 fprintf (stream, "d%d[%d]", regno / 2, regno % 2);
21868 return;
21870 case 'v':
21871 gcc_assert (CONST_DOUBLE_P (x));
21872 int result;
21873 result = vfp3_const_double_for_fract_bits (x);
21874 if (result == 0)
21875 result = vfp3_const_double_for_bits (x);
21876 fprintf (stream, "#%d", result);
21877 return;
21879 /* Register specifier for vld1.16/vst1.16. Translate the S register
21880 number into a D register number and element index. */
21881 case 'z':
21883 enum machine_mode mode = GET_MODE (x);
21884 int regno;
21886 if (GET_MODE_SIZE (mode) != 2 || !REG_P (x))
21888 output_operand_lossage ("invalid operand for code '%c'", code);
21889 return;
21892 regno = REGNO (x);
21893 if (!VFP_REGNO_OK_FOR_SINGLE (regno))
21895 output_operand_lossage ("invalid operand for code '%c'", code);
21896 return;
21899 regno = regno - FIRST_VFP_REGNUM;
21900 fprintf (stream, "d%d[%d]", regno/2, ((regno % 2) ? 2 : 0));
21902 return;
21904 default:
21905 if (x == 0)
21907 output_operand_lossage ("missing operand");
21908 return;
21911 switch (GET_CODE (x))
21913 case REG:
21914 asm_fprintf (stream, "%r", REGNO (x));
21915 break;
21917 case MEM:
21918 output_memory_reference_mode = GET_MODE (x);
21919 output_address (XEXP (x, 0));
21920 break;
21922 case CONST_DOUBLE:
21923 if (TARGET_NEON)
21925 char fpstr[20];
21926 real_to_decimal (fpstr, CONST_DOUBLE_REAL_VALUE (x),
21927 sizeof (fpstr), 0, 1);
21928 fprintf (stream, "#%s", fpstr);
21930 else
21931 fprintf (stream, "#%s", fp_immediate_constant (x));
21932 break;
21934 default:
21935 gcc_assert (GET_CODE (x) != NEG);
21936 fputc ('#', stream);
21937 if (GET_CODE (x) == HIGH)
21939 fputs (":lower16:", stream);
21940 x = XEXP (x, 0);
21943 output_addr_const (stream, x);
21944 break;
21949 /* Target hook for printing a memory address. */
21950 static void
21951 arm_print_operand_address (FILE *stream, rtx x)
21953 if (TARGET_32BIT)
21955 int is_minus = GET_CODE (x) == MINUS;
21957 if (REG_P (x))
21958 asm_fprintf (stream, "[%r]", REGNO (x));
21959 else if (GET_CODE (x) == PLUS || is_minus)
21961 rtx base = XEXP (x, 0);
21962 rtx index = XEXP (x, 1);
21963 HOST_WIDE_INT offset = 0;
21964 if (!REG_P (base)
21965 || (REG_P (index) && REGNO (index) == SP_REGNUM))
21967 /* Ensure that BASE is a register. */
21968 /* (one of them must be). */
21969 /* Also ensure the SP is not used as in index register. */
21970 rtx temp = base;
21971 base = index;
21972 index = temp;
21974 switch (GET_CODE (index))
21976 case CONST_INT:
21977 offset = INTVAL (index);
21978 if (is_minus)
21979 offset = -offset;
21980 asm_fprintf (stream, "[%r, #%wd]",
21981 REGNO (base), offset);
21982 break;
21984 case REG:
21985 asm_fprintf (stream, "[%r, %s%r]",
21986 REGNO (base), is_minus ? "-" : "",
21987 REGNO (index));
21988 break;
21990 case MULT:
21991 case ASHIFTRT:
21992 case LSHIFTRT:
21993 case ASHIFT:
21994 case ROTATERT:
21996 asm_fprintf (stream, "[%r, %s%r",
21997 REGNO (base), is_minus ? "-" : "",
21998 REGNO (XEXP (index, 0)));
21999 arm_print_operand (stream, index, 'S');
22000 fputs ("]", stream);
22001 break;
22004 default:
22005 gcc_unreachable ();
22008 else if (GET_CODE (x) == PRE_INC || GET_CODE (x) == POST_INC
22009 || GET_CODE (x) == PRE_DEC || GET_CODE (x) == POST_DEC)
22011 extern enum machine_mode output_memory_reference_mode;
22013 gcc_assert (REG_P (XEXP (x, 0)));
22015 if (GET_CODE (x) == PRE_DEC || GET_CODE (x) == PRE_INC)
22016 asm_fprintf (stream, "[%r, #%s%d]!",
22017 REGNO (XEXP (x, 0)),
22018 GET_CODE (x) == PRE_DEC ? "-" : "",
22019 GET_MODE_SIZE (output_memory_reference_mode));
22020 else
22021 asm_fprintf (stream, "[%r], #%s%d",
22022 REGNO (XEXP (x, 0)),
22023 GET_CODE (x) == POST_DEC ? "-" : "",
22024 GET_MODE_SIZE (output_memory_reference_mode));
22026 else if (GET_CODE (x) == PRE_MODIFY)
22028 asm_fprintf (stream, "[%r, ", REGNO (XEXP (x, 0)));
22029 if (CONST_INT_P (XEXP (XEXP (x, 1), 1)))
22030 asm_fprintf (stream, "#%wd]!",
22031 INTVAL (XEXP (XEXP (x, 1), 1)));
22032 else
22033 asm_fprintf (stream, "%r]!",
22034 REGNO (XEXP (XEXP (x, 1), 1)));
22036 else if (GET_CODE (x) == POST_MODIFY)
22038 asm_fprintf (stream, "[%r], ", REGNO (XEXP (x, 0)));
22039 if (CONST_INT_P (XEXP (XEXP (x, 1), 1)))
22040 asm_fprintf (stream, "#%wd",
22041 INTVAL (XEXP (XEXP (x, 1), 1)));
22042 else
22043 asm_fprintf (stream, "%r",
22044 REGNO (XEXP (XEXP (x, 1), 1)));
22046 else output_addr_const (stream, x);
22048 else
22050 if (REG_P (x))
22051 asm_fprintf (stream, "[%r]", REGNO (x));
22052 else if (GET_CODE (x) == POST_INC)
22053 asm_fprintf (stream, "%r!", REGNO (XEXP (x, 0)));
22054 else if (GET_CODE (x) == PLUS)
22056 gcc_assert (REG_P (XEXP (x, 0)));
22057 if (CONST_INT_P (XEXP (x, 1)))
22058 asm_fprintf (stream, "[%r, #%wd]",
22059 REGNO (XEXP (x, 0)),
22060 INTVAL (XEXP (x, 1)));
22061 else
22062 asm_fprintf (stream, "[%r, %r]",
22063 REGNO (XEXP (x, 0)),
22064 REGNO (XEXP (x, 1)));
22066 else
22067 output_addr_const (stream, x);
22071 /* Target hook for indicating whether a punctuation character for
22072 TARGET_PRINT_OPERAND is valid. */
22073 static bool
22074 arm_print_operand_punct_valid_p (unsigned char code)
22076 return (code == '@' || code == '|' || code == '.'
22077 || code == '(' || code == ')' || code == '#'
22078 || (TARGET_32BIT && (code == '?'))
22079 || (TARGET_THUMB2 && (code == '!'))
22080 || (TARGET_THUMB && (code == '_')));
22083 /* Target hook for assembling integer objects. The ARM version needs to
22084 handle word-sized values specially. */
22085 static bool
22086 arm_assemble_integer (rtx x, unsigned int size, int aligned_p)
22088 enum machine_mode mode;
22090 if (size == UNITS_PER_WORD && aligned_p)
22092 fputs ("\t.word\t", asm_out_file);
22093 output_addr_const (asm_out_file, x);
22095 /* Mark symbols as position independent. We only do this in the
22096 .text segment, not in the .data segment. */
22097 if (NEED_GOT_RELOC && flag_pic && making_const_table &&
22098 (GET_CODE (x) == SYMBOL_REF || GET_CODE (x) == LABEL_REF))
22100 /* See legitimize_pic_address for an explanation of the
22101 TARGET_VXWORKS_RTP check. */
22102 if (!arm_pic_data_is_text_relative
22103 || (GET_CODE (x) == SYMBOL_REF && !SYMBOL_REF_LOCAL_P (x)))
22104 fputs ("(GOT)", asm_out_file);
22105 else
22106 fputs ("(GOTOFF)", asm_out_file);
22108 fputc ('\n', asm_out_file);
22109 return true;
22112 mode = GET_MODE (x);
22114 if (arm_vector_mode_supported_p (mode))
22116 int i, units;
22118 gcc_assert (GET_CODE (x) == CONST_VECTOR);
22120 units = CONST_VECTOR_NUNITS (x);
22121 size = GET_MODE_SIZE (GET_MODE_INNER (mode));
22123 if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
22124 for (i = 0; i < units; i++)
22126 rtx elt = CONST_VECTOR_ELT (x, i);
22127 assemble_integer
22128 (elt, size, i == 0 ? BIGGEST_ALIGNMENT : size * BITS_PER_UNIT, 1);
22130 else
22131 for (i = 0; i < units; i++)
22133 rtx elt = CONST_VECTOR_ELT (x, i);
22134 REAL_VALUE_TYPE rval;
22136 REAL_VALUE_FROM_CONST_DOUBLE (rval, elt);
22138 assemble_real
22139 (rval, GET_MODE_INNER (mode),
22140 i == 0 ? BIGGEST_ALIGNMENT : size * BITS_PER_UNIT);
22143 return true;
22146 return default_assemble_integer (x, size, aligned_p);
22149 static void
22150 arm_elf_asm_cdtor (rtx symbol, int priority, bool is_ctor)
22152 section *s;
22154 if (!TARGET_AAPCS_BASED)
22156 (is_ctor ?
22157 default_named_section_asm_out_constructor
22158 : default_named_section_asm_out_destructor) (symbol, priority);
22159 return;
22162 /* Put these in the .init_array section, using a special relocation. */
22163 if (priority != DEFAULT_INIT_PRIORITY)
22165 char buf[18];
22166 sprintf (buf, "%s.%.5u",
22167 is_ctor ? ".init_array" : ".fini_array",
22168 priority);
22169 s = get_section (buf, SECTION_WRITE, NULL_TREE);
22171 else if (is_ctor)
22172 s = ctors_section;
22173 else
22174 s = dtors_section;
22176 switch_to_section (s);
22177 assemble_align (POINTER_SIZE);
22178 fputs ("\t.word\t", asm_out_file);
22179 output_addr_const (asm_out_file, symbol);
22180 fputs ("(target1)\n", asm_out_file);
22183 /* Add a function to the list of static constructors. */
22185 static void
22186 arm_elf_asm_constructor (rtx symbol, int priority)
22188 arm_elf_asm_cdtor (symbol, priority, /*is_ctor=*/true);
22191 /* Add a function to the list of static destructors. */
22193 static void
22194 arm_elf_asm_destructor (rtx symbol, int priority)
22196 arm_elf_asm_cdtor (symbol, priority, /*is_ctor=*/false);
22199 /* A finite state machine takes care of noticing whether or not instructions
22200 can be conditionally executed, and thus decrease execution time and code
22201 size by deleting branch instructions. The fsm is controlled by
22202 final_prescan_insn, and controls the actions of ASM_OUTPUT_OPCODE. */
22204 /* The state of the fsm controlling condition codes are:
22205 0: normal, do nothing special
22206 1: make ASM_OUTPUT_OPCODE not output this instruction
22207 2: make ASM_OUTPUT_OPCODE not output this instruction
22208 3: make instructions conditional
22209 4: make instructions conditional
22211 State transitions (state->state by whom under condition):
22212 0 -> 1 final_prescan_insn if the `target' is a label
22213 0 -> 2 final_prescan_insn if the `target' is an unconditional branch
22214 1 -> 3 ASM_OUTPUT_OPCODE after not having output the conditional branch
22215 2 -> 4 ASM_OUTPUT_OPCODE after not having output the conditional branch
22216 3 -> 0 (*targetm.asm_out.internal_label) if the `target' label is reached
22217 (the target label has CODE_LABEL_NUMBER equal to arm_target_label).
22218 4 -> 0 final_prescan_insn if the `target' unconditional branch is reached
22219 (the target insn is arm_target_insn).
22221 If the jump clobbers the conditions then we use states 2 and 4.
22223 A similar thing can be done with conditional return insns.
22225 XXX In case the `target' is an unconditional branch, this conditionalising
22226 of the instructions always reduces code size, but not always execution
22227 time. But then, I want to reduce the code size to somewhere near what
22228 /bin/cc produces. */
22230 /* In addition to this, state is maintained for Thumb-2 COND_EXEC
22231 instructions. When a COND_EXEC instruction is seen the subsequent
22232 instructions are scanned so that multiple conditional instructions can be
22233 combined into a single IT block. arm_condexec_count and arm_condexec_mask
22234 specify the length and true/false mask for the IT block. These will be
22235 decremented/zeroed by arm_asm_output_opcode as the insns are output. */
22237 /* Returns the index of the ARM condition code string in
22238 `arm_condition_codes', or ARM_NV if the comparison is invalid.
22239 COMPARISON should be an rtx like `(eq (...) (...))'. */
22241 enum arm_cond_code
22242 maybe_get_arm_condition_code (rtx comparison)
22244 enum machine_mode mode = GET_MODE (XEXP (comparison, 0));
22245 enum arm_cond_code code;
22246 enum rtx_code comp_code = GET_CODE (comparison);
22248 if (GET_MODE_CLASS (mode) != MODE_CC)
22249 mode = SELECT_CC_MODE (comp_code, XEXP (comparison, 0),
22250 XEXP (comparison, 1));
22252 switch (mode)
22254 case CC_DNEmode: code = ARM_NE; goto dominance;
22255 case CC_DEQmode: code = ARM_EQ; goto dominance;
22256 case CC_DGEmode: code = ARM_GE; goto dominance;
22257 case CC_DGTmode: code = ARM_GT; goto dominance;
22258 case CC_DLEmode: code = ARM_LE; goto dominance;
22259 case CC_DLTmode: code = ARM_LT; goto dominance;
22260 case CC_DGEUmode: code = ARM_CS; goto dominance;
22261 case CC_DGTUmode: code = ARM_HI; goto dominance;
22262 case CC_DLEUmode: code = ARM_LS; goto dominance;
22263 case CC_DLTUmode: code = ARM_CC;
22265 dominance:
22266 if (comp_code == EQ)
22267 return ARM_INVERSE_CONDITION_CODE (code);
22268 if (comp_code == NE)
22269 return code;
22270 return ARM_NV;
22272 case CC_NOOVmode:
22273 switch (comp_code)
22275 case NE: return ARM_NE;
22276 case EQ: return ARM_EQ;
22277 case GE: return ARM_PL;
22278 case LT: return ARM_MI;
22279 default: return ARM_NV;
22282 case CC_Zmode:
22283 switch (comp_code)
22285 case NE: return ARM_NE;
22286 case EQ: return ARM_EQ;
22287 default: return ARM_NV;
22290 case CC_Nmode:
22291 switch (comp_code)
22293 case NE: return ARM_MI;
22294 case EQ: return ARM_PL;
22295 default: return ARM_NV;
22298 case CCFPEmode:
22299 case CCFPmode:
22300 /* We can handle all cases except UNEQ and LTGT. */
22301 switch (comp_code)
22303 case GE: return ARM_GE;
22304 case GT: return ARM_GT;
22305 case LE: return ARM_LS;
22306 case LT: return ARM_MI;
22307 case NE: return ARM_NE;
22308 case EQ: return ARM_EQ;
22309 case ORDERED: return ARM_VC;
22310 case UNORDERED: return ARM_VS;
22311 case UNLT: return ARM_LT;
22312 case UNLE: return ARM_LE;
22313 case UNGT: return ARM_HI;
22314 case UNGE: return ARM_PL;
22315 /* UNEQ and LTGT do not have a representation. */
22316 case UNEQ: /* Fall through. */
22317 case LTGT: /* Fall through. */
22318 default: return ARM_NV;
22321 case CC_SWPmode:
22322 switch (comp_code)
22324 case NE: return ARM_NE;
22325 case EQ: return ARM_EQ;
22326 case GE: return ARM_LE;
22327 case GT: return ARM_LT;
22328 case LE: return ARM_GE;
22329 case LT: return ARM_GT;
22330 case GEU: return ARM_LS;
22331 case GTU: return ARM_CC;
22332 case LEU: return ARM_CS;
22333 case LTU: return ARM_HI;
22334 default: return ARM_NV;
22337 case CC_Cmode:
22338 switch (comp_code)
22340 case LTU: return ARM_CS;
22341 case GEU: return ARM_CC;
22342 default: return ARM_NV;
22345 case CC_CZmode:
22346 switch (comp_code)
22348 case NE: return ARM_NE;
22349 case EQ: return ARM_EQ;
22350 case GEU: return ARM_CS;
22351 case GTU: return ARM_HI;
22352 case LEU: return ARM_LS;
22353 case LTU: return ARM_CC;
22354 default: return ARM_NV;
22357 case CC_NCVmode:
22358 switch (comp_code)
22360 case GE: return ARM_GE;
22361 case LT: return ARM_LT;
22362 case GEU: return ARM_CS;
22363 case LTU: return ARM_CC;
22364 default: return ARM_NV;
22367 case CCmode:
22368 switch (comp_code)
22370 case NE: return ARM_NE;
22371 case EQ: return ARM_EQ;
22372 case GE: return ARM_GE;
22373 case GT: return ARM_GT;
22374 case LE: return ARM_LE;
22375 case LT: return ARM_LT;
22376 case GEU: return ARM_CS;
22377 case GTU: return ARM_HI;
22378 case LEU: return ARM_LS;
22379 case LTU: return ARM_CC;
22380 default: return ARM_NV;
22383 default: gcc_unreachable ();
22387 /* Like maybe_get_arm_condition_code, but never return ARM_NV. */
22388 static enum arm_cond_code
22389 get_arm_condition_code (rtx comparison)
22391 enum arm_cond_code code = maybe_get_arm_condition_code (comparison);
22392 gcc_assert (code != ARM_NV);
22393 return code;
22396 /* Tell arm_asm_output_opcode to output IT blocks for conditionally executed
22397 instructions. */
22398 void
22399 thumb2_final_prescan_insn (rtx insn)
22401 rtx first_insn = insn;
22402 rtx body = PATTERN (insn);
22403 rtx predicate;
22404 enum arm_cond_code code;
22405 int n;
22406 int mask;
22407 int max;
22409 /* max_insns_skipped in the tune was already taken into account in the
22410 cost model of ifcvt pass when generating COND_EXEC insns. At this stage
22411 just emit the IT blocks as we can. It does not make sense to split
22412 the IT blocks. */
22413 max = MAX_INSN_PER_IT_BLOCK;
22415 /* Remove the previous insn from the count of insns to be output. */
22416 if (arm_condexec_count)
22417 arm_condexec_count--;
22419 /* Nothing to do if we are already inside a conditional block. */
22420 if (arm_condexec_count)
22421 return;
22423 if (GET_CODE (body) != COND_EXEC)
22424 return;
22426 /* Conditional jumps are implemented directly. */
22427 if (JUMP_P (insn))
22428 return;
22430 predicate = COND_EXEC_TEST (body);
22431 arm_current_cc = get_arm_condition_code (predicate);
22433 n = get_attr_ce_count (insn);
22434 arm_condexec_count = 1;
22435 arm_condexec_mask = (1 << n) - 1;
22436 arm_condexec_masklen = n;
22437 /* See if subsequent instructions can be combined into the same block. */
22438 for (;;)
22440 insn = next_nonnote_insn (insn);
22442 /* Jumping into the middle of an IT block is illegal, so a label or
22443 barrier terminates the block. */
22444 if (!NONJUMP_INSN_P (insn) && !JUMP_P (insn))
22445 break;
22447 body = PATTERN (insn);
22448 /* USE and CLOBBER aren't really insns, so just skip them. */
22449 if (GET_CODE (body) == USE
22450 || GET_CODE (body) == CLOBBER)
22451 continue;
22453 /* ??? Recognize conditional jumps, and combine them with IT blocks. */
22454 if (GET_CODE (body) != COND_EXEC)
22455 break;
22456 /* Maximum number of conditionally executed instructions in a block. */
22457 n = get_attr_ce_count (insn);
22458 if (arm_condexec_masklen + n > max)
22459 break;
22461 predicate = COND_EXEC_TEST (body);
22462 code = get_arm_condition_code (predicate);
22463 mask = (1 << n) - 1;
22464 if (arm_current_cc == code)
22465 arm_condexec_mask |= (mask << arm_condexec_masklen);
22466 else if (arm_current_cc != ARM_INVERSE_CONDITION_CODE(code))
22467 break;
22469 arm_condexec_count++;
22470 arm_condexec_masklen += n;
22472 /* A jump must be the last instruction in a conditional block. */
22473 if (JUMP_P (insn))
22474 break;
22476 /* Restore recog_data (getting the attributes of other insns can
22477 destroy this array, but final.c assumes that it remains intact
22478 across this call). */
22479 extract_constrain_insn_cached (first_insn);
22482 void
22483 arm_final_prescan_insn (rtx insn)
22485 /* BODY will hold the body of INSN. */
22486 rtx body = PATTERN (insn);
22488 /* This will be 1 if trying to repeat the trick, and things need to be
22489 reversed if it appears to fail. */
22490 int reverse = 0;
22492 /* If we start with a return insn, we only succeed if we find another one. */
22493 int seeking_return = 0;
22494 enum rtx_code return_code = UNKNOWN;
22496 /* START_INSN will hold the insn from where we start looking. This is the
22497 first insn after the following code_label if REVERSE is true. */
22498 rtx start_insn = insn;
22500 /* If in state 4, check if the target branch is reached, in order to
22501 change back to state 0. */
22502 if (arm_ccfsm_state == 4)
22504 if (insn == arm_target_insn)
22506 arm_target_insn = NULL;
22507 arm_ccfsm_state = 0;
22509 return;
22512 /* If in state 3, it is possible to repeat the trick, if this insn is an
22513 unconditional branch to a label, and immediately following this branch
22514 is the previous target label which is only used once, and the label this
22515 branch jumps to is not too far off. */
22516 if (arm_ccfsm_state == 3)
22518 if (simplejump_p (insn))
22520 start_insn = next_nonnote_insn (start_insn);
22521 if (BARRIER_P (start_insn))
22523 /* XXX Isn't this always a barrier? */
22524 start_insn = next_nonnote_insn (start_insn);
22526 if (LABEL_P (start_insn)
22527 && CODE_LABEL_NUMBER (start_insn) == arm_target_label
22528 && LABEL_NUSES (start_insn) == 1)
22529 reverse = TRUE;
22530 else
22531 return;
22533 else if (ANY_RETURN_P (body))
22535 start_insn = next_nonnote_insn (start_insn);
22536 if (BARRIER_P (start_insn))
22537 start_insn = next_nonnote_insn (start_insn);
22538 if (LABEL_P (start_insn)
22539 && CODE_LABEL_NUMBER (start_insn) == arm_target_label
22540 && LABEL_NUSES (start_insn) == 1)
22542 reverse = TRUE;
22543 seeking_return = 1;
22544 return_code = GET_CODE (body);
22546 else
22547 return;
22549 else
22550 return;
22553 gcc_assert (!arm_ccfsm_state || reverse);
22554 if (!JUMP_P (insn))
22555 return;
22557 /* This jump might be paralleled with a clobber of the condition codes
22558 the jump should always come first */
22559 if (GET_CODE (body) == PARALLEL && XVECLEN (body, 0) > 0)
22560 body = XVECEXP (body, 0, 0);
22562 if (reverse
22563 || (GET_CODE (body) == SET && GET_CODE (SET_DEST (body)) == PC
22564 && GET_CODE (SET_SRC (body)) == IF_THEN_ELSE))
22566 int insns_skipped;
22567 int fail = FALSE, succeed = FALSE;
22568 /* Flag which part of the IF_THEN_ELSE is the LABEL_REF. */
22569 int then_not_else = TRUE;
22570 rtx this_insn = start_insn, label = 0;
22572 /* Register the insn jumped to. */
22573 if (reverse)
22575 if (!seeking_return)
22576 label = XEXP (SET_SRC (body), 0);
22578 else if (GET_CODE (XEXP (SET_SRC (body), 1)) == LABEL_REF)
22579 label = XEXP (XEXP (SET_SRC (body), 1), 0);
22580 else if (GET_CODE (XEXP (SET_SRC (body), 2)) == LABEL_REF)
22582 label = XEXP (XEXP (SET_SRC (body), 2), 0);
22583 then_not_else = FALSE;
22585 else if (ANY_RETURN_P (XEXP (SET_SRC (body), 1)))
22587 seeking_return = 1;
22588 return_code = GET_CODE (XEXP (SET_SRC (body), 1));
22590 else if (ANY_RETURN_P (XEXP (SET_SRC (body), 2)))
22592 seeking_return = 1;
22593 return_code = GET_CODE (XEXP (SET_SRC (body), 2));
22594 then_not_else = FALSE;
22596 else
22597 gcc_unreachable ();
22599 /* See how many insns this branch skips, and what kind of insns. If all
22600 insns are okay, and the label or unconditional branch to the same
22601 label is not too far away, succeed. */
22602 for (insns_skipped = 0;
22603 !fail && !succeed && insns_skipped++ < max_insns_skipped;)
22605 rtx scanbody;
22607 this_insn = next_nonnote_insn (this_insn);
22608 if (!this_insn)
22609 break;
22611 switch (GET_CODE (this_insn))
22613 case CODE_LABEL:
22614 /* Succeed if it is the target label, otherwise fail since
22615 control falls in from somewhere else. */
22616 if (this_insn == label)
22618 arm_ccfsm_state = 1;
22619 succeed = TRUE;
22621 else
22622 fail = TRUE;
22623 break;
22625 case BARRIER:
22626 /* Succeed if the following insn is the target label.
22627 Otherwise fail.
22628 If return insns are used then the last insn in a function
22629 will be a barrier. */
22630 this_insn = next_nonnote_insn (this_insn);
22631 if (this_insn && this_insn == label)
22633 arm_ccfsm_state = 1;
22634 succeed = TRUE;
22636 else
22637 fail = TRUE;
22638 break;
22640 case CALL_INSN:
22641 /* The AAPCS says that conditional calls should not be
22642 used since they make interworking inefficient (the
22643 linker can't transform BL<cond> into BLX). That's
22644 only a problem if the machine has BLX. */
22645 if (arm_arch5)
22647 fail = TRUE;
22648 break;
22651 /* Succeed if the following insn is the target label, or
22652 if the following two insns are a barrier and the
22653 target label. */
22654 this_insn = next_nonnote_insn (this_insn);
22655 if (this_insn && BARRIER_P (this_insn))
22656 this_insn = next_nonnote_insn (this_insn);
22658 if (this_insn && this_insn == label
22659 && insns_skipped < max_insns_skipped)
22661 arm_ccfsm_state = 1;
22662 succeed = TRUE;
22664 else
22665 fail = TRUE;
22666 break;
22668 case JUMP_INSN:
22669 /* If this is an unconditional branch to the same label, succeed.
22670 If it is to another label, do nothing. If it is conditional,
22671 fail. */
22672 /* XXX Probably, the tests for SET and the PC are
22673 unnecessary. */
22675 scanbody = PATTERN (this_insn);
22676 if (GET_CODE (scanbody) == SET
22677 && GET_CODE (SET_DEST (scanbody)) == PC)
22679 if (GET_CODE (SET_SRC (scanbody)) == LABEL_REF
22680 && XEXP (SET_SRC (scanbody), 0) == label && !reverse)
22682 arm_ccfsm_state = 2;
22683 succeed = TRUE;
22685 else if (GET_CODE (SET_SRC (scanbody)) == IF_THEN_ELSE)
22686 fail = TRUE;
22688 /* Fail if a conditional return is undesirable (e.g. on a
22689 StrongARM), but still allow this if optimizing for size. */
22690 else if (GET_CODE (scanbody) == return_code
22691 && !use_return_insn (TRUE, NULL)
22692 && !optimize_size)
22693 fail = TRUE;
22694 else if (GET_CODE (scanbody) == return_code)
22696 arm_ccfsm_state = 2;
22697 succeed = TRUE;
22699 else if (GET_CODE (scanbody) == PARALLEL)
22701 switch (get_attr_conds (this_insn))
22703 case CONDS_NOCOND:
22704 break;
22705 default:
22706 fail = TRUE;
22707 break;
22710 else
22711 fail = TRUE; /* Unrecognized jump (e.g. epilogue). */
22713 break;
22715 case INSN:
22716 /* Instructions using or affecting the condition codes make it
22717 fail. */
22718 scanbody = PATTERN (this_insn);
22719 if (!(GET_CODE (scanbody) == SET
22720 || GET_CODE (scanbody) == PARALLEL)
22721 || get_attr_conds (this_insn) != CONDS_NOCOND)
22722 fail = TRUE;
22723 break;
22725 default:
22726 break;
22729 if (succeed)
22731 if ((!seeking_return) && (arm_ccfsm_state == 1 || reverse))
22732 arm_target_label = CODE_LABEL_NUMBER (label);
22733 else
22735 gcc_assert (seeking_return || arm_ccfsm_state == 2);
22737 while (this_insn && GET_CODE (PATTERN (this_insn)) == USE)
22739 this_insn = next_nonnote_insn (this_insn);
22740 gcc_assert (!this_insn
22741 || (!BARRIER_P (this_insn)
22742 && !LABEL_P (this_insn)));
22744 if (!this_insn)
22746 /* Oh, dear! we ran off the end.. give up. */
22747 extract_constrain_insn_cached (insn);
22748 arm_ccfsm_state = 0;
22749 arm_target_insn = NULL;
22750 return;
22752 arm_target_insn = this_insn;
22755 /* If REVERSE is true, ARM_CURRENT_CC needs to be inverted from
22756 what it was. */
22757 if (!reverse)
22758 arm_current_cc = get_arm_condition_code (XEXP (SET_SRC (body), 0));
22760 if (reverse || then_not_else)
22761 arm_current_cc = ARM_INVERSE_CONDITION_CODE (arm_current_cc);
22764 /* Restore recog_data (getting the attributes of other insns can
22765 destroy this array, but final.c assumes that it remains intact
22766 across this call. */
22767 extract_constrain_insn_cached (insn);
22771 /* Output IT instructions. */
22772 void
22773 thumb2_asm_output_opcode (FILE * stream)
22775 char buff[5];
22776 int n;
22778 if (arm_condexec_mask)
22780 for (n = 0; n < arm_condexec_masklen; n++)
22781 buff[n] = (arm_condexec_mask & (1 << n)) ? 't' : 'e';
22782 buff[n] = 0;
22783 asm_fprintf(stream, "i%s\t%s\n\t", buff,
22784 arm_condition_codes[arm_current_cc]);
22785 arm_condexec_mask = 0;
22789 /* Returns true if REGNO is a valid register
22790 for holding a quantity of type MODE. */
22792 arm_hard_regno_mode_ok (unsigned int regno, enum machine_mode mode)
22794 if (GET_MODE_CLASS (mode) == MODE_CC)
22795 return (regno == CC_REGNUM
22796 || (TARGET_HARD_FLOAT && TARGET_VFP
22797 && regno == VFPCC_REGNUM));
22799 if (TARGET_THUMB1)
22800 /* For the Thumb we only allow values bigger than SImode in
22801 registers 0 - 6, so that there is always a second low
22802 register available to hold the upper part of the value.
22803 We probably we ought to ensure that the register is the
22804 start of an even numbered register pair. */
22805 return (ARM_NUM_REGS (mode) < 2) || (regno < LAST_LO_REGNUM);
22807 if (TARGET_HARD_FLOAT && TARGET_VFP
22808 && IS_VFP_REGNUM (regno))
22810 if (mode == SFmode || mode == SImode)
22811 return VFP_REGNO_OK_FOR_SINGLE (regno);
22813 if (mode == DFmode)
22814 return VFP_REGNO_OK_FOR_DOUBLE (regno);
22816 /* VFP registers can hold HFmode values, but there is no point in
22817 putting them there unless we have hardware conversion insns. */
22818 if (mode == HFmode)
22819 return TARGET_FP16 && VFP_REGNO_OK_FOR_SINGLE (regno);
22821 if (TARGET_NEON)
22822 return (VALID_NEON_DREG_MODE (mode) && VFP_REGNO_OK_FOR_DOUBLE (regno))
22823 || (VALID_NEON_QREG_MODE (mode)
22824 && NEON_REGNO_OK_FOR_QUAD (regno))
22825 || (mode == TImode && NEON_REGNO_OK_FOR_NREGS (regno, 2))
22826 || (mode == EImode && NEON_REGNO_OK_FOR_NREGS (regno, 3))
22827 || (mode == OImode && NEON_REGNO_OK_FOR_NREGS (regno, 4))
22828 || (mode == CImode && NEON_REGNO_OK_FOR_NREGS (regno, 6))
22829 || (mode == XImode && NEON_REGNO_OK_FOR_NREGS (regno, 8));
22831 return FALSE;
22834 if (TARGET_REALLY_IWMMXT)
22836 if (IS_IWMMXT_GR_REGNUM (regno))
22837 return mode == SImode;
22839 if (IS_IWMMXT_REGNUM (regno))
22840 return VALID_IWMMXT_REG_MODE (mode);
22843 /* We allow almost any value to be stored in the general registers.
22844 Restrict doubleword quantities to even register pairs in ARM state
22845 so that we can use ldrd. Do not allow very large Neon structure
22846 opaque modes in general registers; they would use too many. */
22847 if (regno <= LAST_ARM_REGNUM)
22849 if (ARM_NUM_REGS (mode) > 4)
22850 return FALSE;
22852 if (TARGET_THUMB2)
22853 return TRUE;
22855 return !(TARGET_LDRD && GET_MODE_SIZE (mode) > 4 && (regno & 1) != 0);
22858 if (regno == FRAME_POINTER_REGNUM
22859 || regno == ARG_POINTER_REGNUM)
22860 /* We only allow integers in the fake hard registers. */
22861 return GET_MODE_CLASS (mode) == MODE_INT;
22863 return FALSE;
22866 /* Implement MODES_TIEABLE_P. */
22868 bool
22869 arm_modes_tieable_p (enum machine_mode mode1, enum machine_mode mode2)
22871 if (GET_MODE_CLASS (mode1) == GET_MODE_CLASS (mode2))
22872 return true;
22874 /* We specifically want to allow elements of "structure" modes to
22875 be tieable to the structure. This more general condition allows
22876 other rarer situations too. */
22877 if (TARGET_NEON
22878 && (VALID_NEON_DREG_MODE (mode1)
22879 || VALID_NEON_QREG_MODE (mode1)
22880 || VALID_NEON_STRUCT_MODE (mode1))
22881 && (VALID_NEON_DREG_MODE (mode2)
22882 || VALID_NEON_QREG_MODE (mode2)
22883 || VALID_NEON_STRUCT_MODE (mode2)))
22884 return true;
22886 return false;
22889 /* For efficiency and historical reasons LO_REGS, HI_REGS and CC_REGS are
22890 not used in arm mode. */
22892 enum reg_class
22893 arm_regno_class (int regno)
22895 if (TARGET_THUMB1)
22897 if (regno == STACK_POINTER_REGNUM)
22898 return STACK_REG;
22899 if (regno == CC_REGNUM)
22900 return CC_REG;
22901 if (regno < 8)
22902 return LO_REGS;
22903 return HI_REGS;
22906 if (TARGET_THUMB2 && regno < 8)
22907 return LO_REGS;
22909 if ( regno <= LAST_ARM_REGNUM
22910 || regno == FRAME_POINTER_REGNUM
22911 || regno == ARG_POINTER_REGNUM)
22912 return TARGET_THUMB2 ? HI_REGS : GENERAL_REGS;
22914 if (regno == CC_REGNUM || regno == VFPCC_REGNUM)
22915 return TARGET_THUMB2 ? CC_REG : NO_REGS;
22917 if (IS_VFP_REGNUM (regno))
22919 if (regno <= D7_VFP_REGNUM)
22920 return VFP_D0_D7_REGS;
22921 else if (regno <= LAST_LO_VFP_REGNUM)
22922 return VFP_LO_REGS;
22923 else
22924 return VFP_HI_REGS;
22927 if (IS_IWMMXT_REGNUM (regno))
22928 return IWMMXT_REGS;
22930 if (IS_IWMMXT_GR_REGNUM (regno))
22931 return IWMMXT_GR_REGS;
22933 return NO_REGS;
22936 /* Handle a special case when computing the offset
22937 of an argument from the frame pointer. */
22939 arm_debugger_arg_offset (int value, rtx addr)
22941 rtx insn;
22943 /* We are only interested if dbxout_parms() failed to compute the offset. */
22944 if (value != 0)
22945 return 0;
22947 /* We can only cope with the case where the address is held in a register. */
22948 if (!REG_P (addr))
22949 return 0;
22951 /* If we are using the frame pointer to point at the argument, then
22952 an offset of 0 is correct. */
22953 if (REGNO (addr) == (unsigned) HARD_FRAME_POINTER_REGNUM)
22954 return 0;
22956 /* If we are using the stack pointer to point at the
22957 argument, then an offset of 0 is correct. */
22958 /* ??? Check this is consistent with thumb2 frame layout. */
22959 if ((TARGET_THUMB || !frame_pointer_needed)
22960 && REGNO (addr) == SP_REGNUM)
22961 return 0;
22963 /* Oh dear. The argument is pointed to by a register rather
22964 than being held in a register, or being stored at a known
22965 offset from the frame pointer. Since GDB only understands
22966 those two kinds of argument we must translate the address
22967 held in the register into an offset from the frame pointer.
22968 We do this by searching through the insns for the function
22969 looking to see where this register gets its value. If the
22970 register is initialized from the frame pointer plus an offset
22971 then we are in luck and we can continue, otherwise we give up.
22973 This code is exercised by producing debugging information
22974 for a function with arguments like this:
22976 double func (double a, double b, int c, double d) {return d;}
22978 Without this code the stab for parameter 'd' will be set to
22979 an offset of 0 from the frame pointer, rather than 8. */
22981 /* The if() statement says:
22983 If the insn is a normal instruction
22984 and if the insn is setting the value in a register
22985 and if the register being set is the register holding the address of the argument
22986 and if the address is computing by an addition
22987 that involves adding to a register
22988 which is the frame pointer
22989 a constant integer
22991 then... */
22993 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
22995 if ( NONJUMP_INSN_P (insn)
22996 && GET_CODE (PATTERN (insn)) == SET
22997 && REGNO (XEXP (PATTERN (insn), 0)) == REGNO (addr)
22998 && GET_CODE (XEXP (PATTERN (insn), 1)) == PLUS
22999 && REG_P (XEXP (XEXP (PATTERN (insn), 1), 0))
23000 && REGNO (XEXP (XEXP (PATTERN (insn), 1), 0)) == (unsigned) HARD_FRAME_POINTER_REGNUM
23001 && CONST_INT_P (XEXP (XEXP (PATTERN (insn), 1), 1))
23004 value = INTVAL (XEXP (XEXP (PATTERN (insn), 1), 1));
23006 break;
23010 if (value == 0)
23012 debug_rtx (addr);
23013 warning (0, "unable to compute real location of stacked parameter");
23014 value = 8; /* XXX magic hack */
23017 return value;
23020 typedef enum {
23021 T_V8QI,
23022 T_V4HI,
23023 T_V4HF,
23024 T_V2SI,
23025 T_V2SF,
23026 T_DI,
23027 T_V16QI,
23028 T_V8HI,
23029 T_V4SI,
23030 T_V4SF,
23031 T_V2DI,
23032 T_TI,
23033 T_EI,
23034 T_OI,
23035 T_MAX /* Size of enum. Keep last. */
23036 } neon_builtin_type_mode;
23038 #define TYPE_MODE_BIT(X) (1 << (X))
23040 #define TB_DREG (TYPE_MODE_BIT (T_V8QI) | TYPE_MODE_BIT (T_V4HI) \
23041 | TYPE_MODE_BIT (T_V4HF) | TYPE_MODE_BIT (T_V2SI) \
23042 | TYPE_MODE_BIT (T_V2SF) | TYPE_MODE_BIT (T_DI))
23043 #define TB_QREG (TYPE_MODE_BIT (T_V16QI) | TYPE_MODE_BIT (T_V8HI) \
23044 | TYPE_MODE_BIT (T_V4SI) | TYPE_MODE_BIT (T_V4SF) \
23045 | TYPE_MODE_BIT (T_V2DI) | TYPE_MODE_BIT (T_TI))
23047 #define v8qi_UP T_V8QI
23048 #define v4hi_UP T_V4HI
23049 #define v4hf_UP T_V4HF
23050 #define v2si_UP T_V2SI
23051 #define v2sf_UP T_V2SF
23052 #define di_UP T_DI
23053 #define v16qi_UP T_V16QI
23054 #define v8hi_UP T_V8HI
23055 #define v4si_UP T_V4SI
23056 #define v4sf_UP T_V4SF
23057 #define v2di_UP T_V2DI
23058 #define ti_UP T_TI
23059 #define ei_UP T_EI
23060 #define oi_UP T_OI
23062 #define UP(X) X##_UP
23064 typedef enum {
23065 NEON_BINOP,
23066 NEON_TERNOP,
23067 NEON_UNOP,
23068 NEON_GETLANE,
23069 NEON_SETLANE,
23070 NEON_CREATE,
23071 NEON_RINT,
23072 NEON_DUP,
23073 NEON_DUPLANE,
23074 NEON_COMBINE,
23075 NEON_SPLIT,
23076 NEON_LANEMUL,
23077 NEON_LANEMULL,
23078 NEON_LANEMULH,
23079 NEON_LANEMAC,
23080 NEON_SCALARMUL,
23081 NEON_SCALARMULL,
23082 NEON_SCALARMULH,
23083 NEON_SCALARMAC,
23084 NEON_CONVERT,
23085 NEON_FLOAT_WIDEN,
23086 NEON_FLOAT_NARROW,
23087 NEON_FIXCONV,
23088 NEON_SELECT,
23089 NEON_REINTERP,
23090 NEON_VTBL,
23091 NEON_VTBX,
23092 NEON_LOAD1,
23093 NEON_LOAD1LANE,
23094 NEON_STORE1,
23095 NEON_STORE1LANE,
23096 NEON_LOADSTRUCT,
23097 NEON_LOADSTRUCTLANE,
23098 NEON_STORESTRUCT,
23099 NEON_STORESTRUCTLANE,
23100 NEON_LOGICBINOP,
23101 NEON_SHIFTINSERT,
23102 NEON_SHIFTIMM,
23103 NEON_SHIFTACC
23104 } neon_itype;
23106 typedef struct {
23107 const char *name;
23108 const neon_itype itype;
23109 const neon_builtin_type_mode mode;
23110 const enum insn_code code;
23111 unsigned int fcode;
23112 } neon_builtin_datum;
23114 #define CF(N,X) CODE_FOR_neon_##N##X
23116 #define VAR1(T, N, A) \
23117 {#N, NEON_##T, UP (A), CF (N, A), 0}
23118 #define VAR2(T, N, A, B) \
23119 VAR1 (T, N, A), \
23120 {#N, NEON_##T, UP (B), CF (N, B), 0}
23121 #define VAR3(T, N, A, B, C) \
23122 VAR2 (T, N, A, B), \
23123 {#N, NEON_##T, UP (C), CF (N, C), 0}
23124 #define VAR4(T, N, A, B, C, D) \
23125 VAR3 (T, N, A, B, C), \
23126 {#N, NEON_##T, UP (D), CF (N, D), 0}
23127 #define VAR5(T, N, A, B, C, D, E) \
23128 VAR4 (T, N, A, B, C, D), \
23129 {#N, NEON_##T, UP (E), CF (N, E), 0}
23130 #define VAR6(T, N, A, B, C, D, E, F) \
23131 VAR5 (T, N, A, B, C, D, E), \
23132 {#N, NEON_##T, UP (F), CF (N, F), 0}
23133 #define VAR7(T, N, A, B, C, D, E, F, G) \
23134 VAR6 (T, N, A, B, C, D, E, F), \
23135 {#N, NEON_##T, UP (G), CF (N, G), 0}
23136 #define VAR8(T, N, A, B, C, D, E, F, G, H) \
23137 VAR7 (T, N, A, B, C, D, E, F, G), \
23138 {#N, NEON_##T, UP (H), CF (N, H), 0}
23139 #define VAR9(T, N, A, B, C, D, E, F, G, H, I) \
23140 VAR8 (T, N, A, B, C, D, E, F, G, H), \
23141 {#N, NEON_##T, UP (I), CF (N, I), 0}
23142 #define VAR10(T, N, A, B, C, D, E, F, G, H, I, J) \
23143 VAR9 (T, N, A, B, C, D, E, F, G, H, I), \
23144 {#N, NEON_##T, UP (J), CF (N, J), 0}
23146 /* The NEON builtin data can be found in arm_neon_builtins.def.
23147 The mode entries in the following table correspond to the "key" type of the
23148 instruction variant, i.e. equivalent to that which would be specified after
23149 the assembler mnemonic, which usually refers to the last vector operand.
23150 (Signed/unsigned/polynomial types are not differentiated between though, and
23151 are all mapped onto the same mode for a given element size.) The modes
23152 listed per instruction should be the same as those defined for that
23153 instruction's pattern in neon.md. */
23155 static neon_builtin_datum neon_builtin_data[] =
23157 #include "arm_neon_builtins.def"
23160 #undef CF
23161 #undef VAR1
23162 #undef VAR2
23163 #undef VAR3
23164 #undef VAR4
23165 #undef VAR5
23166 #undef VAR6
23167 #undef VAR7
23168 #undef VAR8
23169 #undef VAR9
23170 #undef VAR10
23172 #define CF(N,X) ARM_BUILTIN_NEON_##N##X
23173 #define VAR1(T, N, A) \
23174 CF (N, A)
23175 #define VAR2(T, N, A, B) \
23176 VAR1 (T, N, A), \
23177 CF (N, B)
23178 #define VAR3(T, N, A, B, C) \
23179 VAR2 (T, N, A, B), \
23180 CF (N, C)
23181 #define VAR4(T, N, A, B, C, D) \
23182 VAR3 (T, N, A, B, C), \
23183 CF (N, D)
23184 #define VAR5(T, N, A, B, C, D, E) \
23185 VAR4 (T, N, A, B, C, D), \
23186 CF (N, E)
23187 #define VAR6(T, N, A, B, C, D, E, F) \
23188 VAR5 (T, N, A, B, C, D, E), \
23189 CF (N, F)
23190 #define VAR7(T, N, A, B, C, D, E, F, G) \
23191 VAR6 (T, N, A, B, C, D, E, F), \
23192 CF (N, G)
23193 #define VAR8(T, N, A, B, C, D, E, F, G, H) \
23194 VAR7 (T, N, A, B, C, D, E, F, G), \
23195 CF (N, H)
23196 #define VAR9(T, N, A, B, C, D, E, F, G, H, I) \
23197 VAR8 (T, N, A, B, C, D, E, F, G, H), \
23198 CF (N, I)
23199 #define VAR10(T, N, A, B, C, D, E, F, G, H, I, J) \
23200 VAR9 (T, N, A, B, C, D, E, F, G, H, I), \
23201 CF (N, J)
23202 enum arm_builtins
23204 ARM_BUILTIN_GETWCGR0,
23205 ARM_BUILTIN_GETWCGR1,
23206 ARM_BUILTIN_GETWCGR2,
23207 ARM_BUILTIN_GETWCGR3,
23209 ARM_BUILTIN_SETWCGR0,
23210 ARM_BUILTIN_SETWCGR1,
23211 ARM_BUILTIN_SETWCGR2,
23212 ARM_BUILTIN_SETWCGR3,
23214 ARM_BUILTIN_WZERO,
23216 ARM_BUILTIN_WAVG2BR,
23217 ARM_BUILTIN_WAVG2HR,
23218 ARM_BUILTIN_WAVG2B,
23219 ARM_BUILTIN_WAVG2H,
23221 ARM_BUILTIN_WACCB,
23222 ARM_BUILTIN_WACCH,
23223 ARM_BUILTIN_WACCW,
23225 ARM_BUILTIN_WMACS,
23226 ARM_BUILTIN_WMACSZ,
23227 ARM_BUILTIN_WMACU,
23228 ARM_BUILTIN_WMACUZ,
23230 ARM_BUILTIN_WSADB,
23231 ARM_BUILTIN_WSADBZ,
23232 ARM_BUILTIN_WSADH,
23233 ARM_BUILTIN_WSADHZ,
23235 ARM_BUILTIN_WALIGNI,
23236 ARM_BUILTIN_WALIGNR0,
23237 ARM_BUILTIN_WALIGNR1,
23238 ARM_BUILTIN_WALIGNR2,
23239 ARM_BUILTIN_WALIGNR3,
23241 ARM_BUILTIN_TMIA,
23242 ARM_BUILTIN_TMIAPH,
23243 ARM_BUILTIN_TMIABB,
23244 ARM_BUILTIN_TMIABT,
23245 ARM_BUILTIN_TMIATB,
23246 ARM_BUILTIN_TMIATT,
23248 ARM_BUILTIN_TMOVMSKB,
23249 ARM_BUILTIN_TMOVMSKH,
23250 ARM_BUILTIN_TMOVMSKW,
23252 ARM_BUILTIN_TBCSTB,
23253 ARM_BUILTIN_TBCSTH,
23254 ARM_BUILTIN_TBCSTW,
23256 ARM_BUILTIN_WMADDS,
23257 ARM_BUILTIN_WMADDU,
23259 ARM_BUILTIN_WPACKHSS,
23260 ARM_BUILTIN_WPACKWSS,
23261 ARM_BUILTIN_WPACKDSS,
23262 ARM_BUILTIN_WPACKHUS,
23263 ARM_BUILTIN_WPACKWUS,
23264 ARM_BUILTIN_WPACKDUS,
23266 ARM_BUILTIN_WADDB,
23267 ARM_BUILTIN_WADDH,
23268 ARM_BUILTIN_WADDW,
23269 ARM_BUILTIN_WADDSSB,
23270 ARM_BUILTIN_WADDSSH,
23271 ARM_BUILTIN_WADDSSW,
23272 ARM_BUILTIN_WADDUSB,
23273 ARM_BUILTIN_WADDUSH,
23274 ARM_BUILTIN_WADDUSW,
23275 ARM_BUILTIN_WSUBB,
23276 ARM_BUILTIN_WSUBH,
23277 ARM_BUILTIN_WSUBW,
23278 ARM_BUILTIN_WSUBSSB,
23279 ARM_BUILTIN_WSUBSSH,
23280 ARM_BUILTIN_WSUBSSW,
23281 ARM_BUILTIN_WSUBUSB,
23282 ARM_BUILTIN_WSUBUSH,
23283 ARM_BUILTIN_WSUBUSW,
23285 ARM_BUILTIN_WAND,
23286 ARM_BUILTIN_WANDN,
23287 ARM_BUILTIN_WOR,
23288 ARM_BUILTIN_WXOR,
23290 ARM_BUILTIN_WCMPEQB,
23291 ARM_BUILTIN_WCMPEQH,
23292 ARM_BUILTIN_WCMPEQW,
23293 ARM_BUILTIN_WCMPGTUB,
23294 ARM_BUILTIN_WCMPGTUH,
23295 ARM_BUILTIN_WCMPGTUW,
23296 ARM_BUILTIN_WCMPGTSB,
23297 ARM_BUILTIN_WCMPGTSH,
23298 ARM_BUILTIN_WCMPGTSW,
23300 ARM_BUILTIN_TEXTRMSB,
23301 ARM_BUILTIN_TEXTRMSH,
23302 ARM_BUILTIN_TEXTRMSW,
23303 ARM_BUILTIN_TEXTRMUB,
23304 ARM_BUILTIN_TEXTRMUH,
23305 ARM_BUILTIN_TEXTRMUW,
23306 ARM_BUILTIN_TINSRB,
23307 ARM_BUILTIN_TINSRH,
23308 ARM_BUILTIN_TINSRW,
23310 ARM_BUILTIN_WMAXSW,
23311 ARM_BUILTIN_WMAXSH,
23312 ARM_BUILTIN_WMAXSB,
23313 ARM_BUILTIN_WMAXUW,
23314 ARM_BUILTIN_WMAXUH,
23315 ARM_BUILTIN_WMAXUB,
23316 ARM_BUILTIN_WMINSW,
23317 ARM_BUILTIN_WMINSH,
23318 ARM_BUILTIN_WMINSB,
23319 ARM_BUILTIN_WMINUW,
23320 ARM_BUILTIN_WMINUH,
23321 ARM_BUILTIN_WMINUB,
23323 ARM_BUILTIN_WMULUM,
23324 ARM_BUILTIN_WMULSM,
23325 ARM_BUILTIN_WMULUL,
23327 ARM_BUILTIN_PSADBH,
23328 ARM_BUILTIN_WSHUFH,
23330 ARM_BUILTIN_WSLLH,
23331 ARM_BUILTIN_WSLLW,
23332 ARM_BUILTIN_WSLLD,
23333 ARM_BUILTIN_WSRAH,
23334 ARM_BUILTIN_WSRAW,
23335 ARM_BUILTIN_WSRAD,
23336 ARM_BUILTIN_WSRLH,
23337 ARM_BUILTIN_WSRLW,
23338 ARM_BUILTIN_WSRLD,
23339 ARM_BUILTIN_WRORH,
23340 ARM_BUILTIN_WRORW,
23341 ARM_BUILTIN_WRORD,
23342 ARM_BUILTIN_WSLLHI,
23343 ARM_BUILTIN_WSLLWI,
23344 ARM_BUILTIN_WSLLDI,
23345 ARM_BUILTIN_WSRAHI,
23346 ARM_BUILTIN_WSRAWI,
23347 ARM_BUILTIN_WSRADI,
23348 ARM_BUILTIN_WSRLHI,
23349 ARM_BUILTIN_WSRLWI,
23350 ARM_BUILTIN_WSRLDI,
23351 ARM_BUILTIN_WRORHI,
23352 ARM_BUILTIN_WRORWI,
23353 ARM_BUILTIN_WRORDI,
23355 ARM_BUILTIN_WUNPCKIHB,
23356 ARM_BUILTIN_WUNPCKIHH,
23357 ARM_BUILTIN_WUNPCKIHW,
23358 ARM_BUILTIN_WUNPCKILB,
23359 ARM_BUILTIN_WUNPCKILH,
23360 ARM_BUILTIN_WUNPCKILW,
23362 ARM_BUILTIN_WUNPCKEHSB,
23363 ARM_BUILTIN_WUNPCKEHSH,
23364 ARM_BUILTIN_WUNPCKEHSW,
23365 ARM_BUILTIN_WUNPCKEHUB,
23366 ARM_BUILTIN_WUNPCKEHUH,
23367 ARM_BUILTIN_WUNPCKEHUW,
23368 ARM_BUILTIN_WUNPCKELSB,
23369 ARM_BUILTIN_WUNPCKELSH,
23370 ARM_BUILTIN_WUNPCKELSW,
23371 ARM_BUILTIN_WUNPCKELUB,
23372 ARM_BUILTIN_WUNPCKELUH,
23373 ARM_BUILTIN_WUNPCKELUW,
23375 ARM_BUILTIN_WABSB,
23376 ARM_BUILTIN_WABSH,
23377 ARM_BUILTIN_WABSW,
23379 ARM_BUILTIN_WADDSUBHX,
23380 ARM_BUILTIN_WSUBADDHX,
23382 ARM_BUILTIN_WABSDIFFB,
23383 ARM_BUILTIN_WABSDIFFH,
23384 ARM_BUILTIN_WABSDIFFW,
23386 ARM_BUILTIN_WADDCH,
23387 ARM_BUILTIN_WADDCW,
23389 ARM_BUILTIN_WAVG4,
23390 ARM_BUILTIN_WAVG4R,
23392 ARM_BUILTIN_WMADDSX,
23393 ARM_BUILTIN_WMADDUX,
23395 ARM_BUILTIN_WMADDSN,
23396 ARM_BUILTIN_WMADDUN,
23398 ARM_BUILTIN_WMULWSM,
23399 ARM_BUILTIN_WMULWUM,
23401 ARM_BUILTIN_WMULWSMR,
23402 ARM_BUILTIN_WMULWUMR,
23404 ARM_BUILTIN_WMULWL,
23406 ARM_BUILTIN_WMULSMR,
23407 ARM_BUILTIN_WMULUMR,
23409 ARM_BUILTIN_WQMULM,
23410 ARM_BUILTIN_WQMULMR,
23412 ARM_BUILTIN_WQMULWM,
23413 ARM_BUILTIN_WQMULWMR,
23415 ARM_BUILTIN_WADDBHUSM,
23416 ARM_BUILTIN_WADDBHUSL,
23418 ARM_BUILTIN_WQMIABB,
23419 ARM_BUILTIN_WQMIABT,
23420 ARM_BUILTIN_WQMIATB,
23421 ARM_BUILTIN_WQMIATT,
23423 ARM_BUILTIN_WQMIABBN,
23424 ARM_BUILTIN_WQMIABTN,
23425 ARM_BUILTIN_WQMIATBN,
23426 ARM_BUILTIN_WQMIATTN,
23428 ARM_BUILTIN_WMIABB,
23429 ARM_BUILTIN_WMIABT,
23430 ARM_BUILTIN_WMIATB,
23431 ARM_BUILTIN_WMIATT,
23433 ARM_BUILTIN_WMIABBN,
23434 ARM_BUILTIN_WMIABTN,
23435 ARM_BUILTIN_WMIATBN,
23436 ARM_BUILTIN_WMIATTN,
23438 ARM_BUILTIN_WMIAWBB,
23439 ARM_BUILTIN_WMIAWBT,
23440 ARM_BUILTIN_WMIAWTB,
23441 ARM_BUILTIN_WMIAWTT,
23443 ARM_BUILTIN_WMIAWBBN,
23444 ARM_BUILTIN_WMIAWBTN,
23445 ARM_BUILTIN_WMIAWTBN,
23446 ARM_BUILTIN_WMIAWTTN,
23448 ARM_BUILTIN_WMERGE,
23450 ARM_BUILTIN_CRC32B,
23451 ARM_BUILTIN_CRC32H,
23452 ARM_BUILTIN_CRC32W,
23453 ARM_BUILTIN_CRC32CB,
23454 ARM_BUILTIN_CRC32CH,
23455 ARM_BUILTIN_CRC32CW,
23457 #undef CRYPTO1
23458 #undef CRYPTO2
23459 #undef CRYPTO3
23461 #define CRYPTO1(L, U, M1, M2) \
23462 ARM_BUILTIN_CRYPTO_##U,
23463 #define CRYPTO2(L, U, M1, M2, M3) \
23464 ARM_BUILTIN_CRYPTO_##U,
23465 #define CRYPTO3(L, U, M1, M2, M3, M4) \
23466 ARM_BUILTIN_CRYPTO_##U,
23468 #include "crypto.def"
23470 #undef CRYPTO1
23471 #undef CRYPTO2
23472 #undef CRYPTO3
23474 #include "arm_neon_builtins.def"
23476 ,ARM_BUILTIN_MAX
23479 #define ARM_BUILTIN_NEON_BASE (ARM_BUILTIN_MAX - ARRAY_SIZE (neon_builtin_data))
23481 #undef CF
23482 #undef VAR1
23483 #undef VAR2
23484 #undef VAR3
23485 #undef VAR4
23486 #undef VAR5
23487 #undef VAR6
23488 #undef VAR7
23489 #undef VAR8
23490 #undef VAR9
23491 #undef VAR10
23493 static GTY(()) tree arm_builtin_decls[ARM_BUILTIN_MAX];
23495 #define NUM_DREG_TYPES 5
23496 #define NUM_QREG_TYPES 6
23498 static void
23499 arm_init_neon_builtins (void)
23501 unsigned int i, fcode;
23502 tree decl;
23504 tree neon_intQI_type_node;
23505 tree neon_intHI_type_node;
23506 tree neon_floatHF_type_node;
23507 tree neon_polyQI_type_node;
23508 tree neon_polyHI_type_node;
23509 tree neon_intSI_type_node;
23510 tree neon_intDI_type_node;
23511 tree neon_intUTI_type_node;
23512 tree neon_float_type_node;
23514 tree intQI_pointer_node;
23515 tree intHI_pointer_node;
23516 tree intSI_pointer_node;
23517 tree intDI_pointer_node;
23518 tree float_pointer_node;
23520 tree const_intQI_node;
23521 tree const_intHI_node;
23522 tree const_intSI_node;
23523 tree const_intDI_node;
23524 tree const_float_node;
23526 tree const_intQI_pointer_node;
23527 tree const_intHI_pointer_node;
23528 tree const_intSI_pointer_node;
23529 tree const_intDI_pointer_node;
23530 tree const_float_pointer_node;
23532 tree V8QI_type_node;
23533 tree V4HI_type_node;
23534 tree V4HF_type_node;
23535 tree V2SI_type_node;
23536 tree V2SF_type_node;
23537 tree V16QI_type_node;
23538 tree V8HI_type_node;
23539 tree V4SI_type_node;
23540 tree V4SF_type_node;
23541 tree V2DI_type_node;
23543 tree intUQI_type_node;
23544 tree intUHI_type_node;
23545 tree intUSI_type_node;
23546 tree intUDI_type_node;
23548 tree intEI_type_node;
23549 tree intOI_type_node;
23550 tree intCI_type_node;
23551 tree intXI_type_node;
23553 tree reinterp_ftype_dreg[NUM_DREG_TYPES][NUM_DREG_TYPES];
23554 tree reinterp_ftype_qreg[NUM_QREG_TYPES][NUM_QREG_TYPES];
23555 tree dreg_types[NUM_DREG_TYPES], qreg_types[NUM_QREG_TYPES];
23557 /* Create distinguished type nodes for NEON vector element types,
23558 and pointers to values of such types, so we can detect them later. */
23559 neon_intQI_type_node = make_signed_type (GET_MODE_PRECISION (QImode));
23560 neon_intHI_type_node = make_signed_type (GET_MODE_PRECISION (HImode));
23561 neon_polyQI_type_node = make_signed_type (GET_MODE_PRECISION (QImode));
23562 neon_polyHI_type_node = make_signed_type (GET_MODE_PRECISION (HImode));
23563 neon_intSI_type_node = make_signed_type (GET_MODE_PRECISION (SImode));
23564 neon_intDI_type_node = make_signed_type (GET_MODE_PRECISION (DImode));
23565 neon_float_type_node = make_node (REAL_TYPE);
23566 TYPE_PRECISION (neon_float_type_node) = FLOAT_TYPE_SIZE;
23567 layout_type (neon_float_type_node);
23568 neon_floatHF_type_node = make_node (REAL_TYPE);
23569 TYPE_PRECISION (neon_floatHF_type_node) = GET_MODE_PRECISION (HFmode);
23570 layout_type (neon_floatHF_type_node);
23572 /* Define typedefs which exactly correspond to the modes we are basing vector
23573 types on. If you change these names you'll need to change
23574 the table used by arm_mangle_type too. */
23575 (*lang_hooks.types.register_builtin_type) (neon_intQI_type_node,
23576 "__builtin_neon_qi");
23577 (*lang_hooks.types.register_builtin_type) (neon_intHI_type_node,
23578 "__builtin_neon_hi");
23579 (*lang_hooks.types.register_builtin_type) (neon_floatHF_type_node,
23580 "__builtin_neon_hf");
23581 (*lang_hooks.types.register_builtin_type) (neon_intSI_type_node,
23582 "__builtin_neon_si");
23583 (*lang_hooks.types.register_builtin_type) (neon_float_type_node,
23584 "__builtin_neon_sf");
23585 (*lang_hooks.types.register_builtin_type) (neon_intDI_type_node,
23586 "__builtin_neon_di");
23587 (*lang_hooks.types.register_builtin_type) (neon_polyQI_type_node,
23588 "__builtin_neon_poly8");
23589 (*lang_hooks.types.register_builtin_type) (neon_polyHI_type_node,
23590 "__builtin_neon_poly16");
23592 intQI_pointer_node = build_pointer_type (neon_intQI_type_node);
23593 intHI_pointer_node = build_pointer_type (neon_intHI_type_node);
23594 intSI_pointer_node = build_pointer_type (neon_intSI_type_node);
23595 intDI_pointer_node = build_pointer_type (neon_intDI_type_node);
23596 float_pointer_node = build_pointer_type (neon_float_type_node);
23598 /* Next create constant-qualified versions of the above types. */
23599 const_intQI_node = build_qualified_type (neon_intQI_type_node,
23600 TYPE_QUAL_CONST);
23601 const_intHI_node = build_qualified_type (neon_intHI_type_node,
23602 TYPE_QUAL_CONST);
23603 const_intSI_node = build_qualified_type (neon_intSI_type_node,
23604 TYPE_QUAL_CONST);
23605 const_intDI_node = build_qualified_type (neon_intDI_type_node,
23606 TYPE_QUAL_CONST);
23607 const_float_node = build_qualified_type (neon_float_type_node,
23608 TYPE_QUAL_CONST);
23610 const_intQI_pointer_node = build_pointer_type (const_intQI_node);
23611 const_intHI_pointer_node = build_pointer_type (const_intHI_node);
23612 const_intSI_pointer_node = build_pointer_type (const_intSI_node);
23613 const_intDI_pointer_node = build_pointer_type (const_intDI_node);
23614 const_float_pointer_node = build_pointer_type (const_float_node);
23616 /* Now create vector types based on our NEON element types. */
23617 /* 64-bit vectors. */
23618 V8QI_type_node =
23619 build_vector_type_for_mode (neon_intQI_type_node, V8QImode);
23620 V4HI_type_node =
23621 build_vector_type_for_mode (neon_intHI_type_node, V4HImode);
23622 V4HF_type_node =
23623 build_vector_type_for_mode (neon_floatHF_type_node, V4HFmode);
23624 V2SI_type_node =
23625 build_vector_type_for_mode (neon_intSI_type_node, V2SImode);
23626 V2SF_type_node =
23627 build_vector_type_for_mode (neon_float_type_node, V2SFmode);
23628 /* 128-bit vectors. */
23629 V16QI_type_node =
23630 build_vector_type_for_mode (neon_intQI_type_node, V16QImode);
23631 V8HI_type_node =
23632 build_vector_type_for_mode (neon_intHI_type_node, V8HImode);
23633 V4SI_type_node =
23634 build_vector_type_for_mode (neon_intSI_type_node, V4SImode);
23635 V4SF_type_node =
23636 build_vector_type_for_mode (neon_float_type_node, V4SFmode);
23637 V2DI_type_node =
23638 build_vector_type_for_mode (neon_intDI_type_node, V2DImode);
23640 /* Unsigned integer types for various mode sizes. */
23641 intUQI_type_node = make_unsigned_type (GET_MODE_PRECISION (QImode));
23642 intUHI_type_node = make_unsigned_type (GET_MODE_PRECISION (HImode));
23643 intUSI_type_node = make_unsigned_type (GET_MODE_PRECISION (SImode));
23644 intUDI_type_node = make_unsigned_type (GET_MODE_PRECISION (DImode));
23645 neon_intUTI_type_node = make_unsigned_type (GET_MODE_PRECISION (TImode));
23648 (*lang_hooks.types.register_builtin_type) (intUQI_type_node,
23649 "__builtin_neon_uqi");
23650 (*lang_hooks.types.register_builtin_type) (intUHI_type_node,
23651 "__builtin_neon_uhi");
23652 (*lang_hooks.types.register_builtin_type) (intUSI_type_node,
23653 "__builtin_neon_usi");
23654 (*lang_hooks.types.register_builtin_type) (intUDI_type_node,
23655 "__builtin_neon_udi");
23656 (*lang_hooks.types.register_builtin_type) (intUDI_type_node,
23657 "__builtin_neon_poly64");
23658 (*lang_hooks.types.register_builtin_type) (neon_intUTI_type_node,
23659 "__builtin_neon_poly128");
23661 /* Opaque integer types for structures of vectors. */
23662 intEI_type_node = make_signed_type (GET_MODE_PRECISION (EImode));
23663 intOI_type_node = make_signed_type (GET_MODE_PRECISION (OImode));
23664 intCI_type_node = make_signed_type (GET_MODE_PRECISION (CImode));
23665 intXI_type_node = make_signed_type (GET_MODE_PRECISION (XImode));
23667 (*lang_hooks.types.register_builtin_type) (intTI_type_node,
23668 "__builtin_neon_ti");
23669 (*lang_hooks.types.register_builtin_type) (intEI_type_node,
23670 "__builtin_neon_ei");
23671 (*lang_hooks.types.register_builtin_type) (intOI_type_node,
23672 "__builtin_neon_oi");
23673 (*lang_hooks.types.register_builtin_type) (intCI_type_node,
23674 "__builtin_neon_ci");
23675 (*lang_hooks.types.register_builtin_type) (intXI_type_node,
23676 "__builtin_neon_xi");
23678 if (TARGET_CRYPTO && TARGET_HARD_FLOAT)
23680 tree V4USI_type_node =
23681 build_vector_type_for_mode (intUSI_type_node, V4SImode);
23683 tree V16UQI_type_node =
23684 build_vector_type_for_mode (intUQI_type_node, V16QImode);
23686 tree v16uqi_ftype_v16uqi
23687 = build_function_type_list (V16UQI_type_node, V16UQI_type_node, NULL_TREE);
23689 tree v16uqi_ftype_v16uqi_v16uqi
23690 = build_function_type_list (V16UQI_type_node, V16UQI_type_node,
23691 V16UQI_type_node, NULL_TREE);
23693 tree v4usi_ftype_v4usi
23694 = build_function_type_list (V4USI_type_node, V4USI_type_node, NULL_TREE);
23696 tree v4usi_ftype_v4usi_v4usi
23697 = build_function_type_list (V4USI_type_node, V4USI_type_node,
23698 V4USI_type_node, NULL_TREE);
23700 tree v4usi_ftype_v4usi_v4usi_v4usi
23701 = build_function_type_list (V4USI_type_node, V4USI_type_node,
23702 V4USI_type_node, V4USI_type_node, NULL_TREE);
23704 tree uti_ftype_udi_udi
23705 = build_function_type_list (neon_intUTI_type_node, intUDI_type_node,
23706 intUDI_type_node, NULL_TREE);
23708 #undef CRYPTO1
23709 #undef CRYPTO2
23710 #undef CRYPTO3
23711 #undef C
23712 #undef N
23713 #undef CF
23714 #undef FT1
23715 #undef FT2
23716 #undef FT3
23718 #define C(U) \
23719 ARM_BUILTIN_CRYPTO_##U
23720 #define N(L) \
23721 "__builtin_arm_crypto_"#L
23722 #define FT1(R, A) \
23723 R##_ftype_##A
23724 #define FT2(R, A1, A2) \
23725 R##_ftype_##A1##_##A2
23726 #define FT3(R, A1, A2, A3) \
23727 R##_ftype_##A1##_##A2##_##A3
23728 #define CRYPTO1(L, U, R, A) \
23729 arm_builtin_decls[C (U)] = add_builtin_function (N (L), FT1 (R, A), \
23730 C (U), BUILT_IN_MD, \
23731 NULL, NULL_TREE);
23732 #define CRYPTO2(L, U, R, A1, A2) \
23733 arm_builtin_decls[C (U)] = add_builtin_function (N (L), FT2 (R, A1, A2), \
23734 C (U), BUILT_IN_MD, \
23735 NULL, NULL_TREE);
23737 #define CRYPTO3(L, U, R, A1, A2, A3) \
23738 arm_builtin_decls[C (U)] = add_builtin_function (N (L), FT3 (R, A1, A2, A3), \
23739 C (U), BUILT_IN_MD, \
23740 NULL, NULL_TREE);
23741 #include "crypto.def"
23743 #undef CRYPTO1
23744 #undef CRYPTO2
23745 #undef CRYPTO3
23746 #undef C
23747 #undef N
23748 #undef FT1
23749 #undef FT2
23750 #undef FT3
23752 dreg_types[0] = V8QI_type_node;
23753 dreg_types[1] = V4HI_type_node;
23754 dreg_types[2] = V2SI_type_node;
23755 dreg_types[3] = V2SF_type_node;
23756 dreg_types[4] = neon_intDI_type_node;
23758 qreg_types[0] = V16QI_type_node;
23759 qreg_types[1] = V8HI_type_node;
23760 qreg_types[2] = V4SI_type_node;
23761 qreg_types[3] = V4SF_type_node;
23762 qreg_types[4] = V2DI_type_node;
23763 qreg_types[5] = neon_intUTI_type_node;
23765 for (i = 0; i < NUM_QREG_TYPES; i++)
23767 int j;
23768 for (j = 0; j < NUM_QREG_TYPES; j++)
23770 if (i < NUM_DREG_TYPES && j < NUM_DREG_TYPES)
23771 reinterp_ftype_dreg[i][j]
23772 = build_function_type_list (dreg_types[i], dreg_types[j], NULL);
23774 reinterp_ftype_qreg[i][j]
23775 = build_function_type_list (qreg_types[i], qreg_types[j], NULL);
23779 for (i = 0, fcode = ARM_BUILTIN_NEON_BASE;
23780 i < ARRAY_SIZE (neon_builtin_data);
23781 i++, fcode++)
23783 neon_builtin_datum *d = &neon_builtin_data[i];
23785 const char* const modenames[] = {
23786 "v8qi", "v4hi", "v4hf", "v2si", "v2sf", "di",
23787 "v16qi", "v8hi", "v4si", "v4sf", "v2di",
23788 "ti", "ei", "oi"
23790 char namebuf[60];
23791 tree ftype = NULL;
23792 int is_load = 0, is_store = 0;
23794 gcc_assert (ARRAY_SIZE (modenames) == T_MAX);
23796 d->fcode = fcode;
23798 switch (d->itype)
23800 case NEON_LOAD1:
23801 case NEON_LOAD1LANE:
23802 case NEON_LOADSTRUCT:
23803 case NEON_LOADSTRUCTLANE:
23804 is_load = 1;
23805 /* Fall through. */
23806 case NEON_STORE1:
23807 case NEON_STORE1LANE:
23808 case NEON_STORESTRUCT:
23809 case NEON_STORESTRUCTLANE:
23810 if (!is_load)
23811 is_store = 1;
23812 /* Fall through. */
23813 case NEON_UNOP:
23814 case NEON_RINT:
23815 case NEON_BINOP:
23816 case NEON_LOGICBINOP:
23817 case NEON_SHIFTINSERT:
23818 case NEON_TERNOP:
23819 case NEON_GETLANE:
23820 case NEON_SETLANE:
23821 case NEON_CREATE:
23822 case NEON_DUP:
23823 case NEON_DUPLANE:
23824 case NEON_SHIFTIMM:
23825 case NEON_SHIFTACC:
23826 case NEON_COMBINE:
23827 case NEON_SPLIT:
23828 case NEON_CONVERT:
23829 case NEON_FIXCONV:
23830 case NEON_LANEMUL:
23831 case NEON_LANEMULL:
23832 case NEON_LANEMULH:
23833 case NEON_LANEMAC:
23834 case NEON_SCALARMUL:
23835 case NEON_SCALARMULL:
23836 case NEON_SCALARMULH:
23837 case NEON_SCALARMAC:
23838 case NEON_SELECT:
23839 case NEON_VTBL:
23840 case NEON_VTBX:
23842 int k;
23843 tree return_type = void_type_node, args = void_list_node;
23845 /* Build a function type directly from the insn_data for
23846 this builtin. The build_function_type() function takes
23847 care of removing duplicates for us. */
23848 for (k = insn_data[d->code].n_generator_args - 1; k >= 0; k--)
23850 tree eltype;
23852 if (is_load && k == 1)
23854 /* Neon load patterns always have the memory
23855 operand in the operand 1 position. */
23856 gcc_assert (insn_data[d->code].operand[k].predicate
23857 == neon_struct_operand);
23859 switch (d->mode)
23861 case T_V8QI:
23862 case T_V16QI:
23863 eltype = const_intQI_pointer_node;
23864 break;
23866 case T_V4HI:
23867 case T_V8HI:
23868 eltype = const_intHI_pointer_node;
23869 break;
23871 case T_V2SI:
23872 case T_V4SI:
23873 eltype = const_intSI_pointer_node;
23874 break;
23876 case T_V2SF:
23877 case T_V4SF:
23878 eltype = const_float_pointer_node;
23879 break;
23881 case T_DI:
23882 case T_V2DI:
23883 eltype = const_intDI_pointer_node;
23884 break;
23886 default: gcc_unreachable ();
23889 else if (is_store && k == 0)
23891 /* Similarly, Neon store patterns use operand 0 as
23892 the memory location to store to. */
23893 gcc_assert (insn_data[d->code].operand[k].predicate
23894 == neon_struct_operand);
23896 switch (d->mode)
23898 case T_V8QI:
23899 case T_V16QI:
23900 eltype = intQI_pointer_node;
23901 break;
23903 case T_V4HI:
23904 case T_V8HI:
23905 eltype = intHI_pointer_node;
23906 break;
23908 case T_V2SI:
23909 case T_V4SI:
23910 eltype = intSI_pointer_node;
23911 break;
23913 case T_V2SF:
23914 case T_V4SF:
23915 eltype = float_pointer_node;
23916 break;
23918 case T_DI:
23919 case T_V2DI:
23920 eltype = intDI_pointer_node;
23921 break;
23923 default: gcc_unreachable ();
23926 else
23928 switch (insn_data[d->code].operand[k].mode)
23930 case VOIDmode: eltype = void_type_node; break;
23931 /* Scalars. */
23932 case QImode: eltype = neon_intQI_type_node; break;
23933 case HImode: eltype = neon_intHI_type_node; break;
23934 case SImode: eltype = neon_intSI_type_node; break;
23935 case SFmode: eltype = neon_float_type_node; break;
23936 case DImode: eltype = neon_intDI_type_node; break;
23937 case TImode: eltype = intTI_type_node; break;
23938 case EImode: eltype = intEI_type_node; break;
23939 case OImode: eltype = intOI_type_node; break;
23940 case CImode: eltype = intCI_type_node; break;
23941 case XImode: eltype = intXI_type_node; break;
23942 /* 64-bit vectors. */
23943 case V8QImode: eltype = V8QI_type_node; break;
23944 case V4HImode: eltype = V4HI_type_node; break;
23945 case V2SImode: eltype = V2SI_type_node; break;
23946 case V2SFmode: eltype = V2SF_type_node; break;
23947 /* 128-bit vectors. */
23948 case V16QImode: eltype = V16QI_type_node; break;
23949 case V8HImode: eltype = V8HI_type_node; break;
23950 case V4SImode: eltype = V4SI_type_node; break;
23951 case V4SFmode: eltype = V4SF_type_node; break;
23952 case V2DImode: eltype = V2DI_type_node; break;
23953 default: gcc_unreachable ();
23957 if (k == 0 && !is_store)
23958 return_type = eltype;
23959 else
23960 args = tree_cons (NULL_TREE, eltype, args);
23963 ftype = build_function_type (return_type, args);
23965 break;
23967 case NEON_REINTERP:
23969 /* We iterate over NUM_DREG_TYPES doubleword types,
23970 then NUM_QREG_TYPES quadword types.
23971 V4HF is not a type used in reinterpret, so we translate
23972 d->mode to the correct index in reinterp_ftype_dreg. */
23973 bool qreg_p
23974 = GET_MODE_SIZE (insn_data[d->code].operand[0].mode) > 8;
23975 int rhs = (d->mode - ((!qreg_p && (d->mode > T_V4HF)) ? 1 : 0))
23976 % NUM_QREG_TYPES;
23977 switch (insn_data[d->code].operand[0].mode)
23979 case V8QImode: ftype = reinterp_ftype_dreg[0][rhs]; break;
23980 case V4HImode: ftype = reinterp_ftype_dreg[1][rhs]; break;
23981 case V2SImode: ftype = reinterp_ftype_dreg[2][rhs]; break;
23982 case V2SFmode: ftype = reinterp_ftype_dreg[3][rhs]; break;
23983 case DImode: ftype = reinterp_ftype_dreg[4][rhs]; break;
23984 case V16QImode: ftype = reinterp_ftype_qreg[0][rhs]; break;
23985 case V8HImode: ftype = reinterp_ftype_qreg[1][rhs]; break;
23986 case V4SImode: ftype = reinterp_ftype_qreg[2][rhs]; break;
23987 case V4SFmode: ftype = reinterp_ftype_qreg[3][rhs]; break;
23988 case V2DImode: ftype = reinterp_ftype_qreg[4][rhs]; break;
23989 case TImode: ftype = reinterp_ftype_qreg[5][rhs]; break;
23990 default: gcc_unreachable ();
23993 break;
23994 case NEON_FLOAT_WIDEN:
23996 tree eltype = NULL_TREE;
23997 tree return_type = NULL_TREE;
23999 switch (insn_data[d->code].operand[1].mode)
24001 case V4HFmode:
24002 eltype = V4HF_type_node;
24003 return_type = V4SF_type_node;
24004 break;
24005 default: gcc_unreachable ();
24007 ftype = build_function_type_list (return_type, eltype, NULL);
24008 break;
24010 case NEON_FLOAT_NARROW:
24012 tree eltype = NULL_TREE;
24013 tree return_type = NULL_TREE;
24015 switch (insn_data[d->code].operand[1].mode)
24017 case V4SFmode:
24018 eltype = V4SF_type_node;
24019 return_type = V4HF_type_node;
24020 break;
24021 default: gcc_unreachable ();
24023 ftype = build_function_type_list (return_type, eltype, NULL);
24024 break;
24026 default:
24027 gcc_unreachable ();
24030 gcc_assert (ftype != NULL);
24032 sprintf (namebuf, "__builtin_neon_%s%s", d->name, modenames[d->mode]);
24034 decl = add_builtin_function (namebuf, ftype, fcode, BUILT_IN_MD, NULL,
24035 NULL_TREE);
24036 arm_builtin_decls[fcode] = decl;
24040 #undef NUM_DREG_TYPES
24041 #undef NUM_QREG_TYPES
24043 #define def_mbuiltin(MASK, NAME, TYPE, CODE) \
24044 do \
24046 if ((MASK) & insn_flags) \
24048 tree bdecl; \
24049 bdecl = add_builtin_function ((NAME), (TYPE), (CODE), \
24050 BUILT_IN_MD, NULL, NULL_TREE); \
24051 arm_builtin_decls[CODE] = bdecl; \
24054 while (0)
24056 struct builtin_description
24058 const unsigned int mask;
24059 const enum insn_code icode;
24060 const char * const name;
24061 const enum arm_builtins code;
24062 const enum rtx_code comparison;
24063 const unsigned int flag;
24066 static const struct builtin_description bdesc_2arg[] =
24068 #define IWMMXT_BUILTIN(code, string, builtin) \
24069 { FL_IWMMXT, CODE_FOR_##code, "__builtin_arm_" string, \
24070 ARM_BUILTIN_##builtin, UNKNOWN, 0 },
24072 #define IWMMXT2_BUILTIN(code, string, builtin) \
24073 { FL_IWMMXT2, CODE_FOR_##code, "__builtin_arm_" string, \
24074 ARM_BUILTIN_##builtin, UNKNOWN, 0 },
24076 IWMMXT_BUILTIN (addv8qi3, "waddb", WADDB)
24077 IWMMXT_BUILTIN (addv4hi3, "waddh", WADDH)
24078 IWMMXT_BUILTIN (addv2si3, "waddw", WADDW)
24079 IWMMXT_BUILTIN (subv8qi3, "wsubb", WSUBB)
24080 IWMMXT_BUILTIN (subv4hi3, "wsubh", WSUBH)
24081 IWMMXT_BUILTIN (subv2si3, "wsubw", WSUBW)
24082 IWMMXT_BUILTIN (ssaddv8qi3, "waddbss", WADDSSB)
24083 IWMMXT_BUILTIN (ssaddv4hi3, "waddhss", WADDSSH)
24084 IWMMXT_BUILTIN (ssaddv2si3, "waddwss", WADDSSW)
24085 IWMMXT_BUILTIN (sssubv8qi3, "wsubbss", WSUBSSB)
24086 IWMMXT_BUILTIN (sssubv4hi3, "wsubhss", WSUBSSH)
24087 IWMMXT_BUILTIN (sssubv2si3, "wsubwss", WSUBSSW)
24088 IWMMXT_BUILTIN (usaddv8qi3, "waddbus", WADDUSB)
24089 IWMMXT_BUILTIN (usaddv4hi3, "waddhus", WADDUSH)
24090 IWMMXT_BUILTIN (usaddv2si3, "waddwus", WADDUSW)
24091 IWMMXT_BUILTIN (ussubv8qi3, "wsubbus", WSUBUSB)
24092 IWMMXT_BUILTIN (ussubv4hi3, "wsubhus", WSUBUSH)
24093 IWMMXT_BUILTIN (ussubv2si3, "wsubwus", WSUBUSW)
24094 IWMMXT_BUILTIN (mulv4hi3, "wmulul", WMULUL)
24095 IWMMXT_BUILTIN (smulv4hi3_highpart, "wmulsm", WMULSM)
24096 IWMMXT_BUILTIN (umulv4hi3_highpart, "wmulum", WMULUM)
24097 IWMMXT_BUILTIN (eqv8qi3, "wcmpeqb", WCMPEQB)
24098 IWMMXT_BUILTIN (eqv4hi3, "wcmpeqh", WCMPEQH)
24099 IWMMXT_BUILTIN (eqv2si3, "wcmpeqw", WCMPEQW)
24100 IWMMXT_BUILTIN (gtuv8qi3, "wcmpgtub", WCMPGTUB)
24101 IWMMXT_BUILTIN (gtuv4hi3, "wcmpgtuh", WCMPGTUH)
24102 IWMMXT_BUILTIN (gtuv2si3, "wcmpgtuw", WCMPGTUW)
24103 IWMMXT_BUILTIN (gtv8qi3, "wcmpgtsb", WCMPGTSB)
24104 IWMMXT_BUILTIN (gtv4hi3, "wcmpgtsh", WCMPGTSH)
24105 IWMMXT_BUILTIN (gtv2si3, "wcmpgtsw", WCMPGTSW)
24106 IWMMXT_BUILTIN (umaxv8qi3, "wmaxub", WMAXUB)
24107 IWMMXT_BUILTIN (smaxv8qi3, "wmaxsb", WMAXSB)
24108 IWMMXT_BUILTIN (umaxv4hi3, "wmaxuh", WMAXUH)
24109 IWMMXT_BUILTIN (smaxv4hi3, "wmaxsh", WMAXSH)
24110 IWMMXT_BUILTIN (umaxv2si3, "wmaxuw", WMAXUW)
24111 IWMMXT_BUILTIN (smaxv2si3, "wmaxsw", WMAXSW)
24112 IWMMXT_BUILTIN (uminv8qi3, "wminub", WMINUB)
24113 IWMMXT_BUILTIN (sminv8qi3, "wminsb", WMINSB)
24114 IWMMXT_BUILTIN (uminv4hi3, "wminuh", WMINUH)
24115 IWMMXT_BUILTIN (sminv4hi3, "wminsh", WMINSH)
24116 IWMMXT_BUILTIN (uminv2si3, "wminuw", WMINUW)
24117 IWMMXT_BUILTIN (sminv2si3, "wminsw", WMINSW)
24118 IWMMXT_BUILTIN (iwmmxt_anddi3, "wand", WAND)
24119 IWMMXT_BUILTIN (iwmmxt_nanddi3, "wandn", WANDN)
24120 IWMMXT_BUILTIN (iwmmxt_iordi3, "wor", WOR)
24121 IWMMXT_BUILTIN (iwmmxt_xordi3, "wxor", WXOR)
24122 IWMMXT_BUILTIN (iwmmxt_uavgv8qi3, "wavg2b", WAVG2B)
24123 IWMMXT_BUILTIN (iwmmxt_uavgv4hi3, "wavg2h", WAVG2H)
24124 IWMMXT_BUILTIN (iwmmxt_uavgrndv8qi3, "wavg2br", WAVG2BR)
24125 IWMMXT_BUILTIN (iwmmxt_uavgrndv4hi3, "wavg2hr", WAVG2HR)
24126 IWMMXT_BUILTIN (iwmmxt_wunpckilb, "wunpckilb", WUNPCKILB)
24127 IWMMXT_BUILTIN (iwmmxt_wunpckilh, "wunpckilh", WUNPCKILH)
24128 IWMMXT_BUILTIN (iwmmxt_wunpckilw, "wunpckilw", WUNPCKILW)
24129 IWMMXT_BUILTIN (iwmmxt_wunpckihb, "wunpckihb", WUNPCKIHB)
24130 IWMMXT_BUILTIN (iwmmxt_wunpckihh, "wunpckihh", WUNPCKIHH)
24131 IWMMXT_BUILTIN (iwmmxt_wunpckihw, "wunpckihw", WUNPCKIHW)
24132 IWMMXT2_BUILTIN (iwmmxt_waddsubhx, "waddsubhx", WADDSUBHX)
24133 IWMMXT2_BUILTIN (iwmmxt_wsubaddhx, "wsubaddhx", WSUBADDHX)
24134 IWMMXT2_BUILTIN (iwmmxt_wabsdiffb, "wabsdiffb", WABSDIFFB)
24135 IWMMXT2_BUILTIN (iwmmxt_wabsdiffh, "wabsdiffh", WABSDIFFH)
24136 IWMMXT2_BUILTIN (iwmmxt_wabsdiffw, "wabsdiffw", WABSDIFFW)
24137 IWMMXT2_BUILTIN (iwmmxt_avg4, "wavg4", WAVG4)
24138 IWMMXT2_BUILTIN (iwmmxt_avg4r, "wavg4r", WAVG4R)
24139 IWMMXT2_BUILTIN (iwmmxt_wmulwsm, "wmulwsm", WMULWSM)
24140 IWMMXT2_BUILTIN (iwmmxt_wmulwum, "wmulwum", WMULWUM)
24141 IWMMXT2_BUILTIN (iwmmxt_wmulwsmr, "wmulwsmr", WMULWSMR)
24142 IWMMXT2_BUILTIN (iwmmxt_wmulwumr, "wmulwumr", WMULWUMR)
24143 IWMMXT2_BUILTIN (iwmmxt_wmulwl, "wmulwl", WMULWL)
24144 IWMMXT2_BUILTIN (iwmmxt_wmulsmr, "wmulsmr", WMULSMR)
24145 IWMMXT2_BUILTIN (iwmmxt_wmulumr, "wmulumr", WMULUMR)
24146 IWMMXT2_BUILTIN (iwmmxt_wqmulm, "wqmulm", WQMULM)
24147 IWMMXT2_BUILTIN (iwmmxt_wqmulmr, "wqmulmr", WQMULMR)
24148 IWMMXT2_BUILTIN (iwmmxt_wqmulwm, "wqmulwm", WQMULWM)
24149 IWMMXT2_BUILTIN (iwmmxt_wqmulwmr, "wqmulwmr", WQMULWMR)
24150 IWMMXT_BUILTIN (iwmmxt_walignr0, "walignr0", WALIGNR0)
24151 IWMMXT_BUILTIN (iwmmxt_walignr1, "walignr1", WALIGNR1)
24152 IWMMXT_BUILTIN (iwmmxt_walignr2, "walignr2", WALIGNR2)
24153 IWMMXT_BUILTIN (iwmmxt_walignr3, "walignr3", WALIGNR3)
24155 #define IWMMXT_BUILTIN2(code, builtin) \
24156 { FL_IWMMXT, CODE_FOR_##code, NULL, ARM_BUILTIN_##builtin, UNKNOWN, 0 },
24158 #define IWMMXT2_BUILTIN2(code, builtin) \
24159 { FL_IWMMXT2, CODE_FOR_##code, NULL, ARM_BUILTIN_##builtin, UNKNOWN, 0 },
24161 IWMMXT2_BUILTIN2 (iwmmxt_waddbhusm, WADDBHUSM)
24162 IWMMXT2_BUILTIN2 (iwmmxt_waddbhusl, WADDBHUSL)
24163 IWMMXT_BUILTIN2 (iwmmxt_wpackhss, WPACKHSS)
24164 IWMMXT_BUILTIN2 (iwmmxt_wpackwss, WPACKWSS)
24165 IWMMXT_BUILTIN2 (iwmmxt_wpackdss, WPACKDSS)
24166 IWMMXT_BUILTIN2 (iwmmxt_wpackhus, WPACKHUS)
24167 IWMMXT_BUILTIN2 (iwmmxt_wpackwus, WPACKWUS)
24168 IWMMXT_BUILTIN2 (iwmmxt_wpackdus, WPACKDUS)
24169 IWMMXT_BUILTIN2 (iwmmxt_wmacuz, WMACUZ)
24170 IWMMXT_BUILTIN2 (iwmmxt_wmacsz, WMACSZ)
24172 #define CRC32_BUILTIN(L, U) \
24173 {0, CODE_FOR_##L, "__builtin_arm_"#L, ARM_BUILTIN_##U, \
24174 UNKNOWN, 0},
24175 CRC32_BUILTIN (crc32b, CRC32B)
24176 CRC32_BUILTIN (crc32h, CRC32H)
24177 CRC32_BUILTIN (crc32w, CRC32W)
24178 CRC32_BUILTIN (crc32cb, CRC32CB)
24179 CRC32_BUILTIN (crc32ch, CRC32CH)
24180 CRC32_BUILTIN (crc32cw, CRC32CW)
24181 #undef CRC32_BUILTIN
24184 #define CRYPTO_BUILTIN(L, U) \
24185 {0, CODE_FOR_crypto_##L, "__builtin_arm_crypto_"#L, ARM_BUILTIN_CRYPTO_##U, \
24186 UNKNOWN, 0},
24187 #undef CRYPTO1
24188 #undef CRYPTO2
24189 #undef CRYPTO3
24190 #define CRYPTO2(L, U, R, A1, A2) CRYPTO_BUILTIN (L, U)
24191 #define CRYPTO1(L, U, R, A)
24192 #define CRYPTO3(L, U, R, A1, A2, A3)
24193 #include "crypto.def"
24194 #undef CRYPTO1
24195 #undef CRYPTO2
24196 #undef CRYPTO3
24200 static const struct builtin_description bdesc_1arg[] =
24202 IWMMXT_BUILTIN (iwmmxt_tmovmskb, "tmovmskb", TMOVMSKB)
24203 IWMMXT_BUILTIN (iwmmxt_tmovmskh, "tmovmskh", TMOVMSKH)
24204 IWMMXT_BUILTIN (iwmmxt_tmovmskw, "tmovmskw", TMOVMSKW)
24205 IWMMXT_BUILTIN (iwmmxt_waccb, "waccb", WACCB)
24206 IWMMXT_BUILTIN (iwmmxt_wacch, "wacch", WACCH)
24207 IWMMXT_BUILTIN (iwmmxt_waccw, "waccw", WACCW)
24208 IWMMXT_BUILTIN (iwmmxt_wunpckehub, "wunpckehub", WUNPCKEHUB)
24209 IWMMXT_BUILTIN (iwmmxt_wunpckehuh, "wunpckehuh", WUNPCKEHUH)
24210 IWMMXT_BUILTIN (iwmmxt_wunpckehuw, "wunpckehuw", WUNPCKEHUW)
24211 IWMMXT_BUILTIN (iwmmxt_wunpckehsb, "wunpckehsb", WUNPCKEHSB)
24212 IWMMXT_BUILTIN (iwmmxt_wunpckehsh, "wunpckehsh", WUNPCKEHSH)
24213 IWMMXT_BUILTIN (iwmmxt_wunpckehsw, "wunpckehsw", WUNPCKEHSW)
24214 IWMMXT_BUILTIN (iwmmxt_wunpckelub, "wunpckelub", WUNPCKELUB)
24215 IWMMXT_BUILTIN (iwmmxt_wunpckeluh, "wunpckeluh", WUNPCKELUH)
24216 IWMMXT_BUILTIN (iwmmxt_wunpckeluw, "wunpckeluw", WUNPCKELUW)
24217 IWMMXT_BUILTIN (iwmmxt_wunpckelsb, "wunpckelsb", WUNPCKELSB)
24218 IWMMXT_BUILTIN (iwmmxt_wunpckelsh, "wunpckelsh", WUNPCKELSH)
24219 IWMMXT_BUILTIN (iwmmxt_wunpckelsw, "wunpckelsw", WUNPCKELSW)
24220 IWMMXT2_BUILTIN (iwmmxt_wabsv8qi3, "wabsb", WABSB)
24221 IWMMXT2_BUILTIN (iwmmxt_wabsv4hi3, "wabsh", WABSH)
24222 IWMMXT2_BUILTIN (iwmmxt_wabsv2si3, "wabsw", WABSW)
24223 IWMMXT_BUILTIN (tbcstv8qi, "tbcstb", TBCSTB)
24224 IWMMXT_BUILTIN (tbcstv4hi, "tbcsth", TBCSTH)
24225 IWMMXT_BUILTIN (tbcstv2si, "tbcstw", TBCSTW)
24227 #define CRYPTO1(L, U, R, A) CRYPTO_BUILTIN (L, U)
24228 #define CRYPTO2(L, U, R, A1, A2)
24229 #define CRYPTO3(L, U, R, A1, A2, A3)
24230 #include "crypto.def"
24231 #undef CRYPTO1
24232 #undef CRYPTO2
24233 #undef CRYPTO3
24236 static const struct builtin_description bdesc_3arg[] =
24238 #define CRYPTO3(L, U, R, A1, A2, A3) CRYPTO_BUILTIN (L, U)
24239 #define CRYPTO1(L, U, R, A)
24240 #define CRYPTO2(L, U, R, A1, A2)
24241 #include "crypto.def"
24242 #undef CRYPTO1
24243 #undef CRYPTO2
24244 #undef CRYPTO3
24246 #undef CRYPTO_BUILTIN
24248 /* Set up all the iWMMXt builtins. This is not called if
24249 TARGET_IWMMXT is zero. */
24251 static void
24252 arm_init_iwmmxt_builtins (void)
24254 const struct builtin_description * d;
24255 size_t i;
24257 tree V2SI_type_node = build_vector_type_for_mode (intSI_type_node, V2SImode);
24258 tree V4HI_type_node = build_vector_type_for_mode (intHI_type_node, V4HImode);
24259 tree V8QI_type_node = build_vector_type_for_mode (intQI_type_node, V8QImode);
24261 tree v8qi_ftype_v8qi_v8qi_int
24262 = build_function_type_list (V8QI_type_node,
24263 V8QI_type_node, V8QI_type_node,
24264 integer_type_node, NULL_TREE);
24265 tree v4hi_ftype_v4hi_int
24266 = build_function_type_list (V4HI_type_node,
24267 V4HI_type_node, integer_type_node, NULL_TREE);
24268 tree v2si_ftype_v2si_int
24269 = build_function_type_list (V2SI_type_node,
24270 V2SI_type_node, integer_type_node, NULL_TREE);
24271 tree v2si_ftype_di_di
24272 = build_function_type_list (V2SI_type_node,
24273 long_long_integer_type_node,
24274 long_long_integer_type_node,
24275 NULL_TREE);
24276 tree di_ftype_di_int
24277 = build_function_type_list (long_long_integer_type_node,
24278 long_long_integer_type_node,
24279 integer_type_node, NULL_TREE);
24280 tree di_ftype_di_int_int
24281 = build_function_type_list (long_long_integer_type_node,
24282 long_long_integer_type_node,
24283 integer_type_node,
24284 integer_type_node, NULL_TREE);
24285 tree int_ftype_v8qi
24286 = build_function_type_list (integer_type_node,
24287 V8QI_type_node, NULL_TREE);
24288 tree int_ftype_v4hi
24289 = build_function_type_list (integer_type_node,
24290 V4HI_type_node, NULL_TREE);
24291 tree int_ftype_v2si
24292 = build_function_type_list (integer_type_node,
24293 V2SI_type_node, NULL_TREE);
24294 tree int_ftype_v8qi_int
24295 = build_function_type_list (integer_type_node,
24296 V8QI_type_node, integer_type_node, NULL_TREE);
24297 tree int_ftype_v4hi_int
24298 = build_function_type_list (integer_type_node,
24299 V4HI_type_node, integer_type_node, NULL_TREE);
24300 tree int_ftype_v2si_int
24301 = build_function_type_list (integer_type_node,
24302 V2SI_type_node, integer_type_node, NULL_TREE);
24303 tree v8qi_ftype_v8qi_int_int
24304 = build_function_type_list (V8QI_type_node,
24305 V8QI_type_node, integer_type_node,
24306 integer_type_node, NULL_TREE);
24307 tree v4hi_ftype_v4hi_int_int
24308 = build_function_type_list (V4HI_type_node,
24309 V4HI_type_node, integer_type_node,
24310 integer_type_node, NULL_TREE);
24311 tree v2si_ftype_v2si_int_int
24312 = build_function_type_list (V2SI_type_node,
24313 V2SI_type_node, integer_type_node,
24314 integer_type_node, NULL_TREE);
24315 /* Miscellaneous. */
24316 tree v8qi_ftype_v4hi_v4hi
24317 = build_function_type_list (V8QI_type_node,
24318 V4HI_type_node, V4HI_type_node, NULL_TREE);
24319 tree v4hi_ftype_v2si_v2si
24320 = build_function_type_list (V4HI_type_node,
24321 V2SI_type_node, V2SI_type_node, NULL_TREE);
24322 tree v8qi_ftype_v4hi_v8qi
24323 = build_function_type_list (V8QI_type_node,
24324 V4HI_type_node, V8QI_type_node, NULL_TREE);
24325 tree v2si_ftype_v4hi_v4hi
24326 = build_function_type_list (V2SI_type_node,
24327 V4HI_type_node, V4HI_type_node, NULL_TREE);
24328 tree v2si_ftype_v8qi_v8qi
24329 = build_function_type_list (V2SI_type_node,
24330 V8QI_type_node, V8QI_type_node, NULL_TREE);
24331 tree v4hi_ftype_v4hi_di
24332 = build_function_type_list (V4HI_type_node,
24333 V4HI_type_node, long_long_integer_type_node,
24334 NULL_TREE);
24335 tree v2si_ftype_v2si_di
24336 = build_function_type_list (V2SI_type_node,
24337 V2SI_type_node, long_long_integer_type_node,
24338 NULL_TREE);
24339 tree di_ftype_void
24340 = build_function_type_list (long_long_unsigned_type_node, NULL_TREE);
24341 tree int_ftype_void
24342 = build_function_type_list (integer_type_node, NULL_TREE);
24343 tree di_ftype_v8qi
24344 = build_function_type_list (long_long_integer_type_node,
24345 V8QI_type_node, NULL_TREE);
24346 tree di_ftype_v4hi
24347 = build_function_type_list (long_long_integer_type_node,
24348 V4HI_type_node, NULL_TREE);
24349 tree di_ftype_v2si
24350 = build_function_type_list (long_long_integer_type_node,
24351 V2SI_type_node, NULL_TREE);
24352 tree v2si_ftype_v4hi
24353 = build_function_type_list (V2SI_type_node,
24354 V4HI_type_node, NULL_TREE);
24355 tree v4hi_ftype_v8qi
24356 = build_function_type_list (V4HI_type_node,
24357 V8QI_type_node, NULL_TREE);
24358 tree v8qi_ftype_v8qi
24359 = build_function_type_list (V8QI_type_node,
24360 V8QI_type_node, NULL_TREE);
24361 tree v4hi_ftype_v4hi
24362 = build_function_type_list (V4HI_type_node,
24363 V4HI_type_node, NULL_TREE);
24364 tree v2si_ftype_v2si
24365 = build_function_type_list (V2SI_type_node,
24366 V2SI_type_node, NULL_TREE);
24368 tree di_ftype_di_v4hi_v4hi
24369 = build_function_type_list (long_long_unsigned_type_node,
24370 long_long_unsigned_type_node,
24371 V4HI_type_node, V4HI_type_node,
24372 NULL_TREE);
24374 tree di_ftype_v4hi_v4hi
24375 = build_function_type_list (long_long_unsigned_type_node,
24376 V4HI_type_node,V4HI_type_node,
24377 NULL_TREE);
24379 tree v2si_ftype_v2si_v4hi_v4hi
24380 = build_function_type_list (V2SI_type_node,
24381 V2SI_type_node, V4HI_type_node,
24382 V4HI_type_node, NULL_TREE);
24384 tree v2si_ftype_v2si_v8qi_v8qi
24385 = build_function_type_list (V2SI_type_node,
24386 V2SI_type_node, V8QI_type_node,
24387 V8QI_type_node, NULL_TREE);
24389 tree di_ftype_di_v2si_v2si
24390 = build_function_type_list (long_long_unsigned_type_node,
24391 long_long_unsigned_type_node,
24392 V2SI_type_node, V2SI_type_node,
24393 NULL_TREE);
24395 tree di_ftype_di_di_int
24396 = build_function_type_list (long_long_unsigned_type_node,
24397 long_long_unsigned_type_node,
24398 long_long_unsigned_type_node,
24399 integer_type_node, NULL_TREE);
24401 tree void_ftype_int
24402 = build_function_type_list (void_type_node,
24403 integer_type_node, NULL_TREE);
24405 tree v8qi_ftype_char
24406 = build_function_type_list (V8QI_type_node,
24407 signed_char_type_node, NULL_TREE);
24409 tree v4hi_ftype_short
24410 = build_function_type_list (V4HI_type_node,
24411 short_integer_type_node, NULL_TREE);
24413 tree v2si_ftype_int
24414 = build_function_type_list (V2SI_type_node,
24415 integer_type_node, NULL_TREE);
24417 /* Normal vector binops. */
24418 tree v8qi_ftype_v8qi_v8qi
24419 = build_function_type_list (V8QI_type_node,
24420 V8QI_type_node, V8QI_type_node, NULL_TREE);
24421 tree v4hi_ftype_v4hi_v4hi
24422 = build_function_type_list (V4HI_type_node,
24423 V4HI_type_node,V4HI_type_node, NULL_TREE);
24424 tree v2si_ftype_v2si_v2si
24425 = build_function_type_list (V2SI_type_node,
24426 V2SI_type_node, V2SI_type_node, NULL_TREE);
24427 tree di_ftype_di_di
24428 = build_function_type_list (long_long_unsigned_type_node,
24429 long_long_unsigned_type_node,
24430 long_long_unsigned_type_node,
24431 NULL_TREE);
24433 /* Add all builtins that are more or less simple operations on two
24434 operands. */
24435 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
24437 /* Use one of the operands; the target can have a different mode for
24438 mask-generating compares. */
24439 enum machine_mode mode;
24440 tree type;
24442 if (d->name == 0 || !(d->mask == FL_IWMMXT || d->mask == FL_IWMMXT2))
24443 continue;
24445 mode = insn_data[d->icode].operand[1].mode;
24447 switch (mode)
24449 case V8QImode:
24450 type = v8qi_ftype_v8qi_v8qi;
24451 break;
24452 case V4HImode:
24453 type = v4hi_ftype_v4hi_v4hi;
24454 break;
24455 case V2SImode:
24456 type = v2si_ftype_v2si_v2si;
24457 break;
24458 case DImode:
24459 type = di_ftype_di_di;
24460 break;
24462 default:
24463 gcc_unreachable ();
24466 def_mbuiltin (d->mask, d->name, type, d->code);
24469 /* Add the remaining MMX insns with somewhat more complicated types. */
24470 #define iwmmx_mbuiltin(NAME, TYPE, CODE) \
24471 def_mbuiltin (FL_IWMMXT, "__builtin_arm_" NAME, (TYPE), \
24472 ARM_BUILTIN_ ## CODE)
24474 #define iwmmx2_mbuiltin(NAME, TYPE, CODE) \
24475 def_mbuiltin (FL_IWMMXT2, "__builtin_arm_" NAME, (TYPE), \
24476 ARM_BUILTIN_ ## CODE)
24478 iwmmx_mbuiltin ("wzero", di_ftype_void, WZERO);
24479 iwmmx_mbuiltin ("setwcgr0", void_ftype_int, SETWCGR0);
24480 iwmmx_mbuiltin ("setwcgr1", void_ftype_int, SETWCGR1);
24481 iwmmx_mbuiltin ("setwcgr2", void_ftype_int, SETWCGR2);
24482 iwmmx_mbuiltin ("setwcgr3", void_ftype_int, SETWCGR3);
24483 iwmmx_mbuiltin ("getwcgr0", int_ftype_void, GETWCGR0);
24484 iwmmx_mbuiltin ("getwcgr1", int_ftype_void, GETWCGR1);
24485 iwmmx_mbuiltin ("getwcgr2", int_ftype_void, GETWCGR2);
24486 iwmmx_mbuiltin ("getwcgr3", int_ftype_void, GETWCGR3);
24488 iwmmx_mbuiltin ("wsllh", v4hi_ftype_v4hi_di, WSLLH);
24489 iwmmx_mbuiltin ("wsllw", v2si_ftype_v2si_di, WSLLW);
24490 iwmmx_mbuiltin ("wslld", di_ftype_di_di, WSLLD);
24491 iwmmx_mbuiltin ("wsllhi", v4hi_ftype_v4hi_int, WSLLHI);
24492 iwmmx_mbuiltin ("wsllwi", v2si_ftype_v2si_int, WSLLWI);
24493 iwmmx_mbuiltin ("wslldi", di_ftype_di_int, WSLLDI);
24495 iwmmx_mbuiltin ("wsrlh", v4hi_ftype_v4hi_di, WSRLH);
24496 iwmmx_mbuiltin ("wsrlw", v2si_ftype_v2si_di, WSRLW);
24497 iwmmx_mbuiltin ("wsrld", di_ftype_di_di, WSRLD);
24498 iwmmx_mbuiltin ("wsrlhi", v4hi_ftype_v4hi_int, WSRLHI);
24499 iwmmx_mbuiltin ("wsrlwi", v2si_ftype_v2si_int, WSRLWI);
24500 iwmmx_mbuiltin ("wsrldi", di_ftype_di_int, WSRLDI);
24502 iwmmx_mbuiltin ("wsrah", v4hi_ftype_v4hi_di, WSRAH);
24503 iwmmx_mbuiltin ("wsraw", v2si_ftype_v2si_di, WSRAW);
24504 iwmmx_mbuiltin ("wsrad", di_ftype_di_di, WSRAD);
24505 iwmmx_mbuiltin ("wsrahi", v4hi_ftype_v4hi_int, WSRAHI);
24506 iwmmx_mbuiltin ("wsrawi", v2si_ftype_v2si_int, WSRAWI);
24507 iwmmx_mbuiltin ("wsradi", di_ftype_di_int, WSRADI);
24509 iwmmx_mbuiltin ("wrorh", v4hi_ftype_v4hi_di, WRORH);
24510 iwmmx_mbuiltin ("wrorw", v2si_ftype_v2si_di, WRORW);
24511 iwmmx_mbuiltin ("wrord", di_ftype_di_di, WRORD);
24512 iwmmx_mbuiltin ("wrorhi", v4hi_ftype_v4hi_int, WRORHI);
24513 iwmmx_mbuiltin ("wrorwi", v2si_ftype_v2si_int, WRORWI);
24514 iwmmx_mbuiltin ("wrordi", di_ftype_di_int, WRORDI);
24516 iwmmx_mbuiltin ("wshufh", v4hi_ftype_v4hi_int, WSHUFH);
24518 iwmmx_mbuiltin ("wsadb", v2si_ftype_v2si_v8qi_v8qi, WSADB);
24519 iwmmx_mbuiltin ("wsadh", v2si_ftype_v2si_v4hi_v4hi, WSADH);
24520 iwmmx_mbuiltin ("wmadds", v2si_ftype_v4hi_v4hi, WMADDS);
24521 iwmmx2_mbuiltin ("wmaddsx", v2si_ftype_v4hi_v4hi, WMADDSX);
24522 iwmmx2_mbuiltin ("wmaddsn", v2si_ftype_v4hi_v4hi, WMADDSN);
24523 iwmmx_mbuiltin ("wmaddu", v2si_ftype_v4hi_v4hi, WMADDU);
24524 iwmmx2_mbuiltin ("wmaddux", v2si_ftype_v4hi_v4hi, WMADDUX);
24525 iwmmx2_mbuiltin ("wmaddun", v2si_ftype_v4hi_v4hi, WMADDUN);
24526 iwmmx_mbuiltin ("wsadbz", v2si_ftype_v8qi_v8qi, WSADBZ);
24527 iwmmx_mbuiltin ("wsadhz", v2si_ftype_v4hi_v4hi, WSADHZ);
24529 iwmmx_mbuiltin ("textrmsb", int_ftype_v8qi_int, TEXTRMSB);
24530 iwmmx_mbuiltin ("textrmsh", int_ftype_v4hi_int, TEXTRMSH);
24531 iwmmx_mbuiltin ("textrmsw", int_ftype_v2si_int, TEXTRMSW);
24532 iwmmx_mbuiltin ("textrmub", int_ftype_v8qi_int, TEXTRMUB);
24533 iwmmx_mbuiltin ("textrmuh", int_ftype_v4hi_int, TEXTRMUH);
24534 iwmmx_mbuiltin ("textrmuw", int_ftype_v2si_int, TEXTRMUW);
24535 iwmmx_mbuiltin ("tinsrb", v8qi_ftype_v8qi_int_int, TINSRB);
24536 iwmmx_mbuiltin ("tinsrh", v4hi_ftype_v4hi_int_int, TINSRH);
24537 iwmmx_mbuiltin ("tinsrw", v2si_ftype_v2si_int_int, TINSRW);
24539 iwmmx_mbuiltin ("waccb", di_ftype_v8qi, WACCB);
24540 iwmmx_mbuiltin ("wacch", di_ftype_v4hi, WACCH);
24541 iwmmx_mbuiltin ("waccw", di_ftype_v2si, WACCW);
24543 iwmmx_mbuiltin ("tmovmskb", int_ftype_v8qi, TMOVMSKB);
24544 iwmmx_mbuiltin ("tmovmskh", int_ftype_v4hi, TMOVMSKH);
24545 iwmmx_mbuiltin ("tmovmskw", int_ftype_v2si, TMOVMSKW);
24547 iwmmx2_mbuiltin ("waddbhusm", v8qi_ftype_v4hi_v8qi, WADDBHUSM);
24548 iwmmx2_mbuiltin ("waddbhusl", v8qi_ftype_v4hi_v8qi, WADDBHUSL);
24550 iwmmx_mbuiltin ("wpackhss", v8qi_ftype_v4hi_v4hi, WPACKHSS);
24551 iwmmx_mbuiltin ("wpackhus", v8qi_ftype_v4hi_v4hi, WPACKHUS);
24552 iwmmx_mbuiltin ("wpackwus", v4hi_ftype_v2si_v2si, WPACKWUS);
24553 iwmmx_mbuiltin ("wpackwss", v4hi_ftype_v2si_v2si, WPACKWSS);
24554 iwmmx_mbuiltin ("wpackdus", v2si_ftype_di_di, WPACKDUS);
24555 iwmmx_mbuiltin ("wpackdss", v2si_ftype_di_di, WPACKDSS);
24557 iwmmx_mbuiltin ("wunpckehub", v4hi_ftype_v8qi, WUNPCKEHUB);
24558 iwmmx_mbuiltin ("wunpckehuh", v2si_ftype_v4hi, WUNPCKEHUH);
24559 iwmmx_mbuiltin ("wunpckehuw", di_ftype_v2si, WUNPCKEHUW);
24560 iwmmx_mbuiltin ("wunpckehsb", v4hi_ftype_v8qi, WUNPCKEHSB);
24561 iwmmx_mbuiltin ("wunpckehsh", v2si_ftype_v4hi, WUNPCKEHSH);
24562 iwmmx_mbuiltin ("wunpckehsw", di_ftype_v2si, WUNPCKEHSW);
24563 iwmmx_mbuiltin ("wunpckelub", v4hi_ftype_v8qi, WUNPCKELUB);
24564 iwmmx_mbuiltin ("wunpckeluh", v2si_ftype_v4hi, WUNPCKELUH);
24565 iwmmx_mbuiltin ("wunpckeluw", di_ftype_v2si, WUNPCKELUW);
24566 iwmmx_mbuiltin ("wunpckelsb", v4hi_ftype_v8qi, WUNPCKELSB);
24567 iwmmx_mbuiltin ("wunpckelsh", v2si_ftype_v4hi, WUNPCKELSH);
24568 iwmmx_mbuiltin ("wunpckelsw", di_ftype_v2si, WUNPCKELSW);
24570 iwmmx_mbuiltin ("wmacs", di_ftype_di_v4hi_v4hi, WMACS);
24571 iwmmx_mbuiltin ("wmacsz", di_ftype_v4hi_v4hi, WMACSZ);
24572 iwmmx_mbuiltin ("wmacu", di_ftype_di_v4hi_v4hi, WMACU);
24573 iwmmx_mbuiltin ("wmacuz", di_ftype_v4hi_v4hi, WMACUZ);
24575 iwmmx_mbuiltin ("walign", v8qi_ftype_v8qi_v8qi_int, WALIGNI);
24576 iwmmx_mbuiltin ("tmia", di_ftype_di_int_int, TMIA);
24577 iwmmx_mbuiltin ("tmiaph", di_ftype_di_int_int, TMIAPH);
24578 iwmmx_mbuiltin ("tmiabb", di_ftype_di_int_int, TMIABB);
24579 iwmmx_mbuiltin ("tmiabt", di_ftype_di_int_int, TMIABT);
24580 iwmmx_mbuiltin ("tmiatb", di_ftype_di_int_int, TMIATB);
24581 iwmmx_mbuiltin ("tmiatt", di_ftype_di_int_int, TMIATT);
24583 iwmmx2_mbuiltin ("wabsb", v8qi_ftype_v8qi, WABSB);
24584 iwmmx2_mbuiltin ("wabsh", v4hi_ftype_v4hi, WABSH);
24585 iwmmx2_mbuiltin ("wabsw", v2si_ftype_v2si, WABSW);
24587 iwmmx2_mbuiltin ("wqmiabb", v2si_ftype_v2si_v4hi_v4hi, WQMIABB);
24588 iwmmx2_mbuiltin ("wqmiabt", v2si_ftype_v2si_v4hi_v4hi, WQMIABT);
24589 iwmmx2_mbuiltin ("wqmiatb", v2si_ftype_v2si_v4hi_v4hi, WQMIATB);
24590 iwmmx2_mbuiltin ("wqmiatt", v2si_ftype_v2si_v4hi_v4hi, WQMIATT);
24592 iwmmx2_mbuiltin ("wqmiabbn", v2si_ftype_v2si_v4hi_v4hi, WQMIABBN);
24593 iwmmx2_mbuiltin ("wqmiabtn", v2si_ftype_v2si_v4hi_v4hi, WQMIABTN);
24594 iwmmx2_mbuiltin ("wqmiatbn", v2si_ftype_v2si_v4hi_v4hi, WQMIATBN);
24595 iwmmx2_mbuiltin ("wqmiattn", v2si_ftype_v2si_v4hi_v4hi, WQMIATTN);
24597 iwmmx2_mbuiltin ("wmiabb", di_ftype_di_v4hi_v4hi, WMIABB);
24598 iwmmx2_mbuiltin ("wmiabt", di_ftype_di_v4hi_v4hi, WMIABT);
24599 iwmmx2_mbuiltin ("wmiatb", di_ftype_di_v4hi_v4hi, WMIATB);
24600 iwmmx2_mbuiltin ("wmiatt", di_ftype_di_v4hi_v4hi, WMIATT);
24602 iwmmx2_mbuiltin ("wmiabbn", di_ftype_di_v4hi_v4hi, WMIABBN);
24603 iwmmx2_mbuiltin ("wmiabtn", di_ftype_di_v4hi_v4hi, WMIABTN);
24604 iwmmx2_mbuiltin ("wmiatbn", di_ftype_di_v4hi_v4hi, WMIATBN);
24605 iwmmx2_mbuiltin ("wmiattn", di_ftype_di_v4hi_v4hi, WMIATTN);
24607 iwmmx2_mbuiltin ("wmiawbb", di_ftype_di_v2si_v2si, WMIAWBB);
24608 iwmmx2_mbuiltin ("wmiawbt", di_ftype_di_v2si_v2si, WMIAWBT);
24609 iwmmx2_mbuiltin ("wmiawtb", di_ftype_di_v2si_v2si, WMIAWTB);
24610 iwmmx2_mbuiltin ("wmiawtt", di_ftype_di_v2si_v2si, WMIAWTT);
24612 iwmmx2_mbuiltin ("wmiawbbn", di_ftype_di_v2si_v2si, WMIAWBBN);
24613 iwmmx2_mbuiltin ("wmiawbtn", di_ftype_di_v2si_v2si, WMIAWBTN);
24614 iwmmx2_mbuiltin ("wmiawtbn", di_ftype_di_v2si_v2si, WMIAWTBN);
24615 iwmmx2_mbuiltin ("wmiawttn", di_ftype_di_v2si_v2si, WMIAWTTN);
24617 iwmmx2_mbuiltin ("wmerge", di_ftype_di_di_int, WMERGE);
24619 iwmmx_mbuiltin ("tbcstb", v8qi_ftype_char, TBCSTB);
24620 iwmmx_mbuiltin ("tbcsth", v4hi_ftype_short, TBCSTH);
24621 iwmmx_mbuiltin ("tbcstw", v2si_ftype_int, TBCSTW);
24623 #undef iwmmx_mbuiltin
24624 #undef iwmmx2_mbuiltin
24627 static void
24628 arm_init_fp16_builtins (void)
24630 tree fp16_type = make_node (REAL_TYPE);
24631 TYPE_PRECISION (fp16_type) = 16;
24632 layout_type (fp16_type);
24633 (*lang_hooks.types.register_builtin_type) (fp16_type, "__fp16");
24636 static void
24637 arm_init_crc32_builtins ()
24639 tree si_ftype_si_qi
24640 = build_function_type_list (unsigned_intSI_type_node,
24641 unsigned_intSI_type_node,
24642 unsigned_intQI_type_node, NULL_TREE);
24643 tree si_ftype_si_hi
24644 = build_function_type_list (unsigned_intSI_type_node,
24645 unsigned_intSI_type_node,
24646 unsigned_intHI_type_node, NULL_TREE);
24647 tree si_ftype_si_si
24648 = build_function_type_list (unsigned_intSI_type_node,
24649 unsigned_intSI_type_node,
24650 unsigned_intSI_type_node, NULL_TREE);
24652 arm_builtin_decls[ARM_BUILTIN_CRC32B]
24653 = add_builtin_function ("__builtin_arm_crc32b", si_ftype_si_qi,
24654 ARM_BUILTIN_CRC32B, BUILT_IN_MD, NULL, NULL_TREE);
24655 arm_builtin_decls[ARM_BUILTIN_CRC32H]
24656 = add_builtin_function ("__builtin_arm_crc32h", si_ftype_si_hi,
24657 ARM_BUILTIN_CRC32H, BUILT_IN_MD, NULL, NULL_TREE);
24658 arm_builtin_decls[ARM_BUILTIN_CRC32W]
24659 = add_builtin_function ("__builtin_arm_crc32w", si_ftype_si_si,
24660 ARM_BUILTIN_CRC32W, BUILT_IN_MD, NULL, NULL_TREE);
24661 arm_builtin_decls[ARM_BUILTIN_CRC32CB]
24662 = add_builtin_function ("__builtin_arm_crc32cb", si_ftype_si_qi,
24663 ARM_BUILTIN_CRC32CB, BUILT_IN_MD, NULL, NULL_TREE);
24664 arm_builtin_decls[ARM_BUILTIN_CRC32CH]
24665 = add_builtin_function ("__builtin_arm_crc32ch", si_ftype_si_hi,
24666 ARM_BUILTIN_CRC32CH, BUILT_IN_MD, NULL, NULL_TREE);
24667 arm_builtin_decls[ARM_BUILTIN_CRC32CW]
24668 = add_builtin_function ("__builtin_arm_crc32cw", si_ftype_si_si,
24669 ARM_BUILTIN_CRC32CW, BUILT_IN_MD, NULL, NULL_TREE);
24672 static void
24673 arm_init_builtins (void)
24675 if (TARGET_REALLY_IWMMXT)
24676 arm_init_iwmmxt_builtins ();
24678 if (TARGET_NEON)
24679 arm_init_neon_builtins ();
24681 if (arm_fp16_format)
24682 arm_init_fp16_builtins ();
24684 if (TARGET_CRC32)
24685 arm_init_crc32_builtins ();
24688 /* Return the ARM builtin for CODE. */
24690 static tree
24691 arm_builtin_decl (unsigned code, bool initialize_p ATTRIBUTE_UNUSED)
24693 if (code >= ARM_BUILTIN_MAX)
24694 return error_mark_node;
24696 return arm_builtin_decls[code];
24699 /* Implement TARGET_INVALID_PARAMETER_TYPE. */
24701 static const char *
24702 arm_invalid_parameter_type (const_tree t)
24704 if (SCALAR_FLOAT_TYPE_P (t) && TYPE_PRECISION (t) == 16)
24705 return N_("function parameters cannot have __fp16 type");
24706 return NULL;
24709 /* Implement TARGET_INVALID_PARAMETER_TYPE. */
24711 static const char *
24712 arm_invalid_return_type (const_tree t)
24714 if (SCALAR_FLOAT_TYPE_P (t) && TYPE_PRECISION (t) == 16)
24715 return N_("functions cannot return __fp16 type");
24716 return NULL;
24719 /* Implement TARGET_PROMOTED_TYPE. */
24721 static tree
24722 arm_promoted_type (const_tree t)
24724 if (SCALAR_FLOAT_TYPE_P (t) && TYPE_PRECISION (t) == 16)
24725 return float_type_node;
24726 return NULL_TREE;
24729 /* Implement TARGET_CONVERT_TO_TYPE.
24730 Specifically, this hook implements the peculiarity of the ARM
24731 half-precision floating-point C semantics that requires conversions between
24732 __fp16 to or from double to do an intermediate conversion to float. */
24734 static tree
24735 arm_convert_to_type (tree type, tree expr)
24737 tree fromtype = TREE_TYPE (expr);
24738 if (!SCALAR_FLOAT_TYPE_P (fromtype) || !SCALAR_FLOAT_TYPE_P (type))
24739 return NULL_TREE;
24740 if ((TYPE_PRECISION (fromtype) == 16 && TYPE_PRECISION (type) > 32)
24741 || (TYPE_PRECISION (type) == 16 && TYPE_PRECISION (fromtype) > 32))
24742 return convert (type, convert (float_type_node, expr));
24743 return NULL_TREE;
24746 /* Implement TARGET_SCALAR_MODE_SUPPORTED_P.
24747 This simply adds HFmode as a supported mode; even though we don't
24748 implement arithmetic on this type directly, it's supported by
24749 optabs conversions, much the way the double-word arithmetic is
24750 special-cased in the default hook. */
24752 static bool
24753 arm_scalar_mode_supported_p (enum machine_mode mode)
24755 if (mode == HFmode)
24756 return (arm_fp16_format != ARM_FP16_FORMAT_NONE);
24757 else if (ALL_FIXED_POINT_MODE_P (mode))
24758 return true;
24759 else
24760 return default_scalar_mode_supported_p (mode);
24763 /* Errors in the source file can cause expand_expr to return const0_rtx
24764 where we expect a vector. To avoid crashing, use one of the vector
24765 clear instructions. */
24767 static rtx
24768 safe_vector_operand (rtx x, enum machine_mode mode)
24770 if (x != const0_rtx)
24771 return x;
24772 x = gen_reg_rtx (mode);
24774 emit_insn (gen_iwmmxt_clrdi (mode == DImode ? x
24775 : gen_rtx_SUBREG (DImode, x, 0)));
24776 return x;
24779 /* Function to expand ternary builtins. */
24780 static rtx
24781 arm_expand_ternop_builtin (enum insn_code icode,
24782 tree exp, rtx target)
24784 rtx pat;
24785 tree arg0 = CALL_EXPR_ARG (exp, 0);
24786 tree arg1 = CALL_EXPR_ARG (exp, 1);
24787 tree arg2 = CALL_EXPR_ARG (exp, 2);
24789 rtx op0 = expand_normal (arg0);
24790 rtx op1 = expand_normal (arg1);
24791 rtx op2 = expand_normal (arg2);
24792 rtx op3 = NULL_RTX;
24794 /* The sha1c, sha1p, sha1m crypto builtins require a different vec_select
24795 lane operand depending on endianness. */
24796 bool builtin_sha1cpm_p = false;
24798 if (insn_data[icode].n_operands == 5)
24800 gcc_assert (icode == CODE_FOR_crypto_sha1c
24801 || icode == CODE_FOR_crypto_sha1p
24802 || icode == CODE_FOR_crypto_sha1m);
24803 builtin_sha1cpm_p = true;
24805 enum machine_mode tmode = insn_data[icode].operand[0].mode;
24806 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
24807 enum machine_mode mode1 = insn_data[icode].operand[2].mode;
24808 enum machine_mode mode2 = insn_data[icode].operand[3].mode;
24811 if (VECTOR_MODE_P (mode0))
24812 op0 = safe_vector_operand (op0, mode0);
24813 if (VECTOR_MODE_P (mode1))
24814 op1 = safe_vector_operand (op1, mode1);
24815 if (VECTOR_MODE_P (mode2))
24816 op2 = safe_vector_operand (op2, mode2);
24818 if (! target
24819 || GET_MODE (target) != tmode
24820 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
24821 target = gen_reg_rtx (tmode);
24823 gcc_assert ((GET_MODE (op0) == mode0 || GET_MODE (op0) == VOIDmode)
24824 && (GET_MODE (op1) == mode1 || GET_MODE (op1) == VOIDmode)
24825 && (GET_MODE (op2) == mode2 || GET_MODE (op2) == VOIDmode));
24827 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
24828 op0 = copy_to_mode_reg (mode0, op0);
24829 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
24830 op1 = copy_to_mode_reg (mode1, op1);
24831 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
24832 op2 = copy_to_mode_reg (mode2, op2);
24833 if (builtin_sha1cpm_p)
24834 op3 = GEN_INT (TARGET_BIG_END ? 1 : 0);
24836 if (builtin_sha1cpm_p)
24837 pat = GEN_FCN (icode) (target, op0, op1, op2, op3);
24838 else
24839 pat = GEN_FCN (icode) (target, op0, op1, op2);
24840 if (! pat)
24841 return 0;
24842 emit_insn (pat);
24843 return target;
24846 /* Subroutine of arm_expand_builtin to take care of binop insns. */
24848 static rtx
24849 arm_expand_binop_builtin (enum insn_code icode,
24850 tree exp, rtx target)
24852 rtx pat;
24853 tree arg0 = CALL_EXPR_ARG (exp, 0);
24854 tree arg1 = CALL_EXPR_ARG (exp, 1);
24855 rtx op0 = expand_normal (arg0);
24856 rtx op1 = expand_normal (arg1);
24857 enum machine_mode tmode = insn_data[icode].operand[0].mode;
24858 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
24859 enum machine_mode mode1 = insn_data[icode].operand[2].mode;
24861 if (VECTOR_MODE_P (mode0))
24862 op0 = safe_vector_operand (op0, mode0);
24863 if (VECTOR_MODE_P (mode1))
24864 op1 = safe_vector_operand (op1, mode1);
24866 if (! target
24867 || GET_MODE (target) != tmode
24868 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
24869 target = gen_reg_rtx (tmode);
24871 gcc_assert ((GET_MODE (op0) == mode0 || GET_MODE (op0) == VOIDmode)
24872 && (GET_MODE (op1) == mode1 || GET_MODE (op1) == VOIDmode));
24874 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
24875 op0 = copy_to_mode_reg (mode0, op0);
24876 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
24877 op1 = copy_to_mode_reg (mode1, op1);
24879 pat = GEN_FCN (icode) (target, op0, op1);
24880 if (! pat)
24881 return 0;
24882 emit_insn (pat);
24883 return target;
24886 /* Subroutine of arm_expand_builtin to take care of unop insns. */
24888 static rtx
24889 arm_expand_unop_builtin (enum insn_code icode,
24890 tree exp, rtx target, int do_load)
24892 rtx pat;
24893 tree arg0 = CALL_EXPR_ARG (exp, 0);
24894 rtx op0 = expand_normal (arg0);
24895 rtx op1 = NULL_RTX;
24896 enum machine_mode tmode = insn_data[icode].operand[0].mode;
24897 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
24898 bool builtin_sha1h_p = false;
24900 if (insn_data[icode].n_operands == 3)
24902 gcc_assert (icode == CODE_FOR_crypto_sha1h);
24903 builtin_sha1h_p = true;
24906 if (! target
24907 || GET_MODE (target) != tmode
24908 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
24909 target = gen_reg_rtx (tmode);
24910 if (do_load)
24911 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
24912 else
24914 if (VECTOR_MODE_P (mode0))
24915 op0 = safe_vector_operand (op0, mode0);
24917 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
24918 op0 = copy_to_mode_reg (mode0, op0);
24920 if (builtin_sha1h_p)
24921 op1 = GEN_INT (TARGET_BIG_END ? 1 : 0);
24923 if (builtin_sha1h_p)
24924 pat = GEN_FCN (icode) (target, op0, op1);
24925 else
24926 pat = GEN_FCN (icode) (target, op0);
24927 if (! pat)
24928 return 0;
24929 emit_insn (pat);
24930 return target;
24933 typedef enum {
24934 NEON_ARG_COPY_TO_REG,
24935 NEON_ARG_CONSTANT,
24936 NEON_ARG_MEMORY,
24937 NEON_ARG_STOP
24938 } builtin_arg;
24940 #define NEON_MAX_BUILTIN_ARGS 5
24942 /* EXP is a pointer argument to a Neon load or store intrinsic. Derive
24943 and return an expression for the accessed memory.
24945 The intrinsic function operates on a block of registers that has
24946 mode REG_MODE. This block contains vectors of type TYPE_MODE. The
24947 function references the memory at EXP of type TYPE and in mode
24948 MEM_MODE; this mode may be BLKmode if no more suitable mode is
24949 available. */
24951 static tree
24952 neon_dereference_pointer (tree exp, tree type, enum machine_mode mem_mode,
24953 enum machine_mode reg_mode,
24954 neon_builtin_type_mode type_mode)
24956 HOST_WIDE_INT reg_size, vector_size, nvectors, nelems;
24957 tree elem_type, upper_bound, array_type;
24959 /* Work out the size of the register block in bytes. */
24960 reg_size = GET_MODE_SIZE (reg_mode);
24962 /* Work out the size of each vector in bytes. */
24963 gcc_assert (TYPE_MODE_BIT (type_mode) & (TB_DREG | TB_QREG));
24964 vector_size = (TYPE_MODE_BIT (type_mode) & TB_QREG ? 16 : 8);
24966 /* Work out how many vectors there are. */
24967 gcc_assert (reg_size % vector_size == 0);
24968 nvectors = reg_size / vector_size;
24970 /* Work out the type of each element. */
24971 gcc_assert (POINTER_TYPE_P (type));
24972 elem_type = TREE_TYPE (type);
24974 /* Work out how many elements are being loaded or stored.
24975 MEM_MODE == REG_MODE implies a one-to-one mapping between register
24976 and memory elements; anything else implies a lane load or store. */
24977 if (mem_mode == reg_mode)
24978 nelems = vector_size * nvectors / int_size_in_bytes (elem_type);
24979 else
24980 nelems = nvectors;
24982 /* Create a type that describes the full access. */
24983 upper_bound = build_int_cst (size_type_node, nelems - 1);
24984 array_type = build_array_type (elem_type, build_index_type (upper_bound));
24986 /* Dereference EXP using that type. */
24987 return fold_build2 (MEM_REF, array_type, exp,
24988 build_int_cst (build_pointer_type (array_type), 0));
24991 /* Expand a Neon builtin. */
24992 static rtx
24993 arm_expand_neon_args (rtx target, int icode, int have_retval,
24994 neon_builtin_type_mode type_mode,
24995 tree exp, int fcode, ...)
24997 va_list ap;
24998 rtx pat;
24999 tree arg[NEON_MAX_BUILTIN_ARGS];
25000 rtx op[NEON_MAX_BUILTIN_ARGS];
25001 tree arg_type;
25002 tree formals;
25003 enum machine_mode tmode = insn_data[icode].operand[0].mode;
25004 enum machine_mode mode[NEON_MAX_BUILTIN_ARGS];
25005 enum machine_mode other_mode;
25006 int argc = 0;
25007 int opno;
25009 if (have_retval
25010 && (!target
25011 || GET_MODE (target) != tmode
25012 || !(*insn_data[icode].operand[0].predicate) (target, tmode)))
25013 target = gen_reg_rtx (tmode);
25015 va_start (ap, fcode);
25017 formals = TYPE_ARG_TYPES (TREE_TYPE (arm_builtin_decls[fcode]));
25019 for (;;)
25021 builtin_arg thisarg = (builtin_arg) va_arg (ap, int);
25023 if (thisarg == NEON_ARG_STOP)
25024 break;
25025 else
25027 opno = argc + have_retval;
25028 mode[argc] = insn_data[icode].operand[opno].mode;
25029 arg[argc] = CALL_EXPR_ARG (exp, argc);
25030 arg_type = TREE_VALUE (formals);
25031 if (thisarg == NEON_ARG_MEMORY)
25033 other_mode = insn_data[icode].operand[1 - opno].mode;
25034 arg[argc] = neon_dereference_pointer (arg[argc], arg_type,
25035 mode[argc], other_mode,
25036 type_mode);
25039 /* Use EXPAND_MEMORY for NEON_ARG_MEMORY to ensure a MEM_P
25040 be returned. */
25041 op[argc] = expand_expr (arg[argc], NULL_RTX, VOIDmode,
25042 (thisarg == NEON_ARG_MEMORY
25043 ? EXPAND_MEMORY : EXPAND_NORMAL));
25045 switch (thisarg)
25047 case NEON_ARG_COPY_TO_REG:
25048 /*gcc_assert (GET_MODE (op[argc]) == mode[argc]);*/
25049 if (!(*insn_data[icode].operand[opno].predicate)
25050 (op[argc], mode[argc]))
25051 op[argc] = copy_to_mode_reg (mode[argc], op[argc]);
25052 break;
25054 case NEON_ARG_CONSTANT:
25055 /* FIXME: This error message is somewhat unhelpful. */
25056 if (!(*insn_data[icode].operand[opno].predicate)
25057 (op[argc], mode[argc]))
25058 error ("argument must be a constant");
25059 break;
25061 case NEON_ARG_MEMORY:
25062 /* Check if expand failed. */
25063 if (op[argc] == const0_rtx)
25064 return 0;
25065 gcc_assert (MEM_P (op[argc]));
25066 PUT_MODE (op[argc], mode[argc]);
25067 /* ??? arm_neon.h uses the same built-in functions for signed
25068 and unsigned accesses, casting where necessary. This isn't
25069 alias safe. */
25070 set_mem_alias_set (op[argc], 0);
25071 if (!(*insn_data[icode].operand[opno].predicate)
25072 (op[argc], mode[argc]))
25073 op[argc] = (replace_equiv_address
25074 (op[argc], force_reg (Pmode, XEXP (op[argc], 0))));
25075 break;
25077 case NEON_ARG_STOP:
25078 gcc_unreachable ();
25081 argc++;
25082 formals = TREE_CHAIN (formals);
25086 va_end (ap);
25088 if (have_retval)
25089 switch (argc)
25091 case 1:
25092 pat = GEN_FCN (icode) (target, op[0]);
25093 break;
25095 case 2:
25096 pat = GEN_FCN (icode) (target, op[0], op[1]);
25097 break;
25099 case 3:
25100 pat = GEN_FCN (icode) (target, op[0], op[1], op[2]);
25101 break;
25103 case 4:
25104 pat = GEN_FCN (icode) (target, op[0], op[1], op[2], op[3]);
25105 break;
25107 case 5:
25108 pat = GEN_FCN (icode) (target, op[0], op[1], op[2], op[3], op[4]);
25109 break;
25111 default:
25112 gcc_unreachable ();
25114 else
25115 switch (argc)
25117 case 1:
25118 pat = GEN_FCN (icode) (op[0]);
25119 break;
25121 case 2:
25122 pat = GEN_FCN (icode) (op[0], op[1]);
25123 break;
25125 case 3:
25126 pat = GEN_FCN (icode) (op[0], op[1], op[2]);
25127 break;
25129 case 4:
25130 pat = GEN_FCN (icode) (op[0], op[1], op[2], op[3]);
25131 break;
25133 case 5:
25134 pat = GEN_FCN (icode) (op[0], op[1], op[2], op[3], op[4]);
25135 break;
25137 default:
25138 gcc_unreachable ();
25141 if (!pat)
25142 return 0;
25144 emit_insn (pat);
25146 return target;
25149 /* Expand a Neon builtin. These are "special" because they don't have symbolic
25150 constants defined per-instruction or per instruction-variant. Instead, the
25151 required info is looked up in the table neon_builtin_data. */
25152 static rtx
25153 arm_expand_neon_builtin (int fcode, tree exp, rtx target)
25155 neon_builtin_datum *d = &neon_builtin_data[fcode - ARM_BUILTIN_NEON_BASE];
25156 neon_itype itype = d->itype;
25157 enum insn_code icode = d->code;
25158 neon_builtin_type_mode type_mode = d->mode;
25160 switch (itype)
25162 case NEON_UNOP:
25163 case NEON_CONVERT:
25164 case NEON_DUPLANE:
25165 return arm_expand_neon_args (target, icode, 1, type_mode, exp, fcode,
25166 NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT, NEON_ARG_STOP);
25168 case NEON_BINOP:
25169 case NEON_SETLANE:
25170 case NEON_SCALARMUL:
25171 case NEON_SCALARMULL:
25172 case NEON_SCALARMULH:
25173 case NEON_SHIFTINSERT:
25174 case NEON_LOGICBINOP:
25175 return arm_expand_neon_args (target, icode, 1, type_mode, exp, fcode,
25176 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT,
25177 NEON_ARG_STOP);
25179 case NEON_TERNOP:
25180 return arm_expand_neon_args (target, icode, 1, type_mode, exp, fcode,
25181 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG,
25182 NEON_ARG_CONSTANT, NEON_ARG_STOP);
25184 case NEON_GETLANE:
25185 case NEON_FIXCONV:
25186 case NEON_SHIFTIMM:
25187 return arm_expand_neon_args (target, icode, 1, type_mode, exp, fcode,
25188 NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT, NEON_ARG_CONSTANT,
25189 NEON_ARG_STOP);
25191 case NEON_CREATE:
25192 return arm_expand_neon_args (target, icode, 1, type_mode, exp, fcode,
25193 NEON_ARG_COPY_TO_REG, NEON_ARG_STOP);
25195 case NEON_DUP:
25196 case NEON_RINT:
25197 case NEON_SPLIT:
25198 case NEON_FLOAT_WIDEN:
25199 case NEON_FLOAT_NARROW:
25200 case NEON_REINTERP:
25201 return arm_expand_neon_args (target, icode, 1, type_mode, exp, fcode,
25202 NEON_ARG_COPY_TO_REG, NEON_ARG_STOP);
25204 case NEON_COMBINE:
25205 case NEON_VTBL:
25206 return arm_expand_neon_args (target, icode, 1, type_mode, exp, fcode,
25207 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_STOP);
25209 case NEON_LANEMUL:
25210 case NEON_LANEMULL:
25211 case NEON_LANEMULH:
25212 return arm_expand_neon_args (target, icode, 1, type_mode, exp, fcode,
25213 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT,
25214 NEON_ARG_CONSTANT, NEON_ARG_STOP);
25216 case NEON_LANEMAC:
25217 return arm_expand_neon_args (target, icode, 1, type_mode, exp, fcode,
25218 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG,
25219 NEON_ARG_CONSTANT, NEON_ARG_CONSTANT, NEON_ARG_STOP);
25221 case NEON_SHIFTACC:
25222 return arm_expand_neon_args (target, icode, 1, type_mode, exp, fcode,
25223 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT,
25224 NEON_ARG_CONSTANT, NEON_ARG_STOP);
25226 case NEON_SCALARMAC:
25227 return arm_expand_neon_args (target, icode, 1, type_mode, exp, fcode,
25228 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG,
25229 NEON_ARG_CONSTANT, NEON_ARG_STOP);
25231 case NEON_SELECT:
25232 case NEON_VTBX:
25233 return arm_expand_neon_args (target, icode, 1, type_mode, exp, fcode,
25234 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG,
25235 NEON_ARG_STOP);
25237 case NEON_LOAD1:
25238 case NEON_LOADSTRUCT:
25239 return arm_expand_neon_args (target, icode, 1, type_mode, exp, fcode,
25240 NEON_ARG_MEMORY, NEON_ARG_STOP);
25242 case NEON_LOAD1LANE:
25243 case NEON_LOADSTRUCTLANE:
25244 return arm_expand_neon_args (target, icode, 1, type_mode, exp, fcode,
25245 NEON_ARG_MEMORY, NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT,
25246 NEON_ARG_STOP);
25248 case NEON_STORE1:
25249 case NEON_STORESTRUCT:
25250 return arm_expand_neon_args (target, icode, 0, type_mode, exp, fcode,
25251 NEON_ARG_MEMORY, NEON_ARG_COPY_TO_REG, NEON_ARG_STOP);
25253 case NEON_STORE1LANE:
25254 case NEON_STORESTRUCTLANE:
25255 return arm_expand_neon_args (target, icode, 0, type_mode, exp, fcode,
25256 NEON_ARG_MEMORY, NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT,
25257 NEON_ARG_STOP);
25260 gcc_unreachable ();
25263 /* Emit code to reinterpret one Neon type as another, without altering bits. */
25264 void
25265 neon_reinterpret (rtx dest, rtx src)
25267 emit_move_insn (dest, gen_lowpart (GET_MODE (dest), src));
25270 /* Set up OPERANDS for a register copy from SRC to DEST, taking care
25271 not to early-clobber SRC registers in the process.
25273 We assume that the operands described by SRC and DEST represent a
25274 decomposed copy of OPERANDS[1] into OPERANDS[0]. COUNT is the
25275 number of components into which the copy has been decomposed. */
25276 void
25277 neon_disambiguate_copy (rtx *operands, rtx *dest, rtx *src, unsigned int count)
25279 unsigned int i;
25281 if (!reg_overlap_mentioned_p (operands[0], operands[1])
25282 || REGNO (operands[0]) < REGNO (operands[1]))
25284 for (i = 0; i < count; i++)
25286 operands[2 * i] = dest[i];
25287 operands[2 * i + 1] = src[i];
25290 else
25292 for (i = 0; i < count; i++)
25294 operands[2 * i] = dest[count - i - 1];
25295 operands[2 * i + 1] = src[count - i - 1];
25300 /* Split operands into moves from op[1] + op[2] into op[0]. */
25302 void
25303 neon_split_vcombine (rtx operands[3])
25305 unsigned int dest = REGNO (operands[0]);
25306 unsigned int src1 = REGNO (operands[1]);
25307 unsigned int src2 = REGNO (operands[2]);
25308 enum machine_mode halfmode = GET_MODE (operands[1]);
25309 unsigned int halfregs = HARD_REGNO_NREGS (src1, halfmode);
25310 rtx destlo, desthi;
25312 if (src1 == dest && src2 == dest + halfregs)
25314 /* No-op move. Can't split to nothing; emit something. */
25315 emit_note (NOTE_INSN_DELETED);
25316 return;
25319 /* Preserve register attributes for variable tracking. */
25320 destlo = gen_rtx_REG_offset (operands[0], halfmode, dest, 0);
25321 desthi = gen_rtx_REG_offset (operands[0], halfmode, dest + halfregs,
25322 GET_MODE_SIZE (halfmode));
25324 /* Special case of reversed high/low parts. Use VSWP. */
25325 if (src2 == dest && src1 == dest + halfregs)
25327 rtx x = gen_rtx_SET (VOIDmode, destlo, operands[1]);
25328 rtx y = gen_rtx_SET (VOIDmode, desthi, operands[2]);
25329 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, x, y)));
25330 return;
25333 if (!reg_overlap_mentioned_p (operands[2], destlo))
25335 /* Try to avoid unnecessary moves if part of the result
25336 is in the right place already. */
25337 if (src1 != dest)
25338 emit_move_insn (destlo, operands[1]);
25339 if (src2 != dest + halfregs)
25340 emit_move_insn (desthi, operands[2]);
25342 else
25344 if (src2 != dest + halfregs)
25345 emit_move_insn (desthi, operands[2]);
25346 if (src1 != dest)
25347 emit_move_insn (destlo, operands[1]);
25351 /* Expand an expression EXP that calls a built-in function,
25352 with result going to TARGET if that's convenient
25353 (and in mode MODE if that's convenient).
25354 SUBTARGET may be used as the target for computing one of EXP's operands.
25355 IGNORE is nonzero if the value is to be ignored. */
25357 static rtx
25358 arm_expand_builtin (tree exp,
25359 rtx target,
25360 rtx subtarget ATTRIBUTE_UNUSED,
25361 enum machine_mode mode ATTRIBUTE_UNUSED,
25362 int ignore ATTRIBUTE_UNUSED)
25364 const struct builtin_description * d;
25365 enum insn_code icode;
25366 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
25367 tree arg0;
25368 tree arg1;
25369 tree arg2;
25370 rtx op0;
25371 rtx op1;
25372 rtx op2;
25373 rtx pat;
25374 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
25375 size_t i;
25376 enum machine_mode tmode;
25377 enum machine_mode mode0;
25378 enum machine_mode mode1;
25379 enum machine_mode mode2;
25380 int opint;
25381 int selector;
25382 int mask;
25383 int imm;
25385 if (fcode >= ARM_BUILTIN_NEON_BASE)
25386 return arm_expand_neon_builtin (fcode, exp, target);
25388 switch (fcode)
25390 case ARM_BUILTIN_TEXTRMSB:
25391 case ARM_BUILTIN_TEXTRMUB:
25392 case ARM_BUILTIN_TEXTRMSH:
25393 case ARM_BUILTIN_TEXTRMUH:
25394 case ARM_BUILTIN_TEXTRMSW:
25395 case ARM_BUILTIN_TEXTRMUW:
25396 icode = (fcode == ARM_BUILTIN_TEXTRMSB ? CODE_FOR_iwmmxt_textrmsb
25397 : fcode == ARM_BUILTIN_TEXTRMUB ? CODE_FOR_iwmmxt_textrmub
25398 : fcode == ARM_BUILTIN_TEXTRMSH ? CODE_FOR_iwmmxt_textrmsh
25399 : fcode == ARM_BUILTIN_TEXTRMUH ? CODE_FOR_iwmmxt_textrmuh
25400 : CODE_FOR_iwmmxt_textrmw);
25402 arg0 = CALL_EXPR_ARG (exp, 0);
25403 arg1 = CALL_EXPR_ARG (exp, 1);
25404 op0 = expand_normal (arg0);
25405 op1 = expand_normal (arg1);
25406 tmode = insn_data[icode].operand[0].mode;
25407 mode0 = insn_data[icode].operand[1].mode;
25408 mode1 = insn_data[icode].operand[2].mode;
25410 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
25411 op0 = copy_to_mode_reg (mode0, op0);
25412 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
25414 /* @@@ better error message */
25415 error ("selector must be an immediate");
25416 return gen_reg_rtx (tmode);
25419 opint = INTVAL (op1);
25420 if (fcode == ARM_BUILTIN_TEXTRMSB || fcode == ARM_BUILTIN_TEXTRMUB)
25422 if (opint > 7 || opint < 0)
25423 error ("the range of selector should be in 0 to 7");
25425 else if (fcode == ARM_BUILTIN_TEXTRMSH || fcode == ARM_BUILTIN_TEXTRMUH)
25427 if (opint > 3 || opint < 0)
25428 error ("the range of selector should be in 0 to 3");
25430 else /* ARM_BUILTIN_TEXTRMSW || ARM_BUILTIN_TEXTRMUW. */
25432 if (opint > 1 || opint < 0)
25433 error ("the range of selector should be in 0 to 1");
25436 if (target == 0
25437 || GET_MODE (target) != tmode
25438 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
25439 target = gen_reg_rtx (tmode);
25440 pat = GEN_FCN (icode) (target, op0, op1);
25441 if (! pat)
25442 return 0;
25443 emit_insn (pat);
25444 return target;
25446 case ARM_BUILTIN_WALIGNI:
25447 /* If op2 is immediate, call walighi, else call walighr. */
25448 arg0 = CALL_EXPR_ARG (exp, 0);
25449 arg1 = CALL_EXPR_ARG (exp, 1);
25450 arg2 = CALL_EXPR_ARG (exp, 2);
25451 op0 = expand_normal (arg0);
25452 op1 = expand_normal (arg1);
25453 op2 = expand_normal (arg2);
25454 if (CONST_INT_P (op2))
25456 icode = CODE_FOR_iwmmxt_waligni;
25457 tmode = insn_data[icode].operand[0].mode;
25458 mode0 = insn_data[icode].operand[1].mode;
25459 mode1 = insn_data[icode].operand[2].mode;
25460 mode2 = insn_data[icode].operand[3].mode;
25461 if (!(*insn_data[icode].operand[1].predicate) (op0, mode0))
25462 op0 = copy_to_mode_reg (mode0, op0);
25463 if (!(*insn_data[icode].operand[2].predicate) (op1, mode1))
25464 op1 = copy_to_mode_reg (mode1, op1);
25465 gcc_assert ((*insn_data[icode].operand[3].predicate) (op2, mode2));
25466 selector = INTVAL (op2);
25467 if (selector > 7 || selector < 0)
25468 error ("the range of selector should be in 0 to 7");
25470 else
25472 icode = CODE_FOR_iwmmxt_walignr;
25473 tmode = insn_data[icode].operand[0].mode;
25474 mode0 = insn_data[icode].operand[1].mode;
25475 mode1 = insn_data[icode].operand[2].mode;
25476 mode2 = insn_data[icode].operand[3].mode;
25477 if (!(*insn_data[icode].operand[1].predicate) (op0, mode0))
25478 op0 = copy_to_mode_reg (mode0, op0);
25479 if (!(*insn_data[icode].operand[2].predicate) (op1, mode1))
25480 op1 = copy_to_mode_reg (mode1, op1);
25481 if (!(*insn_data[icode].operand[3].predicate) (op2, mode2))
25482 op2 = copy_to_mode_reg (mode2, op2);
25484 if (target == 0
25485 || GET_MODE (target) != tmode
25486 || !(*insn_data[icode].operand[0].predicate) (target, tmode))
25487 target = gen_reg_rtx (tmode);
25488 pat = GEN_FCN (icode) (target, op0, op1, op2);
25489 if (!pat)
25490 return 0;
25491 emit_insn (pat);
25492 return target;
25494 case ARM_BUILTIN_TINSRB:
25495 case ARM_BUILTIN_TINSRH:
25496 case ARM_BUILTIN_TINSRW:
25497 case ARM_BUILTIN_WMERGE:
25498 icode = (fcode == ARM_BUILTIN_TINSRB ? CODE_FOR_iwmmxt_tinsrb
25499 : fcode == ARM_BUILTIN_TINSRH ? CODE_FOR_iwmmxt_tinsrh
25500 : fcode == ARM_BUILTIN_WMERGE ? CODE_FOR_iwmmxt_wmerge
25501 : CODE_FOR_iwmmxt_tinsrw);
25502 arg0 = CALL_EXPR_ARG (exp, 0);
25503 arg1 = CALL_EXPR_ARG (exp, 1);
25504 arg2 = CALL_EXPR_ARG (exp, 2);
25505 op0 = expand_normal (arg0);
25506 op1 = expand_normal (arg1);
25507 op2 = expand_normal (arg2);
25508 tmode = insn_data[icode].operand[0].mode;
25509 mode0 = insn_data[icode].operand[1].mode;
25510 mode1 = insn_data[icode].operand[2].mode;
25511 mode2 = insn_data[icode].operand[3].mode;
25513 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
25514 op0 = copy_to_mode_reg (mode0, op0);
25515 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
25516 op1 = copy_to_mode_reg (mode1, op1);
25517 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
25519 error ("selector must be an immediate");
25520 return const0_rtx;
25522 if (icode == CODE_FOR_iwmmxt_wmerge)
25524 selector = INTVAL (op2);
25525 if (selector > 7 || selector < 0)
25526 error ("the range of selector should be in 0 to 7");
25528 if ((icode == CODE_FOR_iwmmxt_tinsrb)
25529 || (icode == CODE_FOR_iwmmxt_tinsrh)
25530 || (icode == CODE_FOR_iwmmxt_tinsrw))
25532 mask = 0x01;
25533 selector= INTVAL (op2);
25534 if (icode == CODE_FOR_iwmmxt_tinsrb && (selector < 0 || selector > 7))
25535 error ("the range of selector should be in 0 to 7");
25536 else if (icode == CODE_FOR_iwmmxt_tinsrh && (selector < 0 ||selector > 3))
25537 error ("the range of selector should be in 0 to 3");
25538 else if (icode == CODE_FOR_iwmmxt_tinsrw && (selector < 0 ||selector > 1))
25539 error ("the range of selector should be in 0 to 1");
25540 mask <<= selector;
25541 op2 = GEN_INT (mask);
25543 if (target == 0
25544 || GET_MODE (target) != tmode
25545 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
25546 target = gen_reg_rtx (tmode);
25547 pat = GEN_FCN (icode) (target, op0, op1, op2);
25548 if (! pat)
25549 return 0;
25550 emit_insn (pat);
25551 return target;
25553 case ARM_BUILTIN_SETWCGR0:
25554 case ARM_BUILTIN_SETWCGR1:
25555 case ARM_BUILTIN_SETWCGR2:
25556 case ARM_BUILTIN_SETWCGR3:
25557 icode = (fcode == ARM_BUILTIN_SETWCGR0 ? CODE_FOR_iwmmxt_setwcgr0
25558 : fcode == ARM_BUILTIN_SETWCGR1 ? CODE_FOR_iwmmxt_setwcgr1
25559 : fcode == ARM_BUILTIN_SETWCGR2 ? CODE_FOR_iwmmxt_setwcgr2
25560 : CODE_FOR_iwmmxt_setwcgr3);
25561 arg0 = CALL_EXPR_ARG (exp, 0);
25562 op0 = expand_normal (arg0);
25563 mode0 = insn_data[icode].operand[0].mode;
25564 if (!(*insn_data[icode].operand[0].predicate) (op0, mode0))
25565 op0 = copy_to_mode_reg (mode0, op0);
25566 pat = GEN_FCN (icode) (op0);
25567 if (!pat)
25568 return 0;
25569 emit_insn (pat);
25570 return 0;
25572 case ARM_BUILTIN_GETWCGR0:
25573 case ARM_BUILTIN_GETWCGR1:
25574 case ARM_BUILTIN_GETWCGR2:
25575 case ARM_BUILTIN_GETWCGR3:
25576 icode = (fcode == ARM_BUILTIN_GETWCGR0 ? CODE_FOR_iwmmxt_getwcgr0
25577 : fcode == ARM_BUILTIN_GETWCGR1 ? CODE_FOR_iwmmxt_getwcgr1
25578 : fcode == ARM_BUILTIN_GETWCGR2 ? CODE_FOR_iwmmxt_getwcgr2
25579 : CODE_FOR_iwmmxt_getwcgr3);
25580 tmode = insn_data[icode].operand[0].mode;
25581 if (target == 0
25582 || GET_MODE (target) != tmode
25583 || !(*insn_data[icode].operand[0].predicate) (target, tmode))
25584 target = gen_reg_rtx (tmode);
25585 pat = GEN_FCN (icode) (target);
25586 if (!pat)
25587 return 0;
25588 emit_insn (pat);
25589 return target;
25591 case ARM_BUILTIN_WSHUFH:
25592 icode = CODE_FOR_iwmmxt_wshufh;
25593 arg0 = CALL_EXPR_ARG (exp, 0);
25594 arg1 = CALL_EXPR_ARG (exp, 1);
25595 op0 = expand_normal (arg0);
25596 op1 = expand_normal (arg1);
25597 tmode = insn_data[icode].operand[0].mode;
25598 mode1 = insn_data[icode].operand[1].mode;
25599 mode2 = insn_data[icode].operand[2].mode;
25601 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
25602 op0 = copy_to_mode_reg (mode1, op0);
25603 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
25605 error ("mask must be an immediate");
25606 return const0_rtx;
25608 selector = INTVAL (op1);
25609 if (selector < 0 || selector > 255)
25610 error ("the range of mask should be in 0 to 255");
25611 if (target == 0
25612 || GET_MODE (target) != tmode
25613 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
25614 target = gen_reg_rtx (tmode);
25615 pat = GEN_FCN (icode) (target, op0, op1);
25616 if (! pat)
25617 return 0;
25618 emit_insn (pat);
25619 return target;
25621 case ARM_BUILTIN_WMADDS:
25622 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wmadds, exp, target);
25623 case ARM_BUILTIN_WMADDSX:
25624 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wmaddsx, exp, target);
25625 case ARM_BUILTIN_WMADDSN:
25626 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wmaddsn, exp, target);
25627 case ARM_BUILTIN_WMADDU:
25628 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wmaddu, exp, target);
25629 case ARM_BUILTIN_WMADDUX:
25630 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wmaddux, exp, target);
25631 case ARM_BUILTIN_WMADDUN:
25632 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wmaddun, exp, target);
25633 case ARM_BUILTIN_WSADBZ:
25634 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wsadbz, exp, target);
25635 case ARM_BUILTIN_WSADHZ:
25636 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wsadhz, exp, target);
25638 /* Several three-argument builtins. */
25639 case ARM_BUILTIN_WMACS:
25640 case ARM_BUILTIN_WMACU:
25641 case ARM_BUILTIN_TMIA:
25642 case ARM_BUILTIN_TMIAPH:
25643 case ARM_BUILTIN_TMIATT:
25644 case ARM_BUILTIN_TMIATB:
25645 case ARM_BUILTIN_TMIABT:
25646 case ARM_BUILTIN_TMIABB:
25647 case ARM_BUILTIN_WQMIABB:
25648 case ARM_BUILTIN_WQMIABT:
25649 case ARM_BUILTIN_WQMIATB:
25650 case ARM_BUILTIN_WQMIATT:
25651 case ARM_BUILTIN_WQMIABBN:
25652 case ARM_BUILTIN_WQMIABTN:
25653 case ARM_BUILTIN_WQMIATBN:
25654 case ARM_BUILTIN_WQMIATTN:
25655 case ARM_BUILTIN_WMIABB:
25656 case ARM_BUILTIN_WMIABT:
25657 case ARM_BUILTIN_WMIATB:
25658 case ARM_BUILTIN_WMIATT:
25659 case ARM_BUILTIN_WMIABBN:
25660 case ARM_BUILTIN_WMIABTN:
25661 case ARM_BUILTIN_WMIATBN:
25662 case ARM_BUILTIN_WMIATTN:
25663 case ARM_BUILTIN_WMIAWBB:
25664 case ARM_BUILTIN_WMIAWBT:
25665 case ARM_BUILTIN_WMIAWTB:
25666 case ARM_BUILTIN_WMIAWTT:
25667 case ARM_BUILTIN_WMIAWBBN:
25668 case ARM_BUILTIN_WMIAWBTN:
25669 case ARM_BUILTIN_WMIAWTBN:
25670 case ARM_BUILTIN_WMIAWTTN:
25671 case ARM_BUILTIN_WSADB:
25672 case ARM_BUILTIN_WSADH:
25673 icode = (fcode == ARM_BUILTIN_WMACS ? CODE_FOR_iwmmxt_wmacs
25674 : fcode == ARM_BUILTIN_WMACU ? CODE_FOR_iwmmxt_wmacu
25675 : fcode == ARM_BUILTIN_TMIA ? CODE_FOR_iwmmxt_tmia
25676 : fcode == ARM_BUILTIN_TMIAPH ? CODE_FOR_iwmmxt_tmiaph
25677 : fcode == ARM_BUILTIN_TMIABB ? CODE_FOR_iwmmxt_tmiabb
25678 : fcode == ARM_BUILTIN_TMIABT ? CODE_FOR_iwmmxt_tmiabt
25679 : fcode == ARM_BUILTIN_TMIATB ? CODE_FOR_iwmmxt_tmiatb
25680 : fcode == ARM_BUILTIN_TMIATT ? CODE_FOR_iwmmxt_tmiatt
25681 : fcode == ARM_BUILTIN_WQMIABB ? CODE_FOR_iwmmxt_wqmiabb
25682 : fcode == ARM_BUILTIN_WQMIABT ? CODE_FOR_iwmmxt_wqmiabt
25683 : fcode == ARM_BUILTIN_WQMIATB ? CODE_FOR_iwmmxt_wqmiatb
25684 : fcode == ARM_BUILTIN_WQMIATT ? CODE_FOR_iwmmxt_wqmiatt
25685 : fcode == ARM_BUILTIN_WQMIABBN ? CODE_FOR_iwmmxt_wqmiabbn
25686 : fcode == ARM_BUILTIN_WQMIABTN ? CODE_FOR_iwmmxt_wqmiabtn
25687 : fcode == ARM_BUILTIN_WQMIATBN ? CODE_FOR_iwmmxt_wqmiatbn
25688 : fcode == ARM_BUILTIN_WQMIATTN ? CODE_FOR_iwmmxt_wqmiattn
25689 : fcode == ARM_BUILTIN_WMIABB ? CODE_FOR_iwmmxt_wmiabb
25690 : fcode == ARM_BUILTIN_WMIABT ? CODE_FOR_iwmmxt_wmiabt
25691 : fcode == ARM_BUILTIN_WMIATB ? CODE_FOR_iwmmxt_wmiatb
25692 : fcode == ARM_BUILTIN_WMIATT ? CODE_FOR_iwmmxt_wmiatt
25693 : fcode == ARM_BUILTIN_WMIABBN ? CODE_FOR_iwmmxt_wmiabbn
25694 : fcode == ARM_BUILTIN_WMIABTN ? CODE_FOR_iwmmxt_wmiabtn
25695 : fcode == ARM_BUILTIN_WMIATBN ? CODE_FOR_iwmmxt_wmiatbn
25696 : fcode == ARM_BUILTIN_WMIATTN ? CODE_FOR_iwmmxt_wmiattn
25697 : fcode == ARM_BUILTIN_WMIAWBB ? CODE_FOR_iwmmxt_wmiawbb
25698 : fcode == ARM_BUILTIN_WMIAWBT ? CODE_FOR_iwmmxt_wmiawbt
25699 : fcode == ARM_BUILTIN_WMIAWTB ? CODE_FOR_iwmmxt_wmiawtb
25700 : fcode == ARM_BUILTIN_WMIAWTT ? CODE_FOR_iwmmxt_wmiawtt
25701 : fcode == ARM_BUILTIN_WMIAWBBN ? CODE_FOR_iwmmxt_wmiawbbn
25702 : fcode == ARM_BUILTIN_WMIAWBTN ? CODE_FOR_iwmmxt_wmiawbtn
25703 : fcode == ARM_BUILTIN_WMIAWTBN ? CODE_FOR_iwmmxt_wmiawtbn
25704 : fcode == ARM_BUILTIN_WMIAWTTN ? CODE_FOR_iwmmxt_wmiawttn
25705 : fcode == ARM_BUILTIN_WSADB ? CODE_FOR_iwmmxt_wsadb
25706 : CODE_FOR_iwmmxt_wsadh);
25707 arg0 = CALL_EXPR_ARG (exp, 0);
25708 arg1 = CALL_EXPR_ARG (exp, 1);
25709 arg2 = CALL_EXPR_ARG (exp, 2);
25710 op0 = expand_normal (arg0);
25711 op1 = expand_normal (arg1);
25712 op2 = expand_normal (arg2);
25713 tmode = insn_data[icode].operand[0].mode;
25714 mode0 = insn_data[icode].operand[1].mode;
25715 mode1 = insn_data[icode].operand[2].mode;
25716 mode2 = insn_data[icode].operand[3].mode;
25718 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
25719 op0 = copy_to_mode_reg (mode0, op0);
25720 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
25721 op1 = copy_to_mode_reg (mode1, op1);
25722 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
25723 op2 = copy_to_mode_reg (mode2, op2);
25724 if (target == 0
25725 || GET_MODE (target) != tmode
25726 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
25727 target = gen_reg_rtx (tmode);
25728 pat = GEN_FCN (icode) (target, op0, op1, op2);
25729 if (! pat)
25730 return 0;
25731 emit_insn (pat);
25732 return target;
25734 case ARM_BUILTIN_WZERO:
25735 target = gen_reg_rtx (DImode);
25736 emit_insn (gen_iwmmxt_clrdi (target));
25737 return target;
25739 case ARM_BUILTIN_WSRLHI:
25740 case ARM_BUILTIN_WSRLWI:
25741 case ARM_BUILTIN_WSRLDI:
25742 case ARM_BUILTIN_WSLLHI:
25743 case ARM_BUILTIN_WSLLWI:
25744 case ARM_BUILTIN_WSLLDI:
25745 case ARM_BUILTIN_WSRAHI:
25746 case ARM_BUILTIN_WSRAWI:
25747 case ARM_BUILTIN_WSRADI:
25748 case ARM_BUILTIN_WRORHI:
25749 case ARM_BUILTIN_WRORWI:
25750 case ARM_BUILTIN_WRORDI:
25751 case ARM_BUILTIN_WSRLH:
25752 case ARM_BUILTIN_WSRLW:
25753 case ARM_BUILTIN_WSRLD:
25754 case ARM_BUILTIN_WSLLH:
25755 case ARM_BUILTIN_WSLLW:
25756 case ARM_BUILTIN_WSLLD:
25757 case ARM_BUILTIN_WSRAH:
25758 case ARM_BUILTIN_WSRAW:
25759 case ARM_BUILTIN_WSRAD:
25760 case ARM_BUILTIN_WRORH:
25761 case ARM_BUILTIN_WRORW:
25762 case ARM_BUILTIN_WRORD:
25763 icode = (fcode == ARM_BUILTIN_WSRLHI ? CODE_FOR_lshrv4hi3_iwmmxt
25764 : fcode == ARM_BUILTIN_WSRLWI ? CODE_FOR_lshrv2si3_iwmmxt
25765 : fcode == ARM_BUILTIN_WSRLDI ? CODE_FOR_lshrdi3_iwmmxt
25766 : fcode == ARM_BUILTIN_WSLLHI ? CODE_FOR_ashlv4hi3_iwmmxt
25767 : fcode == ARM_BUILTIN_WSLLWI ? CODE_FOR_ashlv2si3_iwmmxt
25768 : fcode == ARM_BUILTIN_WSLLDI ? CODE_FOR_ashldi3_iwmmxt
25769 : fcode == ARM_BUILTIN_WSRAHI ? CODE_FOR_ashrv4hi3_iwmmxt
25770 : fcode == ARM_BUILTIN_WSRAWI ? CODE_FOR_ashrv2si3_iwmmxt
25771 : fcode == ARM_BUILTIN_WSRADI ? CODE_FOR_ashrdi3_iwmmxt
25772 : fcode == ARM_BUILTIN_WRORHI ? CODE_FOR_rorv4hi3
25773 : fcode == ARM_BUILTIN_WRORWI ? CODE_FOR_rorv2si3
25774 : fcode == ARM_BUILTIN_WRORDI ? CODE_FOR_rordi3
25775 : fcode == ARM_BUILTIN_WSRLH ? CODE_FOR_lshrv4hi3_di
25776 : fcode == ARM_BUILTIN_WSRLW ? CODE_FOR_lshrv2si3_di
25777 : fcode == ARM_BUILTIN_WSRLD ? CODE_FOR_lshrdi3_di
25778 : fcode == ARM_BUILTIN_WSLLH ? CODE_FOR_ashlv4hi3_di
25779 : fcode == ARM_BUILTIN_WSLLW ? CODE_FOR_ashlv2si3_di
25780 : fcode == ARM_BUILTIN_WSLLD ? CODE_FOR_ashldi3_di
25781 : fcode == ARM_BUILTIN_WSRAH ? CODE_FOR_ashrv4hi3_di
25782 : fcode == ARM_BUILTIN_WSRAW ? CODE_FOR_ashrv2si3_di
25783 : fcode == ARM_BUILTIN_WSRAD ? CODE_FOR_ashrdi3_di
25784 : fcode == ARM_BUILTIN_WRORH ? CODE_FOR_rorv4hi3_di
25785 : fcode == ARM_BUILTIN_WRORW ? CODE_FOR_rorv2si3_di
25786 : fcode == ARM_BUILTIN_WRORD ? CODE_FOR_rordi3_di
25787 : CODE_FOR_nothing);
25788 arg1 = CALL_EXPR_ARG (exp, 1);
25789 op1 = expand_normal (arg1);
25790 if (GET_MODE (op1) == VOIDmode)
25792 imm = INTVAL (op1);
25793 if ((fcode == ARM_BUILTIN_WRORHI || fcode == ARM_BUILTIN_WRORWI
25794 || fcode == ARM_BUILTIN_WRORH || fcode == ARM_BUILTIN_WRORW)
25795 && (imm < 0 || imm > 32))
25797 if (fcode == ARM_BUILTIN_WRORHI)
25798 error ("the range of count should be in 0 to 32. please check the intrinsic _mm_rori_pi16 in code.");
25799 else if (fcode == ARM_BUILTIN_WRORWI)
25800 error ("the range of count should be in 0 to 32. please check the intrinsic _mm_rori_pi32 in code.");
25801 else if (fcode == ARM_BUILTIN_WRORH)
25802 error ("the range of count should be in 0 to 32. please check the intrinsic _mm_ror_pi16 in code.");
25803 else
25804 error ("the range of count should be in 0 to 32. please check the intrinsic _mm_ror_pi32 in code.");
25806 else if ((fcode == ARM_BUILTIN_WRORDI || fcode == ARM_BUILTIN_WRORD)
25807 && (imm < 0 || imm > 64))
25809 if (fcode == ARM_BUILTIN_WRORDI)
25810 error ("the range of count should be in 0 to 64. please check the intrinsic _mm_rori_si64 in code.");
25811 else
25812 error ("the range of count should be in 0 to 64. please check the intrinsic _mm_ror_si64 in code.");
25814 else if (imm < 0)
25816 if (fcode == ARM_BUILTIN_WSRLHI)
25817 error ("the count should be no less than 0. please check the intrinsic _mm_srli_pi16 in code.");
25818 else if (fcode == ARM_BUILTIN_WSRLWI)
25819 error ("the count should be no less than 0. please check the intrinsic _mm_srli_pi32 in code.");
25820 else if (fcode == ARM_BUILTIN_WSRLDI)
25821 error ("the count should be no less than 0. please check the intrinsic _mm_srli_si64 in code.");
25822 else if (fcode == ARM_BUILTIN_WSLLHI)
25823 error ("the count should be no less than 0. please check the intrinsic _mm_slli_pi16 in code.");
25824 else if (fcode == ARM_BUILTIN_WSLLWI)
25825 error ("the count should be no less than 0. please check the intrinsic _mm_slli_pi32 in code.");
25826 else if (fcode == ARM_BUILTIN_WSLLDI)
25827 error ("the count should be no less than 0. please check the intrinsic _mm_slli_si64 in code.");
25828 else if (fcode == ARM_BUILTIN_WSRAHI)
25829 error ("the count should be no less than 0. please check the intrinsic _mm_srai_pi16 in code.");
25830 else if (fcode == ARM_BUILTIN_WSRAWI)
25831 error ("the count should be no less than 0. please check the intrinsic _mm_srai_pi32 in code.");
25832 else if (fcode == ARM_BUILTIN_WSRADI)
25833 error ("the count should be no less than 0. please check the intrinsic _mm_srai_si64 in code.");
25834 else if (fcode == ARM_BUILTIN_WSRLH)
25835 error ("the count should be no less than 0. please check the intrinsic _mm_srl_pi16 in code.");
25836 else if (fcode == ARM_BUILTIN_WSRLW)
25837 error ("the count should be no less than 0. please check the intrinsic _mm_srl_pi32 in code.");
25838 else if (fcode == ARM_BUILTIN_WSRLD)
25839 error ("the count should be no less than 0. please check the intrinsic _mm_srl_si64 in code.");
25840 else if (fcode == ARM_BUILTIN_WSLLH)
25841 error ("the count should be no less than 0. please check the intrinsic _mm_sll_pi16 in code.");
25842 else if (fcode == ARM_BUILTIN_WSLLW)
25843 error ("the count should be no less than 0. please check the intrinsic _mm_sll_pi32 in code.");
25844 else if (fcode == ARM_BUILTIN_WSLLD)
25845 error ("the count should be no less than 0. please check the intrinsic _mm_sll_si64 in code.");
25846 else if (fcode == ARM_BUILTIN_WSRAH)
25847 error ("the count should be no less than 0. please check the intrinsic _mm_sra_pi16 in code.");
25848 else if (fcode == ARM_BUILTIN_WSRAW)
25849 error ("the count should be no less than 0. please check the intrinsic _mm_sra_pi32 in code.");
25850 else
25851 error ("the count should be no less than 0. please check the intrinsic _mm_sra_si64 in code.");
25854 return arm_expand_binop_builtin (icode, exp, target);
25856 default:
25857 break;
25860 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
25861 if (d->code == (const enum arm_builtins) fcode)
25862 return arm_expand_binop_builtin (d->icode, exp, target);
25864 for (i = 0, d = bdesc_1arg; i < ARRAY_SIZE (bdesc_1arg); i++, d++)
25865 if (d->code == (const enum arm_builtins) fcode)
25866 return arm_expand_unop_builtin (d->icode, exp, target, 0);
25868 for (i = 0, d = bdesc_3arg; i < ARRAY_SIZE (bdesc_3arg); i++, d++)
25869 if (d->code == (const enum arm_builtins) fcode)
25870 return arm_expand_ternop_builtin (d->icode, exp, target);
25872 /* @@@ Should really do something sensible here. */
25873 return NULL_RTX;
25876 /* Return the number (counting from 0) of
25877 the least significant set bit in MASK. */
25879 inline static int
25880 number_of_first_bit_set (unsigned mask)
25882 return ctz_hwi (mask);
25885 /* Like emit_multi_reg_push, but allowing for a different set of
25886 registers to be described as saved. MASK is the set of registers
25887 to be saved; REAL_REGS is the set of registers to be described as
25888 saved. If REAL_REGS is 0, only describe the stack adjustment. */
25890 static rtx
25891 thumb1_emit_multi_reg_push (unsigned long mask, unsigned long real_regs)
25893 unsigned long regno;
25894 rtx par[10], tmp, reg, insn;
25895 int i, j;
25897 /* Build the parallel of the registers actually being stored. */
25898 for (i = 0; mask; ++i, mask &= mask - 1)
25900 regno = ctz_hwi (mask);
25901 reg = gen_rtx_REG (SImode, regno);
25903 if (i == 0)
25904 tmp = gen_rtx_UNSPEC (BLKmode, gen_rtvec (1, reg), UNSPEC_PUSH_MULT);
25905 else
25906 tmp = gen_rtx_USE (VOIDmode, reg);
25908 par[i] = tmp;
25911 tmp = plus_constant (Pmode, stack_pointer_rtx, -4 * i);
25912 tmp = gen_rtx_PRE_MODIFY (Pmode, stack_pointer_rtx, tmp);
25913 tmp = gen_frame_mem (BLKmode, tmp);
25914 tmp = gen_rtx_SET (VOIDmode, tmp, par[0]);
25915 par[0] = tmp;
25917 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (i, par));
25918 insn = emit_insn (tmp);
25920 /* Always build the stack adjustment note for unwind info. */
25921 tmp = plus_constant (Pmode, stack_pointer_rtx, -4 * i);
25922 tmp = gen_rtx_SET (VOIDmode, stack_pointer_rtx, tmp);
25923 par[0] = tmp;
25925 /* Build the parallel of the registers recorded as saved for unwind. */
25926 for (j = 0; real_regs; ++j, real_regs &= real_regs - 1)
25928 regno = ctz_hwi (real_regs);
25929 reg = gen_rtx_REG (SImode, regno);
25931 tmp = plus_constant (Pmode, stack_pointer_rtx, j * 4);
25932 tmp = gen_frame_mem (SImode, tmp);
25933 tmp = gen_rtx_SET (VOIDmode, tmp, reg);
25934 RTX_FRAME_RELATED_P (tmp) = 1;
25935 par[j + 1] = tmp;
25938 if (j == 0)
25939 tmp = par[0];
25940 else
25942 RTX_FRAME_RELATED_P (par[0]) = 1;
25943 tmp = gen_rtx_SEQUENCE (VOIDmode, gen_rtvec_v (j + 1, par));
25946 add_reg_note (insn, REG_FRAME_RELATED_EXPR, tmp);
25948 return insn;
25951 /* Emit code to push or pop registers to or from the stack. F is the
25952 assembly file. MASK is the registers to pop. */
25953 static void
25954 thumb_pop (FILE *f, unsigned long mask)
25956 int regno;
25957 int lo_mask = mask & 0xFF;
25958 int pushed_words = 0;
25960 gcc_assert (mask);
25962 if (lo_mask == 0 && (mask & (1 << PC_REGNUM)))
25964 /* Special case. Do not generate a POP PC statement here, do it in
25965 thumb_exit() */
25966 thumb_exit (f, -1);
25967 return;
25970 fprintf (f, "\tpop\t{");
25972 /* Look at the low registers first. */
25973 for (regno = 0; regno <= LAST_LO_REGNUM; regno++, lo_mask >>= 1)
25975 if (lo_mask & 1)
25977 asm_fprintf (f, "%r", regno);
25979 if ((lo_mask & ~1) != 0)
25980 fprintf (f, ", ");
25982 pushed_words++;
25986 if (mask & (1 << PC_REGNUM))
25988 /* Catch popping the PC. */
25989 if (TARGET_INTERWORK || TARGET_BACKTRACE
25990 || crtl->calls_eh_return)
25992 /* The PC is never poped directly, instead
25993 it is popped into r3 and then BX is used. */
25994 fprintf (f, "}\n");
25996 thumb_exit (f, -1);
25998 return;
26000 else
26002 if (mask & 0xFF)
26003 fprintf (f, ", ");
26005 asm_fprintf (f, "%r", PC_REGNUM);
26009 fprintf (f, "}\n");
26012 /* Generate code to return from a thumb function.
26013 If 'reg_containing_return_addr' is -1, then the return address is
26014 actually on the stack, at the stack pointer. */
26015 static void
26016 thumb_exit (FILE *f, int reg_containing_return_addr)
26018 unsigned regs_available_for_popping;
26019 unsigned regs_to_pop;
26020 int pops_needed;
26021 unsigned available;
26022 unsigned required;
26023 enum machine_mode mode;
26024 int size;
26025 int restore_a4 = FALSE;
26027 /* Compute the registers we need to pop. */
26028 regs_to_pop = 0;
26029 pops_needed = 0;
26031 if (reg_containing_return_addr == -1)
26033 regs_to_pop |= 1 << LR_REGNUM;
26034 ++pops_needed;
26037 if (TARGET_BACKTRACE)
26039 /* Restore the (ARM) frame pointer and stack pointer. */
26040 regs_to_pop |= (1 << ARM_HARD_FRAME_POINTER_REGNUM) | (1 << SP_REGNUM);
26041 pops_needed += 2;
26044 /* If there is nothing to pop then just emit the BX instruction and
26045 return. */
26046 if (pops_needed == 0)
26048 if (crtl->calls_eh_return)
26049 asm_fprintf (f, "\tadd\t%r, %r\n", SP_REGNUM, ARM_EH_STACKADJ_REGNUM);
26051 asm_fprintf (f, "\tbx\t%r\n", reg_containing_return_addr);
26052 return;
26054 /* Otherwise if we are not supporting interworking and we have not created
26055 a backtrace structure and the function was not entered in ARM mode then
26056 just pop the return address straight into the PC. */
26057 else if (!TARGET_INTERWORK
26058 && !TARGET_BACKTRACE
26059 && !is_called_in_ARM_mode (current_function_decl)
26060 && !crtl->calls_eh_return)
26062 asm_fprintf (f, "\tpop\t{%r}\n", PC_REGNUM);
26063 return;
26066 /* Find out how many of the (return) argument registers we can corrupt. */
26067 regs_available_for_popping = 0;
26069 /* If returning via __builtin_eh_return, the bottom three registers
26070 all contain information needed for the return. */
26071 if (crtl->calls_eh_return)
26072 size = 12;
26073 else
26075 /* If we can deduce the registers used from the function's
26076 return value. This is more reliable that examining
26077 df_regs_ever_live_p () because that will be set if the register is
26078 ever used in the function, not just if the register is used
26079 to hold a return value. */
26081 if (crtl->return_rtx != 0)
26082 mode = GET_MODE (crtl->return_rtx);
26083 else
26084 mode = DECL_MODE (DECL_RESULT (current_function_decl));
26086 size = GET_MODE_SIZE (mode);
26088 if (size == 0)
26090 /* In a void function we can use any argument register.
26091 In a function that returns a structure on the stack
26092 we can use the second and third argument registers. */
26093 if (mode == VOIDmode)
26094 regs_available_for_popping =
26095 (1 << ARG_REGISTER (1))
26096 | (1 << ARG_REGISTER (2))
26097 | (1 << ARG_REGISTER (3));
26098 else
26099 regs_available_for_popping =
26100 (1 << ARG_REGISTER (2))
26101 | (1 << ARG_REGISTER (3));
26103 else if (size <= 4)
26104 regs_available_for_popping =
26105 (1 << ARG_REGISTER (2))
26106 | (1 << ARG_REGISTER (3));
26107 else if (size <= 8)
26108 regs_available_for_popping =
26109 (1 << ARG_REGISTER (3));
26112 /* Match registers to be popped with registers into which we pop them. */
26113 for (available = regs_available_for_popping,
26114 required = regs_to_pop;
26115 required != 0 && available != 0;
26116 available &= ~(available & - available),
26117 required &= ~(required & - required))
26118 -- pops_needed;
26120 /* If we have any popping registers left over, remove them. */
26121 if (available > 0)
26122 regs_available_for_popping &= ~available;
26124 /* Otherwise if we need another popping register we can use
26125 the fourth argument register. */
26126 else if (pops_needed)
26128 /* If we have not found any free argument registers and
26129 reg a4 contains the return address, we must move it. */
26130 if (regs_available_for_popping == 0
26131 && reg_containing_return_addr == LAST_ARG_REGNUM)
26133 asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM, LAST_ARG_REGNUM);
26134 reg_containing_return_addr = LR_REGNUM;
26136 else if (size > 12)
26138 /* Register a4 is being used to hold part of the return value,
26139 but we have dire need of a free, low register. */
26140 restore_a4 = TRUE;
26142 asm_fprintf (f, "\tmov\t%r, %r\n",IP_REGNUM, LAST_ARG_REGNUM);
26145 if (reg_containing_return_addr != LAST_ARG_REGNUM)
26147 /* The fourth argument register is available. */
26148 regs_available_for_popping |= 1 << LAST_ARG_REGNUM;
26150 --pops_needed;
26154 /* Pop as many registers as we can. */
26155 thumb_pop (f, regs_available_for_popping);
26157 /* Process the registers we popped. */
26158 if (reg_containing_return_addr == -1)
26160 /* The return address was popped into the lowest numbered register. */
26161 regs_to_pop &= ~(1 << LR_REGNUM);
26163 reg_containing_return_addr =
26164 number_of_first_bit_set (regs_available_for_popping);
26166 /* Remove this register for the mask of available registers, so that
26167 the return address will not be corrupted by further pops. */
26168 regs_available_for_popping &= ~(1 << reg_containing_return_addr);
26171 /* If we popped other registers then handle them here. */
26172 if (regs_available_for_popping)
26174 int frame_pointer;
26176 /* Work out which register currently contains the frame pointer. */
26177 frame_pointer = number_of_first_bit_set (regs_available_for_popping);
26179 /* Move it into the correct place. */
26180 asm_fprintf (f, "\tmov\t%r, %r\n",
26181 ARM_HARD_FRAME_POINTER_REGNUM, frame_pointer);
26183 /* (Temporarily) remove it from the mask of popped registers. */
26184 regs_available_for_popping &= ~(1 << frame_pointer);
26185 regs_to_pop &= ~(1 << ARM_HARD_FRAME_POINTER_REGNUM);
26187 if (regs_available_for_popping)
26189 int stack_pointer;
26191 /* We popped the stack pointer as well,
26192 find the register that contains it. */
26193 stack_pointer = number_of_first_bit_set (regs_available_for_popping);
26195 /* Move it into the stack register. */
26196 asm_fprintf (f, "\tmov\t%r, %r\n", SP_REGNUM, stack_pointer);
26198 /* At this point we have popped all necessary registers, so
26199 do not worry about restoring regs_available_for_popping
26200 to its correct value:
26202 assert (pops_needed == 0)
26203 assert (regs_available_for_popping == (1 << frame_pointer))
26204 assert (regs_to_pop == (1 << STACK_POINTER)) */
26206 else
26208 /* Since we have just move the popped value into the frame
26209 pointer, the popping register is available for reuse, and
26210 we know that we still have the stack pointer left to pop. */
26211 regs_available_for_popping |= (1 << frame_pointer);
26215 /* If we still have registers left on the stack, but we no longer have
26216 any registers into which we can pop them, then we must move the return
26217 address into the link register and make available the register that
26218 contained it. */
26219 if (regs_available_for_popping == 0 && pops_needed > 0)
26221 regs_available_for_popping |= 1 << reg_containing_return_addr;
26223 asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM,
26224 reg_containing_return_addr);
26226 reg_containing_return_addr = LR_REGNUM;
26229 /* If we have registers left on the stack then pop some more.
26230 We know that at most we will want to pop FP and SP. */
26231 if (pops_needed > 0)
26233 int popped_into;
26234 int move_to;
26236 thumb_pop (f, regs_available_for_popping);
26238 /* We have popped either FP or SP.
26239 Move whichever one it is into the correct register. */
26240 popped_into = number_of_first_bit_set (regs_available_for_popping);
26241 move_to = number_of_first_bit_set (regs_to_pop);
26243 asm_fprintf (f, "\tmov\t%r, %r\n", move_to, popped_into);
26245 regs_to_pop &= ~(1 << move_to);
26247 --pops_needed;
26250 /* If we still have not popped everything then we must have only
26251 had one register available to us and we are now popping the SP. */
26252 if (pops_needed > 0)
26254 int popped_into;
26256 thumb_pop (f, regs_available_for_popping);
26258 popped_into = number_of_first_bit_set (regs_available_for_popping);
26260 asm_fprintf (f, "\tmov\t%r, %r\n", SP_REGNUM, popped_into);
26262 assert (regs_to_pop == (1 << STACK_POINTER))
26263 assert (pops_needed == 1)
26267 /* If necessary restore the a4 register. */
26268 if (restore_a4)
26270 if (reg_containing_return_addr != LR_REGNUM)
26272 asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM, LAST_ARG_REGNUM);
26273 reg_containing_return_addr = LR_REGNUM;
26276 asm_fprintf (f, "\tmov\t%r, %r\n", LAST_ARG_REGNUM, IP_REGNUM);
26279 if (crtl->calls_eh_return)
26280 asm_fprintf (f, "\tadd\t%r, %r\n", SP_REGNUM, ARM_EH_STACKADJ_REGNUM);
26282 /* Return to caller. */
26283 asm_fprintf (f, "\tbx\t%r\n", reg_containing_return_addr);
26286 /* Scan INSN just before assembler is output for it.
26287 For Thumb-1, we track the status of the condition codes; this
26288 information is used in the cbranchsi4_insn pattern. */
26289 void
26290 thumb1_final_prescan_insn (rtx insn)
26292 if (flag_print_asm_name)
26293 asm_fprintf (asm_out_file, "%@ 0x%04x\n",
26294 INSN_ADDRESSES (INSN_UID (insn)));
26295 /* Don't overwrite the previous setter when we get to a cbranch. */
26296 if (INSN_CODE (insn) != CODE_FOR_cbranchsi4_insn)
26298 enum attr_conds conds;
26300 if (cfun->machine->thumb1_cc_insn)
26302 if (modified_in_p (cfun->machine->thumb1_cc_op0, insn)
26303 || modified_in_p (cfun->machine->thumb1_cc_op1, insn))
26304 CC_STATUS_INIT;
26306 conds = get_attr_conds (insn);
26307 if (conds == CONDS_SET)
26309 rtx set = single_set (insn);
26310 cfun->machine->thumb1_cc_insn = insn;
26311 cfun->machine->thumb1_cc_op0 = SET_DEST (set);
26312 cfun->machine->thumb1_cc_op1 = const0_rtx;
26313 cfun->machine->thumb1_cc_mode = CC_NOOVmode;
26314 if (INSN_CODE (insn) == CODE_FOR_thumb1_subsi3_insn)
26316 rtx src1 = XEXP (SET_SRC (set), 1);
26317 if (src1 == const0_rtx)
26318 cfun->machine->thumb1_cc_mode = CCmode;
26320 else if (REG_P (SET_DEST (set)) && REG_P (SET_SRC (set)))
26322 /* Record the src register operand instead of dest because
26323 cprop_hardreg pass propagates src. */
26324 cfun->machine->thumb1_cc_op0 = SET_SRC (set);
26327 else if (conds != CONDS_NOCOND)
26328 cfun->machine->thumb1_cc_insn = NULL_RTX;
26331 /* Check if unexpected far jump is used. */
26332 if (cfun->machine->lr_save_eliminated
26333 && get_attr_far_jump (insn) == FAR_JUMP_YES)
26334 internal_error("Unexpected thumb1 far jump");
26338 thumb_shiftable_const (unsigned HOST_WIDE_INT val)
26340 unsigned HOST_WIDE_INT mask = 0xff;
26341 int i;
26343 val = val & (unsigned HOST_WIDE_INT)0xffffffffu;
26344 if (val == 0) /* XXX */
26345 return 0;
26347 for (i = 0; i < 25; i++)
26348 if ((val & (mask << i)) == val)
26349 return 1;
26351 return 0;
26354 /* Returns nonzero if the current function contains,
26355 or might contain a far jump. */
26356 static int
26357 thumb_far_jump_used_p (void)
26359 rtx insn;
26360 bool far_jump = false;
26361 unsigned int func_size = 0;
26363 /* This test is only important for leaf functions. */
26364 /* assert (!leaf_function_p ()); */
26366 /* If we have already decided that far jumps may be used,
26367 do not bother checking again, and always return true even if
26368 it turns out that they are not being used. Once we have made
26369 the decision that far jumps are present (and that hence the link
26370 register will be pushed onto the stack) we cannot go back on it. */
26371 if (cfun->machine->far_jump_used)
26372 return 1;
26374 /* If this function is not being called from the prologue/epilogue
26375 generation code then it must be being called from the
26376 INITIAL_ELIMINATION_OFFSET macro. */
26377 if (!(ARM_DOUBLEWORD_ALIGN || reload_completed))
26379 /* In this case we know that we are being asked about the elimination
26380 of the arg pointer register. If that register is not being used,
26381 then there are no arguments on the stack, and we do not have to
26382 worry that a far jump might force the prologue to push the link
26383 register, changing the stack offsets. In this case we can just
26384 return false, since the presence of far jumps in the function will
26385 not affect stack offsets.
26387 If the arg pointer is live (or if it was live, but has now been
26388 eliminated and so set to dead) then we do have to test to see if
26389 the function might contain a far jump. This test can lead to some
26390 false negatives, since before reload is completed, then length of
26391 branch instructions is not known, so gcc defaults to returning their
26392 longest length, which in turn sets the far jump attribute to true.
26394 A false negative will not result in bad code being generated, but it
26395 will result in a needless push and pop of the link register. We
26396 hope that this does not occur too often.
26398 If we need doubleword stack alignment this could affect the other
26399 elimination offsets so we can't risk getting it wrong. */
26400 if (df_regs_ever_live_p (ARG_POINTER_REGNUM))
26401 cfun->machine->arg_pointer_live = 1;
26402 else if (!cfun->machine->arg_pointer_live)
26403 return 0;
26406 /* We should not change far_jump_used during or after reload, as there is
26407 no chance to change stack frame layout. */
26408 if (reload_in_progress || reload_completed)
26409 return 0;
26411 /* Check to see if the function contains a branch
26412 insn with the far jump attribute set. */
26413 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
26415 if (JUMP_P (insn) && get_attr_far_jump (insn) == FAR_JUMP_YES)
26417 far_jump = true;
26419 func_size += get_attr_length (insn);
26422 /* Attribute far_jump will always be true for thumb1 before
26423 shorten_branch pass. So checking far_jump attribute before
26424 shorten_branch isn't much useful.
26426 Following heuristic tries to estimate more accurately if a far jump
26427 may finally be used. The heuristic is very conservative as there is
26428 no chance to roll-back the decision of not to use far jump.
26430 Thumb1 long branch offset is -2048 to 2046. The worst case is each
26431 2-byte insn is associated with a 4 byte constant pool. Using
26432 function size 2048/3 as the threshold is conservative enough. */
26433 if (far_jump)
26435 if ((func_size * 3) >= 2048)
26437 /* Record the fact that we have decided that
26438 the function does use far jumps. */
26439 cfun->machine->far_jump_used = 1;
26440 return 1;
26444 return 0;
26447 /* Return nonzero if FUNC must be entered in ARM mode. */
26449 is_called_in_ARM_mode (tree func)
26451 gcc_assert (TREE_CODE (func) == FUNCTION_DECL);
26453 /* Ignore the problem about functions whose address is taken. */
26454 if (TARGET_CALLEE_INTERWORKING && TREE_PUBLIC (func))
26455 return TRUE;
26457 #ifdef ARM_PE
26458 return lookup_attribute ("interfacearm", DECL_ATTRIBUTES (func)) != NULL_TREE;
26459 #else
26460 return FALSE;
26461 #endif
26464 /* Given the stack offsets and register mask in OFFSETS, decide how
26465 many additional registers to push instead of subtracting a constant
26466 from SP. For epilogues the principle is the same except we use pop.
26467 FOR_PROLOGUE indicates which we're generating. */
26468 static int
26469 thumb1_extra_regs_pushed (arm_stack_offsets *offsets, bool for_prologue)
26471 HOST_WIDE_INT amount;
26472 unsigned long live_regs_mask = offsets->saved_regs_mask;
26473 /* Extract a mask of the ones we can give to the Thumb's push/pop
26474 instruction. */
26475 unsigned long l_mask = live_regs_mask & (for_prologue ? 0x40ff : 0xff);
26476 /* Then count how many other high registers will need to be pushed. */
26477 unsigned long high_regs_pushed = bit_count (live_regs_mask & 0x0f00);
26478 int n_free, reg_base, size;
26480 if (!for_prologue && frame_pointer_needed)
26481 amount = offsets->locals_base - offsets->saved_regs;
26482 else
26483 amount = offsets->outgoing_args - offsets->saved_regs;
26485 /* If the stack frame size is 512 exactly, we can save one load
26486 instruction, which should make this a win even when optimizing
26487 for speed. */
26488 if (!optimize_size && amount != 512)
26489 return 0;
26491 /* Can't do this if there are high registers to push. */
26492 if (high_regs_pushed != 0)
26493 return 0;
26495 /* Shouldn't do it in the prologue if no registers would normally
26496 be pushed at all. In the epilogue, also allow it if we'll have
26497 a pop insn for the PC. */
26498 if (l_mask == 0
26499 && (for_prologue
26500 || TARGET_BACKTRACE
26501 || (live_regs_mask & 1 << LR_REGNUM) == 0
26502 || TARGET_INTERWORK
26503 || crtl->args.pretend_args_size != 0))
26504 return 0;
26506 /* Don't do this if thumb_expand_prologue wants to emit instructions
26507 between the push and the stack frame allocation. */
26508 if (for_prologue
26509 && ((flag_pic && arm_pic_register != INVALID_REGNUM)
26510 || (!frame_pointer_needed && CALLER_INTERWORKING_SLOT_SIZE > 0)))
26511 return 0;
26513 reg_base = 0;
26514 n_free = 0;
26515 if (!for_prologue)
26517 size = arm_size_return_regs ();
26518 reg_base = ARM_NUM_INTS (size);
26519 live_regs_mask >>= reg_base;
26522 while (reg_base + n_free < 8 && !(live_regs_mask & 1)
26523 && (for_prologue || call_used_regs[reg_base + n_free]))
26525 live_regs_mask >>= 1;
26526 n_free++;
26529 if (n_free == 0)
26530 return 0;
26531 gcc_assert (amount / 4 * 4 == amount);
26533 if (amount >= 512 && (amount - n_free * 4) < 512)
26534 return (amount - 508) / 4;
26535 if (amount <= n_free * 4)
26536 return amount / 4;
26537 return 0;
26540 /* The bits which aren't usefully expanded as rtl. */
26541 const char *
26542 thumb1_unexpanded_epilogue (void)
26544 arm_stack_offsets *offsets;
26545 int regno;
26546 unsigned long live_regs_mask = 0;
26547 int high_regs_pushed = 0;
26548 int extra_pop;
26549 int had_to_push_lr;
26550 int size;
26552 if (cfun->machine->return_used_this_function != 0)
26553 return "";
26555 if (IS_NAKED (arm_current_func_type ()))
26556 return "";
26558 offsets = arm_get_frame_offsets ();
26559 live_regs_mask = offsets->saved_regs_mask;
26560 high_regs_pushed = bit_count (live_regs_mask & 0x0f00);
26562 /* If we can deduce the registers used from the function's return value.
26563 This is more reliable that examining df_regs_ever_live_p () because that
26564 will be set if the register is ever used in the function, not just if
26565 the register is used to hold a return value. */
26566 size = arm_size_return_regs ();
26568 extra_pop = thumb1_extra_regs_pushed (offsets, false);
26569 if (extra_pop > 0)
26571 unsigned long extra_mask = (1 << extra_pop) - 1;
26572 live_regs_mask |= extra_mask << ARM_NUM_INTS (size);
26575 /* The prolog may have pushed some high registers to use as
26576 work registers. e.g. the testsuite file:
26577 gcc/testsuite/gcc/gcc.c-torture/execute/complex-2.c
26578 compiles to produce:
26579 push {r4, r5, r6, r7, lr}
26580 mov r7, r9
26581 mov r6, r8
26582 push {r6, r7}
26583 as part of the prolog. We have to undo that pushing here. */
26585 if (high_regs_pushed)
26587 unsigned long mask = live_regs_mask & 0xff;
26588 int next_hi_reg;
26590 /* The available low registers depend on the size of the value we are
26591 returning. */
26592 if (size <= 12)
26593 mask |= 1 << 3;
26594 if (size <= 8)
26595 mask |= 1 << 2;
26597 if (mask == 0)
26598 /* Oh dear! We have no low registers into which we can pop
26599 high registers! */
26600 internal_error
26601 ("no low registers available for popping high registers");
26603 for (next_hi_reg = 8; next_hi_reg < 13; next_hi_reg++)
26604 if (live_regs_mask & (1 << next_hi_reg))
26605 break;
26607 while (high_regs_pushed)
26609 /* Find lo register(s) into which the high register(s) can
26610 be popped. */
26611 for (regno = 0; regno <= LAST_LO_REGNUM; regno++)
26613 if (mask & (1 << regno))
26614 high_regs_pushed--;
26615 if (high_regs_pushed == 0)
26616 break;
26619 mask &= (2 << regno) - 1; /* A noop if regno == 8 */
26621 /* Pop the values into the low register(s). */
26622 thumb_pop (asm_out_file, mask);
26624 /* Move the value(s) into the high registers. */
26625 for (regno = 0; regno <= LAST_LO_REGNUM; regno++)
26627 if (mask & (1 << regno))
26629 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", next_hi_reg,
26630 regno);
26632 for (next_hi_reg++; next_hi_reg < 13; next_hi_reg++)
26633 if (live_regs_mask & (1 << next_hi_reg))
26634 break;
26638 live_regs_mask &= ~0x0f00;
26641 had_to_push_lr = (live_regs_mask & (1 << LR_REGNUM)) != 0;
26642 live_regs_mask &= 0xff;
26644 if (crtl->args.pretend_args_size == 0 || TARGET_BACKTRACE)
26646 /* Pop the return address into the PC. */
26647 if (had_to_push_lr)
26648 live_regs_mask |= 1 << PC_REGNUM;
26650 /* Either no argument registers were pushed or a backtrace
26651 structure was created which includes an adjusted stack
26652 pointer, so just pop everything. */
26653 if (live_regs_mask)
26654 thumb_pop (asm_out_file, live_regs_mask);
26656 /* We have either just popped the return address into the
26657 PC or it is was kept in LR for the entire function.
26658 Note that thumb_pop has already called thumb_exit if the
26659 PC was in the list. */
26660 if (!had_to_push_lr)
26661 thumb_exit (asm_out_file, LR_REGNUM);
26663 else
26665 /* Pop everything but the return address. */
26666 if (live_regs_mask)
26667 thumb_pop (asm_out_file, live_regs_mask);
26669 if (had_to_push_lr)
26671 if (size > 12)
26673 /* We have no free low regs, so save one. */
26674 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", IP_REGNUM,
26675 LAST_ARG_REGNUM);
26678 /* Get the return address into a temporary register. */
26679 thumb_pop (asm_out_file, 1 << LAST_ARG_REGNUM);
26681 if (size > 12)
26683 /* Move the return address to lr. */
26684 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", LR_REGNUM,
26685 LAST_ARG_REGNUM);
26686 /* Restore the low register. */
26687 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", LAST_ARG_REGNUM,
26688 IP_REGNUM);
26689 regno = LR_REGNUM;
26691 else
26692 regno = LAST_ARG_REGNUM;
26694 else
26695 regno = LR_REGNUM;
26697 /* Remove the argument registers that were pushed onto the stack. */
26698 asm_fprintf (asm_out_file, "\tadd\t%r, %r, #%d\n",
26699 SP_REGNUM, SP_REGNUM,
26700 crtl->args.pretend_args_size);
26702 thumb_exit (asm_out_file, regno);
26705 return "";
26708 /* Functions to save and restore machine-specific function data. */
26709 static struct machine_function *
26710 arm_init_machine_status (void)
26712 struct machine_function *machine;
26713 machine = ggc_cleared_alloc<machine_function> ();
26715 #if ARM_FT_UNKNOWN != 0
26716 machine->func_type = ARM_FT_UNKNOWN;
26717 #endif
26718 return machine;
26721 /* Return an RTX indicating where the return address to the
26722 calling function can be found. */
26724 arm_return_addr (int count, rtx frame ATTRIBUTE_UNUSED)
26726 if (count != 0)
26727 return NULL_RTX;
26729 return get_hard_reg_initial_val (Pmode, LR_REGNUM);
26732 /* Do anything needed before RTL is emitted for each function. */
26733 void
26734 arm_init_expanders (void)
26736 /* Arrange to initialize and mark the machine per-function status. */
26737 init_machine_status = arm_init_machine_status;
26739 /* This is to stop the combine pass optimizing away the alignment
26740 adjustment of va_arg. */
26741 /* ??? It is claimed that this should not be necessary. */
26742 if (cfun)
26743 mark_reg_pointer (arg_pointer_rtx, PARM_BOUNDARY);
26747 /* Like arm_compute_initial_elimination offset. Simpler because there
26748 isn't an ABI specified frame pointer for Thumb. Instead, we set it
26749 to point at the base of the local variables after static stack
26750 space for a function has been allocated. */
26752 HOST_WIDE_INT
26753 thumb_compute_initial_elimination_offset (unsigned int from, unsigned int to)
26755 arm_stack_offsets *offsets;
26757 offsets = arm_get_frame_offsets ();
26759 switch (from)
26761 case ARG_POINTER_REGNUM:
26762 switch (to)
26764 case STACK_POINTER_REGNUM:
26765 return offsets->outgoing_args - offsets->saved_args;
26767 case FRAME_POINTER_REGNUM:
26768 return offsets->soft_frame - offsets->saved_args;
26770 case ARM_HARD_FRAME_POINTER_REGNUM:
26771 return offsets->saved_regs - offsets->saved_args;
26773 case THUMB_HARD_FRAME_POINTER_REGNUM:
26774 return offsets->locals_base - offsets->saved_args;
26776 default:
26777 gcc_unreachable ();
26779 break;
26781 case FRAME_POINTER_REGNUM:
26782 switch (to)
26784 case STACK_POINTER_REGNUM:
26785 return offsets->outgoing_args - offsets->soft_frame;
26787 case ARM_HARD_FRAME_POINTER_REGNUM:
26788 return offsets->saved_regs - offsets->soft_frame;
26790 case THUMB_HARD_FRAME_POINTER_REGNUM:
26791 return offsets->locals_base - offsets->soft_frame;
26793 default:
26794 gcc_unreachable ();
26796 break;
26798 default:
26799 gcc_unreachable ();
26803 /* Generate the function's prologue. */
26805 void
26806 thumb1_expand_prologue (void)
26808 rtx insn;
26810 HOST_WIDE_INT amount;
26811 arm_stack_offsets *offsets;
26812 unsigned long func_type;
26813 int regno;
26814 unsigned long live_regs_mask;
26815 unsigned long l_mask;
26816 unsigned high_regs_pushed = 0;
26818 func_type = arm_current_func_type ();
26820 /* Naked functions don't have prologues. */
26821 if (IS_NAKED (func_type))
26822 return;
26824 if (IS_INTERRUPT (func_type))
26826 error ("interrupt Service Routines cannot be coded in Thumb mode");
26827 return;
26830 if (is_called_in_ARM_mode (current_function_decl))
26831 emit_insn (gen_prologue_thumb1_interwork ());
26833 offsets = arm_get_frame_offsets ();
26834 live_regs_mask = offsets->saved_regs_mask;
26836 /* Extract a mask of the ones we can give to the Thumb's push instruction. */
26837 l_mask = live_regs_mask & 0x40ff;
26838 /* Then count how many other high registers will need to be pushed. */
26839 high_regs_pushed = bit_count (live_regs_mask & 0x0f00);
26841 if (crtl->args.pretend_args_size)
26843 rtx x = GEN_INT (-crtl->args.pretend_args_size);
26845 if (cfun->machine->uses_anonymous_args)
26847 int num_pushes = ARM_NUM_INTS (crtl->args.pretend_args_size);
26848 unsigned long mask;
26850 mask = 1ul << (LAST_ARG_REGNUM + 1);
26851 mask -= 1ul << (LAST_ARG_REGNUM + 1 - num_pushes);
26853 insn = thumb1_emit_multi_reg_push (mask, 0);
26855 else
26857 insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
26858 stack_pointer_rtx, x));
26860 RTX_FRAME_RELATED_P (insn) = 1;
26863 if (TARGET_BACKTRACE)
26865 HOST_WIDE_INT offset = 0;
26866 unsigned work_register;
26867 rtx work_reg, x, arm_hfp_rtx;
26869 /* We have been asked to create a stack backtrace structure.
26870 The code looks like this:
26872 0 .align 2
26873 0 func:
26874 0 sub SP, #16 Reserve space for 4 registers.
26875 2 push {R7} Push low registers.
26876 4 add R7, SP, #20 Get the stack pointer before the push.
26877 6 str R7, [SP, #8] Store the stack pointer
26878 (before reserving the space).
26879 8 mov R7, PC Get hold of the start of this code + 12.
26880 10 str R7, [SP, #16] Store it.
26881 12 mov R7, FP Get hold of the current frame pointer.
26882 14 str R7, [SP, #4] Store it.
26883 16 mov R7, LR Get hold of the current return address.
26884 18 str R7, [SP, #12] Store it.
26885 20 add R7, SP, #16 Point at the start of the
26886 backtrace structure.
26887 22 mov FP, R7 Put this value into the frame pointer. */
26889 work_register = thumb_find_work_register (live_regs_mask);
26890 work_reg = gen_rtx_REG (SImode, work_register);
26891 arm_hfp_rtx = gen_rtx_REG (SImode, ARM_HARD_FRAME_POINTER_REGNUM);
26893 insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
26894 stack_pointer_rtx, GEN_INT (-16)));
26895 RTX_FRAME_RELATED_P (insn) = 1;
26897 if (l_mask)
26899 insn = thumb1_emit_multi_reg_push (l_mask, l_mask);
26900 RTX_FRAME_RELATED_P (insn) = 1;
26902 offset = bit_count (l_mask) * UNITS_PER_WORD;
26905 x = GEN_INT (offset + 16 + crtl->args.pretend_args_size);
26906 emit_insn (gen_addsi3 (work_reg, stack_pointer_rtx, x));
26908 x = plus_constant (Pmode, stack_pointer_rtx, offset + 4);
26909 x = gen_frame_mem (SImode, x);
26910 emit_move_insn (x, work_reg);
26912 /* Make sure that the instruction fetching the PC is in the right place
26913 to calculate "start of backtrace creation code + 12". */
26914 /* ??? The stores using the common WORK_REG ought to be enough to
26915 prevent the scheduler from doing anything weird. Failing that
26916 we could always move all of the following into an UNSPEC_VOLATILE. */
26917 if (l_mask)
26919 x = gen_rtx_REG (SImode, PC_REGNUM);
26920 emit_move_insn (work_reg, x);
26922 x = plus_constant (Pmode, stack_pointer_rtx, offset + 12);
26923 x = gen_frame_mem (SImode, x);
26924 emit_move_insn (x, work_reg);
26926 emit_move_insn (work_reg, arm_hfp_rtx);
26928 x = plus_constant (Pmode, stack_pointer_rtx, offset);
26929 x = gen_frame_mem (SImode, x);
26930 emit_move_insn (x, work_reg);
26932 else
26934 emit_move_insn (work_reg, arm_hfp_rtx);
26936 x = plus_constant (Pmode, stack_pointer_rtx, offset);
26937 x = gen_frame_mem (SImode, x);
26938 emit_move_insn (x, work_reg);
26940 x = gen_rtx_REG (SImode, PC_REGNUM);
26941 emit_move_insn (work_reg, x);
26943 x = plus_constant (Pmode, stack_pointer_rtx, offset + 12);
26944 x = gen_frame_mem (SImode, x);
26945 emit_move_insn (x, work_reg);
26948 x = gen_rtx_REG (SImode, LR_REGNUM);
26949 emit_move_insn (work_reg, x);
26951 x = plus_constant (Pmode, stack_pointer_rtx, offset + 8);
26952 x = gen_frame_mem (SImode, x);
26953 emit_move_insn (x, work_reg);
26955 x = GEN_INT (offset + 12);
26956 emit_insn (gen_addsi3 (work_reg, stack_pointer_rtx, x));
26958 emit_move_insn (arm_hfp_rtx, work_reg);
26960 /* Optimization: If we are not pushing any low registers but we are going
26961 to push some high registers then delay our first push. This will just
26962 be a push of LR and we can combine it with the push of the first high
26963 register. */
26964 else if ((l_mask & 0xff) != 0
26965 || (high_regs_pushed == 0 && l_mask))
26967 unsigned long mask = l_mask;
26968 mask |= (1 << thumb1_extra_regs_pushed (offsets, true)) - 1;
26969 insn = thumb1_emit_multi_reg_push (mask, mask);
26970 RTX_FRAME_RELATED_P (insn) = 1;
26973 if (high_regs_pushed)
26975 unsigned pushable_regs;
26976 unsigned next_hi_reg;
26977 unsigned arg_regs_num = TARGET_AAPCS_BASED ? crtl->args.info.aapcs_ncrn
26978 : crtl->args.info.nregs;
26979 unsigned arg_regs_mask = (1 << arg_regs_num) - 1;
26981 for (next_hi_reg = 12; next_hi_reg > LAST_LO_REGNUM; next_hi_reg--)
26982 if (live_regs_mask & (1 << next_hi_reg))
26983 break;
26985 /* Here we need to mask out registers used for passing arguments
26986 even if they can be pushed. This is to avoid using them to stash the high
26987 registers. Such kind of stash may clobber the use of arguments. */
26988 pushable_regs = l_mask & (~arg_regs_mask) & 0xff;
26990 if (pushable_regs == 0)
26991 pushable_regs = 1 << thumb_find_work_register (live_regs_mask);
26993 while (high_regs_pushed > 0)
26995 unsigned long real_regs_mask = 0;
26997 for (regno = LAST_LO_REGNUM; regno >= 0; regno --)
26999 if (pushable_regs & (1 << regno))
27001 emit_move_insn (gen_rtx_REG (SImode, regno),
27002 gen_rtx_REG (SImode, next_hi_reg));
27004 high_regs_pushed --;
27005 real_regs_mask |= (1 << next_hi_reg);
27007 if (high_regs_pushed)
27009 for (next_hi_reg --; next_hi_reg > LAST_LO_REGNUM;
27010 next_hi_reg --)
27011 if (live_regs_mask & (1 << next_hi_reg))
27012 break;
27014 else
27016 pushable_regs &= ~((1 << regno) - 1);
27017 break;
27022 /* If we had to find a work register and we have not yet
27023 saved the LR then add it to the list of regs to push. */
27024 if (l_mask == (1 << LR_REGNUM))
27026 pushable_regs |= l_mask;
27027 real_regs_mask |= l_mask;
27028 l_mask = 0;
27031 insn = thumb1_emit_multi_reg_push (pushable_regs, real_regs_mask);
27032 RTX_FRAME_RELATED_P (insn) = 1;
27036 /* Load the pic register before setting the frame pointer,
27037 so we can use r7 as a temporary work register. */
27038 if (flag_pic && arm_pic_register != INVALID_REGNUM)
27039 arm_load_pic_register (live_regs_mask);
27041 if (!frame_pointer_needed && CALLER_INTERWORKING_SLOT_SIZE > 0)
27042 emit_move_insn (gen_rtx_REG (Pmode, ARM_HARD_FRAME_POINTER_REGNUM),
27043 stack_pointer_rtx);
27045 if (flag_stack_usage_info)
27046 current_function_static_stack_size
27047 = offsets->outgoing_args - offsets->saved_args;
27049 amount = offsets->outgoing_args - offsets->saved_regs;
27050 amount -= 4 * thumb1_extra_regs_pushed (offsets, true);
27051 if (amount)
27053 if (amount < 512)
27055 insn = emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
27056 GEN_INT (- amount)));
27057 RTX_FRAME_RELATED_P (insn) = 1;
27059 else
27061 rtx reg, dwarf;
27063 /* The stack decrement is too big for an immediate value in a single
27064 insn. In theory we could issue multiple subtracts, but after
27065 three of them it becomes more space efficient to place the full
27066 value in the constant pool and load into a register. (Also the
27067 ARM debugger really likes to see only one stack decrement per
27068 function). So instead we look for a scratch register into which
27069 we can load the decrement, and then we subtract this from the
27070 stack pointer. Unfortunately on the thumb the only available
27071 scratch registers are the argument registers, and we cannot use
27072 these as they may hold arguments to the function. Instead we
27073 attempt to locate a call preserved register which is used by this
27074 function. If we can find one, then we know that it will have
27075 been pushed at the start of the prologue and so we can corrupt
27076 it now. */
27077 for (regno = LAST_ARG_REGNUM + 1; regno <= LAST_LO_REGNUM; regno++)
27078 if (live_regs_mask & (1 << regno))
27079 break;
27081 gcc_assert(regno <= LAST_LO_REGNUM);
27083 reg = gen_rtx_REG (SImode, regno);
27085 emit_insn (gen_movsi (reg, GEN_INT (- amount)));
27087 insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
27088 stack_pointer_rtx, reg));
27090 dwarf = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
27091 plus_constant (Pmode, stack_pointer_rtx,
27092 -amount));
27093 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
27094 RTX_FRAME_RELATED_P (insn) = 1;
27098 if (frame_pointer_needed)
27099 thumb_set_frame_pointer (offsets);
27101 /* If we are profiling, make sure no instructions are scheduled before
27102 the call to mcount. Similarly if the user has requested no
27103 scheduling in the prolog. Similarly if we want non-call exceptions
27104 using the EABI unwinder, to prevent faulting instructions from being
27105 swapped with a stack adjustment. */
27106 if (crtl->profile || !TARGET_SCHED_PROLOG
27107 || (arm_except_unwind_info (&global_options) == UI_TARGET
27108 && cfun->can_throw_non_call_exceptions))
27109 emit_insn (gen_blockage ());
27111 cfun->machine->lr_save_eliminated = !thumb_force_lr_save ();
27112 if (live_regs_mask & 0xff)
27113 cfun->machine->lr_save_eliminated = 0;
27116 /* Generate pattern *pop_multiple_with_stack_update_and_return if single
27117 POP instruction can be generated. LR should be replaced by PC. All
27118 the checks required are already done by USE_RETURN_INSN (). Hence,
27119 all we really need to check here is if single register is to be
27120 returned, or multiple register return. */
27121 void
27122 thumb2_expand_return (bool simple_return)
27124 int i, num_regs;
27125 unsigned long saved_regs_mask;
27126 arm_stack_offsets *offsets;
27128 offsets = arm_get_frame_offsets ();
27129 saved_regs_mask = offsets->saved_regs_mask;
27131 for (i = 0, num_regs = 0; i <= LAST_ARM_REGNUM; i++)
27132 if (saved_regs_mask & (1 << i))
27133 num_regs++;
27135 if (!simple_return && saved_regs_mask)
27137 if (num_regs == 1)
27139 rtx par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
27140 rtx reg = gen_rtx_REG (SImode, PC_REGNUM);
27141 rtx addr = gen_rtx_MEM (SImode,
27142 gen_rtx_POST_INC (SImode,
27143 stack_pointer_rtx));
27144 set_mem_alias_set (addr, get_frame_alias_set ());
27145 XVECEXP (par, 0, 0) = ret_rtx;
27146 XVECEXP (par, 0, 1) = gen_rtx_SET (SImode, reg, addr);
27147 RTX_FRAME_RELATED_P (XVECEXP (par, 0, 1)) = 1;
27148 emit_jump_insn (par);
27150 else
27152 saved_regs_mask &= ~ (1 << LR_REGNUM);
27153 saved_regs_mask |= (1 << PC_REGNUM);
27154 arm_emit_multi_reg_pop (saved_regs_mask);
27157 else
27159 emit_jump_insn (simple_return_rtx);
27163 void
27164 thumb1_expand_epilogue (void)
27166 HOST_WIDE_INT amount;
27167 arm_stack_offsets *offsets;
27168 int regno;
27170 /* Naked functions don't have prologues. */
27171 if (IS_NAKED (arm_current_func_type ()))
27172 return;
27174 offsets = arm_get_frame_offsets ();
27175 amount = offsets->outgoing_args - offsets->saved_regs;
27177 if (frame_pointer_needed)
27179 emit_insn (gen_movsi (stack_pointer_rtx, hard_frame_pointer_rtx));
27180 amount = offsets->locals_base - offsets->saved_regs;
27182 amount -= 4 * thumb1_extra_regs_pushed (offsets, false);
27184 gcc_assert (amount >= 0);
27185 if (amount)
27187 emit_insn (gen_blockage ());
27189 if (amount < 512)
27190 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
27191 GEN_INT (amount)));
27192 else
27194 /* r3 is always free in the epilogue. */
27195 rtx reg = gen_rtx_REG (SImode, LAST_ARG_REGNUM);
27197 emit_insn (gen_movsi (reg, GEN_INT (amount)));
27198 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, reg));
27202 /* Emit a USE (stack_pointer_rtx), so that
27203 the stack adjustment will not be deleted. */
27204 emit_insn (gen_force_register_use (stack_pointer_rtx));
27206 if (crtl->profile || !TARGET_SCHED_PROLOG)
27207 emit_insn (gen_blockage ());
27209 /* Emit a clobber for each insn that will be restored in the epilogue,
27210 so that flow2 will get register lifetimes correct. */
27211 for (regno = 0; regno < 13; regno++)
27212 if (df_regs_ever_live_p (regno) && !call_used_regs[regno])
27213 emit_clobber (gen_rtx_REG (SImode, regno));
27215 if (! df_regs_ever_live_p (LR_REGNUM))
27216 emit_use (gen_rtx_REG (SImode, LR_REGNUM));
27219 /* Epilogue code for APCS frame. */
27220 static void
27221 arm_expand_epilogue_apcs_frame (bool really_return)
27223 unsigned long func_type;
27224 unsigned long saved_regs_mask;
27225 int num_regs = 0;
27226 int i;
27227 int floats_from_frame = 0;
27228 arm_stack_offsets *offsets;
27230 gcc_assert (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM);
27231 func_type = arm_current_func_type ();
27233 /* Get frame offsets for ARM. */
27234 offsets = arm_get_frame_offsets ();
27235 saved_regs_mask = offsets->saved_regs_mask;
27237 /* Find the offset of the floating-point save area in the frame. */
27238 floats_from_frame
27239 = (offsets->saved_args
27240 + arm_compute_static_chain_stack_bytes ()
27241 - offsets->frame);
27243 /* Compute how many core registers saved and how far away the floats are. */
27244 for (i = 0; i <= LAST_ARM_REGNUM; i++)
27245 if (saved_regs_mask & (1 << i))
27247 num_regs++;
27248 floats_from_frame += 4;
27251 if (TARGET_HARD_FLOAT && TARGET_VFP)
27253 int start_reg;
27254 rtx ip_rtx = gen_rtx_REG (SImode, IP_REGNUM);
27256 /* The offset is from IP_REGNUM. */
27257 int saved_size = arm_get_vfp_saved_size ();
27258 if (saved_size > 0)
27260 rtx insn;
27261 floats_from_frame += saved_size;
27262 insn = emit_insn (gen_addsi3 (ip_rtx,
27263 hard_frame_pointer_rtx,
27264 GEN_INT (-floats_from_frame)));
27265 arm_add_cfa_adjust_cfa_note (insn, -floats_from_frame,
27266 ip_rtx, hard_frame_pointer_rtx);
27269 /* Generate VFP register multi-pop. */
27270 start_reg = FIRST_VFP_REGNUM;
27272 for (i = FIRST_VFP_REGNUM; i < LAST_VFP_REGNUM; i += 2)
27273 /* Look for a case where a reg does not need restoring. */
27274 if ((!df_regs_ever_live_p (i) || call_used_regs[i])
27275 && (!df_regs_ever_live_p (i + 1)
27276 || call_used_regs[i + 1]))
27278 if (start_reg != i)
27279 arm_emit_vfp_multi_reg_pop (start_reg,
27280 (i - start_reg) / 2,
27281 gen_rtx_REG (SImode,
27282 IP_REGNUM));
27283 start_reg = i + 2;
27286 /* Restore the remaining regs that we have discovered (or possibly
27287 even all of them, if the conditional in the for loop never
27288 fired). */
27289 if (start_reg != i)
27290 arm_emit_vfp_multi_reg_pop (start_reg,
27291 (i - start_reg) / 2,
27292 gen_rtx_REG (SImode, IP_REGNUM));
27295 if (TARGET_IWMMXT)
27297 /* The frame pointer is guaranteed to be non-double-word aligned, as
27298 it is set to double-word-aligned old_stack_pointer - 4. */
27299 rtx insn;
27300 int lrm_count = (num_regs % 2) ? (num_regs + 2) : (num_regs + 1);
27302 for (i = LAST_IWMMXT_REGNUM; i >= FIRST_IWMMXT_REGNUM; i--)
27303 if (df_regs_ever_live_p (i) && !call_used_regs[i])
27305 rtx addr = gen_frame_mem (V2SImode,
27306 plus_constant (Pmode, hard_frame_pointer_rtx,
27307 - lrm_count * 4));
27308 insn = emit_insn (gen_movsi (gen_rtx_REG (V2SImode, i), addr));
27309 REG_NOTES (insn) = alloc_reg_note (REG_CFA_RESTORE,
27310 gen_rtx_REG (V2SImode, i),
27311 NULL_RTX);
27312 lrm_count += 2;
27316 /* saved_regs_mask should contain IP which contains old stack pointer
27317 at the time of activation creation. Since SP and IP are adjacent registers,
27318 we can restore the value directly into SP. */
27319 gcc_assert (saved_regs_mask & (1 << IP_REGNUM));
27320 saved_regs_mask &= ~(1 << IP_REGNUM);
27321 saved_regs_mask |= (1 << SP_REGNUM);
27323 /* There are two registers left in saved_regs_mask - LR and PC. We
27324 only need to restore LR (the return address), but to
27325 save time we can load it directly into PC, unless we need a
27326 special function exit sequence, or we are not really returning. */
27327 if (really_return
27328 && ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL
27329 && !crtl->calls_eh_return)
27330 /* Delete LR from the register mask, so that LR on
27331 the stack is loaded into the PC in the register mask. */
27332 saved_regs_mask &= ~(1 << LR_REGNUM);
27333 else
27334 saved_regs_mask &= ~(1 << PC_REGNUM);
27336 num_regs = bit_count (saved_regs_mask);
27337 if ((offsets->outgoing_args != (1 + num_regs)) || cfun->calls_alloca)
27339 rtx insn;
27340 emit_insn (gen_blockage ());
27341 /* Unwind the stack to just below the saved registers. */
27342 insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
27343 hard_frame_pointer_rtx,
27344 GEN_INT (- 4 * num_regs)));
27346 arm_add_cfa_adjust_cfa_note (insn, - 4 * num_regs,
27347 stack_pointer_rtx, hard_frame_pointer_rtx);
27350 arm_emit_multi_reg_pop (saved_regs_mask);
27352 if (IS_INTERRUPT (func_type))
27354 /* Interrupt handlers will have pushed the
27355 IP onto the stack, so restore it now. */
27356 rtx insn;
27357 rtx addr = gen_rtx_MEM (SImode,
27358 gen_rtx_POST_INC (SImode,
27359 stack_pointer_rtx));
27360 set_mem_alias_set (addr, get_frame_alias_set ());
27361 insn = emit_insn (gen_movsi (gen_rtx_REG (SImode, IP_REGNUM), addr));
27362 REG_NOTES (insn) = alloc_reg_note (REG_CFA_RESTORE,
27363 gen_rtx_REG (SImode, IP_REGNUM),
27364 NULL_RTX);
27367 if (!really_return || (saved_regs_mask & (1 << PC_REGNUM)))
27368 return;
27370 if (crtl->calls_eh_return)
27371 emit_insn (gen_addsi3 (stack_pointer_rtx,
27372 stack_pointer_rtx,
27373 gen_rtx_REG (SImode, ARM_EH_STACKADJ_REGNUM)));
27375 if (IS_STACKALIGN (func_type))
27376 /* Restore the original stack pointer. Before prologue, the stack was
27377 realigned and the original stack pointer saved in r0. For details,
27378 see comment in arm_expand_prologue. */
27379 emit_insn (gen_movsi (stack_pointer_rtx, gen_rtx_REG (SImode, 0)));
27381 emit_jump_insn (simple_return_rtx);
27384 /* Generate RTL to represent ARM epilogue. Really_return is true if the
27385 function is not a sibcall. */
27386 void
27387 arm_expand_epilogue (bool really_return)
27389 unsigned long func_type;
27390 unsigned long saved_regs_mask;
27391 int num_regs = 0;
27392 int i;
27393 int amount;
27394 arm_stack_offsets *offsets;
27396 func_type = arm_current_func_type ();
27398 /* Naked functions don't have epilogue. Hence, generate return pattern, and
27399 let output_return_instruction take care of instruction emission if any. */
27400 if (IS_NAKED (func_type)
27401 || (IS_VOLATILE (func_type) && TARGET_ABORT_NORETURN))
27403 if (really_return)
27404 emit_jump_insn (simple_return_rtx);
27405 return;
27408 /* If we are throwing an exception, then we really must be doing a
27409 return, so we can't tail-call. */
27410 gcc_assert (!crtl->calls_eh_return || really_return);
27412 if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
27414 arm_expand_epilogue_apcs_frame (really_return);
27415 return;
27418 /* Get frame offsets for ARM. */
27419 offsets = arm_get_frame_offsets ();
27420 saved_regs_mask = offsets->saved_regs_mask;
27421 num_regs = bit_count (saved_regs_mask);
27423 if (frame_pointer_needed)
27425 rtx insn;
27426 /* Restore stack pointer if necessary. */
27427 if (TARGET_ARM)
27429 /* In ARM mode, frame pointer points to first saved register.
27430 Restore stack pointer to last saved register. */
27431 amount = offsets->frame - offsets->saved_regs;
27433 /* Force out any pending memory operations that reference stacked data
27434 before stack de-allocation occurs. */
27435 emit_insn (gen_blockage ());
27436 insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
27437 hard_frame_pointer_rtx,
27438 GEN_INT (amount)));
27439 arm_add_cfa_adjust_cfa_note (insn, amount,
27440 stack_pointer_rtx,
27441 hard_frame_pointer_rtx);
27443 /* Emit USE(stack_pointer_rtx) to ensure that stack adjustment is not
27444 deleted. */
27445 emit_insn (gen_force_register_use (stack_pointer_rtx));
27447 else
27449 /* In Thumb-2 mode, the frame pointer points to the last saved
27450 register. */
27451 amount = offsets->locals_base - offsets->saved_regs;
27452 if (amount)
27454 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
27455 hard_frame_pointer_rtx,
27456 GEN_INT (amount)));
27457 arm_add_cfa_adjust_cfa_note (insn, amount,
27458 hard_frame_pointer_rtx,
27459 hard_frame_pointer_rtx);
27462 /* Force out any pending memory operations that reference stacked data
27463 before stack de-allocation occurs. */
27464 emit_insn (gen_blockage ());
27465 insn = emit_insn (gen_movsi (stack_pointer_rtx,
27466 hard_frame_pointer_rtx));
27467 arm_add_cfa_adjust_cfa_note (insn, 0,
27468 stack_pointer_rtx,
27469 hard_frame_pointer_rtx);
27470 /* Emit USE(stack_pointer_rtx) to ensure that stack adjustment is not
27471 deleted. */
27472 emit_insn (gen_force_register_use (stack_pointer_rtx));
27475 else
27477 /* Pop off outgoing args and local frame to adjust stack pointer to
27478 last saved register. */
27479 amount = offsets->outgoing_args - offsets->saved_regs;
27480 if (amount)
27482 rtx tmp;
27483 /* Force out any pending memory operations that reference stacked data
27484 before stack de-allocation occurs. */
27485 emit_insn (gen_blockage ());
27486 tmp = emit_insn (gen_addsi3 (stack_pointer_rtx,
27487 stack_pointer_rtx,
27488 GEN_INT (amount)));
27489 arm_add_cfa_adjust_cfa_note (tmp, amount,
27490 stack_pointer_rtx, stack_pointer_rtx);
27491 /* Emit USE(stack_pointer_rtx) to ensure that stack adjustment is
27492 not deleted. */
27493 emit_insn (gen_force_register_use (stack_pointer_rtx));
27497 if (TARGET_HARD_FLOAT && TARGET_VFP)
27499 /* Generate VFP register multi-pop. */
27500 int end_reg = LAST_VFP_REGNUM + 1;
27502 /* Scan the registers in reverse order. We need to match
27503 any groupings made in the prologue and generate matching
27504 vldm operations. The need to match groups is because,
27505 unlike pop, vldm can only do consecutive regs. */
27506 for (i = LAST_VFP_REGNUM - 1; i >= FIRST_VFP_REGNUM; i -= 2)
27507 /* Look for a case where a reg does not need restoring. */
27508 if ((!df_regs_ever_live_p (i) || call_used_regs[i])
27509 && (!df_regs_ever_live_p (i + 1)
27510 || call_used_regs[i + 1]))
27512 /* Restore the regs discovered so far (from reg+2 to
27513 end_reg). */
27514 if (end_reg > i + 2)
27515 arm_emit_vfp_multi_reg_pop (i + 2,
27516 (end_reg - (i + 2)) / 2,
27517 stack_pointer_rtx);
27518 end_reg = i;
27521 /* Restore the remaining regs that we have discovered (or possibly
27522 even all of them, if the conditional in the for loop never
27523 fired). */
27524 if (end_reg > i + 2)
27525 arm_emit_vfp_multi_reg_pop (i + 2,
27526 (end_reg - (i + 2)) / 2,
27527 stack_pointer_rtx);
27530 if (TARGET_IWMMXT)
27531 for (i = FIRST_IWMMXT_REGNUM; i <= LAST_IWMMXT_REGNUM; i++)
27532 if (df_regs_ever_live_p (i) && !call_used_regs[i])
27534 rtx insn;
27535 rtx addr = gen_rtx_MEM (V2SImode,
27536 gen_rtx_POST_INC (SImode,
27537 stack_pointer_rtx));
27538 set_mem_alias_set (addr, get_frame_alias_set ());
27539 insn = emit_insn (gen_movsi (gen_rtx_REG (V2SImode, i), addr));
27540 REG_NOTES (insn) = alloc_reg_note (REG_CFA_RESTORE,
27541 gen_rtx_REG (V2SImode, i),
27542 NULL_RTX);
27543 arm_add_cfa_adjust_cfa_note (insn, UNITS_PER_WORD,
27544 stack_pointer_rtx, stack_pointer_rtx);
27547 if (saved_regs_mask)
27549 rtx insn;
27550 bool return_in_pc = false;
27552 if (ARM_FUNC_TYPE (func_type) != ARM_FT_INTERWORKED
27553 && (TARGET_ARM || ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL)
27554 && !IS_STACKALIGN (func_type)
27555 && really_return
27556 && crtl->args.pretend_args_size == 0
27557 && saved_regs_mask & (1 << LR_REGNUM)
27558 && !crtl->calls_eh_return)
27560 saved_regs_mask &= ~(1 << LR_REGNUM);
27561 saved_regs_mask |= (1 << PC_REGNUM);
27562 return_in_pc = true;
27565 if (num_regs == 1 && (!IS_INTERRUPT (func_type) || !return_in_pc))
27567 for (i = 0; i <= LAST_ARM_REGNUM; i++)
27568 if (saved_regs_mask & (1 << i))
27570 rtx addr = gen_rtx_MEM (SImode,
27571 gen_rtx_POST_INC (SImode,
27572 stack_pointer_rtx));
27573 set_mem_alias_set (addr, get_frame_alias_set ());
27575 if (i == PC_REGNUM)
27577 insn = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
27578 XVECEXP (insn, 0, 0) = ret_rtx;
27579 XVECEXP (insn, 0, 1) = gen_rtx_SET (SImode,
27580 gen_rtx_REG (SImode, i),
27581 addr);
27582 RTX_FRAME_RELATED_P (XVECEXP (insn, 0, 1)) = 1;
27583 insn = emit_jump_insn (insn);
27585 else
27587 insn = emit_insn (gen_movsi (gen_rtx_REG (SImode, i),
27588 addr));
27589 REG_NOTES (insn) = alloc_reg_note (REG_CFA_RESTORE,
27590 gen_rtx_REG (SImode, i),
27591 NULL_RTX);
27592 arm_add_cfa_adjust_cfa_note (insn, UNITS_PER_WORD,
27593 stack_pointer_rtx,
27594 stack_pointer_rtx);
27598 else
27600 if (TARGET_LDRD
27601 && current_tune->prefer_ldrd_strd
27602 && !optimize_function_for_size_p (cfun))
27604 if (TARGET_THUMB2)
27605 thumb2_emit_ldrd_pop (saved_regs_mask);
27606 else if (TARGET_ARM && !IS_INTERRUPT (func_type))
27607 arm_emit_ldrd_pop (saved_regs_mask);
27608 else
27609 arm_emit_multi_reg_pop (saved_regs_mask);
27611 else
27612 arm_emit_multi_reg_pop (saved_regs_mask);
27615 if (return_in_pc == true)
27616 return;
27619 if (crtl->args.pretend_args_size)
27621 int i, j;
27622 rtx dwarf = NULL_RTX;
27623 rtx tmp = emit_insn (gen_addsi3 (stack_pointer_rtx,
27624 stack_pointer_rtx,
27625 GEN_INT (crtl->args.pretend_args_size)));
27627 RTX_FRAME_RELATED_P (tmp) = 1;
27629 if (cfun->machine->uses_anonymous_args)
27631 /* Restore pretend args. Refer arm_expand_prologue on how to save
27632 pretend_args in stack. */
27633 int num_regs = crtl->args.pretend_args_size / 4;
27634 saved_regs_mask = (0xf0 >> num_regs) & 0xf;
27635 for (j = 0, i = 0; j < num_regs; i++)
27636 if (saved_regs_mask & (1 << i))
27638 rtx reg = gen_rtx_REG (SImode, i);
27639 dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
27640 j++;
27642 REG_NOTES (tmp) = dwarf;
27644 arm_add_cfa_adjust_cfa_note (tmp, crtl->args.pretend_args_size,
27645 stack_pointer_rtx, stack_pointer_rtx);
27648 if (!really_return)
27649 return;
27651 if (crtl->calls_eh_return)
27652 emit_insn (gen_addsi3 (stack_pointer_rtx,
27653 stack_pointer_rtx,
27654 gen_rtx_REG (SImode, ARM_EH_STACKADJ_REGNUM)));
27656 if (IS_STACKALIGN (func_type))
27657 /* Restore the original stack pointer. Before prologue, the stack was
27658 realigned and the original stack pointer saved in r0. For details,
27659 see comment in arm_expand_prologue. */
27660 emit_insn (gen_movsi (stack_pointer_rtx, gen_rtx_REG (SImode, 0)));
27662 emit_jump_insn (simple_return_rtx);
27665 /* Implementation of insn prologue_thumb1_interwork. This is the first
27666 "instruction" of a function called in ARM mode. Swap to thumb mode. */
27668 const char *
27669 thumb1_output_interwork (void)
27671 const char * name;
27672 FILE *f = asm_out_file;
27674 gcc_assert (MEM_P (DECL_RTL (current_function_decl)));
27675 gcc_assert (GET_CODE (XEXP (DECL_RTL (current_function_decl), 0))
27676 == SYMBOL_REF);
27677 name = XSTR (XEXP (DECL_RTL (current_function_decl), 0), 0);
27679 /* Generate code sequence to switch us into Thumb mode. */
27680 /* The .code 32 directive has already been emitted by
27681 ASM_DECLARE_FUNCTION_NAME. */
27682 asm_fprintf (f, "\torr\t%r, %r, #1\n", IP_REGNUM, PC_REGNUM);
27683 asm_fprintf (f, "\tbx\t%r\n", IP_REGNUM);
27685 /* Generate a label, so that the debugger will notice the
27686 change in instruction sets. This label is also used by
27687 the assembler to bypass the ARM code when this function
27688 is called from a Thumb encoded function elsewhere in the
27689 same file. Hence the definition of STUB_NAME here must
27690 agree with the definition in gas/config/tc-arm.c. */
27692 #define STUB_NAME ".real_start_of"
27694 fprintf (f, "\t.code\t16\n");
27695 #ifdef ARM_PE
27696 if (arm_dllexport_name_p (name))
27697 name = arm_strip_name_encoding (name);
27698 #endif
27699 asm_fprintf (f, "\t.globl %s%U%s\n", STUB_NAME, name);
27700 fprintf (f, "\t.thumb_func\n");
27701 asm_fprintf (f, "%s%U%s:\n", STUB_NAME, name);
27703 return "";
27706 /* Handle the case of a double word load into a low register from
27707 a computed memory address. The computed address may involve a
27708 register which is overwritten by the load. */
27709 const char *
27710 thumb_load_double_from_address (rtx *operands)
27712 rtx addr;
27713 rtx base;
27714 rtx offset;
27715 rtx arg1;
27716 rtx arg2;
27718 gcc_assert (REG_P (operands[0]));
27719 gcc_assert (MEM_P (operands[1]));
27721 /* Get the memory address. */
27722 addr = XEXP (operands[1], 0);
27724 /* Work out how the memory address is computed. */
27725 switch (GET_CODE (addr))
27727 case REG:
27728 operands[2] = adjust_address (operands[1], SImode, 4);
27730 if (REGNO (operands[0]) == REGNO (addr))
27732 output_asm_insn ("ldr\t%H0, %2", operands);
27733 output_asm_insn ("ldr\t%0, %1", operands);
27735 else
27737 output_asm_insn ("ldr\t%0, %1", operands);
27738 output_asm_insn ("ldr\t%H0, %2", operands);
27740 break;
27742 case CONST:
27743 /* Compute <address> + 4 for the high order load. */
27744 operands[2] = adjust_address (operands[1], SImode, 4);
27746 output_asm_insn ("ldr\t%0, %1", operands);
27747 output_asm_insn ("ldr\t%H0, %2", operands);
27748 break;
27750 case PLUS:
27751 arg1 = XEXP (addr, 0);
27752 arg2 = XEXP (addr, 1);
27754 if (CONSTANT_P (arg1))
27755 base = arg2, offset = arg1;
27756 else
27757 base = arg1, offset = arg2;
27759 gcc_assert (REG_P (base));
27761 /* Catch the case of <address> = <reg> + <reg> */
27762 if (REG_P (offset))
27764 int reg_offset = REGNO (offset);
27765 int reg_base = REGNO (base);
27766 int reg_dest = REGNO (operands[0]);
27768 /* Add the base and offset registers together into the
27769 higher destination register. */
27770 asm_fprintf (asm_out_file, "\tadd\t%r, %r, %r",
27771 reg_dest + 1, reg_base, reg_offset);
27773 /* Load the lower destination register from the address in
27774 the higher destination register. */
27775 asm_fprintf (asm_out_file, "\tldr\t%r, [%r, #0]",
27776 reg_dest, reg_dest + 1);
27778 /* Load the higher destination register from its own address
27779 plus 4. */
27780 asm_fprintf (asm_out_file, "\tldr\t%r, [%r, #4]",
27781 reg_dest + 1, reg_dest + 1);
27783 else
27785 /* Compute <address> + 4 for the high order load. */
27786 operands[2] = adjust_address (operands[1], SImode, 4);
27788 /* If the computed address is held in the low order register
27789 then load the high order register first, otherwise always
27790 load the low order register first. */
27791 if (REGNO (operands[0]) == REGNO (base))
27793 output_asm_insn ("ldr\t%H0, %2", operands);
27794 output_asm_insn ("ldr\t%0, %1", operands);
27796 else
27798 output_asm_insn ("ldr\t%0, %1", operands);
27799 output_asm_insn ("ldr\t%H0, %2", operands);
27802 break;
27804 case LABEL_REF:
27805 /* With no registers to worry about we can just load the value
27806 directly. */
27807 operands[2] = adjust_address (operands[1], SImode, 4);
27809 output_asm_insn ("ldr\t%H0, %2", operands);
27810 output_asm_insn ("ldr\t%0, %1", operands);
27811 break;
27813 default:
27814 gcc_unreachable ();
27817 return "";
27820 const char *
27821 thumb_output_move_mem_multiple (int n, rtx *operands)
27823 rtx tmp;
27825 switch (n)
27827 case 2:
27828 if (REGNO (operands[4]) > REGNO (operands[5]))
27830 tmp = operands[4];
27831 operands[4] = operands[5];
27832 operands[5] = tmp;
27834 output_asm_insn ("ldmia\t%1!, {%4, %5}", operands);
27835 output_asm_insn ("stmia\t%0!, {%4, %5}", operands);
27836 break;
27838 case 3:
27839 if (REGNO (operands[4]) > REGNO (operands[5]))
27841 tmp = operands[4];
27842 operands[4] = operands[5];
27843 operands[5] = tmp;
27845 if (REGNO (operands[5]) > REGNO (operands[6]))
27847 tmp = operands[5];
27848 operands[5] = operands[6];
27849 operands[6] = tmp;
27851 if (REGNO (operands[4]) > REGNO (operands[5]))
27853 tmp = operands[4];
27854 operands[4] = operands[5];
27855 operands[5] = tmp;
27858 output_asm_insn ("ldmia\t%1!, {%4, %5, %6}", operands);
27859 output_asm_insn ("stmia\t%0!, {%4, %5, %6}", operands);
27860 break;
27862 default:
27863 gcc_unreachable ();
27866 return "";
27869 /* Output a call-via instruction for thumb state. */
27870 const char *
27871 thumb_call_via_reg (rtx reg)
27873 int regno = REGNO (reg);
27874 rtx *labelp;
27876 gcc_assert (regno < LR_REGNUM);
27878 /* If we are in the normal text section we can use a single instance
27879 per compilation unit. If we are doing function sections, then we need
27880 an entry per section, since we can't rely on reachability. */
27881 if (in_section == text_section)
27883 thumb_call_reg_needed = 1;
27885 if (thumb_call_via_label[regno] == NULL)
27886 thumb_call_via_label[regno] = gen_label_rtx ();
27887 labelp = thumb_call_via_label + regno;
27889 else
27891 if (cfun->machine->call_via[regno] == NULL)
27892 cfun->machine->call_via[regno] = gen_label_rtx ();
27893 labelp = cfun->machine->call_via + regno;
27896 output_asm_insn ("bl\t%a0", labelp);
27897 return "";
27900 /* Routines for generating rtl. */
27901 void
27902 thumb_expand_movmemqi (rtx *operands)
27904 rtx out = copy_to_mode_reg (SImode, XEXP (operands[0], 0));
27905 rtx in = copy_to_mode_reg (SImode, XEXP (operands[1], 0));
27906 HOST_WIDE_INT len = INTVAL (operands[2]);
27907 HOST_WIDE_INT offset = 0;
27909 while (len >= 12)
27911 emit_insn (gen_movmem12b (out, in, out, in));
27912 len -= 12;
27915 if (len >= 8)
27917 emit_insn (gen_movmem8b (out, in, out, in));
27918 len -= 8;
27921 if (len >= 4)
27923 rtx reg = gen_reg_rtx (SImode);
27924 emit_insn (gen_movsi (reg, gen_rtx_MEM (SImode, in)));
27925 emit_insn (gen_movsi (gen_rtx_MEM (SImode, out), reg));
27926 len -= 4;
27927 offset += 4;
27930 if (len >= 2)
27932 rtx reg = gen_reg_rtx (HImode);
27933 emit_insn (gen_movhi (reg, gen_rtx_MEM (HImode,
27934 plus_constant (Pmode, in,
27935 offset))));
27936 emit_insn (gen_movhi (gen_rtx_MEM (HImode, plus_constant (Pmode, out,
27937 offset)),
27938 reg));
27939 len -= 2;
27940 offset += 2;
27943 if (len)
27945 rtx reg = gen_reg_rtx (QImode);
27946 emit_insn (gen_movqi (reg, gen_rtx_MEM (QImode,
27947 plus_constant (Pmode, in,
27948 offset))));
27949 emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (Pmode, out,
27950 offset)),
27951 reg));
27955 void
27956 thumb_reload_out_hi (rtx *operands)
27958 emit_insn (gen_thumb_movhi_clobber (operands[0], operands[1], operands[2]));
27961 /* Handle reading a half-word from memory during reload. */
27962 void
27963 thumb_reload_in_hi (rtx *operands ATTRIBUTE_UNUSED)
27965 gcc_unreachable ();
27968 /* Return the length of a function name prefix
27969 that starts with the character 'c'. */
27970 static int
27971 arm_get_strip_length (int c)
27973 switch (c)
27975 ARM_NAME_ENCODING_LENGTHS
27976 default: return 0;
27980 /* Return a pointer to a function's name with any
27981 and all prefix encodings stripped from it. */
27982 const char *
27983 arm_strip_name_encoding (const char *name)
27985 int skip;
27987 while ((skip = arm_get_strip_length (* name)))
27988 name += skip;
27990 return name;
27993 /* If there is a '*' anywhere in the name's prefix, then
27994 emit the stripped name verbatim, otherwise prepend an
27995 underscore if leading underscores are being used. */
27996 void
27997 arm_asm_output_labelref (FILE *stream, const char *name)
27999 int skip;
28000 int verbatim = 0;
28002 while ((skip = arm_get_strip_length (* name)))
28004 verbatim |= (*name == '*');
28005 name += skip;
28008 if (verbatim)
28009 fputs (name, stream);
28010 else
28011 asm_fprintf (stream, "%U%s", name);
28014 /* This function is used to emit an EABI tag and its associated value.
28015 We emit the numerical value of the tag in case the assembler does not
28016 support textual tags. (Eg gas prior to 2.20). If requested we include
28017 the tag name in a comment so that anyone reading the assembler output
28018 will know which tag is being set.
28020 This function is not static because arm-c.c needs it too. */
28022 void
28023 arm_emit_eabi_attribute (const char *name, int num, int val)
28025 asm_fprintf (asm_out_file, "\t.eabi_attribute %d, %d", num, val);
28026 if (flag_verbose_asm || flag_debug_asm)
28027 asm_fprintf (asm_out_file, "\t%s %s", ASM_COMMENT_START, name);
28028 asm_fprintf (asm_out_file, "\n");
28031 static void
28032 arm_file_start (void)
28034 int val;
28036 if (TARGET_UNIFIED_ASM)
28037 asm_fprintf (asm_out_file, "\t.syntax unified\n");
28039 if (TARGET_BPABI)
28041 const char *fpu_name;
28042 if (arm_selected_arch)
28044 /* armv7ve doesn't support any extensions. */
28045 if (strcmp (arm_selected_arch->name, "armv7ve") == 0)
28047 /* Keep backward compatability for assemblers
28048 which don't support armv7ve. */
28049 asm_fprintf (asm_out_file, "\t.arch armv7-a\n");
28050 asm_fprintf (asm_out_file, "\t.arch_extension virt\n");
28051 asm_fprintf (asm_out_file, "\t.arch_extension idiv\n");
28052 asm_fprintf (asm_out_file, "\t.arch_extension sec\n");
28053 asm_fprintf (asm_out_file, "\t.arch_extension mp\n");
28055 else
28057 const char* pos = strchr (arm_selected_arch->name, '+');
28058 if (pos)
28060 char buf[15];
28061 gcc_assert (strlen (arm_selected_arch->name)
28062 <= sizeof (buf) / sizeof (*pos));
28063 strncpy (buf, arm_selected_arch->name,
28064 (pos - arm_selected_arch->name) * sizeof (*pos));
28065 buf[pos - arm_selected_arch->name] = '\0';
28066 asm_fprintf (asm_out_file, "\t.arch %s\n", buf);
28067 asm_fprintf (asm_out_file, "\t.arch_extension %s\n", pos + 1);
28069 else
28070 asm_fprintf (asm_out_file, "\t.arch %s\n", arm_selected_arch->name);
28073 else if (strncmp (arm_selected_cpu->name, "generic", 7) == 0)
28074 asm_fprintf (asm_out_file, "\t.arch %s\n", arm_selected_cpu->name + 8);
28075 else
28077 const char* truncated_name
28078 = arm_rewrite_selected_cpu (arm_selected_cpu->name);
28079 asm_fprintf (asm_out_file, "\t.cpu %s\n", truncated_name);
28082 if (TARGET_SOFT_FLOAT)
28084 fpu_name = "softvfp";
28086 else
28088 fpu_name = arm_fpu_desc->name;
28089 if (arm_fpu_desc->model == ARM_FP_MODEL_VFP)
28091 if (TARGET_HARD_FLOAT)
28092 arm_emit_eabi_attribute ("Tag_ABI_HardFP_use", 27, 3);
28093 if (TARGET_HARD_FLOAT_ABI)
28094 arm_emit_eabi_attribute ("Tag_ABI_VFP_args", 28, 1);
28097 asm_fprintf (asm_out_file, "\t.fpu %s\n", fpu_name);
28099 /* Some of these attributes only apply when the corresponding features
28100 are used. However we don't have any easy way of figuring this out.
28101 Conservatively record the setting that would have been used. */
28103 if (flag_rounding_math)
28104 arm_emit_eabi_attribute ("Tag_ABI_FP_rounding", 19, 1);
28106 if (!flag_unsafe_math_optimizations)
28108 arm_emit_eabi_attribute ("Tag_ABI_FP_denormal", 20, 1);
28109 arm_emit_eabi_attribute ("Tag_ABI_FP_exceptions", 21, 1);
28111 if (flag_signaling_nans)
28112 arm_emit_eabi_attribute ("Tag_ABI_FP_user_exceptions", 22, 1);
28114 arm_emit_eabi_attribute ("Tag_ABI_FP_number_model", 23,
28115 flag_finite_math_only ? 1 : 3);
28117 arm_emit_eabi_attribute ("Tag_ABI_align8_needed", 24, 1);
28118 arm_emit_eabi_attribute ("Tag_ABI_align8_preserved", 25, 1);
28119 arm_emit_eabi_attribute ("Tag_ABI_enum_size", 26,
28120 flag_short_enums ? 1 : 2);
28122 /* Tag_ABI_optimization_goals. */
28123 if (optimize_size)
28124 val = 4;
28125 else if (optimize >= 2)
28126 val = 2;
28127 else if (optimize)
28128 val = 1;
28129 else
28130 val = 6;
28131 arm_emit_eabi_attribute ("Tag_ABI_optimization_goals", 30, val);
28133 arm_emit_eabi_attribute ("Tag_CPU_unaligned_access", 34,
28134 unaligned_access);
28136 if (arm_fp16_format)
28137 arm_emit_eabi_attribute ("Tag_ABI_FP_16bit_format", 38,
28138 (int) arm_fp16_format);
28140 if (arm_lang_output_object_attributes_hook)
28141 arm_lang_output_object_attributes_hook();
28144 default_file_start ();
28147 static void
28148 arm_file_end (void)
28150 int regno;
28152 if (NEED_INDICATE_EXEC_STACK)
28153 /* Add .note.GNU-stack. */
28154 file_end_indicate_exec_stack ();
28156 if (! thumb_call_reg_needed)
28157 return;
28159 switch_to_section (text_section);
28160 asm_fprintf (asm_out_file, "\t.code 16\n");
28161 ASM_OUTPUT_ALIGN (asm_out_file, 1);
28163 for (regno = 0; regno < LR_REGNUM; regno++)
28165 rtx label = thumb_call_via_label[regno];
28167 if (label != 0)
28169 targetm.asm_out.internal_label (asm_out_file, "L",
28170 CODE_LABEL_NUMBER (label));
28171 asm_fprintf (asm_out_file, "\tbx\t%r\n", regno);
28176 #ifndef ARM_PE
28177 /* Symbols in the text segment can be accessed without indirecting via the
28178 constant pool; it may take an extra binary operation, but this is still
28179 faster than indirecting via memory. Don't do this when not optimizing,
28180 since we won't be calculating al of the offsets necessary to do this
28181 simplification. */
28183 static void
28184 arm_encode_section_info (tree decl, rtx rtl, int first)
28186 if (optimize > 0 && TREE_CONSTANT (decl))
28187 SYMBOL_REF_FLAG (XEXP (rtl, 0)) = 1;
28189 default_encode_section_info (decl, rtl, first);
28191 #endif /* !ARM_PE */
28193 static void
28194 arm_internal_label (FILE *stream, const char *prefix, unsigned long labelno)
28196 if (arm_ccfsm_state == 3 && (unsigned) arm_target_label == labelno
28197 && !strcmp (prefix, "L"))
28199 arm_ccfsm_state = 0;
28200 arm_target_insn = NULL;
28202 default_internal_label (stream, prefix, labelno);
28205 /* Output code to add DELTA to the first argument, and then jump
28206 to FUNCTION. Used for C++ multiple inheritance. */
28207 static void
28208 arm_output_mi_thunk (FILE *file, tree thunk ATTRIBUTE_UNUSED,
28209 HOST_WIDE_INT delta,
28210 HOST_WIDE_INT vcall_offset ATTRIBUTE_UNUSED,
28211 tree function)
28213 static int thunk_label = 0;
28214 char label[256];
28215 char labelpc[256];
28216 int mi_delta = delta;
28217 const char *const mi_op = mi_delta < 0 ? "sub" : "add";
28218 int shift = 0;
28219 int this_regno = (aggregate_value_p (TREE_TYPE (TREE_TYPE (function)), function)
28220 ? 1 : 0);
28221 if (mi_delta < 0)
28222 mi_delta = - mi_delta;
28224 final_start_function (emit_barrier (), file, 1);
28226 if (TARGET_THUMB1)
28228 int labelno = thunk_label++;
28229 ASM_GENERATE_INTERNAL_LABEL (label, "LTHUMBFUNC", labelno);
28230 /* Thunks are entered in arm mode when avaiable. */
28231 if (TARGET_THUMB1_ONLY)
28233 /* push r3 so we can use it as a temporary. */
28234 /* TODO: Omit this save if r3 is not used. */
28235 fputs ("\tpush {r3}\n", file);
28236 fputs ("\tldr\tr3, ", file);
28238 else
28240 fputs ("\tldr\tr12, ", file);
28242 assemble_name (file, label);
28243 fputc ('\n', file);
28244 if (flag_pic)
28246 /* If we are generating PIC, the ldr instruction below loads
28247 "(target - 7) - .LTHUNKPCn" into r12. The pc reads as
28248 the address of the add + 8, so we have:
28250 r12 = (target - 7) - .LTHUNKPCn + (.LTHUNKPCn + 8)
28251 = target + 1.
28253 Note that we have "+ 1" because some versions of GNU ld
28254 don't set the low bit of the result for R_ARM_REL32
28255 relocations against thumb function symbols.
28256 On ARMv6M this is +4, not +8. */
28257 ASM_GENERATE_INTERNAL_LABEL (labelpc, "LTHUNKPC", labelno);
28258 assemble_name (file, labelpc);
28259 fputs (":\n", file);
28260 if (TARGET_THUMB1_ONLY)
28262 /* This is 2 insns after the start of the thunk, so we know it
28263 is 4-byte aligned. */
28264 fputs ("\tadd\tr3, pc, r3\n", file);
28265 fputs ("\tmov r12, r3\n", file);
28267 else
28268 fputs ("\tadd\tr12, pc, r12\n", file);
28270 else if (TARGET_THUMB1_ONLY)
28271 fputs ("\tmov r12, r3\n", file);
28273 if (TARGET_THUMB1_ONLY)
28275 if (mi_delta > 255)
28277 fputs ("\tldr\tr3, ", file);
28278 assemble_name (file, label);
28279 fputs ("+4\n", file);
28280 asm_fprintf (file, "\t%s\t%r, %r, r3\n",
28281 mi_op, this_regno, this_regno);
28283 else if (mi_delta != 0)
28285 asm_fprintf (file, "\t%s\t%r, %r, #%d\n",
28286 mi_op, this_regno, this_regno,
28287 mi_delta);
28290 else
28292 /* TODO: Use movw/movt for large constants when available. */
28293 while (mi_delta != 0)
28295 if ((mi_delta & (3 << shift)) == 0)
28296 shift += 2;
28297 else
28299 asm_fprintf (file, "\t%s\t%r, %r, #%d\n",
28300 mi_op, this_regno, this_regno,
28301 mi_delta & (0xff << shift));
28302 mi_delta &= ~(0xff << shift);
28303 shift += 8;
28307 if (TARGET_THUMB1)
28309 if (TARGET_THUMB1_ONLY)
28310 fputs ("\tpop\t{r3}\n", file);
28312 fprintf (file, "\tbx\tr12\n");
28313 ASM_OUTPUT_ALIGN (file, 2);
28314 assemble_name (file, label);
28315 fputs (":\n", file);
28316 if (flag_pic)
28318 /* Output ".word .LTHUNKn-7-.LTHUNKPCn". */
28319 rtx tem = XEXP (DECL_RTL (function), 0);
28320 tem = plus_constant (GET_MODE (tem), tem, -7);
28321 tem = gen_rtx_MINUS (GET_MODE (tem),
28322 tem,
28323 gen_rtx_SYMBOL_REF (Pmode,
28324 ggc_strdup (labelpc)));
28325 assemble_integer (tem, 4, BITS_PER_WORD, 1);
28327 else
28328 /* Output ".word .LTHUNKn". */
28329 assemble_integer (XEXP (DECL_RTL (function), 0), 4, BITS_PER_WORD, 1);
28331 if (TARGET_THUMB1_ONLY && mi_delta > 255)
28332 assemble_integer (GEN_INT(mi_delta), 4, BITS_PER_WORD, 1);
28334 else
28336 fputs ("\tb\t", file);
28337 assemble_name (file, XSTR (XEXP (DECL_RTL (function), 0), 0));
28338 if (NEED_PLT_RELOC)
28339 fputs ("(PLT)", file);
28340 fputc ('\n', file);
28343 final_end_function ();
28347 arm_emit_vector_const (FILE *file, rtx x)
28349 int i;
28350 const char * pattern;
28352 gcc_assert (GET_CODE (x) == CONST_VECTOR);
28354 switch (GET_MODE (x))
28356 case V2SImode: pattern = "%08x"; break;
28357 case V4HImode: pattern = "%04x"; break;
28358 case V8QImode: pattern = "%02x"; break;
28359 default: gcc_unreachable ();
28362 fprintf (file, "0x");
28363 for (i = CONST_VECTOR_NUNITS (x); i--;)
28365 rtx element;
28367 element = CONST_VECTOR_ELT (x, i);
28368 fprintf (file, pattern, INTVAL (element));
28371 return 1;
28374 /* Emit a fp16 constant appropriately padded to occupy a 4-byte word.
28375 HFmode constant pool entries are actually loaded with ldr. */
28376 void
28377 arm_emit_fp16_const (rtx c)
28379 REAL_VALUE_TYPE r;
28380 long bits;
28382 REAL_VALUE_FROM_CONST_DOUBLE (r, c);
28383 bits = real_to_target (NULL, &r, HFmode);
28384 if (WORDS_BIG_ENDIAN)
28385 assemble_zeros (2);
28386 assemble_integer (GEN_INT (bits), 2, BITS_PER_WORD, 1);
28387 if (!WORDS_BIG_ENDIAN)
28388 assemble_zeros (2);
28391 const char *
28392 arm_output_load_gr (rtx *operands)
28394 rtx reg;
28395 rtx offset;
28396 rtx wcgr;
28397 rtx sum;
28399 if (!MEM_P (operands [1])
28400 || GET_CODE (sum = XEXP (operands [1], 0)) != PLUS
28401 || !REG_P (reg = XEXP (sum, 0))
28402 || !CONST_INT_P (offset = XEXP (sum, 1))
28403 || ((INTVAL (offset) < 1024) && (INTVAL (offset) > -1024)))
28404 return "wldrw%?\t%0, %1";
28406 /* Fix up an out-of-range load of a GR register. */
28407 output_asm_insn ("str%?\t%0, [sp, #-4]!\t@ Start of GR load expansion", & reg);
28408 wcgr = operands[0];
28409 operands[0] = reg;
28410 output_asm_insn ("ldr%?\t%0, %1", operands);
28412 operands[0] = wcgr;
28413 operands[1] = reg;
28414 output_asm_insn ("tmcr%?\t%0, %1", operands);
28415 output_asm_insn ("ldr%?\t%0, [sp], #4\t@ End of GR load expansion", & reg);
28417 return "";
28420 /* Worker function for TARGET_SETUP_INCOMING_VARARGS.
28422 On the ARM, PRETEND_SIZE is set in order to have the prologue push the last
28423 named arg and all anonymous args onto the stack.
28424 XXX I know the prologue shouldn't be pushing registers, but it is faster
28425 that way. */
28427 static void
28428 arm_setup_incoming_varargs (cumulative_args_t pcum_v,
28429 enum machine_mode mode,
28430 tree type,
28431 int *pretend_size,
28432 int second_time ATTRIBUTE_UNUSED)
28434 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
28435 int nregs;
28437 cfun->machine->uses_anonymous_args = 1;
28438 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
28440 nregs = pcum->aapcs_ncrn;
28441 if ((nregs & 1) && arm_needs_doubleword_align (mode, type))
28442 nregs++;
28444 else
28445 nregs = pcum->nregs;
28447 if (nregs < NUM_ARG_REGS)
28448 *pretend_size = (NUM_ARG_REGS - nregs) * UNITS_PER_WORD;
28451 /* We can't rely on the caller doing the proper promotion when
28452 using APCS or ATPCS. */
28454 static bool
28455 arm_promote_prototypes (const_tree t ATTRIBUTE_UNUSED)
28457 return !TARGET_AAPCS_BASED;
28460 static enum machine_mode
28461 arm_promote_function_mode (const_tree type ATTRIBUTE_UNUSED,
28462 enum machine_mode mode,
28463 int *punsignedp ATTRIBUTE_UNUSED,
28464 const_tree fntype ATTRIBUTE_UNUSED,
28465 int for_return ATTRIBUTE_UNUSED)
28467 if (GET_MODE_CLASS (mode) == MODE_INT
28468 && GET_MODE_SIZE (mode) < 4)
28469 return SImode;
28471 return mode;
28474 /* AAPCS based ABIs use short enums by default. */
28476 static bool
28477 arm_default_short_enums (void)
28479 return TARGET_AAPCS_BASED && arm_abi != ARM_ABI_AAPCS_LINUX;
28483 /* AAPCS requires that anonymous bitfields affect structure alignment. */
28485 static bool
28486 arm_align_anon_bitfield (void)
28488 return TARGET_AAPCS_BASED;
28492 /* The generic C++ ABI says 64-bit (long long). The EABI says 32-bit. */
28494 static tree
28495 arm_cxx_guard_type (void)
28497 return TARGET_AAPCS_BASED ? integer_type_node : long_long_integer_type_node;
28501 /* The EABI says test the least significant bit of a guard variable. */
28503 static bool
28504 arm_cxx_guard_mask_bit (void)
28506 return TARGET_AAPCS_BASED;
28510 /* The EABI specifies that all array cookies are 8 bytes long. */
28512 static tree
28513 arm_get_cookie_size (tree type)
28515 tree size;
28517 if (!TARGET_AAPCS_BASED)
28518 return default_cxx_get_cookie_size (type);
28520 size = build_int_cst (sizetype, 8);
28521 return size;
28525 /* The EABI says that array cookies should also contain the element size. */
28527 static bool
28528 arm_cookie_has_size (void)
28530 return TARGET_AAPCS_BASED;
28534 /* The EABI says constructors and destructors should return a pointer to
28535 the object constructed/destroyed. */
28537 static bool
28538 arm_cxx_cdtor_returns_this (void)
28540 return TARGET_AAPCS_BASED;
28543 /* The EABI says that an inline function may never be the key
28544 method. */
28546 static bool
28547 arm_cxx_key_method_may_be_inline (void)
28549 return !TARGET_AAPCS_BASED;
28552 static void
28553 arm_cxx_determine_class_data_visibility (tree decl)
28555 if (!TARGET_AAPCS_BASED
28556 || !TARGET_DLLIMPORT_DECL_ATTRIBUTES)
28557 return;
28559 /* In general, \S 3.2.5.5 of the ARM EABI requires that class data
28560 is exported. However, on systems without dynamic vague linkage,
28561 \S 3.2.5.6 says that COMDAT class data has hidden linkage. */
28562 if (!TARGET_ARM_DYNAMIC_VAGUE_LINKAGE_P && DECL_COMDAT (decl))
28563 DECL_VISIBILITY (decl) = VISIBILITY_HIDDEN;
28564 else
28565 DECL_VISIBILITY (decl) = VISIBILITY_DEFAULT;
28566 DECL_VISIBILITY_SPECIFIED (decl) = 1;
28569 static bool
28570 arm_cxx_class_data_always_comdat (void)
28572 /* \S 3.2.5.4 of the ARM C++ ABI says that class data only have
28573 vague linkage if the class has no key function. */
28574 return !TARGET_AAPCS_BASED;
28578 /* The EABI says __aeabi_atexit should be used to register static
28579 destructors. */
28581 static bool
28582 arm_cxx_use_aeabi_atexit (void)
28584 return TARGET_AAPCS_BASED;
28588 void
28589 arm_set_return_address (rtx source, rtx scratch)
28591 arm_stack_offsets *offsets;
28592 HOST_WIDE_INT delta;
28593 rtx addr;
28594 unsigned long saved_regs;
28596 offsets = arm_get_frame_offsets ();
28597 saved_regs = offsets->saved_regs_mask;
28599 if ((saved_regs & (1 << LR_REGNUM)) == 0)
28600 emit_move_insn (gen_rtx_REG (Pmode, LR_REGNUM), source);
28601 else
28603 if (frame_pointer_needed)
28604 addr = plus_constant (Pmode, hard_frame_pointer_rtx, -4);
28605 else
28607 /* LR will be the first saved register. */
28608 delta = offsets->outgoing_args - (offsets->frame + 4);
28611 if (delta >= 4096)
28613 emit_insn (gen_addsi3 (scratch, stack_pointer_rtx,
28614 GEN_INT (delta & ~4095)));
28615 addr = scratch;
28616 delta &= 4095;
28618 else
28619 addr = stack_pointer_rtx;
28621 addr = plus_constant (Pmode, addr, delta);
28623 emit_move_insn (gen_frame_mem (Pmode, addr), source);
28628 void
28629 thumb_set_return_address (rtx source, rtx scratch)
28631 arm_stack_offsets *offsets;
28632 HOST_WIDE_INT delta;
28633 HOST_WIDE_INT limit;
28634 int reg;
28635 rtx addr;
28636 unsigned long mask;
28638 emit_use (source);
28640 offsets = arm_get_frame_offsets ();
28641 mask = offsets->saved_regs_mask;
28642 if (mask & (1 << LR_REGNUM))
28644 limit = 1024;
28645 /* Find the saved regs. */
28646 if (frame_pointer_needed)
28648 delta = offsets->soft_frame - offsets->saved_args;
28649 reg = THUMB_HARD_FRAME_POINTER_REGNUM;
28650 if (TARGET_THUMB1)
28651 limit = 128;
28653 else
28655 delta = offsets->outgoing_args - offsets->saved_args;
28656 reg = SP_REGNUM;
28658 /* Allow for the stack frame. */
28659 if (TARGET_THUMB1 && TARGET_BACKTRACE)
28660 delta -= 16;
28661 /* The link register is always the first saved register. */
28662 delta -= 4;
28664 /* Construct the address. */
28665 addr = gen_rtx_REG (SImode, reg);
28666 if (delta > limit)
28668 emit_insn (gen_movsi (scratch, GEN_INT (delta)));
28669 emit_insn (gen_addsi3 (scratch, scratch, stack_pointer_rtx));
28670 addr = scratch;
28672 else
28673 addr = plus_constant (Pmode, addr, delta);
28675 emit_move_insn (gen_frame_mem (Pmode, addr), source);
28677 else
28678 emit_move_insn (gen_rtx_REG (Pmode, LR_REGNUM), source);
28681 /* Implements target hook vector_mode_supported_p. */
28682 bool
28683 arm_vector_mode_supported_p (enum machine_mode mode)
28685 /* Neon also supports V2SImode, etc. listed in the clause below. */
28686 if (TARGET_NEON && (mode == V2SFmode || mode == V4SImode || mode == V8HImode
28687 || mode == V4HFmode || mode == V16QImode || mode == V4SFmode || mode == V2DImode))
28688 return true;
28690 if ((TARGET_NEON || TARGET_IWMMXT)
28691 && ((mode == V2SImode)
28692 || (mode == V4HImode)
28693 || (mode == V8QImode)))
28694 return true;
28696 if (TARGET_INT_SIMD && (mode == V4UQQmode || mode == V4QQmode
28697 || mode == V2UHQmode || mode == V2HQmode || mode == V2UHAmode
28698 || mode == V2HAmode))
28699 return true;
28701 return false;
28704 /* Implements target hook array_mode_supported_p. */
28706 static bool
28707 arm_array_mode_supported_p (enum machine_mode mode,
28708 unsigned HOST_WIDE_INT nelems)
28710 if (TARGET_NEON
28711 && (VALID_NEON_DREG_MODE (mode) || VALID_NEON_QREG_MODE (mode))
28712 && (nelems >= 2 && nelems <= 4))
28713 return true;
28715 return false;
28718 /* Use the option -mvectorize-with-neon-double to override the use of quardword
28719 registers when autovectorizing for Neon, at least until multiple vector
28720 widths are supported properly by the middle-end. */
28722 static enum machine_mode
28723 arm_preferred_simd_mode (enum machine_mode mode)
28725 if (TARGET_NEON)
28726 switch (mode)
28728 case SFmode:
28729 return TARGET_NEON_VECTORIZE_DOUBLE ? V2SFmode : V4SFmode;
28730 case SImode:
28731 return TARGET_NEON_VECTORIZE_DOUBLE ? V2SImode : V4SImode;
28732 case HImode:
28733 return TARGET_NEON_VECTORIZE_DOUBLE ? V4HImode : V8HImode;
28734 case QImode:
28735 return TARGET_NEON_VECTORIZE_DOUBLE ? V8QImode : V16QImode;
28736 case DImode:
28737 if (!TARGET_NEON_VECTORIZE_DOUBLE)
28738 return V2DImode;
28739 break;
28741 default:;
28744 if (TARGET_REALLY_IWMMXT)
28745 switch (mode)
28747 case SImode:
28748 return V2SImode;
28749 case HImode:
28750 return V4HImode;
28751 case QImode:
28752 return V8QImode;
28754 default:;
28757 return word_mode;
28760 /* Implement TARGET_CLASS_LIKELY_SPILLED_P.
28762 We need to define this for LO_REGS on Thumb-1. Otherwise we can end up
28763 using r0-r4 for function arguments, r7 for the stack frame and don't have
28764 enough left over to do doubleword arithmetic. For Thumb-2 all the
28765 potentially problematic instructions accept high registers so this is not
28766 necessary. Care needs to be taken to avoid adding new Thumb-2 patterns
28767 that require many low registers. */
28768 static bool
28769 arm_class_likely_spilled_p (reg_class_t rclass)
28771 if ((TARGET_THUMB1 && rclass == LO_REGS)
28772 || rclass == CC_REG)
28773 return true;
28775 return false;
28778 /* Implements target hook small_register_classes_for_mode_p. */
28779 bool
28780 arm_small_register_classes_for_mode_p (enum machine_mode mode ATTRIBUTE_UNUSED)
28782 return TARGET_THUMB1;
28785 /* Implement TARGET_SHIFT_TRUNCATION_MASK. SImode shifts use normal
28786 ARM insns and therefore guarantee that the shift count is modulo 256.
28787 DImode shifts (those implemented by lib1funcs.S or by optabs.c)
28788 guarantee no particular behavior for out-of-range counts. */
28790 static unsigned HOST_WIDE_INT
28791 arm_shift_truncation_mask (enum machine_mode mode)
28793 return mode == SImode ? 255 : 0;
28797 /* Map internal gcc register numbers to DWARF2 register numbers. */
28799 unsigned int
28800 arm_dbx_register_number (unsigned int regno)
28802 if (regno < 16)
28803 return regno;
28805 if (IS_VFP_REGNUM (regno))
28807 /* See comment in arm_dwarf_register_span. */
28808 if (VFP_REGNO_OK_FOR_SINGLE (regno))
28809 return 64 + regno - FIRST_VFP_REGNUM;
28810 else
28811 return 256 + (regno - FIRST_VFP_REGNUM) / 2;
28814 if (IS_IWMMXT_GR_REGNUM (regno))
28815 return 104 + regno - FIRST_IWMMXT_GR_REGNUM;
28817 if (IS_IWMMXT_REGNUM (regno))
28818 return 112 + regno - FIRST_IWMMXT_REGNUM;
28820 gcc_unreachable ();
28823 /* Dwarf models VFPv3 registers as 32 64-bit registers.
28824 GCC models tham as 64 32-bit registers, so we need to describe this to
28825 the DWARF generation code. Other registers can use the default. */
28826 static rtx
28827 arm_dwarf_register_span (rtx rtl)
28829 enum machine_mode mode;
28830 unsigned regno;
28831 rtx parts[16];
28832 int nregs;
28833 int i;
28835 regno = REGNO (rtl);
28836 if (!IS_VFP_REGNUM (regno))
28837 return NULL_RTX;
28839 /* XXX FIXME: The EABI defines two VFP register ranges:
28840 64-95: Legacy VFPv2 numbering for S0-S31 (obsolescent)
28841 256-287: D0-D31
28842 The recommended encoding for S0-S31 is a DW_OP_bit_piece of the
28843 corresponding D register. Until GDB supports this, we shall use the
28844 legacy encodings. We also use these encodings for D0-D15 for
28845 compatibility with older debuggers. */
28846 mode = GET_MODE (rtl);
28847 if (GET_MODE_SIZE (mode) < 8)
28848 return NULL_RTX;
28850 if (VFP_REGNO_OK_FOR_SINGLE (regno))
28852 nregs = GET_MODE_SIZE (mode) / 4;
28853 for (i = 0; i < nregs; i += 2)
28854 if (TARGET_BIG_END)
28856 parts[i] = gen_rtx_REG (SImode, regno + i + 1);
28857 parts[i + 1] = gen_rtx_REG (SImode, regno + i);
28859 else
28861 parts[i] = gen_rtx_REG (SImode, regno + i);
28862 parts[i + 1] = gen_rtx_REG (SImode, regno + i + 1);
28865 else
28867 nregs = GET_MODE_SIZE (mode) / 8;
28868 for (i = 0; i < nregs; i++)
28869 parts[i] = gen_rtx_REG (DImode, regno + i);
28872 return gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (nregs , parts));
28875 #if ARM_UNWIND_INFO
28876 /* Emit unwind directives for a store-multiple instruction or stack pointer
28877 push during alignment.
28878 These should only ever be generated by the function prologue code, so
28879 expect them to have a particular form.
28880 The store-multiple instruction sometimes pushes pc as the last register,
28881 although it should not be tracked into unwind information, or for -Os
28882 sometimes pushes some dummy registers before first register that needs
28883 to be tracked in unwind information; such dummy registers are there just
28884 to avoid separate stack adjustment, and will not be restored in the
28885 epilogue. */
28887 static void
28888 arm_unwind_emit_sequence (FILE * asm_out_file, rtx p)
28890 int i;
28891 HOST_WIDE_INT offset;
28892 HOST_WIDE_INT nregs;
28893 int reg_size;
28894 unsigned reg;
28895 unsigned lastreg;
28896 unsigned padfirst = 0, padlast = 0;
28897 rtx e;
28899 e = XVECEXP (p, 0, 0);
28900 gcc_assert (GET_CODE (e) == SET);
28902 /* First insn will adjust the stack pointer. */
28903 gcc_assert (GET_CODE (e) == SET
28904 && REG_P (SET_DEST (e))
28905 && REGNO (SET_DEST (e)) == SP_REGNUM
28906 && GET_CODE (SET_SRC (e)) == PLUS);
28908 offset = -INTVAL (XEXP (SET_SRC (e), 1));
28909 nregs = XVECLEN (p, 0) - 1;
28910 gcc_assert (nregs);
28912 reg = REGNO (SET_SRC (XVECEXP (p, 0, 1)));
28913 if (reg < 16)
28915 /* For -Os dummy registers can be pushed at the beginning to
28916 avoid separate stack pointer adjustment. */
28917 e = XVECEXP (p, 0, 1);
28918 e = XEXP (SET_DEST (e), 0);
28919 if (GET_CODE (e) == PLUS)
28920 padfirst = INTVAL (XEXP (e, 1));
28921 gcc_assert (padfirst == 0 || optimize_size);
28922 /* The function prologue may also push pc, but not annotate it as it is
28923 never restored. We turn this into a stack pointer adjustment. */
28924 e = XVECEXP (p, 0, nregs);
28925 e = XEXP (SET_DEST (e), 0);
28926 if (GET_CODE (e) == PLUS)
28927 padlast = offset - INTVAL (XEXP (e, 1)) - 4;
28928 else
28929 padlast = offset - 4;
28930 gcc_assert (padlast == 0 || padlast == 4);
28931 if (padlast == 4)
28932 fprintf (asm_out_file, "\t.pad #4\n");
28933 reg_size = 4;
28934 fprintf (asm_out_file, "\t.save {");
28936 else if (IS_VFP_REGNUM (reg))
28938 reg_size = 8;
28939 fprintf (asm_out_file, "\t.vsave {");
28941 else
28942 /* Unknown register type. */
28943 gcc_unreachable ();
28945 /* If the stack increment doesn't match the size of the saved registers,
28946 something has gone horribly wrong. */
28947 gcc_assert (offset == padfirst + nregs * reg_size + padlast);
28949 offset = padfirst;
28950 lastreg = 0;
28951 /* The remaining insns will describe the stores. */
28952 for (i = 1; i <= nregs; i++)
28954 /* Expect (set (mem <addr>) (reg)).
28955 Where <addr> is (reg:SP) or (plus (reg:SP) (const_int)). */
28956 e = XVECEXP (p, 0, i);
28957 gcc_assert (GET_CODE (e) == SET
28958 && MEM_P (SET_DEST (e))
28959 && REG_P (SET_SRC (e)));
28961 reg = REGNO (SET_SRC (e));
28962 gcc_assert (reg >= lastreg);
28964 if (i != 1)
28965 fprintf (asm_out_file, ", ");
28966 /* We can't use %r for vfp because we need to use the
28967 double precision register names. */
28968 if (IS_VFP_REGNUM (reg))
28969 asm_fprintf (asm_out_file, "d%d", (reg - FIRST_VFP_REGNUM) / 2);
28970 else
28971 asm_fprintf (asm_out_file, "%r", reg);
28973 #ifdef ENABLE_CHECKING
28974 /* Check that the addresses are consecutive. */
28975 e = XEXP (SET_DEST (e), 0);
28976 if (GET_CODE (e) == PLUS)
28977 gcc_assert (REG_P (XEXP (e, 0))
28978 && REGNO (XEXP (e, 0)) == SP_REGNUM
28979 && CONST_INT_P (XEXP (e, 1))
28980 && offset == INTVAL (XEXP (e, 1)));
28981 else
28982 gcc_assert (i == 1
28983 && REG_P (e)
28984 && REGNO (e) == SP_REGNUM);
28985 offset += reg_size;
28986 #endif
28988 fprintf (asm_out_file, "}\n");
28989 if (padfirst)
28990 fprintf (asm_out_file, "\t.pad #%d\n", padfirst);
28993 /* Emit unwind directives for a SET. */
28995 static void
28996 arm_unwind_emit_set (FILE * asm_out_file, rtx p)
28998 rtx e0;
28999 rtx e1;
29000 unsigned reg;
29002 e0 = XEXP (p, 0);
29003 e1 = XEXP (p, 1);
29004 switch (GET_CODE (e0))
29006 case MEM:
29007 /* Pushing a single register. */
29008 if (GET_CODE (XEXP (e0, 0)) != PRE_DEC
29009 || !REG_P (XEXP (XEXP (e0, 0), 0))
29010 || REGNO (XEXP (XEXP (e0, 0), 0)) != SP_REGNUM)
29011 abort ();
29013 asm_fprintf (asm_out_file, "\t.save ");
29014 if (IS_VFP_REGNUM (REGNO (e1)))
29015 asm_fprintf(asm_out_file, "{d%d}\n",
29016 (REGNO (e1) - FIRST_VFP_REGNUM) / 2);
29017 else
29018 asm_fprintf(asm_out_file, "{%r}\n", REGNO (e1));
29019 break;
29021 case REG:
29022 if (REGNO (e0) == SP_REGNUM)
29024 /* A stack increment. */
29025 if (GET_CODE (e1) != PLUS
29026 || !REG_P (XEXP (e1, 0))
29027 || REGNO (XEXP (e1, 0)) != SP_REGNUM
29028 || !CONST_INT_P (XEXP (e1, 1)))
29029 abort ();
29031 asm_fprintf (asm_out_file, "\t.pad #%wd\n",
29032 -INTVAL (XEXP (e1, 1)));
29034 else if (REGNO (e0) == HARD_FRAME_POINTER_REGNUM)
29036 HOST_WIDE_INT offset;
29038 if (GET_CODE (e1) == PLUS)
29040 if (!REG_P (XEXP (e1, 0))
29041 || !CONST_INT_P (XEXP (e1, 1)))
29042 abort ();
29043 reg = REGNO (XEXP (e1, 0));
29044 offset = INTVAL (XEXP (e1, 1));
29045 asm_fprintf (asm_out_file, "\t.setfp %r, %r, #%wd\n",
29046 HARD_FRAME_POINTER_REGNUM, reg,
29047 offset);
29049 else if (REG_P (e1))
29051 reg = REGNO (e1);
29052 asm_fprintf (asm_out_file, "\t.setfp %r, %r\n",
29053 HARD_FRAME_POINTER_REGNUM, reg);
29055 else
29056 abort ();
29058 else if (REG_P (e1) && REGNO (e1) == SP_REGNUM)
29060 /* Move from sp to reg. */
29061 asm_fprintf (asm_out_file, "\t.movsp %r\n", REGNO (e0));
29063 else if (GET_CODE (e1) == PLUS
29064 && REG_P (XEXP (e1, 0))
29065 && REGNO (XEXP (e1, 0)) == SP_REGNUM
29066 && CONST_INT_P (XEXP (e1, 1)))
29068 /* Set reg to offset from sp. */
29069 asm_fprintf (asm_out_file, "\t.movsp %r, #%d\n",
29070 REGNO (e0), (int)INTVAL(XEXP (e1, 1)));
29072 else
29073 abort ();
29074 break;
29076 default:
29077 abort ();
29082 /* Emit unwind directives for the given insn. */
29084 static void
29085 arm_unwind_emit (FILE * asm_out_file, rtx insn)
29087 rtx note, pat;
29088 bool handled_one = false;
29090 if (arm_except_unwind_info (&global_options) != UI_TARGET)
29091 return;
29093 if (!(flag_unwind_tables || crtl->uses_eh_lsda)
29094 && (TREE_NOTHROW (current_function_decl)
29095 || crtl->all_throwers_are_sibcalls))
29096 return;
29098 if (NOTE_P (insn) || !RTX_FRAME_RELATED_P (insn))
29099 return;
29101 for (note = REG_NOTES (insn); note ; note = XEXP (note, 1))
29103 switch (REG_NOTE_KIND (note))
29105 case REG_FRAME_RELATED_EXPR:
29106 pat = XEXP (note, 0);
29107 goto found;
29109 case REG_CFA_REGISTER:
29110 pat = XEXP (note, 0);
29111 if (pat == NULL)
29113 pat = PATTERN (insn);
29114 if (GET_CODE (pat) == PARALLEL)
29115 pat = XVECEXP (pat, 0, 0);
29118 /* Only emitted for IS_STACKALIGN re-alignment. */
29120 rtx dest, src;
29121 unsigned reg;
29123 src = SET_SRC (pat);
29124 dest = SET_DEST (pat);
29126 gcc_assert (src == stack_pointer_rtx);
29127 reg = REGNO (dest);
29128 asm_fprintf (asm_out_file, "\t.unwind_raw 0, 0x%x @ vsp = r%d\n",
29129 reg + 0x90, reg);
29131 handled_one = true;
29132 break;
29134 /* The INSN is generated in epilogue. It is set as RTX_FRAME_RELATED_P
29135 to get correct dwarf information for shrink-wrap. We should not
29136 emit unwind information for it because these are used either for
29137 pretend arguments or notes to adjust sp and restore registers from
29138 stack. */
29139 case REG_CFA_DEF_CFA:
29140 case REG_CFA_ADJUST_CFA:
29141 case REG_CFA_RESTORE:
29142 return;
29144 case REG_CFA_EXPRESSION:
29145 case REG_CFA_OFFSET:
29146 /* ??? Only handling here what we actually emit. */
29147 gcc_unreachable ();
29149 default:
29150 break;
29153 if (handled_one)
29154 return;
29155 pat = PATTERN (insn);
29156 found:
29158 switch (GET_CODE (pat))
29160 case SET:
29161 arm_unwind_emit_set (asm_out_file, pat);
29162 break;
29164 case SEQUENCE:
29165 /* Store multiple. */
29166 arm_unwind_emit_sequence (asm_out_file, pat);
29167 break;
29169 default:
29170 abort();
29175 /* Output a reference from a function exception table to the type_info
29176 object X. The EABI specifies that the symbol should be relocated by
29177 an R_ARM_TARGET2 relocation. */
29179 static bool
29180 arm_output_ttype (rtx x)
29182 fputs ("\t.word\t", asm_out_file);
29183 output_addr_const (asm_out_file, x);
29184 /* Use special relocations for symbol references. */
29185 if (!CONST_INT_P (x))
29186 fputs ("(TARGET2)", asm_out_file);
29187 fputc ('\n', asm_out_file);
29189 return TRUE;
29192 /* Implement TARGET_ASM_EMIT_EXCEPT_PERSONALITY. */
29194 static void
29195 arm_asm_emit_except_personality (rtx personality)
29197 fputs ("\t.personality\t", asm_out_file);
29198 output_addr_const (asm_out_file, personality);
29199 fputc ('\n', asm_out_file);
29202 /* Implement TARGET_ASM_INITIALIZE_SECTIONS. */
29204 static void
29205 arm_asm_init_sections (void)
29207 exception_section = get_unnamed_section (0, output_section_asm_op,
29208 "\t.handlerdata");
29210 #endif /* ARM_UNWIND_INFO */
29212 /* Output unwind directives for the start/end of a function. */
29214 void
29215 arm_output_fn_unwind (FILE * f, bool prologue)
29217 if (arm_except_unwind_info (&global_options) != UI_TARGET)
29218 return;
29220 if (prologue)
29221 fputs ("\t.fnstart\n", f);
29222 else
29224 /* If this function will never be unwound, then mark it as such.
29225 The came condition is used in arm_unwind_emit to suppress
29226 the frame annotations. */
29227 if (!(flag_unwind_tables || crtl->uses_eh_lsda)
29228 && (TREE_NOTHROW (current_function_decl)
29229 || crtl->all_throwers_are_sibcalls))
29230 fputs("\t.cantunwind\n", f);
29232 fputs ("\t.fnend\n", f);
29236 static bool
29237 arm_emit_tls_decoration (FILE *fp, rtx x)
29239 enum tls_reloc reloc;
29240 rtx val;
29242 val = XVECEXP (x, 0, 0);
29243 reloc = (enum tls_reloc) INTVAL (XVECEXP (x, 0, 1));
29245 output_addr_const (fp, val);
29247 switch (reloc)
29249 case TLS_GD32:
29250 fputs ("(tlsgd)", fp);
29251 break;
29252 case TLS_LDM32:
29253 fputs ("(tlsldm)", fp);
29254 break;
29255 case TLS_LDO32:
29256 fputs ("(tlsldo)", fp);
29257 break;
29258 case TLS_IE32:
29259 fputs ("(gottpoff)", fp);
29260 break;
29261 case TLS_LE32:
29262 fputs ("(tpoff)", fp);
29263 break;
29264 case TLS_DESCSEQ:
29265 fputs ("(tlsdesc)", fp);
29266 break;
29267 default:
29268 gcc_unreachable ();
29271 switch (reloc)
29273 case TLS_GD32:
29274 case TLS_LDM32:
29275 case TLS_IE32:
29276 case TLS_DESCSEQ:
29277 fputs (" + (. - ", fp);
29278 output_addr_const (fp, XVECEXP (x, 0, 2));
29279 /* For DESCSEQ the 3rd operand encodes thumbness, and is added */
29280 fputs (reloc == TLS_DESCSEQ ? " + " : " - ", fp);
29281 output_addr_const (fp, XVECEXP (x, 0, 3));
29282 fputc (')', fp);
29283 break;
29284 default:
29285 break;
29288 return TRUE;
29291 /* ARM implementation of TARGET_ASM_OUTPUT_DWARF_DTPREL. */
29293 static void
29294 arm_output_dwarf_dtprel (FILE *file, int size, rtx x)
29296 gcc_assert (size == 4);
29297 fputs ("\t.word\t", file);
29298 output_addr_const (file, x);
29299 fputs ("(tlsldo)", file);
29302 /* Implement TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA. */
29304 static bool
29305 arm_output_addr_const_extra (FILE *fp, rtx x)
29307 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
29308 return arm_emit_tls_decoration (fp, x);
29309 else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_PIC_LABEL)
29311 char label[256];
29312 int labelno = INTVAL (XVECEXP (x, 0, 0));
29314 ASM_GENERATE_INTERNAL_LABEL (label, "LPIC", labelno);
29315 assemble_name_raw (fp, label);
29317 return TRUE;
29319 else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_GOTSYM_OFF)
29321 assemble_name (fp, "_GLOBAL_OFFSET_TABLE_");
29322 if (GOT_PCREL)
29323 fputs ("+.", fp);
29324 fputs ("-(", fp);
29325 output_addr_const (fp, XVECEXP (x, 0, 0));
29326 fputc (')', fp);
29327 return TRUE;
29329 else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_SYMBOL_OFFSET)
29331 output_addr_const (fp, XVECEXP (x, 0, 0));
29332 if (GOT_PCREL)
29333 fputs ("+.", fp);
29334 fputs ("-(", fp);
29335 output_addr_const (fp, XVECEXP (x, 0, 1));
29336 fputc (')', fp);
29337 return TRUE;
29339 else if (GET_CODE (x) == CONST_VECTOR)
29340 return arm_emit_vector_const (fp, x);
29342 return FALSE;
29345 /* Output assembly for a shift instruction.
29346 SET_FLAGS determines how the instruction modifies the condition codes.
29347 0 - Do not set condition codes.
29348 1 - Set condition codes.
29349 2 - Use smallest instruction. */
29350 const char *
29351 arm_output_shift(rtx * operands, int set_flags)
29353 char pattern[100];
29354 static const char flag_chars[3] = {'?', '.', '!'};
29355 const char *shift;
29356 HOST_WIDE_INT val;
29357 char c;
29359 c = flag_chars[set_flags];
29360 if (TARGET_UNIFIED_ASM)
29362 shift = shift_op(operands[3], &val);
29363 if (shift)
29365 if (val != -1)
29366 operands[2] = GEN_INT(val);
29367 sprintf (pattern, "%s%%%c\t%%0, %%1, %%2", shift, c);
29369 else
29370 sprintf (pattern, "mov%%%c\t%%0, %%1", c);
29372 else
29373 sprintf (pattern, "mov%%%c\t%%0, %%1%%S3", c);
29374 output_asm_insn (pattern, operands);
29375 return "";
29378 /* Output assembly for a WMMX immediate shift instruction. */
29379 const char *
29380 arm_output_iwmmxt_shift_immediate (const char *insn_name, rtx *operands, bool wror_or_wsra)
29382 int shift = INTVAL (operands[2]);
29383 char templ[50];
29384 enum machine_mode opmode = GET_MODE (operands[0]);
29386 gcc_assert (shift >= 0);
29388 /* If the shift value in the register versions is > 63 (for D qualifier),
29389 31 (for W qualifier) or 15 (for H qualifier). */
29390 if (((opmode == V4HImode) && (shift > 15))
29391 || ((opmode == V2SImode) && (shift > 31))
29392 || ((opmode == DImode) && (shift > 63)))
29394 if (wror_or_wsra)
29396 sprintf (templ, "%s\t%%0, %%1, #%d", insn_name, 32);
29397 output_asm_insn (templ, operands);
29398 if (opmode == DImode)
29400 sprintf (templ, "%s\t%%0, %%0, #%d", insn_name, 32);
29401 output_asm_insn (templ, operands);
29404 else
29406 /* The destination register will contain all zeros. */
29407 sprintf (templ, "wzero\t%%0");
29408 output_asm_insn (templ, operands);
29410 return "";
29413 if ((opmode == DImode) && (shift > 32))
29415 sprintf (templ, "%s\t%%0, %%1, #%d", insn_name, 32);
29416 output_asm_insn (templ, operands);
29417 sprintf (templ, "%s\t%%0, %%0, #%d", insn_name, shift - 32);
29418 output_asm_insn (templ, operands);
29420 else
29422 sprintf (templ, "%s\t%%0, %%1, #%d", insn_name, shift);
29423 output_asm_insn (templ, operands);
29425 return "";
29428 /* Output assembly for a WMMX tinsr instruction. */
29429 const char *
29430 arm_output_iwmmxt_tinsr (rtx *operands)
29432 int mask = INTVAL (operands[3]);
29433 int i;
29434 char templ[50];
29435 int units = mode_nunits[GET_MODE (operands[0])];
29436 gcc_assert ((mask & (mask - 1)) == 0);
29437 for (i = 0; i < units; ++i)
29439 if ((mask & 0x01) == 1)
29441 break;
29443 mask >>= 1;
29445 gcc_assert (i < units);
29447 switch (GET_MODE (operands[0]))
29449 case V8QImode:
29450 sprintf (templ, "tinsrb%%?\t%%0, %%2, #%d", i);
29451 break;
29452 case V4HImode:
29453 sprintf (templ, "tinsrh%%?\t%%0, %%2, #%d", i);
29454 break;
29455 case V2SImode:
29456 sprintf (templ, "tinsrw%%?\t%%0, %%2, #%d", i);
29457 break;
29458 default:
29459 gcc_unreachable ();
29460 break;
29462 output_asm_insn (templ, operands);
29464 return "";
29467 /* Output a Thumb-1 casesi dispatch sequence. */
29468 const char *
29469 thumb1_output_casesi (rtx *operands)
29471 rtx diff_vec = PATTERN (NEXT_INSN (operands[0]));
29473 gcc_assert (GET_CODE (diff_vec) == ADDR_DIFF_VEC);
29475 switch (GET_MODE(diff_vec))
29477 case QImode:
29478 return (ADDR_DIFF_VEC_FLAGS (diff_vec).offset_unsigned ?
29479 "bl\t%___gnu_thumb1_case_uqi" : "bl\t%___gnu_thumb1_case_sqi");
29480 case HImode:
29481 return (ADDR_DIFF_VEC_FLAGS (diff_vec).offset_unsigned ?
29482 "bl\t%___gnu_thumb1_case_uhi" : "bl\t%___gnu_thumb1_case_shi");
29483 case SImode:
29484 return "bl\t%___gnu_thumb1_case_si";
29485 default:
29486 gcc_unreachable ();
29490 /* Output a Thumb-2 casesi instruction. */
29491 const char *
29492 thumb2_output_casesi (rtx *operands)
29494 rtx diff_vec = PATTERN (NEXT_INSN (operands[2]));
29496 gcc_assert (GET_CODE (diff_vec) == ADDR_DIFF_VEC);
29498 output_asm_insn ("cmp\t%0, %1", operands);
29499 output_asm_insn ("bhi\t%l3", operands);
29500 switch (GET_MODE(diff_vec))
29502 case QImode:
29503 return "tbb\t[%|pc, %0]";
29504 case HImode:
29505 return "tbh\t[%|pc, %0, lsl #1]";
29506 case SImode:
29507 if (flag_pic)
29509 output_asm_insn ("adr\t%4, %l2", operands);
29510 output_asm_insn ("ldr\t%5, [%4, %0, lsl #2]", operands);
29511 output_asm_insn ("add\t%4, %4, %5", operands);
29512 return "bx\t%4";
29514 else
29516 output_asm_insn ("adr\t%4, %l2", operands);
29517 return "ldr\t%|pc, [%4, %0, lsl #2]";
29519 default:
29520 gcc_unreachable ();
29524 /* Most ARM cores are single issue, but some newer ones can dual issue.
29525 The scheduler descriptions rely on this being correct. */
29526 static int
29527 arm_issue_rate (void)
29529 switch (arm_tune)
29531 case cortexa15:
29532 case cortexa57:
29533 return 3;
29535 case cortexr4:
29536 case cortexr4f:
29537 case cortexr5:
29538 case genericv7a:
29539 case cortexa5:
29540 case cortexa7:
29541 case cortexa8:
29542 case cortexa9:
29543 case cortexa12:
29544 case cortexa53:
29545 case fa726te:
29546 case marvell_pj4:
29547 return 2;
29549 default:
29550 return 1;
29554 /* A table and a function to perform ARM-specific name mangling for
29555 NEON vector types in order to conform to the AAPCS (see "Procedure
29556 Call Standard for the ARM Architecture", Appendix A). To qualify
29557 for emission with the mangled names defined in that document, a
29558 vector type must not only be of the correct mode but also be
29559 composed of NEON vector element types (e.g. __builtin_neon_qi). */
29560 typedef struct
29562 enum machine_mode mode;
29563 const char *element_type_name;
29564 const char *aapcs_name;
29565 } arm_mangle_map_entry;
29567 static arm_mangle_map_entry arm_mangle_map[] = {
29568 /* 64-bit containerized types. */
29569 { V8QImode, "__builtin_neon_qi", "15__simd64_int8_t" },
29570 { V8QImode, "__builtin_neon_uqi", "16__simd64_uint8_t" },
29571 { V4HImode, "__builtin_neon_hi", "16__simd64_int16_t" },
29572 { V4HImode, "__builtin_neon_uhi", "17__simd64_uint16_t" },
29573 { V4HFmode, "__builtin_neon_hf", "18__simd64_float16_t" },
29574 { V2SImode, "__builtin_neon_si", "16__simd64_int32_t" },
29575 { V2SImode, "__builtin_neon_usi", "17__simd64_uint32_t" },
29576 { V2SFmode, "__builtin_neon_sf", "18__simd64_float32_t" },
29577 { V8QImode, "__builtin_neon_poly8", "16__simd64_poly8_t" },
29578 { V4HImode, "__builtin_neon_poly16", "17__simd64_poly16_t" },
29580 /* 128-bit containerized types. */
29581 { V16QImode, "__builtin_neon_qi", "16__simd128_int8_t" },
29582 { V16QImode, "__builtin_neon_uqi", "17__simd128_uint8_t" },
29583 { V8HImode, "__builtin_neon_hi", "17__simd128_int16_t" },
29584 { V8HImode, "__builtin_neon_uhi", "18__simd128_uint16_t" },
29585 { V4SImode, "__builtin_neon_si", "17__simd128_int32_t" },
29586 { V4SImode, "__builtin_neon_usi", "18__simd128_uint32_t" },
29587 { V4SFmode, "__builtin_neon_sf", "19__simd128_float32_t" },
29588 { V16QImode, "__builtin_neon_poly8", "17__simd128_poly8_t" },
29589 { V8HImode, "__builtin_neon_poly16", "18__simd128_poly16_t" },
29590 { VOIDmode, NULL, NULL }
29593 const char *
29594 arm_mangle_type (const_tree type)
29596 arm_mangle_map_entry *pos = arm_mangle_map;
29598 /* The ARM ABI documents (10th October 2008) say that "__va_list"
29599 has to be managled as if it is in the "std" namespace. */
29600 if (TARGET_AAPCS_BASED
29601 && lang_hooks.types_compatible_p (CONST_CAST_TREE (type), va_list_type))
29602 return "St9__va_list";
29604 /* Half-precision float. */
29605 if (TREE_CODE (type) == REAL_TYPE && TYPE_PRECISION (type) == 16)
29606 return "Dh";
29608 if (TREE_CODE (type) != VECTOR_TYPE)
29609 return NULL;
29611 /* Check the mode of the vector type, and the name of the vector
29612 element type, against the table. */
29613 while (pos->mode != VOIDmode)
29615 tree elt_type = TREE_TYPE (type);
29617 if (pos->mode == TYPE_MODE (type)
29618 && TREE_CODE (TYPE_NAME (elt_type)) == TYPE_DECL
29619 && !strcmp (IDENTIFIER_POINTER (DECL_NAME (TYPE_NAME (elt_type))),
29620 pos->element_type_name))
29621 return pos->aapcs_name;
29623 pos++;
29626 /* Use the default mangling for unrecognized (possibly user-defined)
29627 vector types. */
29628 return NULL;
29631 /* Order of allocation of core registers for Thumb: this allocation is
29632 written over the corresponding initial entries of the array
29633 initialized with REG_ALLOC_ORDER. We allocate all low registers
29634 first. Saving and restoring a low register is usually cheaper than
29635 using a call-clobbered high register. */
29637 static const int thumb_core_reg_alloc_order[] =
29639 3, 2, 1, 0, 4, 5, 6, 7,
29640 14, 12, 8, 9, 10, 11
29643 /* Adjust register allocation order when compiling for Thumb. */
29645 void
29646 arm_order_regs_for_local_alloc (void)
29648 const int arm_reg_alloc_order[] = REG_ALLOC_ORDER;
29649 memcpy(reg_alloc_order, arm_reg_alloc_order, sizeof (reg_alloc_order));
29650 if (TARGET_THUMB)
29651 memcpy (reg_alloc_order, thumb_core_reg_alloc_order,
29652 sizeof (thumb_core_reg_alloc_order));
29655 /* Implement TARGET_FRAME_POINTER_REQUIRED. */
29657 bool
29658 arm_frame_pointer_required (void)
29660 return (cfun->has_nonlocal_label
29661 || SUBTARGET_FRAME_POINTER_REQUIRED
29662 || (TARGET_ARM && TARGET_APCS_FRAME && ! leaf_function_p ()));
29665 /* Only thumb1 can't support conditional execution, so return true if
29666 the target is not thumb1. */
29667 static bool
29668 arm_have_conditional_execution (void)
29670 return !TARGET_THUMB1;
29673 tree
29674 arm_builtin_vectorized_function (tree fndecl, tree type_out, tree type_in)
29676 enum machine_mode in_mode, out_mode;
29677 int in_n, out_n;
29679 if (TREE_CODE (type_out) != VECTOR_TYPE
29680 || TREE_CODE (type_in) != VECTOR_TYPE
29681 || !(TARGET_NEON && TARGET_FPU_ARMV8 && flag_unsafe_math_optimizations))
29682 return NULL_TREE;
29684 out_mode = TYPE_MODE (TREE_TYPE (type_out));
29685 out_n = TYPE_VECTOR_SUBPARTS (type_out);
29686 in_mode = TYPE_MODE (TREE_TYPE (type_in));
29687 in_n = TYPE_VECTOR_SUBPARTS (type_in);
29689 /* ARM_CHECK_BUILTIN_MODE and ARM_FIND_VRINT_VARIANT are used to find the
29690 decl of the vectorized builtin for the appropriate vector mode.
29691 NULL_TREE is returned if no such builtin is available. */
29692 #undef ARM_CHECK_BUILTIN_MODE
29693 #define ARM_CHECK_BUILTIN_MODE(C) \
29694 (out_mode == SFmode && out_n == C \
29695 && in_mode == SFmode && in_n == C)
29697 #undef ARM_FIND_VRINT_VARIANT
29698 #define ARM_FIND_VRINT_VARIANT(N) \
29699 (ARM_CHECK_BUILTIN_MODE (2) \
29700 ? arm_builtin_decl(ARM_BUILTIN_NEON_##N##v2sf, false) \
29701 : (ARM_CHECK_BUILTIN_MODE (4) \
29702 ? arm_builtin_decl(ARM_BUILTIN_NEON_##N##v4sf, false) \
29703 : NULL_TREE))
29705 if (DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_NORMAL)
29707 enum built_in_function fn = DECL_FUNCTION_CODE (fndecl);
29708 switch (fn)
29710 case BUILT_IN_FLOORF:
29711 return ARM_FIND_VRINT_VARIANT (vrintm);
29712 case BUILT_IN_CEILF:
29713 return ARM_FIND_VRINT_VARIANT (vrintp);
29714 case BUILT_IN_TRUNCF:
29715 return ARM_FIND_VRINT_VARIANT (vrintz);
29716 case BUILT_IN_ROUNDF:
29717 return ARM_FIND_VRINT_VARIANT (vrinta);
29718 default:
29719 return NULL_TREE;
29722 return NULL_TREE;
29724 #undef ARM_CHECK_BUILTIN_MODE
29725 #undef ARM_FIND_VRINT_VARIANT
29727 /* The AAPCS sets the maximum alignment of a vector to 64 bits. */
29728 static HOST_WIDE_INT
29729 arm_vector_alignment (const_tree type)
29731 HOST_WIDE_INT align = tree_to_shwi (TYPE_SIZE (type));
29733 if (TARGET_AAPCS_BASED)
29734 align = MIN (align, 64);
29736 return align;
29739 static unsigned int
29740 arm_autovectorize_vector_sizes (void)
29742 return TARGET_NEON_VECTORIZE_DOUBLE ? 0 : (16 | 8);
29745 static bool
29746 arm_vector_alignment_reachable (const_tree type, bool is_packed)
29748 /* Vectors which aren't in packed structures will not be less aligned than
29749 the natural alignment of their element type, so this is safe. */
29750 if (TARGET_NEON && !BYTES_BIG_ENDIAN && unaligned_access)
29751 return !is_packed;
29753 return default_builtin_vector_alignment_reachable (type, is_packed);
29756 static bool
29757 arm_builtin_support_vector_misalignment (enum machine_mode mode,
29758 const_tree type, int misalignment,
29759 bool is_packed)
29761 if (TARGET_NEON && !BYTES_BIG_ENDIAN && unaligned_access)
29763 HOST_WIDE_INT align = TYPE_ALIGN_UNIT (type);
29765 if (is_packed)
29766 return align == 1;
29768 /* If the misalignment is unknown, we should be able to handle the access
29769 so long as it is not to a member of a packed data structure. */
29770 if (misalignment == -1)
29771 return true;
29773 /* Return true if the misalignment is a multiple of the natural alignment
29774 of the vector's element type. This is probably always going to be
29775 true in practice, since we've already established that this isn't a
29776 packed access. */
29777 return ((misalignment % align) == 0);
29780 return default_builtin_support_vector_misalignment (mode, type, misalignment,
29781 is_packed);
29784 static void
29785 arm_conditional_register_usage (void)
29787 int regno;
29789 if (TARGET_THUMB1 && optimize_size)
29791 /* When optimizing for size on Thumb-1, it's better not
29792 to use the HI regs, because of the overhead of
29793 stacking them. */
29794 for (regno = FIRST_HI_REGNUM;
29795 regno <= LAST_HI_REGNUM; ++regno)
29796 fixed_regs[regno] = call_used_regs[regno] = 1;
29799 /* The link register can be clobbered by any branch insn,
29800 but we have no way to track that at present, so mark
29801 it as unavailable. */
29802 if (TARGET_THUMB1)
29803 fixed_regs[LR_REGNUM] = call_used_regs[LR_REGNUM] = 1;
29805 if (TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP)
29807 /* VFPv3 registers are disabled when earlier VFP
29808 versions are selected due to the definition of
29809 LAST_VFP_REGNUM. */
29810 for (regno = FIRST_VFP_REGNUM;
29811 regno <= LAST_VFP_REGNUM; ++ regno)
29813 fixed_regs[regno] = 0;
29814 call_used_regs[regno] = regno < FIRST_VFP_REGNUM + 16
29815 || regno >= FIRST_VFP_REGNUM + 32;
29819 if (TARGET_REALLY_IWMMXT)
29821 regno = FIRST_IWMMXT_GR_REGNUM;
29822 /* The 2002/10/09 revision of the XScale ABI has wCG0
29823 and wCG1 as call-preserved registers. The 2002/11/21
29824 revision changed this so that all wCG registers are
29825 scratch registers. */
29826 for (regno = FIRST_IWMMXT_GR_REGNUM;
29827 regno <= LAST_IWMMXT_GR_REGNUM; ++ regno)
29828 fixed_regs[regno] = 0;
29829 /* The XScale ABI has wR0 - wR9 as scratch registers,
29830 the rest as call-preserved registers. */
29831 for (regno = FIRST_IWMMXT_REGNUM;
29832 regno <= LAST_IWMMXT_REGNUM; ++ regno)
29834 fixed_regs[regno] = 0;
29835 call_used_regs[regno] = regno < FIRST_IWMMXT_REGNUM + 10;
29839 if ((unsigned) PIC_OFFSET_TABLE_REGNUM != INVALID_REGNUM)
29841 fixed_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
29842 call_used_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
29844 else if (TARGET_APCS_STACK)
29846 fixed_regs[10] = 1;
29847 call_used_regs[10] = 1;
29849 /* -mcaller-super-interworking reserves r11 for calls to
29850 _interwork_r11_call_via_rN(). Making the register global
29851 is an easy way of ensuring that it remains valid for all
29852 calls. */
29853 if (TARGET_APCS_FRAME || TARGET_CALLER_INTERWORKING
29854 || TARGET_TPCS_FRAME || TARGET_TPCS_LEAF_FRAME)
29856 fixed_regs[ARM_HARD_FRAME_POINTER_REGNUM] = 1;
29857 call_used_regs[ARM_HARD_FRAME_POINTER_REGNUM] = 1;
29858 if (TARGET_CALLER_INTERWORKING)
29859 global_regs[ARM_HARD_FRAME_POINTER_REGNUM] = 1;
29861 SUBTARGET_CONDITIONAL_REGISTER_USAGE
29864 static reg_class_t
29865 arm_preferred_rename_class (reg_class_t rclass)
29867 /* Thumb-2 instructions using LO_REGS may be smaller than instructions
29868 using GENERIC_REGS. During register rename pass, we prefer LO_REGS,
29869 and code size can be reduced. */
29870 if (TARGET_THUMB2 && rclass == GENERAL_REGS)
29871 return LO_REGS;
29872 else
29873 return NO_REGS;
29876 /* Compute the atrribute "length" of insn "*push_multi".
29877 So this function MUST be kept in sync with that insn pattern. */
29879 arm_attr_length_push_multi(rtx parallel_op, rtx first_op)
29881 int i, regno, hi_reg;
29882 int num_saves = XVECLEN (parallel_op, 0);
29884 /* ARM mode. */
29885 if (TARGET_ARM)
29886 return 4;
29887 /* Thumb1 mode. */
29888 if (TARGET_THUMB1)
29889 return 2;
29891 /* Thumb2 mode. */
29892 regno = REGNO (first_op);
29893 hi_reg = (REGNO_REG_CLASS (regno) == HI_REGS) && (regno != LR_REGNUM);
29894 for (i = 1; i < num_saves && !hi_reg; i++)
29896 regno = REGNO (XEXP (XVECEXP (parallel_op, 0, i), 0));
29897 hi_reg |= (REGNO_REG_CLASS (regno) == HI_REGS) && (regno != LR_REGNUM);
29900 if (!hi_reg)
29901 return 2;
29902 return 4;
29905 /* Compute the number of instructions emitted by output_move_double. */
29907 arm_count_output_move_double_insns (rtx *operands)
29909 int count;
29910 rtx ops[2];
29911 /* output_move_double may modify the operands array, so call it
29912 here on a copy of the array. */
29913 ops[0] = operands[0];
29914 ops[1] = operands[1];
29915 output_move_double (ops, false, &count);
29916 return count;
29920 vfp3_const_double_for_fract_bits (rtx operand)
29922 REAL_VALUE_TYPE r0;
29924 if (!CONST_DOUBLE_P (operand))
29925 return 0;
29927 REAL_VALUE_FROM_CONST_DOUBLE (r0, operand);
29928 if (exact_real_inverse (DFmode, &r0))
29930 if (exact_real_truncate (DFmode, &r0))
29932 HOST_WIDE_INT value = real_to_integer (&r0);
29933 value = value & 0xffffffff;
29934 if ((value != 0) && ( (value & (value - 1)) == 0))
29935 return int_log2 (value);
29938 return 0;
29942 vfp3_const_double_for_bits (rtx operand)
29944 REAL_VALUE_TYPE r0;
29946 if (!CONST_DOUBLE_P (operand))
29947 return 0;
29949 REAL_VALUE_FROM_CONST_DOUBLE (r0, operand);
29950 if (exact_real_truncate (DFmode, &r0))
29952 HOST_WIDE_INT value = real_to_integer (&r0);
29953 value = value & 0xffffffff;
29954 if ((value != 0) && ( (value & (value - 1)) == 0))
29955 return int_log2 (value);
29958 return 0;
29961 /* Emit a memory barrier around an atomic sequence according to MODEL. */
29963 static void
29964 arm_pre_atomic_barrier (enum memmodel model)
29966 if (need_atomic_barrier_p (model, true))
29967 emit_insn (gen_memory_barrier ());
29970 static void
29971 arm_post_atomic_barrier (enum memmodel model)
29973 if (need_atomic_barrier_p (model, false))
29974 emit_insn (gen_memory_barrier ());
29977 /* Emit the load-exclusive and store-exclusive instructions.
29978 Use acquire and release versions if necessary. */
29980 static void
29981 arm_emit_load_exclusive (enum machine_mode mode, rtx rval, rtx mem, bool acq)
29983 rtx (*gen) (rtx, rtx);
29985 if (acq)
29987 switch (mode)
29989 case QImode: gen = gen_arm_load_acquire_exclusiveqi; break;
29990 case HImode: gen = gen_arm_load_acquire_exclusivehi; break;
29991 case SImode: gen = gen_arm_load_acquire_exclusivesi; break;
29992 case DImode: gen = gen_arm_load_acquire_exclusivedi; break;
29993 default:
29994 gcc_unreachable ();
29997 else
29999 switch (mode)
30001 case QImode: gen = gen_arm_load_exclusiveqi; break;
30002 case HImode: gen = gen_arm_load_exclusivehi; break;
30003 case SImode: gen = gen_arm_load_exclusivesi; break;
30004 case DImode: gen = gen_arm_load_exclusivedi; break;
30005 default:
30006 gcc_unreachable ();
30010 emit_insn (gen (rval, mem));
30013 static void
30014 arm_emit_store_exclusive (enum machine_mode mode, rtx bval, rtx rval,
30015 rtx mem, bool rel)
30017 rtx (*gen) (rtx, rtx, rtx);
30019 if (rel)
30021 switch (mode)
30023 case QImode: gen = gen_arm_store_release_exclusiveqi; break;
30024 case HImode: gen = gen_arm_store_release_exclusivehi; break;
30025 case SImode: gen = gen_arm_store_release_exclusivesi; break;
30026 case DImode: gen = gen_arm_store_release_exclusivedi; break;
30027 default:
30028 gcc_unreachable ();
30031 else
30033 switch (mode)
30035 case QImode: gen = gen_arm_store_exclusiveqi; break;
30036 case HImode: gen = gen_arm_store_exclusivehi; break;
30037 case SImode: gen = gen_arm_store_exclusivesi; break;
30038 case DImode: gen = gen_arm_store_exclusivedi; break;
30039 default:
30040 gcc_unreachable ();
30044 emit_insn (gen (bval, rval, mem));
30047 /* Mark the previous jump instruction as unlikely. */
30049 static void
30050 emit_unlikely_jump (rtx insn)
30052 int very_unlikely = REG_BR_PROB_BASE / 100 - 1;
30054 insn = emit_jump_insn (insn);
30055 add_int_reg_note (insn, REG_BR_PROB, very_unlikely);
30058 /* Expand a compare and swap pattern. */
30060 void
30061 arm_expand_compare_and_swap (rtx operands[])
30063 rtx bval, rval, mem, oldval, newval, is_weak, mod_s, mod_f, x;
30064 enum machine_mode mode;
30065 rtx (*gen) (rtx, rtx, rtx, rtx, rtx, rtx, rtx);
30067 bval = operands[0];
30068 rval = operands[1];
30069 mem = operands[2];
30070 oldval = operands[3];
30071 newval = operands[4];
30072 is_weak = operands[5];
30073 mod_s = operands[6];
30074 mod_f = operands[7];
30075 mode = GET_MODE (mem);
30077 /* Normally the succ memory model must be stronger than fail, but in the
30078 unlikely event of fail being ACQUIRE and succ being RELEASE we need to
30079 promote succ to ACQ_REL so that we don't lose the acquire semantics. */
30081 if (TARGET_HAVE_LDACQ
30082 && INTVAL (mod_f) == MEMMODEL_ACQUIRE
30083 && INTVAL (mod_s) == MEMMODEL_RELEASE)
30084 mod_s = GEN_INT (MEMMODEL_ACQ_REL);
30086 switch (mode)
30088 case QImode:
30089 case HImode:
30090 /* For narrow modes, we're going to perform the comparison in SImode,
30091 so do the zero-extension now. */
30092 rval = gen_reg_rtx (SImode);
30093 oldval = convert_modes (SImode, mode, oldval, true);
30094 /* FALLTHRU */
30096 case SImode:
30097 /* Force the value into a register if needed. We waited until after
30098 the zero-extension above to do this properly. */
30099 if (!arm_add_operand (oldval, SImode))
30100 oldval = force_reg (SImode, oldval);
30101 break;
30103 case DImode:
30104 if (!cmpdi_operand (oldval, mode))
30105 oldval = force_reg (mode, oldval);
30106 break;
30108 default:
30109 gcc_unreachable ();
30112 switch (mode)
30114 case QImode: gen = gen_atomic_compare_and_swapqi_1; break;
30115 case HImode: gen = gen_atomic_compare_and_swaphi_1; break;
30116 case SImode: gen = gen_atomic_compare_and_swapsi_1; break;
30117 case DImode: gen = gen_atomic_compare_and_swapdi_1; break;
30118 default:
30119 gcc_unreachable ();
30122 emit_insn (gen (rval, mem, oldval, newval, is_weak, mod_s, mod_f));
30124 if (mode == QImode || mode == HImode)
30125 emit_move_insn (operands[1], gen_lowpart (mode, rval));
30127 /* In all cases, we arrange for success to be signaled by Z set.
30128 This arrangement allows for the boolean result to be used directly
30129 in a subsequent branch, post optimization. */
30130 x = gen_rtx_REG (CCmode, CC_REGNUM);
30131 x = gen_rtx_EQ (SImode, x, const0_rtx);
30132 emit_insn (gen_rtx_SET (VOIDmode, bval, x));
30135 /* Split a compare and swap pattern. It is IMPLEMENTATION DEFINED whether
30136 another memory store between the load-exclusive and store-exclusive can
30137 reset the monitor from Exclusive to Open state. This means we must wait
30138 until after reload to split the pattern, lest we get a register spill in
30139 the middle of the atomic sequence. */
30141 void
30142 arm_split_compare_and_swap (rtx operands[])
30144 rtx rval, mem, oldval, newval, scratch;
30145 enum machine_mode mode;
30146 enum memmodel mod_s, mod_f;
30147 bool is_weak;
30148 rtx label1, label2, x, cond;
30150 rval = operands[0];
30151 mem = operands[1];
30152 oldval = operands[2];
30153 newval = operands[3];
30154 is_weak = (operands[4] != const0_rtx);
30155 mod_s = (enum memmodel) INTVAL (operands[5]);
30156 mod_f = (enum memmodel) INTVAL (operands[6]);
30157 scratch = operands[7];
30158 mode = GET_MODE (mem);
30160 bool use_acquire = TARGET_HAVE_LDACQ
30161 && !(mod_s == MEMMODEL_RELAXED
30162 || mod_s == MEMMODEL_CONSUME
30163 || mod_s == MEMMODEL_RELEASE);
30165 bool use_release = TARGET_HAVE_LDACQ
30166 && !(mod_s == MEMMODEL_RELAXED
30167 || mod_s == MEMMODEL_CONSUME
30168 || mod_s == MEMMODEL_ACQUIRE);
30170 /* Checks whether a barrier is needed and emits one accordingly. */
30171 if (!(use_acquire || use_release))
30172 arm_pre_atomic_barrier (mod_s);
30174 label1 = NULL_RTX;
30175 if (!is_weak)
30177 label1 = gen_label_rtx ();
30178 emit_label (label1);
30180 label2 = gen_label_rtx ();
30182 arm_emit_load_exclusive (mode, rval, mem, use_acquire);
30184 cond = arm_gen_compare_reg (NE, rval, oldval, scratch);
30185 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
30186 x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
30187 gen_rtx_LABEL_REF (Pmode, label2), pc_rtx);
30188 emit_unlikely_jump (gen_rtx_SET (VOIDmode, pc_rtx, x));
30190 arm_emit_store_exclusive (mode, scratch, mem, newval, use_release);
30192 /* Weak or strong, we want EQ to be true for success, so that we
30193 match the flags that we got from the compare above. */
30194 cond = gen_rtx_REG (CCmode, CC_REGNUM);
30195 x = gen_rtx_COMPARE (CCmode, scratch, const0_rtx);
30196 emit_insn (gen_rtx_SET (VOIDmode, cond, x));
30198 if (!is_weak)
30200 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
30201 x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
30202 gen_rtx_LABEL_REF (Pmode, label1), pc_rtx);
30203 emit_unlikely_jump (gen_rtx_SET (VOIDmode, pc_rtx, x));
30206 if (mod_f != MEMMODEL_RELAXED)
30207 emit_label (label2);
30209 /* Checks whether a barrier is needed and emits one accordingly. */
30210 if (!(use_acquire || use_release))
30211 arm_post_atomic_barrier (mod_s);
30213 if (mod_f == MEMMODEL_RELAXED)
30214 emit_label (label2);
30217 void
30218 arm_split_atomic_op (enum rtx_code code, rtx old_out, rtx new_out, rtx mem,
30219 rtx value, rtx model_rtx, rtx cond)
30221 enum memmodel model = (enum memmodel) INTVAL (model_rtx);
30222 enum machine_mode mode = GET_MODE (mem);
30223 enum machine_mode wmode = (mode == DImode ? DImode : SImode);
30224 rtx label, x;
30226 bool use_acquire = TARGET_HAVE_LDACQ
30227 && !(model == MEMMODEL_RELAXED
30228 || model == MEMMODEL_CONSUME
30229 || model == MEMMODEL_RELEASE);
30231 bool use_release = TARGET_HAVE_LDACQ
30232 && !(model == MEMMODEL_RELAXED
30233 || model == MEMMODEL_CONSUME
30234 || model == MEMMODEL_ACQUIRE);
30236 /* Checks whether a barrier is needed and emits one accordingly. */
30237 if (!(use_acquire || use_release))
30238 arm_pre_atomic_barrier (model);
30240 label = gen_label_rtx ();
30241 emit_label (label);
30243 if (new_out)
30244 new_out = gen_lowpart (wmode, new_out);
30245 if (old_out)
30246 old_out = gen_lowpart (wmode, old_out);
30247 else
30248 old_out = new_out;
30249 value = simplify_gen_subreg (wmode, value, mode, 0);
30251 arm_emit_load_exclusive (mode, old_out, mem, use_acquire);
30253 switch (code)
30255 case SET:
30256 new_out = value;
30257 break;
30259 case NOT:
30260 x = gen_rtx_AND (wmode, old_out, value);
30261 emit_insn (gen_rtx_SET (VOIDmode, new_out, x));
30262 x = gen_rtx_NOT (wmode, new_out);
30263 emit_insn (gen_rtx_SET (VOIDmode, new_out, x));
30264 break;
30266 case MINUS:
30267 if (CONST_INT_P (value))
30269 value = GEN_INT (-INTVAL (value));
30270 code = PLUS;
30272 /* FALLTHRU */
30274 case PLUS:
30275 if (mode == DImode)
30277 /* DImode plus/minus need to clobber flags. */
30278 /* The adddi3 and subdi3 patterns are incorrectly written so that
30279 they require matching operands, even when we could easily support
30280 three operands. Thankfully, this can be fixed up post-splitting,
30281 as the individual add+adc patterns do accept three operands and
30282 post-reload cprop can make these moves go away. */
30283 emit_move_insn (new_out, old_out);
30284 if (code == PLUS)
30285 x = gen_adddi3 (new_out, new_out, value);
30286 else
30287 x = gen_subdi3 (new_out, new_out, value);
30288 emit_insn (x);
30289 break;
30291 /* FALLTHRU */
30293 default:
30294 x = gen_rtx_fmt_ee (code, wmode, old_out, value);
30295 emit_insn (gen_rtx_SET (VOIDmode, new_out, x));
30296 break;
30299 arm_emit_store_exclusive (mode, cond, mem, gen_lowpart (mode, new_out),
30300 use_release);
30302 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
30303 emit_unlikely_jump (gen_cbranchsi4 (x, cond, const0_rtx, label));
30305 /* Checks whether a barrier is needed and emits one accordingly. */
30306 if (!(use_acquire || use_release))
30307 arm_post_atomic_barrier (model);
30310 #define MAX_VECT_LEN 16
30312 struct expand_vec_perm_d
30314 rtx target, op0, op1;
30315 unsigned char perm[MAX_VECT_LEN];
30316 enum machine_mode vmode;
30317 unsigned char nelt;
30318 bool one_vector_p;
30319 bool testing_p;
30322 /* Generate a variable permutation. */
30324 static void
30325 arm_expand_vec_perm_1 (rtx target, rtx op0, rtx op1, rtx sel)
30327 enum machine_mode vmode = GET_MODE (target);
30328 bool one_vector_p = rtx_equal_p (op0, op1);
30330 gcc_checking_assert (vmode == V8QImode || vmode == V16QImode);
30331 gcc_checking_assert (GET_MODE (op0) == vmode);
30332 gcc_checking_assert (GET_MODE (op1) == vmode);
30333 gcc_checking_assert (GET_MODE (sel) == vmode);
30334 gcc_checking_assert (TARGET_NEON);
30336 if (one_vector_p)
30338 if (vmode == V8QImode)
30339 emit_insn (gen_neon_vtbl1v8qi (target, op0, sel));
30340 else
30341 emit_insn (gen_neon_vtbl1v16qi (target, op0, sel));
30343 else
30345 rtx pair;
30347 if (vmode == V8QImode)
30349 pair = gen_reg_rtx (V16QImode);
30350 emit_insn (gen_neon_vcombinev8qi (pair, op0, op1));
30351 pair = gen_lowpart (TImode, pair);
30352 emit_insn (gen_neon_vtbl2v8qi (target, pair, sel));
30354 else
30356 pair = gen_reg_rtx (OImode);
30357 emit_insn (gen_neon_vcombinev16qi (pair, op0, op1));
30358 emit_insn (gen_neon_vtbl2v16qi (target, pair, sel));
30363 void
30364 arm_expand_vec_perm (rtx target, rtx op0, rtx op1, rtx sel)
30366 enum machine_mode vmode = GET_MODE (target);
30367 unsigned int i, nelt = GET_MODE_NUNITS (vmode);
30368 bool one_vector_p = rtx_equal_p (op0, op1);
30369 rtx rmask[MAX_VECT_LEN], mask;
30371 /* TODO: ARM's VTBL indexing is little-endian. In order to handle GCC's
30372 numbering of elements for big-endian, we must reverse the order. */
30373 gcc_checking_assert (!BYTES_BIG_ENDIAN);
30375 /* The VTBL instruction does not use a modulo index, so we must take care
30376 of that ourselves. */
30377 mask = GEN_INT (one_vector_p ? nelt - 1 : 2 * nelt - 1);
30378 for (i = 0; i < nelt; ++i)
30379 rmask[i] = mask;
30380 mask = gen_rtx_CONST_VECTOR (vmode, gen_rtvec_v (nelt, rmask));
30381 sel = expand_simple_binop (vmode, AND, sel, mask, NULL, 0, OPTAB_LIB_WIDEN);
30383 arm_expand_vec_perm_1 (target, op0, op1, sel);
30386 /* Generate or test for an insn that supports a constant permutation. */
30388 /* Recognize patterns for the VUZP insns. */
30390 static bool
30391 arm_evpc_neon_vuzp (struct expand_vec_perm_d *d)
30393 unsigned int i, odd, mask, nelt = d->nelt;
30394 rtx out0, out1, in0, in1, x;
30395 rtx (*gen)(rtx, rtx, rtx, rtx);
30397 if (GET_MODE_UNIT_SIZE (d->vmode) >= 8)
30398 return false;
30400 /* Note that these are little-endian tests. Adjust for big-endian later. */
30401 if (d->perm[0] == 0)
30402 odd = 0;
30403 else if (d->perm[0] == 1)
30404 odd = 1;
30405 else
30406 return false;
30407 mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
30409 for (i = 0; i < nelt; i++)
30411 unsigned elt = (i * 2 + odd) & mask;
30412 if (d->perm[i] != elt)
30413 return false;
30416 /* Success! */
30417 if (d->testing_p)
30418 return true;
30420 switch (d->vmode)
30422 case V16QImode: gen = gen_neon_vuzpv16qi_internal; break;
30423 case V8QImode: gen = gen_neon_vuzpv8qi_internal; break;
30424 case V8HImode: gen = gen_neon_vuzpv8hi_internal; break;
30425 case V4HImode: gen = gen_neon_vuzpv4hi_internal; break;
30426 case V4SImode: gen = gen_neon_vuzpv4si_internal; break;
30427 case V2SImode: gen = gen_neon_vuzpv2si_internal; break;
30428 case V2SFmode: gen = gen_neon_vuzpv2sf_internal; break;
30429 case V4SFmode: gen = gen_neon_vuzpv4sf_internal; break;
30430 default:
30431 gcc_unreachable ();
30434 in0 = d->op0;
30435 in1 = d->op1;
30436 if (BYTES_BIG_ENDIAN)
30438 x = in0, in0 = in1, in1 = x;
30439 odd = !odd;
30442 out0 = d->target;
30443 out1 = gen_reg_rtx (d->vmode);
30444 if (odd)
30445 x = out0, out0 = out1, out1 = x;
30447 emit_insn (gen (out0, in0, in1, out1));
30448 return true;
30451 /* Recognize patterns for the VZIP insns. */
30453 static bool
30454 arm_evpc_neon_vzip (struct expand_vec_perm_d *d)
30456 unsigned int i, high, mask, nelt = d->nelt;
30457 rtx out0, out1, in0, in1, x;
30458 rtx (*gen)(rtx, rtx, rtx, rtx);
30460 if (GET_MODE_UNIT_SIZE (d->vmode) >= 8)
30461 return false;
30463 /* Note that these are little-endian tests. Adjust for big-endian later. */
30464 high = nelt / 2;
30465 if (d->perm[0] == high)
30467 else if (d->perm[0] == 0)
30468 high = 0;
30469 else
30470 return false;
30471 mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
30473 for (i = 0; i < nelt / 2; i++)
30475 unsigned elt = (i + high) & mask;
30476 if (d->perm[i * 2] != elt)
30477 return false;
30478 elt = (elt + nelt) & mask;
30479 if (d->perm[i * 2 + 1] != elt)
30480 return false;
30483 /* Success! */
30484 if (d->testing_p)
30485 return true;
30487 switch (d->vmode)
30489 case V16QImode: gen = gen_neon_vzipv16qi_internal; break;
30490 case V8QImode: gen = gen_neon_vzipv8qi_internal; break;
30491 case V8HImode: gen = gen_neon_vzipv8hi_internal; break;
30492 case V4HImode: gen = gen_neon_vzipv4hi_internal; break;
30493 case V4SImode: gen = gen_neon_vzipv4si_internal; break;
30494 case V2SImode: gen = gen_neon_vzipv2si_internal; break;
30495 case V2SFmode: gen = gen_neon_vzipv2sf_internal; break;
30496 case V4SFmode: gen = gen_neon_vzipv4sf_internal; break;
30497 default:
30498 gcc_unreachable ();
30501 in0 = d->op0;
30502 in1 = d->op1;
30503 if (BYTES_BIG_ENDIAN)
30505 x = in0, in0 = in1, in1 = x;
30506 high = !high;
30509 out0 = d->target;
30510 out1 = gen_reg_rtx (d->vmode);
30511 if (high)
30512 x = out0, out0 = out1, out1 = x;
30514 emit_insn (gen (out0, in0, in1, out1));
30515 return true;
30518 /* Recognize patterns for the VREV insns. */
30520 static bool
30521 arm_evpc_neon_vrev (struct expand_vec_perm_d *d)
30523 unsigned int i, j, diff, nelt = d->nelt;
30524 rtx (*gen)(rtx, rtx, rtx);
30526 if (!d->one_vector_p)
30527 return false;
30529 diff = d->perm[0];
30530 switch (diff)
30532 case 7:
30533 switch (d->vmode)
30535 case V16QImode: gen = gen_neon_vrev64v16qi; break;
30536 case V8QImode: gen = gen_neon_vrev64v8qi; break;
30537 default:
30538 return false;
30540 break;
30541 case 3:
30542 switch (d->vmode)
30544 case V16QImode: gen = gen_neon_vrev32v16qi; break;
30545 case V8QImode: gen = gen_neon_vrev32v8qi; break;
30546 case V8HImode: gen = gen_neon_vrev64v8hi; break;
30547 case V4HImode: gen = gen_neon_vrev64v4hi; break;
30548 default:
30549 return false;
30551 break;
30552 case 1:
30553 switch (d->vmode)
30555 case V16QImode: gen = gen_neon_vrev16v16qi; break;
30556 case V8QImode: gen = gen_neon_vrev16v8qi; break;
30557 case V8HImode: gen = gen_neon_vrev32v8hi; break;
30558 case V4HImode: gen = gen_neon_vrev32v4hi; break;
30559 case V4SImode: gen = gen_neon_vrev64v4si; break;
30560 case V2SImode: gen = gen_neon_vrev64v2si; break;
30561 case V4SFmode: gen = gen_neon_vrev64v4sf; break;
30562 case V2SFmode: gen = gen_neon_vrev64v2sf; break;
30563 default:
30564 return false;
30566 break;
30567 default:
30568 return false;
30571 for (i = 0; i < nelt ; i += diff + 1)
30572 for (j = 0; j <= diff; j += 1)
30574 /* This is guaranteed to be true as the value of diff
30575 is 7, 3, 1 and we should have enough elements in the
30576 queue to generate this. Getting a vector mask with a
30577 value of diff other than these values implies that
30578 something is wrong by the time we get here. */
30579 gcc_assert (i + j < nelt);
30580 if (d->perm[i + j] != i + diff - j)
30581 return false;
30584 /* Success! */
30585 if (d->testing_p)
30586 return true;
30588 /* ??? The third operand is an artifact of the builtin infrastructure
30589 and is ignored by the actual instruction. */
30590 emit_insn (gen (d->target, d->op0, const0_rtx));
30591 return true;
30594 /* Recognize patterns for the VTRN insns. */
30596 static bool
30597 arm_evpc_neon_vtrn (struct expand_vec_perm_d *d)
30599 unsigned int i, odd, mask, nelt = d->nelt;
30600 rtx out0, out1, in0, in1, x;
30601 rtx (*gen)(rtx, rtx, rtx, rtx);
30603 if (GET_MODE_UNIT_SIZE (d->vmode) >= 8)
30604 return false;
30606 /* Note that these are little-endian tests. Adjust for big-endian later. */
30607 if (d->perm[0] == 0)
30608 odd = 0;
30609 else if (d->perm[0] == 1)
30610 odd = 1;
30611 else
30612 return false;
30613 mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
30615 for (i = 0; i < nelt; i += 2)
30617 if (d->perm[i] != i + odd)
30618 return false;
30619 if (d->perm[i + 1] != ((i + nelt + odd) & mask))
30620 return false;
30623 /* Success! */
30624 if (d->testing_p)
30625 return true;
30627 switch (d->vmode)
30629 case V16QImode: gen = gen_neon_vtrnv16qi_internal; break;
30630 case V8QImode: gen = gen_neon_vtrnv8qi_internal; break;
30631 case V8HImode: gen = gen_neon_vtrnv8hi_internal; break;
30632 case V4HImode: gen = gen_neon_vtrnv4hi_internal; break;
30633 case V4SImode: gen = gen_neon_vtrnv4si_internal; break;
30634 case V2SImode: gen = gen_neon_vtrnv2si_internal; break;
30635 case V2SFmode: gen = gen_neon_vtrnv2sf_internal; break;
30636 case V4SFmode: gen = gen_neon_vtrnv4sf_internal; break;
30637 default:
30638 gcc_unreachable ();
30641 in0 = d->op0;
30642 in1 = d->op1;
30643 if (BYTES_BIG_ENDIAN)
30645 x = in0, in0 = in1, in1 = x;
30646 odd = !odd;
30649 out0 = d->target;
30650 out1 = gen_reg_rtx (d->vmode);
30651 if (odd)
30652 x = out0, out0 = out1, out1 = x;
30654 emit_insn (gen (out0, in0, in1, out1));
30655 return true;
30658 /* Recognize patterns for the VEXT insns. */
30660 static bool
30661 arm_evpc_neon_vext (struct expand_vec_perm_d *d)
30663 unsigned int i, nelt = d->nelt;
30664 rtx (*gen) (rtx, rtx, rtx, rtx);
30665 rtx offset;
30667 unsigned int location;
30669 unsigned int next = d->perm[0] + 1;
30671 /* TODO: Handle GCC's numbering of elements for big-endian. */
30672 if (BYTES_BIG_ENDIAN)
30673 return false;
30675 /* Check if the extracted indexes are increasing by one. */
30676 for (i = 1; i < nelt; next++, i++)
30678 /* If we hit the most significant element of the 2nd vector in
30679 the previous iteration, no need to test further. */
30680 if (next == 2 * nelt)
30681 return false;
30683 /* If we are operating on only one vector: it could be a
30684 rotation. If there are only two elements of size < 64, let
30685 arm_evpc_neon_vrev catch it. */
30686 if (d->one_vector_p && (next == nelt))
30688 if ((nelt == 2) && (d->vmode != V2DImode))
30689 return false;
30690 else
30691 next = 0;
30694 if (d->perm[i] != next)
30695 return false;
30698 location = d->perm[0];
30700 switch (d->vmode)
30702 case V16QImode: gen = gen_neon_vextv16qi; break;
30703 case V8QImode: gen = gen_neon_vextv8qi; break;
30704 case V4HImode: gen = gen_neon_vextv4hi; break;
30705 case V8HImode: gen = gen_neon_vextv8hi; break;
30706 case V2SImode: gen = gen_neon_vextv2si; break;
30707 case V4SImode: gen = gen_neon_vextv4si; break;
30708 case V2SFmode: gen = gen_neon_vextv2sf; break;
30709 case V4SFmode: gen = gen_neon_vextv4sf; break;
30710 case V2DImode: gen = gen_neon_vextv2di; break;
30711 default:
30712 return false;
30715 /* Success! */
30716 if (d->testing_p)
30717 return true;
30719 offset = GEN_INT (location);
30720 emit_insn (gen (d->target, d->op0, d->op1, offset));
30721 return true;
30724 /* The NEON VTBL instruction is a fully variable permuation that's even
30725 stronger than what we expose via VEC_PERM_EXPR. What it doesn't do
30726 is mask the index operand as VEC_PERM_EXPR requires. Therefore we
30727 can do slightly better by expanding this as a constant where we don't
30728 have to apply a mask. */
30730 static bool
30731 arm_evpc_neon_vtbl (struct expand_vec_perm_d *d)
30733 rtx rperm[MAX_VECT_LEN], sel;
30734 enum machine_mode vmode = d->vmode;
30735 unsigned int i, nelt = d->nelt;
30737 /* TODO: ARM's VTBL indexing is little-endian. In order to handle GCC's
30738 numbering of elements for big-endian, we must reverse the order. */
30739 if (BYTES_BIG_ENDIAN)
30740 return false;
30742 if (d->testing_p)
30743 return true;
30745 /* Generic code will try constant permutation twice. Once with the
30746 original mode and again with the elements lowered to QImode.
30747 So wait and don't do the selector expansion ourselves. */
30748 if (vmode != V8QImode && vmode != V16QImode)
30749 return false;
30751 for (i = 0; i < nelt; ++i)
30752 rperm[i] = GEN_INT (d->perm[i]);
30753 sel = gen_rtx_CONST_VECTOR (vmode, gen_rtvec_v (nelt, rperm));
30754 sel = force_reg (vmode, sel);
30756 arm_expand_vec_perm_1 (d->target, d->op0, d->op1, sel);
30757 return true;
30760 static bool
30761 arm_expand_vec_perm_const_1 (struct expand_vec_perm_d *d)
30763 /* Check if the input mask matches vext before reordering the
30764 operands. */
30765 if (TARGET_NEON)
30766 if (arm_evpc_neon_vext (d))
30767 return true;
30769 /* The pattern matching functions above are written to look for a small
30770 number to begin the sequence (0, 1, N/2). If we begin with an index
30771 from the second operand, we can swap the operands. */
30772 if (d->perm[0] >= d->nelt)
30774 unsigned i, nelt = d->nelt;
30775 rtx x;
30777 for (i = 0; i < nelt; ++i)
30778 d->perm[i] = (d->perm[i] + nelt) & (2 * nelt - 1);
30780 x = d->op0;
30781 d->op0 = d->op1;
30782 d->op1 = x;
30785 if (TARGET_NEON)
30787 if (arm_evpc_neon_vuzp (d))
30788 return true;
30789 if (arm_evpc_neon_vzip (d))
30790 return true;
30791 if (arm_evpc_neon_vrev (d))
30792 return true;
30793 if (arm_evpc_neon_vtrn (d))
30794 return true;
30795 return arm_evpc_neon_vtbl (d);
30797 return false;
30800 /* Expand a vec_perm_const pattern. */
30802 bool
30803 arm_expand_vec_perm_const (rtx target, rtx op0, rtx op1, rtx sel)
30805 struct expand_vec_perm_d d;
30806 int i, nelt, which;
30808 d.target = target;
30809 d.op0 = op0;
30810 d.op1 = op1;
30812 d.vmode = GET_MODE (target);
30813 gcc_assert (VECTOR_MODE_P (d.vmode));
30814 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
30815 d.testing_p = false;
30817 for (i = which = 0; i < nelt; ++i)
30819 rtx e = XVECEXP (sel, 0, i);
30820 int ei = INTVAL (e) & (2 * nelt - 1);
30821 which |= (ei < nelt ? 1 : 2);
30822 d.perm[i] = ei;
30825 switch (which)
30827 default:
30828 gcc_unreachable();
30830 case 3:
30831 d.one_vector_p = false;
30832 if (!rtx_equal_p (op0, op1))
30833 break;
30835 /* The elements of PERM do not suggest that only the first operand
30836 is used, but both operands are identical. Allow easier matching
30837 of the permutation by folding the permutation into the single
30838 input vector. */
30839 /* FALLTHRU */
30840 case 2:
30841 for (i = 0; i < nelt; ++i)
30842 d.perm[i] &= nelt - 1;
30843 d.op0 = op1;
30844 d.one_vector_p = true;
30845 break;
30847 case 1:
30848 d.op1 = op0;
30849 d.one_vector_p = true;
30850 break;
30853 return arm_expand_vec_perm_const_1 (&d);
30856 /* Implement TARGET_VECTORIZE_VEC_PERM_CONST_OK. */
30858 static bool
30859 arm_vectorize_vec_perm_const_ok (enum machine_mode vmode,
30860 const unsigned char *sel)
30862 struct expand_vec_perm_d d;
30863 unsigned int i, nelt, which;
30864 bool ret;
30866 d.vmode = vmode;
30867 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
30868 d.testing_p = true;
30869 memcpy (d.perm, sel, nelt);
30871 /* Categorize the set of elements in the selector. */
30872 for (i = which = 0; i < nelt; ++i)
30874 unsigned char e = d.perm[i];
30875 gcc_assert (e < 2 * nelt);
30876 which |= (e < nelt ? 1 : 2);
30879 /* For all elements from second vector, fold the elements to first. */
30880 if (which == 2)
30881 for (i = 0; i < nelt; ++i)
30882 d.perm[i] -= nelt;
30884 /* Check whether the mask can be applied to the vector type. */
30885 d.one_vector_p = (which != 3);
30887 d.target = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 1);
30888 d.op1 = d.op0 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 2);
30889 if (!d.one_vector_p)
30890 d.op1 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 3);
30892 start_sequence ();
30893 ret = arm_expand_vec_perm_const_1 (&d);
30894 end_sequence ();
30896 return ret;
30899 bool
30900 arm_autoinc_modes_ok_p (enum machine_mode mode, enum arm_auto_incmodes code)
30902 /* If we are soft float and we do not have ldrd
30903 then all auto increment forms are ok. */
30904 if (TARGET_SOFT_FLOAT && (TARGET_LDRD || GET_MODE_SIZE (mode) <= 4))
30905 return true;
30907 switch (code)
30909 /* Post increment and Pre Decrement are supported for all
30910 instruction forms except for vector forms. */
30911 case ARM_POST_INC:
30912 case ARM_PRE_DEC:
30913 if (VECTOR_MODE_P (mode))
30915 if (code != ARM_PRE_DEC)
30916 return true;
30917 else
30918 return false;
30921 return true;
30923 case ARM_POST_DEC:
30924 case ARM_PRE_INC:
30925 /* Without LDRD and mode size greater than
30926 word size, there is no point in auto-incrementing
30927 because ldm and stm will not have these forms. */
30928 if (!TARGET_LDRD && GET_MODE_SIZE (mode) > 4)
30929 return false;
30931 /* Vector and floating point modes do not support
30932 these auto increment forms. */
30933 if (FLOAT_MODE_P (mode) || VECTOR_MODE_P (mode))
30934 return false;
30936 return true;
30938 default:
30939 return false;
30943 return false;
30946 /* The default expansion of general 64-bit shifts in core-regs is suboptimal,
30947 on ARM, since we know that shifts by negative amounts are no-ops.
30948 Additionally, the default expansion code is not available or suitable
30949 for post-reload insn splits (this can occur when the register allocator
30950 chooses not to do a shift in NEON).
30952 This function is used in both initial expand and post-reload splits, and
30953 handles all kinds of 64-bit shifts.
30955 Input requirements:
30956 - It is safe for the input and output to be the same register, but
30957 early-clobber rules apply for the shift amount and scratch registers.
30958 - Shift by register requires both scratch registers. In all other cases
30959 the scratch registers may be NULL.
30960 - Ashiftrt by a register also clobbers the CC register. */
30961 void
30962 arm_emit_coreregs_64bit_shift (enum rtx_code code, rtx out, rtx in,
30963 rtx amount, rtx scratch1, rtx scratch2)
30965 rtx out_high = gen_highpart (SImode, out);
30966 rtx out_low = gen_lowpart (SImode, out);
30967 rtx in_high = gen_highpart (SImode, in);
30968 rtx in_low = gen_lowpart (SImode, in);
30970 /* Terminology:
30971 in = the register pair containing the input value.
30972 out = the destination register pair.
30973 up = the high- or low-part of each pair.
30974 down = the opposite part to "up".
30975 In a shift, we can consider bits to shift from "up"-stream to
30976 "down"-stream, so in a left-shift "up" is the low-part and "down"
30977 is the high-part of each register pair. */
30979 rtx out_up = code == ASHIFT ? out_low : out_high;
30980 rtx out_down = code == ASHIFT ? out_high : out_low;
30981 rtx in_up = code == ASHIFT ? in_low : in_high;
30982 rtx in_down = code == ASHIFT ? in_high : in_low;
30984 gcc_assert (code == ASHIFT || code == ASHIFTRT || code == LSHIFTRT);
30985 gcc_assert (out
30986 && (REG_P (out) || GET_CODE (out) == SUBREG)
30987 && GET_MODE (out) == DImode);
30988 gcc_assert (in
30989 && (REG_P (in) || GET_CODE (in) == SUBREG)
30990 && GET_MODE (in) == DImode);
30991 gcc_assert (amount
30992 && (((REG_P (amount) || GET_CODE (amount) == SUBREG)
30993 && GET_MODE (amount) == SImode)
30994 || CONST_INT_P (amount)));
30995 gcc_assert (scratch1 == NULL
30996 || (GET_CODE (scratch1) == SCRATCH)
30997 || (GET_MODE (scratch1) == SImode
30998 && REG_P (scratch1)));
30999 gcc_assert (scratch2 == NULL
31000 || (GET_CODE (scratch2) == SCRATCH)
31001 || (GET_MODE (scratch2) == SImode
31002 && REG_P (scratch2)));
31003 gcc_assert (!REG_P (out) || !REG_P (amount)
31004 || !HARD_REGISTER_P (out)
31005 || (REGNO (out) != REGNO (amount)
31006 && REGNO (out) + 1 != REGNO (amount)));
31008 /* Macros to make following code more readable. */
31009 #define SUB_32(DEST,SRC) \
31010 gen_addsi3 ((DEST), (SRC), GEN_INT (-32))
31011 #define RSB_32(DEST,SRC) \
31012 gen_subsi3 ((DEST), GEN_INT (32), (SRC))
31013 #define SUB_S_32(DEST,SRC) \
31014 gen_addsi3_compare0 ((DEST), (SRC), \
31015 GEN_INT (-32))
31016 #define SET(DEST,SRC) \
31017 gen_rtx_SET (SImode, (DEST), (SRC))
31018 #define SHIFT(CODE,SRC,AMOUNT) \
31019 gen_rtx_fmt_ee ((CODE), SImode, (SRC), (AMOUNT))
31020 #define LSHIFT(CODE,SRC,AMOUNT) \
31021 gen_rtx_fmt_ee ((CODE) == ASHIFT ? ASHIFT : LSHIFTRT, \
31022 SImode, (SRC), (AMOUNT))
31023 #define REV_LSHIFT(CODE,SRC,AMOUNT) \
31024 gen_rtx_fmt_ee ((CODE) == ASHIFT ? LSHIFTRT : ASHIFT, \
31025 SImode, (SRC), (AMOUNT))
31026 #define ORR(A,B) \
31027 gen_rtx_IOR (SImode, (A), (B))
31028 #define BRANCH(COND,LABEL) \
31029 gen_arm_cond_branch ((LABEL), \
31030 gen_rtx_ ## COND (CCmode, cc_reg, \
31031 const0_rtx), \
31032 cc_reg)
31034 /* Shifts by register and shifts by constant are handled separately. */
31035 if (CONST_INT_P (amount))
31037 /* We have a shift-by-constant. */
31039 /* First, handle out-of-range shift amounts.
31040 In both cases we try to match the result an ARM instruction in a
31041 shift-by-register would give. This helps reduce execution
31042 differences between optimization levels, but it won't stop other
31043 parts of the compiler doing different things. This is "undefined
31044 behaviour, in any case. */
31045 if (INTVAL (amount) <= 0)
31046 emit_insn (gen_movdi (out, in));
31047 else if (INTVAL (amount) >= 64)
31049 if (code == ASHIFTRT)
31051 rtx const31_rtx = GEN_INT (31);
31052 emit_insn (SET (out_down, SHIFT (code, in_up, const31_rtx)));
31053 emit_insn (SET (out_up, SHIFT (code, in_up, const31_rtx)));
31055 else
31056 emit_insn (gen_movdi (out, const0_rtx));
31059 /* Now handle valid shifts. */
31060 else if (INTVAL (amount) < 32)
31062 /* Shifts by a constant less than 32. */
31063 rtx reverse_amount = GEN_INT (32 - INTVAL (amount));
31065 emit_insn (SET (out_down, LSHIFT (code, in_down, amount)));
31066 emit_insn (SET (out_down,
31067 ORR (REV_LSHIFT (code, in_up, reverse_amount),
31068 out_down)));
31069 emit_insn (SET (out_up, SHIFT (code, in_up, amount)));
31071 else
31073 /* Shifts by a constant greater than 31. */
31074 rtx adj_amount = GEN_INT (INTVAL (amount) - 32);
31076 emit_insn (SET (out_down, SHIFT (code, in_up, adj_amount)));
31077 if (code == ASHIFTRT)
31078 emit_insn (gen_ashrsi3 (out_up, in_up,
31079 GEN_INT (31)));
31080 else
31081 emit_insn (SET (out_up, const0_rtx));
31084 else
31086 /* We have a shift-by-register. */
31087 rtx cc_reg = gen_rtx_REG (CC_NOOVmode, CC_REGNUM);
31089 /* This alternative requires the scratch registers. */
31090 gcc_assert (scratch1 && REG_P (scratch1));
31091 gcc_assert (scratch2 && REG_P (scratch2));
31093 /* We will need the values "amount-32" and "32-amount" later.
31094 Swapping them around now allows the later code to be more general. */
31095 switch (code)
31097 case ASHIFT:
31098 emit_insn (SUB_32 (scratch1, amount));
31099 emit_insn (RSB_32 (scratch2, amount));
31100 break;
31101 case ASHIFTRT:
31102 emit_insn (RSB_32 (scratch1, amount));
31103 /* Also set CC = amount > 32. */
31104 emit_insn (SUB_S_32 (scratch2, amount));
31105 break;
31106 case LSHIFTRT:
31107 emit_insn (RSB_32 (scratch1, amount));
31108 emit_insn (SUB_32 (scratch2, amount));
31109 break;
31110 default:
31111 gcc_unreachable ();
31114 /* Emit code like this:
31116 arithmetic-left:
31117 out_down = in_down << amount;
31118 out_down = (in_up << (amount - 32)) | out_down;
31119 out_down = ((unsigned)in_up >> (32 - amount)) | out_down;
31120 out_up = in_up << amount;
31122 arithmetic-right:
31123 out_down = in_down >> amount;
31124 out_down = (in_up << (32 - amount)) | out_down;
31125 if (amount < 32)
31126 out_down = ((signed)in_up >> (amount - 32)) | out_down;
31127 out_up = in_up << amount;
31129 logical-right:
31130 out_down = in_down >> amount;
31131 out_down = (in_up << (32 - amount)) | out_down;
31132 if (amount < 32)
31133 out_down = ((unsigned)in_up >> (amount - 32)) | out_down;
31134 out_up = in_up << amount;
31136 The ARM and Thumb2 variants are the same but implemented slightly
31137 differently. If this were only called during expand we could just
31138 use the Thumb2 case and let combine do the right thing, but this
31139 can also be called from post-reload splitters. */
31141 emit_insn (SET (out_down, LSHIFT (code, in_down, amount)));
31143 if (!TARGET_THUMB2)
31145 /* Emit code for ARM mode. */
31146 emit_insn (SET (out_down,
31147 ORR (SHIFT (ASHIFT, in_up, scratch1), out_down)));
31148 if (code == ASHIFTRT)
31150 rtx done_label = gen_label_rtx ();
31151 emit_jump_insn (BRANCH (LT, done_label));
31152 emit_insn (SET (out_down, ORR (SHIFT (ASHIFTRT, in_up, scratch2),
31153 out_down)));
31154 emit_label (done_label);
31156 else
31157 emit_insn (SET (out_down, ORR (SHIFT (LSHIFTRT, in_up, scratch2),
31158 out_down)));
31160 else
31162 /* Emit code for Thumb2 mode.
31163 Thumb2 can't do shift and or in one insn. */
31164 emit_insn (SET (scratch1, SHIFT (ASHIFT, in_up, scratch1)));
31165 emit_insn (gen_iorsi3 (out_down, out_down, scratch1));
31167 if (code == ASHIFTRT)
31169 rtx done_label = gen_label_rtx ();
31170 emit_jump_insn (BRANCH (LT, done_label));
31171 emit_insn (SET (scratch2, SHIFT (ASHIFTRT, in_up, scratch2)));
31172 emit_insn (SET (out_down, ORR (out_down, scratch2)));
31173 emit_label (done_label);
31175 else
31177 emit_insn (SET (scratch2, SHIFT (LSHIFTRT, in_up, scratch2)));
31178 emit_insn (gen_iorsi3 (out_down, out_down, scratch2));
31182 emit_insn (SET (out_up, SHIFT (code, in_up, amount)));
31185 #undef SUB_32
31186 #undef RSB_32
31187 #undef SUB_S_32
31188 #undef SET
31189 #undef SHIFT
31190 #undef LSHIFT
31191 #undef REV_LSHIFT
31192 #undef ORR
31193 #undef BRANCH
31197 /* Returns true if a valid comparison operation and makes
31198 the operands in a form that is valid. */
31199 bool
31200 arm_validize_comparison (rtx *comparison, rtx * op1, rtx * op2)
31202 enum rtx_code code = GET_CODE (*comparison);
31203 int code_int;
31204 enum machine_mode mode = (GET_MODE (*op1) == VOIDmode)
31205 ? GET_MODE (*op2) : GET_MODE (*op1);
31207 gcc_assert (GET_MODE (*op1) != VOIDmode || GET_MODE (*op2) != VOIDmode);
31209 if (code == UNEQ || code == LTGT)
31210 return false;
31212 code_int = (int)code;
31213 arm_canonicalize_comparison (&code_int, op1, op2, 0);
31214 PUT_CODE (*comparison, (enum rtx_code)code_int);
31216 switch (mode)
31218 case SImode:
31219 if (!arm_add_operand (*op1, mode))
31220 *op1 = force_reg (mode, *op1);
31221 if (!arm_add_operand (*op2, mode))
31222 *op2 = force_reg (mode, *op2);
31223 return true;
31225 case DImode:
31226 if (!cmpdi_operand (*op1, mode))
31227 *op1 = force_reg (mode, *op1);
31228 if (!cmpdi_operand (*op2, mode))
31229 *op2 = force_reg (mode, *op2);
31230 return true;
31232 case SFmode:
31233 case DFmode:
31234 if (!arm_float_compare_operand (*op1, mode))
31235 *op1 = force_reg (mode, *op1);
31236 if (!arm_float_compare_operand (*op2, mode))
31237 *op2 = force_reg (mode, *op2);
31238 return true;
31239 default:
31240 break;
31243 return false;
31247 /* Implement the TARGET_ASAN_SHADOW_OFFSET hook. */
31249 static unsigned HOST_WIDE_INT
31250 arm_asan_shadow_offset (void)
31252 return (unsigned HOST_WIDE_INT) 1 << 29;
31256 /* This is a temporary fix for PR60655. Ideally we need
31257 to handle most of these cases in the generic part but
31258 currently we reject minus (..) (sym_ref). We try to
31259 ameliorate the case with minus (sym_ref1) (sym_ref2)
31260 where they are in the same section. */
31262 static bool
31263 arm_const_not_ok_for_debug_p (rtx p)
31265 tree decl_op0 = NULL;
31266 tree decl_op1 = NULL;
31268 if (GET_CODE (p) == MINUS)
31270 if (GET_CODE (XEXP (p, 1)) == SYMBOL_REF)
31272 decl_op1 = SYMBOL_REF_DECL (XEXP (p, 1));
31273 if (decl_op1
31274 && GET_CODE (XEXP (p, 0)) == SYMBOL_REF
31275 && (decl_op0 = SYMBOL_REF_DECL (XEXP (p, 0))))
31277 if ((TREE_CODE (decl_op1) == VAR_DECL
31278 || TREE_CODE (decl_op1) == CONST_DECL)
31279 && (TREE_CODE (decl_op0) == VAR_DECL
31280 || TREE_CODE (decl_op0) == CONST_DECL))
31281 return (get_variable_section (decl_op1, false)
31282 != get_variable_section (decl_op0, false));
31284 if (TREE_CODE (decl_op1) == LABEL_DECL
31285 && TREE_CODE (decl_op0) == LABEL_DECL)
31286 return (DECL_CONTEXT (decl_op1)
31287 != DECL_CONTEXT (decl_op0));
31290 return true;
31294 return false;
31297 #include "gt-arm.h"