2014-01-17 Richard Biener <rguenther@suse.de>
[official-gcc.git] / gcc / config / arm / arm.c
blobfc81bf684072252cecece6c12cf0240d20e7d000
1 /* Output routines for GCC for ARM.
2 Copyright (C) 1991-2014 Free Software Foundation, Inc.
3 Contributed by Pieter `Tiggr' Schoenmakers (rcpieter@win.tue.nl)
4 and Martin Simmons (@harleqn.co.uk).
5 More major hacks by Richard Earnshaw (rearnsha@arm.com).
7 This file is part of GCC.
9 GCC is free software; you can redistribute it and/or modify it
10 under the terms of the GNU General Public License as published
11 by the Free Software Foundation; either version 3, or (at your
12 option) any later version.
14 GCC is distributed in the hope that it will be useful, but WITHOUT
15 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
16 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
17 License for more details.
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING3. If not see
21 <http://www.gnu.org/licenses/>. */
23 #include "config.h"
24 #include "system.h"
25 #include "coretypes.h"
26 #include "hash-table.h"
27 #include "tm.h"
28 #include "rtl.h"
29 #include "tree.h"
30 #include "stringpool.h"
31 #include "stor-layout.h"
32 #include "calls.h"
33 #include "varasm.h"
34 #include "obstack.h"
35 #include "regs.h"
36 #include "hard-reg-set.h"
37 #include "insn-config.h"
38 #include "conditions.h"
39 #include "output.h"
40 #include "insn-attr.h"
41 #include "flags.h"
42 #include "reload.h"
43 #include "function.h"
44 #include "expr.h"
45 #include "optabs.h"
46 #include "diagnostic-core.h"
47 #include "recog.h"
48 #include "cgraph.h"
49 #include "ggc.h"
50 #include "except.h"
51 #include "tm_p.h"
52 #include "target.h"
53 #include "target-def.h"
54 #include "debug.h"
55 #include "langhooks.h"
56 #include "df.h"
57 #include "intl.h"
58 #include "libfuncs.h"
59 #include "params.h"
60 #include "opts.h"
61 #include "dumpfile.h"
63 /* Forward definitions of types. */
64 typedef struct minipool_node Mnode;
65 typedef struct minipool_fixup Mfix;
67 void (*arm_lang_output_object_attributes_hook)(void);
69 struct four_ints
71 int i[4];
74 /* Forward function declarations. */
75 static bool arm_lra_p (void);
76 static bool arm_needs_doubleword_align (enum machine_mode, const_tree);
77 static int arm_compute_static_chain_stack_bytes (void);
78 static arm_stack_offsets *arm_get_frame_offsets (void);
79 static void arm_add_gc_roots (void);
80 static int arm_gen_constant (enum rtx_code, enum machine_mode, rtx,
81 HOST_WIDE_INT, rtx, rtx, int, int);
82 static unsigned bit_count (unsigned long);
83 static int arm_address_register_rtx_p (rtx, int);
84 static int arm_legitimate_index_p (enum machine_mode, rtx, RTX_CODE, int);
85 static int thumb2_legitimate_index_p (enum machine_mode, rtx, int);
86 static int thumb1_base_register_rtx_p (rtx, enum machine_mode, int);
87 static rtx arm_legitimize_address (rtx, rtx, enum machine_mode);
88 static reg_class_t arm_preferred_reload_class (rtx, reg_class_t);
89 static rtx thumb_legitimize_address (rtx, rtx, enum machine_mode);
90 inline static int thumb1_index_register_rtx_p (rtx, int);
91 static bool arm_legitimate_address_p (enum machine_mode, rtx, bool);
92 static int thumb_far_jump_used_p (void);
93 static bool thumb_force_lr_save (void);
94 static unsigned arm_size_return_regs (void);
95 static bool arm_assemble_integer (rtx, unsigned int, int);
96 static void arm_print_operand (FILE *, rtx, int);
97 static void arm_print_operand_address (FILE *, rtx);
98 static bool arm_print_operand_punct_valid_p (unsigned char code);
99 static const char *fp_const_from_val (REAL_VALUE_TYPE *);
100 static arm_cc get_arm_condition_code (rtx);
101 static HOST_WIDE_INT int_log2 (HOST_WIDE_INT);
102 static const char *output_multi_immediate (rtx *, const char *, const char *,
103 int, HOST_WIDE_INT);
104 static const char *shift_op (rtx, HOST_WIDE_INT *);
105 static struct machine_function *arm_init_machine_status (void);
106 static void thumb_exit (FILE *, int);
107 static HOST_WIDE_INT get_jump_table_size (rtx);
108 static Mnode *move_minipool_fix_forward_ref (Mnode *, Mnode *, HOST_WIDE_INT);
109 static Mnode *add_minipool_forward_ref (Mfix *);
110 static Mnode *move_minipool_fix_backward_ref (Mnode *, Mnode *, HOST_WIDE_INT);
111 static Mnode *add_minipool_backward_ref (Mfix *);
112 static void assign_minipool_offsets (Mfix *);
113 static void arm_print_value (FILE *, rtx);
114 static void dump_minipool (rtx);
115 static int arm_barrier_cost (rtx);
116 static Mfix *create_fix_barrier (Mfix *, HOST_WIDE_INT);
117 static void push_minipool_barrier (rtx, HOST_WIDE_INT);
118 static void push_minipool_fix (rtx, HOST_WIDE_INT, rtx *, enum machine_mode,
119 rtx);
120 static void arm_reorg (void);
121 static void note_invalid_constants (rtx, HOST_WIDE_INT, int);
122 static unsigned long arm_compute_save_reg0_reg12_mask (void);
123 static unsigned long arm_compute_save_reg_mask (void);
124 static unsigned long arm_isr_value (tree);
125 static unsigned long arm_compute_func_type (void);
126 static tree arm_handle_fndecl_attribute (tree *, tree, tree, int, bool *);
127 static tree arm_handle_pcs_attribute (tree *, tree, tree, int, bool *);
128 static tree arm_handle_isr_attribute (tree *, tree, tree, int, bool *);
129 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
130 static tree arm_handle_notshared_attribute (tree *, tree, tree, int, bool *);
131 #endif
132 static void arm_output_function_epilogue (FILE *, HOST_WIDE_INT);
133 static void arm_output_function_prologue (FILE *, HOST_WIDE_INT);
134 static int arm_comp_type_attributes (const_tree, const_tree);
135 static void arm_set_default_type_attributes (tree);
136 static int arm_adjust_cost (rtx, rtx, rtx, int);
137 static int arm_sched_reorder (FILE *, int, rtx *, int *, int);
138 static int optimal_immediate_sequence (enum rtx_code code,
139 unsigned HOST_WIDE_INT val,
140 struct four_ints *return_sequence);
141 static int optimal_immediate_sequence_1 (enum rtx_code code,
142 unsigned HOST_WIDE_INT val,
143 struct four_ints *return_sequence,
144 int i);
145 static int arm_get_strip_length (int);
146 static bool arm_function_ok_for_sibcall (tree, tree);
147 static enum machine_mode arm_promote_function_mode (const_tree,
148 enum machine_mode, int *,
149 const_tree, int);
150 static bool arm_return_in_memory (const_tree, const_tree);
151 static rtx arm_function_value (const_tree, const_tree, bool);
152 static rtx arm_libcall_value_1 (enum machine_mode);
153 static rtx arm_libcall_value (enum machine_mode, const_rtx);
154 static bool arm_function_value_regno_p (const unsigned int);
155 static void arm_internal_label (FILE *, const char *, unsigned long);
156 static void arm_output_mi_thunk (FILE *, tree, HOST_WIDE_INT, HOST_WIDE_INT,
157 tree);
158 static bool arm_have_conditional_execution (void);
159 static bool arm_cannot_force_const_mem (enum machine_mode, rtx);
160 static bool arm_legitimate_constant_p (enum machine_mode, rtx);
161 static bool arm_rtx_costs_1 (rtx, enum rtx_code, int*, bool);
162 static bool arm_size_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *);
163 static bool arm_slowmul_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *, bool);
164 static bool arm_fastmul_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *, bool);
165 static bool arm_xscale_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *, bool);
166 static bool arm_9e_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *, bool);
167 static bool arm_rtx_costs (rtx, int, int, int, int *, bool);
168 static int arm_address_cost (rtx, enum machine_mode, addr_space_t, bool);
169 static int arm_register_move_cost (enum machine_mode, reg_class_t, reg_class_t);
170 static int arm_memory_move_cost (enum machine_mode, reg_class_t, bool);
171 static void arm_init_builtins (void);
172 static void arm_init_iwmmxt_builtins (void);
173 static rtx safe_vector_operand (rtx, enum machine_mode);
174 static rtx arm_expand_binop_builtin (enum insn_code, tree, rtx);
175 static rtx arm_expand_unop_builtin (enum insn_code, tree, rtx, int);
176 static rtx arm_expand_builtin (tree, rtx, rtx, enum machine_mode, int);
177 static tree arm_builtin_decl (unsigned, bool);
178 static void emit_constant_insn (rtx cond, rtx pattern);
179 static rtx emit_set_insn (rtx, rtx);
180 static rtx emit_multi_reg_push (unsigned long);
181 static int arm_arg_partial_bytes (cumulative_args_t, enum machine_mode,
182 tree, bool);
183 static rtx arm_function_arg (cumulative_args_t, enum machine_mode,
184 const_tree, bool);
185 static void arm_function_arg_advance (cumulative_args_t, enum machine_mode,
186 const_tree, bool);
187 static unsigned int arm_function_arg_boundary (enum machine_mode, const_tree);
188 static rtx aapcs_allocate_return_reg (enum machine_mode, const_tree,
189 const_tree);
190 static rtx aapcs_libcall_value (enum machine_mode);
191 static int aapcs_select_return_coproc (const_tree, const_tree);
193 #ifdef OBJECT_FORMAT_ELF
194 static void arm_elf_asm_constructor (rtx, int) ATTRIBUTE_UNUSED;
195 static void arm_elf_asm_destructor (rtx, int) ATTRIBUTE_UNUSED;
196 #endif
197 #ifndef ARM_PE
198 static void arm_encode_section_info (tree, rtx, int);
199 #endif
201 static void arm_file_end (void);
202 static void arm_file_start (void);
204 static void arm_setup_incoming_varargs (cumulative_args_t, enum machine_mode,
205 tree, int *, int);
206 static bool arm_pass_by_reference (cumulative_args_t,
207 enum machine_mode, const_tree, bool);
208 static bool arm_promote_prototypes (const_tree);
209 static bool arm_default_short_enums (void);
210 static bool arm_align_anon_bitfield (void);
211 static bool arm_return_in_msb (const_tree);
212 static bool arm_must_pass_in_stack (enum machine_mode, const_tree);
213 static bool arm_return_in_memory (const_tree, const_tree);
214 #if ARM_UNWIND_INFO
215 static void arm_unwind_emit (FILE *, rtx);
216 static bool arm_output_ttype (rtx);
217 static void arm_asm_emit_except_personality (rtx);
218 static void arm_asm_init_sections (void);
219 #endif
220 static rtx arm_dwarf_register_span (rtx);
222 static tree arm_cxx_guard_type (void);
223 static bool arm_cxx_guard_mask_bit (void);
224 static tree arm_get_cookie_size (tree);
225 static bool arm_cookie_has_size (void);
226 static bool arm_cxx_cdtor_returns_this (void);
227 static bool arm_cxx_key_method_may_be_inline (void);
228 static void arm_cxx_determine_class_data_visibility (tree);
229 static bool arm_cxx_class_data_always_comdat (void);
230 static bool arm_cxx_use_aeabi_atexit (void);
231 static void arm_init_libfuncs (void);
232 static tree arm_build_builtin_va_list (void);
233 static void arm_expand_builtin_va_start (tree, rtx);
234 static tree arm_gimplify_va_arg_expr (tree, tree, gimple_seq *, gimple_seq *);
235 static void arm_option_override (void);
236 static unsigned HOST_WIDE_INT arm_shift_truncation_mask (enum machine_mode);
237 static bool arm_cannot_copy_insn_p (rtx);
238 static bool arm_tls_symbol_p (rtx x);
239 static int arm_issue_rate (void);
240 static void arm_output_dwarf_dtprel (FILE *, int, rtx) ATTRIBUTE_UNUSED;
241 static bool arm_output_addr_const_extra (FILE *, rtx);
242 static bool arm_allocate_stack_slots_for_args (void);
243 static bool arm_warn_func_return (tree);
244 static const char *arm_invalid_parameter_type (const_tree t);
245 static const char *arm_invalid_return_type (const_tree t);
246 static tree arm_promoted_type (const_tree t);
247 static tree arm_convert_to_type (tree type, tree expr);
248 static bool arm_scalar_mode_supported_p (enum machine_mode);
249 static bool arm_frame_pointer_required (void);
250 static bool arm_can_eliminate (const int, const int);
251 static void arm_asm_trampoline_template (FILE *);
252 static void arm_trampoline_init (rtx, tree, rtx);
253 static rtx arm_trampoline_adjust_address (rtx);
254 static rtx arm_pic_static_addr (rtx orig, rtx reg);
255 static bool cortex_a9_sched_adjust_cost (rtx, rtx, rtx, int *);
256 static bool xscale_sched_adjust_cost (rtx, rtx, rtx, int *);
257 static bool fa726te_sched_adjust_cost (rtx, rtx, rtx, int *);
258 static bool arm_array_mode_supported_p (enum machine_mode,
259 unsigned HOST_WIDE_INT);
260 static enum machine_mode arm_preferred_simd_mode (enum machine_mode);
261 static bool arm_class_likely_spilled_p (reg_class_t);
262 static HOST_WIDE_INT arm_vector_alignment (const_tree type);
263 static bool arm_vector_alignment_reachable (const_tree type, bool is_packed);
264 static bool arm_builtin_support_vector_misalignment (enum machine_mode mode,
265 const_tree type,
266 int misalignment,
267 bool is_packed);
268 static void arm_conditional_register_usage (void);
269 static reg_class_t arm_preferred_rename_class (reg_class_t rclass);
270 static unsigned int arm_autovectorize_vector_sizes (void);
271 static int arm_default_branch_cost (bool, bool);
272 static int arm_cortex_a5_branch_cost (bool, bool);
273 static int arm_cortex_m_branch_cost (bool, bool);
275 static bool arm_vectorize_vec_perm_const_ok (enum machine_mode vmode,
276 const unsigned char *sel);
278 static int arm_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
279 tree vectype,
280 int misalign ATTRIBUTE_UNUSED);
281 static unsigned arm_add_stmt_cost (void *data, int count,
282 enum vect_cost_for_stmt kind,
283 struct _stmt_vec_info *stmt_info,
284 int misalign,
285 enum vect_cost_model_location where);
287 static void arm_canonicalize_comparison (int *code, rtx *op0, rtx *op1,
288 bool op0_preserve_value);
289 static unsigned HOST_WIDE_INT arm_asan_shadow_offset (void);
291 /* Table of machine attributes. */
292 static const struct attribute_spec arm_attribute_table[] =
294 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler,
295 affects_type_identity } */
296 /* Function calls made to this symbol must be done indirectly, because
297 it may lie outside of the 26 bit addressing range of a normal function
298 call. */
299 { "long_call", 0, 0, false, true, true, NULL, false },
300 /* Whereas these functions are always known to reside within the 26 bit
301 addressing range. */
302 { "short_call", 0, 0, false, true, true, NULL, false },
303 /* Specify the procedure call conventions for a function. */
304 { "pcs", 1, 1, false, true, true, arm_handle_pcs_attribute,
305 false },
306 /* Interrupt Service Routines have special prologue and epilogue requirements. */
307 { "isr", 0, 1, false, false, false, arm_handle_isr_attribute,
308 false },
309 { "interrupt", 0, 1, false, false, false, arm_handle_isr_attribute,
310 false },
311 { "naked", 0, 0, true, false, false, arm_handle_fndecl_attribute,
312 false },
313 #ifdef ARM_PE
314 /* ARM/PE has three new attributes:
315 interfacearm - ?
316 dllexport - for exporting a function/variable that will live in a dll
317 dllimport - for importing a function/variable from a dll
319 Microsoft allows multiple declspecs in one __declspec, separating
320 them with spaces. We do NOT support this. Instead, use __declspec
321 multiple times.
323 { "dllimport", 0, 0, true, false, false, NULL, false },
324 { "dllexport", 0, 0, true, false, false, NULL, false },
325 { "interfacearm", 0, 0, true, false, false, arm_handle_fndecl_attribute,
326 false },
327 #elif TARGET_DLLIMPORT_DECL_ATTRIBUTES
328 { "dllimport", 0, 0, false, false, false, handle_dll_attribute, false },
329 { "dllexport", 0, 0, false, false, false, handle_dll_attribute, false },
330 { "notshared", 0, 0, false, true, false, arm_handle_notshared_attribute,
331 false },
332 #endif
333 { NULL, 0, 0, false, false, false, NULL, false }
336 /* Initialize the GCC target structure. */
337 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
338 #undef TARGET_MERGE_DECL_ATTRIBUTES
339 #define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
340 #endif
342 #undef TARGET_LEGITIMIZE_ADDRESS
343 #define TARGET_LEGITIMIZE_ADDRESS arm_legitimize_address
345 #undef TARGET_LRA_P
346 #define TARGET_LRA_P arm_lra_p
348 #undef TARGET_ATTRIBUTE_TABLE
349 #define TARGET_ATTRIBUTE_TABLE arm_attribute_table
351 #undef TARGET_ASM_FILE_START
352 #define TARGET_ASM_FILE_START arm_file_start
353 #undef TARGET_ASM_FILE_END
354 #define TARGET_ASM_FILE_END arm_file_end
356 #undef TARGET_ASM_ALIGNED_SI_OP
357 #define TARGET_ASM_ALIGNED_SI_OP NULL
358 #undef TARGET_ASM_INTEGER
359 #define TARGET_ASM_INTEGER arm_assemble_integer
361 #undef TARGET_PRINT_OPERAND
362 #define TARGET_PRINT_OPERAND arm_print_operand
363 #undef TARGET_PRINT_OPERAND_ADDRESS
364 #define TARGET_PRINT_OPERAND_ADDRESS arm_print_operand_address
365 #undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
366 #define TARGET_PRINT_OPERAND_PUNCT_VALID_P arm_print_operand_punct_valid_p
368 #undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA
369 #define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA arm_output_addr_const_extra
371 #undef TARGET_ASM_FUNCTION_PROLOGUE
372 #define TARGET_ASM_FUNCTION_PROLOGUE arm_output_function_prologue
374 #undef TARGET_ASM_FUNCTION_EPILOGUE
375 #define TARGET_ASM_FUNCTION_EPILOGUE arm_output_function_epilogue
377 #undef TARGET_OPTION_OVERRIDE
378 #define TARGET_OPTION_OVERRIDE arm_option_override
380 #undef TARGET_COMP_TYPE_ATTRIBUTES
381 #define TARGET_COMP_TYPE_ATTRIBUTES arm_comp_type_attributes
383 #undef TARGET_SET_DEFAULT_TYPE_ATTRIBUTES
384 #define TARGET_SET_DEFAULT_TYPE_ATTRIBUTES arm_set_default_type_attributes
386 #undef TARGET_SCHED_ADJUST_COST
387 #define TARGET_SCHED_ADJUST_COST arm_adjust_cost
389 #undef TARGET_SCHED_REORDER
390 #define TARGET_SCHED_REORDER arm_sched_reorder
392 #undef TARGET_REGISTER_MOVE_COST
393 #define TARGET_REGISTER_MOVE_COST arm_register_move_cost
395 #undef TARGET_MEMORY_MOVE_COST
396 #define TARGET_MEMORY_MOVE_COST arm_memory_move_cost
398 #undef TARGET_ENCODE_SECTION_INFO
399 #ifdef ARM_PE
400 #define TARGET_ENCODE_SECTION_INFO arm_pe_encode_section_info
401 #else
402 #define TARGET_ENCODE_SECTION_INFO arm_encode_section_info
403 #endif
405 #undef TARGET_STRIP_NAME_ENCODING
406 #define TARGET_STRIP_NAME_ENCODING arm_strip_name_encoding
408 #undef TARGET_ASM_INTERNAL_LABEL
409 #define TARGET_ASM_INTERNAL_LABEL arm_internal_label
411 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
412 #define TARGET_FUNCTION_OK_FOR_SIBCALL arm_function_ok_for_sibcall
414 #undef TARGET_FUNCTION_VALUE
415 #define TARGET_FUNCTION_VALUE arm_function_value
417 #undef TARGET_LIBCALL_VALUE
418 #define TARGET_LIBCALL_VALUE arm_libcall_value
420 #undef TARGET_FUNCTION_VALUE_REGNO_P
421 #define TARGET_FUNCTION_VALUE_REGNO_P arm_function_value_regno_p
423 #undef TARGET_ASM_OUTPUT_MI_THUNK
424 #define TARGET_ASM_OUTPUT_MI_THUNK arm_output_mi_thunk
425 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
426 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK default_can_output_mi_thunk_no_vcall
428 #undef TARGET_RTX_COSTS
429 #define TARGET_RTX_COSTS arm_rtx_costs
430 #undef TARGET_ADDRESS_COST
431 #define TARGET_ADDRESS_COST arm_address_cost
433 #undef TARGET_SHIFT_TRUNCATION_MASK
434 #define TARGET_SHIFT_TRUNCATION_MASK arm_shift_truncation_mask
435 #undef TARGET_VECTOR_MODE_SUPPORTED_P
436 #define TARGET_VECTOR_MODE_SUPPORTED_P arm_vector_mode_supported_p
437 #undef TARGET_ARRAY_MODE_SUPPORTED_P
438 #define TARGET_ARRAY_MODE_SUPPORTED_P arm_array_mode_supported_p
439 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
440 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE arm_preferred_simd_mode
441 #undef TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES
442 #define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES \
443 arm_autovectorize_vector_sizes
445 #undef TARGET_MACHINE_DEPENDENT_REORG
446 #define TARGET_MACHINE_DEPENDENT_REORG arm_reorg
448 #undef TARGET_INIT_BUILTINS
449 #define TARGET_INIT_BUILTINS arm_init_builtins
450 #undef TARGET_EXPAND_BUILTIN
451 #define TARGET_EXPAND_BUILTIN arm_expand_builtin
452 #undef TARGET_BUILTIN_DECL
453 #define TARGET_BUILTIN_DECL arm_builtin_decl
455 #undef TARGET_INIT_LIBFUNCS
456 #define TARGET_INIT_LIBFUNCS arm_init_libfuncs
458 #undef TARGET_PROMOTE_FUNCTION_MODE
459 #define TARGET_PROMOTE_FUNCTION_MODE arm_promote_function_mode
460 #undef TARGET_PROMOTE_PROTOTYPES
461 #define TARGET_PROMOTE_PROTOTYPES arm_promote_prototypes
462 #undef TARGET_PASS_BY_REFERENCE
463 #define TARGET_PASS_BY_REFERENCE arm_pass_by_reference
464 #undef TARGET_ARG_PARTIAL_BYTES
465 #define TARGET_ARG_PARTIAL_BYTES arm_arg_partial_bytes
466 #undef TARGET_FUNCTION_ARG
467 #define TARGET_FUNCTION_ARG arm_function_arg
468 #undef TARGET_FUNCTION_ARG_ADVANCE
469 #define TARGET_FUNCTION_ARG_ADVANCE arm_function_arg_advance
470 #undef TARGET_FUNCTION_ARG_BOUNDARY
471 #define TARGET_FUNCTION_ARG_BOUNDARY arm_function_arg_boundary
473 #undef TARGET_SETUP_INCOMING_VARARGS
474 #define TARGET_SETUP_INCOMING_VARARGS arm_setup_incoming_varargs
476 #undef TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS
477 #define TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS arm_allocate_stack_slots_for_args
479 #undef TARGET_ASM_TRAMPOLINE_TEMPLATE
480 #define TARGET_ASM_TRAMPOLINE_TEMPLATE arm_asm_trampoline_template
481 #undef TARGET_TRAMPOLINE_INIT
482 #define TARGET_TRAMPOLINE_INIT arm_trampoline_init
483 #undef TARGET_TRAMPOLINE_ADJUST_ADDRESS
484 #define TARGET_TRAMPOLINE_ADJUST_ADDRESS arm_trampoline_adjust_address
486 #undef TARGET_WARN_FUNC_RETURN
487 #define TARGET_WARN_FUNC_RETURN arm_warn_func_return
489 #undef TARGET_DEFAULT_SHORT_ENUMS
490 #define TARGET_DEFAULT_SHORT_ENUMS arm_default_short_enums
492 #undef TARGET_ALIGN_ANON_BITFIELD
493 #define TARGET_ALIGN_ANON_BITFIELD arm_align_anon_bitfield
495 #undef TARGET_NARROW_VOLATILE_BITFIELD
496 #define TARGET_NARROW_VOLATILE_BITFIELD hook_bool_void_false
498 #undef TARGET_CXX_GUARD_TYPE
499 #define TARGET_CXX_GUARD_TYPE arm_cxx_guard_type
501 #undef TARGET_CXX_GUARD_MASK_BIT
502 #define TARGET_CXX_GUARD_MASK_BIT arm_cxx_guard_mask_bit
504 #undef TARGET_CXX_GET_COOKIE_SIZE
505 #define TARGET_CXX_GET_COOKIE_SIZE arm_get_cookie_size
507 #undef TARGET_CXX_COOKIE_HAS_SIZE
508 #define TARGET_CXX_COOKIE_HAS_SIZE arm_cookie_has_size
510 #undef TARGET_CXX_CDTOR_RETURNS_THIS
511 #define TARGET_CXX_CDTOR_RETURNS_THIS arm_cxx_cdtor_returns_this
513 #undef TARGET_CXX_KEY_METHOD_MAY_BE_INLINE
514 #define TARGET_CXX_KEY_METHOD_MAY_BE_INLINE arm_cxx_key_method_may_be_inline
516 #undef TARGET_CXX_USE_AEABI_ATEXIT
517 #define TARGET_CXX_USE_AEABI_ATEXIT arm_cxx_use_aeabi_atexit
519 #undef TARGET_CXX_DETERMINE_CLASS_DATA_VISIBILITY
520 #define TARGET_CXX_DETERMINE_CLASS_DATA_VISIBILITY \
521 arm_cxx_determine_class_data_visibility
523 #undef TARGET_CXX_CLASS_DATA_ALWAYS_COMDAT
524 #define TARGET_CXX_CLASS_DATA_ALWAYS_COMDAT arm_cxx_class_data_always_comdat
526 #undef TARGET_RETURN_IN_MSB
527 #define TARGET_RETURN_IN_MSB arm_return_in_msb
529 #undef TARGET_RETURN_IN_MEMORY
530 #define TARGET_RETURN_IN_MEMORY arm_return_in_memory
532 #undef TARGET_MUST_PASS_IN_STACK
533 #define TARGET_MUST_PASS_IN_STACK arm_must_pass_in_stack
535 #if ARM_UNWIND_INFO
536 #undef TARGET_ASM_UNWIND_EMIT
537 #define TARGET_ASM_UNWIND_EMIT arm_unwind_emit
539 /* EABI unwinding tables use a different format for the typeinfo tables. */
540 #undef TARGET_ASM_TTYPE
541 #define TARGET_ASM_TTYPE arm_output_ttype
543 #undef TARGET_ARM_EABI_UNWINDER
544 #define TARGET_ARM_EABI_UNWINDER true
546 #undef TARGET_ASM_EMIT_EXCEPT_PERSONALITY
547 #define TARGET_ASM_EMIT_EXCEPT_PERSONALITY arm_asm_emit_except_personality
549 #undef TARGET_ASM_INIT_SECTIONS
550 #define TARGET_ASM_INIT_SECTIONS arm_asm_init_sections
551 #endif /* ARM_UNWIND_INFO */
553 #undef TARGET_DWARF_REGISTER_SPAN
554 #define TARGET_DWARF_REGISTER_SPAN arm_dwarf_register_span
556 #undef TARGET_CANNOT_COPY_INSN_P
557 #define TARGET_CANNOT_COPY_INSN_P arm_cannot_copy_insn_p
559 #ifdef HAVE_AS_TLS
560 #undef TARGET_HAVE_TLS
561 #define TARGET_HAVE_TLS true
562 #endif
564 #undef TARGET_HAVE_CONDITIONAL_EXECUTION
565 #define TARGET_HAVE_CONDITIONAL_EXECUTION arm_have_conditional_execution
567 #undef TARGET_LEGITIMATE_CONSTANT_P
568 #define TARGET_LEGITIMATE_CONSTANT_P arm_legitimate_constant_p
570 #undef TARGET_CANNOT_FORCE_CONST_MEM
571 #define TARGET_CANNOT_FORCE_CONST_MEM arm_cannot_force_const_mem
573 #undef TARGET_MAX_ANCHOR_OFFSET
574 #define TARGET_MAX_ANCHOR_OFFSET 4095
576 /* The minimum is set such that the total size of the block
577 for a particular anchor is -4088 + 1 + 4095 bytes, which is
578 divisible by eight, ensuring natural spacing of anchors. */
579 #undef TARGET_MIN_ANCHOR_OFFSET
580 #define TARGET_MIN_ANCHOR_OFFSET -4088
582 #undef TARGET_SCHED_ISSUE_RATE
583 #define TARGET_SCHED_ISSUE_RATE arm_issue_rate
585 #undef TARGET_MANGLE_TYPE
586 #define TARGET_MANGLE_TYPE arm_mangle_type
588 #undef TARGET_BUILD_BUILTIN_VA_LIST
589 #define TARGET_BUILD_BUILTIN_VA_LIST arm_build_builtin_va_list
590 #undef TARGET_EXPAND_BUILTIN_VA_START
591 #define TARGET_EXPAND_BUILTIN_VA_START arm_expand_builtin_va_start
592 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
593 #define TARGET_GIMPLIFY_VA_ARG_EXPR arm_gimplify_va_arg_expr
595 #ifdef HAVE_AS_TLS
596 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
597 #define TARGET_ASM_OUTPUT_DWARF_DTPREL arm_output_dwarf_dtprel
598 #endif
600 #undef TARGET_LEGITIMATE_ADDRESS_P
601 #define TARGET_LEGITIMATE_ADDRESS_P arm_legitimate_address_p
603 #undef TARGET_PREFERRED_RELOAD_CLASS
604 #define TARGET_PREFERRED_RELOAD_CLASS arm_preferred_reload_class
606 #undef TARGET_INVALID_PARAMETER_TYPE
607 #define TARGET_INVALID_PARAMETER_TYPE arm_invalid_parameter_type
609 #undef TARGET_INVALID_RETURN_TYPE
610 #define TARGET_INVALID_RETURN_TYPE arm_invalid_return_type
612 #undef TARGET_PROMOTED_TYPE
613 #define TARGET_PROMOTED_TYPE arm_promoted_type
615 #undef TARGET_CONVERT_TO_TYPE
616 #define TARGET_CONVERT_TO_TYPE arm_convert_to_type
618 #undef TARGET_SCALAR_MODE_SUPPORTED_P
619 #define TARGET_SCALAR_MODE_SUPPORTED_P arm_scalar_mode_supported_p
621 #undef TARGET_FRAME_POINTER_REQUIRED
622 #define TARGET_FRAME_POINTER_REQUIRED arm_frame_pointer_required
624 #undef TARGET_CAN_ELIMINATE
625 #define TARGET_CAN_ELIMINATE arm_can_eliminate
627 #undef TARGET_CONDITIONAL_REGISTER_USAGE
628 #define TARGET_CONDITIONAL_REGISTER_USAGE arm_conditional_register_usage
630 #undef TARGET_CLASS_LIKELY_SPILLED_P
631 #define TARGET_CLASS_LIKELY_SPILLED_P arm_class_likely_spilled_p
633 #undef TARGET_VECTORIZE_BUILTINS
634 #define TARGET_VECTORIZE_BUILTINS
636 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
637 #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
638 arm_builtin_vectorized_function
640 #undef TARGET_VECTOR_ALIGNMENT
641 #define TARGET_VECTOR_ALIGNMENT arm_vector_alignment
643 #undef TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE
644 #define TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE \
645 arm_vector_alignment_reachable
647 #undef TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT
648 #define TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT \
649 arm_builtin_support_vector_misalignment
651 #undef TARGET_PREFERRED_RENAME_CLASS
652 #define TARGET_PREFERRED_RENAME_CLASS \
653 arm_preferred_rename_class
655 #undef TARGET_VECTORIZE_VEC_PERM_CONST_OK
656 #define TARGET_VECTORIZE_VEC_PERM_CONST_OK \
657 arm_vectorize_vec_perm_const_ok
659 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
660 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
661 arm_builtin_vectorization_cost
662 #undef TARGET_VECTORIZE_ADD_STMT_COST
663 #define TARGET_VECTORIZE_ADD_STMT_COST arm_add_stmt_cost
665 #undef TARGET_CANONICALIZE_COMPARISON
666 #define TARGET_CANONICALIZE_COMPARISON \
667 arm_canonicalize_comparison
669 #undef TARGET_ASAN_SHADOW_OFFSET
670 #define TARGET_ASAN_SHADOW_OFFSET arm_asan_shadow_offset
672 #undef MAX_INSN_PER_IT_BLOCK
673 #define MAX_INSN_PER_IT_BLOCK (arm_restrict_it ? 1 : 4)
675 #undef TARGET_CAN_USE_DOLOOP_P
676 #define TARGET_CAN_USE_DOLOOP_P can_use_doloop_if_innermost
678 struct gcc_target targetm = TARGET_INITIALIZER;
680 /* Obstack for minipool constant handling. */
681 static struct obstack minipool_obstack;
682 static char * minipool_startobj;
684 /* The maximum number of insns skipped which
685 will be conditionalised if possible. */
686 static int max_insns_skipped = 5;
688 extern FILE * asm_out_file;
690 /* True if we are currently building a constant table. */
691 int making_const_table;
693 /* The processor for which instructions should be scheduled. */
694 enum processor_type arm_tune = arm_none;
696 /* The current tuning set. */
697 const struct tune_params *current_tune;
699 /* Which floating point hardware to schedule for. */
700 int arm_fpu_attr;
702 /* Which floating popint hardware to use. */
703 const struct arm_fpu_desc *arm_fpu_desc;
705 /* Used for Thumb call_via trampolines. */
706 rtx thumb_call_via_label[14];
707 static int thumb_call_reg_needed;
709 /* Bit values used to identify processor capabilities. */
710 #define FL_CO_PROC (1 << 0) /* Has external co-processor bus */
711 #define FL_ARCH3M (1 << 1) /* Extended multiply */
712 #define FL_MODE26 (1 << 2) /* 26-bit mode support */
713 #define FL_MODE32 (1 << 3) /* 32-bit mode support */
714 #define FL_ARCH4 (1 << 4) /* Architecture rel 4 */
715 #define FL_ARCH5 (1 << 5) /* Architecture rel 5 */
716 #define FL_THUMB (1 << 6) /* Thumb aware */
717 #define FL_LDSCHED (1 << 7) /* Load scheduling necessary */
718 #define FL_STRONG (1 << 8) /* StrongARM */
719 #define FL_ARCH5E (1 << 9) /* DSP extensions to v5 */
720 #define FL_XSCALE (1 << 10) /* XScale */
721 /* spare (1 << 11) */
722 #define FL_ARCH6 (1 << 12) /* Architecture rel 6. Adds
723 media instructions. */
724 #define FL_VFPV2 (1 << 13) /* Vector Floating Point V2. */
725 #define FL_WBUF (1 << 14) /* Schedule for write buffer ops.
726 Note: ARM6 & 7 derivatives only. */
727 #define FL_ARCH6K (1 << 15) /* Architecture rel 6 K extensions. */
728 #define FL_THUMB2 (1 << 16) /* Thumb-2. */
729 #define FL_NOTM (1 << 17) /* Instructions not present in the 'M'
730 profile. */
731 #define FL_THUMB_DIV (1 << 18) /* Hardware divide (Thumb mode). */
732 #define FL_VFPV3 (1 << 19) /* Vector Floating Point V3. */
733 #define FL_NEON (1 << 20) /* Neon instructions. */
734 #define FL_ARCH7EM (1 << 21) /* Instructions present in the ARMv7E-M
735 architecture. */
736 #define FL_ARCH7 (1 << 22) /* Architecture 7. */
737 #define FL_ARM_DIV (1 << 23) /* Hardware divide (ARM mode). */
738 #define FL_ARCH8 (1 << 24) /* Architecture 8. */
739 #define FL_CRC32 (1 << 25) /* ARMv8 CRC32 instructions. */
741 #define FL_IWMMXT (1 << 29) /* XScale v2 or "Intel Wireless MMX technology". */
742 #define FL_IWMMXT2 (1 << 30) /* "Intel Wireless MMX2 technology". */
744 /* Flags that only effect tuning, not available instructions. */
745 #define FL_TUNE (FL_WBUF | FL_VFPV2 | FL_STRONG | FL_LDSCHED \
746 | FL_CO_PROC)
748 #define FL_FOR_ARCH2 FL_NOTM
749 #define FL_FOR_ARCH3 (FL_FOR_ARCH2 | FL_MODE32)
750 #define FL_FOR_ARCH3M (FL_FOR_ARCH3 | FL_ARCH3M)
751 #define FL_FOR_ARCH4 (FL_FOR_ARCH3M | FL_ARCH4)
752 #define FL_FOR_ARCH4T (FL_FOR_ARCH4 | FL_THUMB)
753 #define FL_FOR_ARCH5 (FL_FOR_ARCH4 | FL_ARCH5)
754 #define FL_FOR_ARCH5T (FL_FOR_ARCH5 | FL_THUMB)
755 #define FL_FOR_ARCH5E (FL_FOR_ARCH5 | FL_ARCH5E)
756 #define FL_FOR_ARCH5TE (FL_FOR_ARCH5E | FL_THUMB)
757 #define FL_FOR_ARCH5TEJ FL_FOR_ARCH5TE
758 #define FL_FOR_ARCH6 (FL_FOR_ARCH5TE | FL_ARCH6)
759 #define FL_FOR_ARCH6J FL_FOR_ARCH6
760 #define FL_FOR_ARCH6K (FL_FOR_ARCH6 | FL_ARCH6K)
761 #define FL_FOR_ARCH6Z FL_FOR_ARCH6
762 #define FL_FOR_ARCH6ZK FL_FOR_ARCH6K
763 #define FL_FOR_ARCH6T2 (FL_FOR_ARCH6 | FL_THUMB2)
764 #define FL_FOR_ARCH6M (FL_FOR_ARCH6 & ~FL_NOTM)
765 #define FL_FOR_ARCH7 ((FL_FOR_ARCH6T2 & ~FL_NOTM) | FL_ARCH7)
766 #define FL_FOR_ARCH7A (FL_FOR_ARCH7 | FL_NOTM | FL_ARCH6K)
767 #define FL_FOR_ARCH7R (FL_FOR_ARCH7A | FL_THUMB_DIV)
768 #define FL_FOR_ARCH7M (FL_FOR_ARCH7 | FL_THUMB_DIV)
769 #define FL_FOR_ARCH7EM (FL_FOR_ARCH7M | FL_ARCH7EM)
770 #define FL_FOR_ARCH8A (FL_FOR_ARCH7 | FL_ARCH6K | FL_ARCH8 | FL_THUMB_DIV \
771 | FL_ARM_DIV | FL_NOTM)
773 /* The bits in this mask specify which
774 instructions we are allowed to generate. */
775 static unsigned long insn_flags = 0;
777 /* The bits in this mask specify which instruction scheduling options should
778 be used. */
779 static unsigned long tune_flags = 0;
781 /* The highest ARM architecture version supported by the
782 target. */
783 enum base_architecture arm_base_arch = BASE_ARCH_0;
785 /* The following are used in the arm.md file as equivalents to bits
786 in the above two flag variables. */
788 /* Nonzero if this chip supports the ARM Architecture 3M extensions. */
789 int arm_arch3m = 0;
791 /* Nonzero if this chip supports the ARM Architecture 4 extensions. */
792 int arm_arch4 = 0;
794 /* Nonzero if this chip supports the ARM Architecture 4t extensions. */
795 int arm_arch4t = 0;
797 /* Nonzero if this chip supports the ARM Architecture 5 extensions. */
798 int arm_arch5 = 0;
800 /* Nonzero if this chip supports the ARM Architecture 5E extensions. */
801 int arm_arch5e = 0;
803 /* Nonzero if this chip supports the ARM Architecture 6 extensions. */
804 int arm_arch6 = 0;
806 /* Nonzero if this chip supports the ARM 6K extensions. */
807 int arm_arch6k = 0;
809 /* Nonzero if instructions present in ARMv6-M can be used. */
810 int arm_arch6m = 0;
812 /* Nonzero if this chip supports the ARM 7 extensions. */
813 int arm_arch7 = 0;
815 /* Nonzero if instructions not present in the 'M' profile can be used. */
816 int arm_arch_notm = 0;
818 /* Nonzero if instructions present in ARMv7E-M can be used. */
819 int arm_arch7em = 0;
821 /* Nonzero if instructions present in ARMv8 can be used. */
822 int arm_arch8 = 0;
824 /* Nonzero if this chip can benefit from load scheduling. */
825 int arm_ld_sched = 0;
827 /* Nonzero if this chip is a StrongARM. */
828 int arm_tune_strongarm = 0;
830 /* Nonzero if this chip supports Intel Wireless MMX technology. */
831 int arm_arch_iwmmxt = 0;
833 /* Nonzero if this chip supports Intel Wireless MMX2 technology. */
834 int arm_arch_iwmmxt2 = 0;
836 /* Nonzero if this chip is an XScale. */
837 int arm_arch_xscale = 0;
839 /* Nonzero if tuning for XScale */
840 int arm_tune_xscale = 0;
842 /* Nonzero if we want to tune for stores that access the write-buffer.
843 This typically means an ARM6 or ARM7 with MMU or MPU. */
844 int arm_tune_wbuf = 0;
846 /* Nonzero if tuning for Cortex-A9. */
847 int arm_tune_cortex_a9 = 0;
849 /* Nonzero if generating Thumb instructions. */
850 int thumb_code = 0;
852 /* Nonzero if generating Thumb-1 instructions. */
853 int thumb1_code = 0;
855 /* Nonzero if we should define __THUMB_INTERWORK__ in the
856 preprocessor.
857 XXX This is a bit of a hack, it's intended to help work around
858 problems in GLD which doesn't understand that armv5t code is
859 interworking clean. */
860 int arm_cpp_interwork = 0;
862 /* Nonzero if chip supports Thumb 2. */
863 int arm_arch_thumb2;
865 /* Nonzero if chip supports integer division instruction. */
866 int arm_arch_arm_hwdiv;
867 int arm_arch_thumb_hwdiv;
869 /* Nonzero if we should use Neon to handle 64-bits operations rather
870 than core registers. */
871 int prefer_neon_for_64bits = 0;
873 /* Nonzero if we shouldn't use literal pools. */
874 bool arm_disable_literal_pool = false;
876 /* In case of a PRE_INC, POST_INC, PRE_DEC, POST_DEC memory reference,
877 we must report the mode of the memory reference from
878 TARGET_PRINT_OPERAND to TARGET_PRINT_OPERAND_ADDRESS. */
879 enum machine_mode output_memory_reference_mode;
881 /* The register number to be used for the PIC offset register. */
882 unsigned arm_pic_register = INVALID_REGNUM;
884 /* Set to 1 after arm_reorg has started. Reset to start at the start of
885 the next function. */
886 static int after_arm_reorg = 0;
888 enum arm_pcs arm_pcs_default;
890 /* For an explanation of these variables, see final_prescan_insn below. */
891 int arm_ccfsm_state;
892 /* arm_current_cc is also used for Thumb-2 cond_exec blocks. */
893 enum arm_cond_code arm_current_cc;
895 rtx arm_target_insn;
896 int arm_target_label;
897 /* The number of conditionally executed insns, including the current insn. */
898 int arm_condexec_count = 0;
899 /* A bitmask specifying the patterns for the IT block.
900 Zero means do not output an IT block before this insn. */
901 int arm_condexec_mask = 0;
902 /* The number of bits used in arm_condexec_mask. */
903 int arm_condexec_masklen = 0;
905 /* Nonzero if chip supports the ARMv8 CRC instructions. */
906 int arm_arch_crc = 0;
908 /* The condition codes of the ARM, and the inverse function. */
909 static const char * const arm_condition_codes[] =
911 "eq", "ne", "cs", "cc", "mi", "pl", "vs", "vc",
912 "hi", "ls", "ge", "lt", "gt", "le", "al", "nv"
915 /* The register numbers in sequence, for passing to arm_gen_load_multiple. */
916 int arm_regs_in_sequence[] =
918 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
921 #define ARM_LSL_NAME (TARGET_UNIFIED_ASM ? "lsl" : "asl")
922 #define streq(string1, string2) (strcmp (string1, string2) == 0)
924 #define THUMB2_WORK_REGS (0xff & ~( (1 << THUMB_HARD_FRAME_POINTER_REGNUM) \
925 | (1 << SP_REGNUM) | (1 << PC_REGNUM) \
926 | (1 << PIC_OFFSET_TABLE_REGNUM)))
928 /* Initialization code. */
930 struct processors
932 const char *const name;
933 enum processor_type core;
934 const char *arch;
935 enum base_architecture base_arch;
936 const unsigned long flags;
937 const struct tune_params *const tune;
941 #define ARM_PREFETCH_NOT_BENEFICIAL 0, -1, -1
942 #define ARM_PREFETCH_BENEFICIAL(prefetch_slots,l1_size,l1_line_size) \
943 prefetch_slots, \
944 l1_size, \
945 l1_line_size
947 /* arm generic vectorizer costs. */
948 static const
949 struct cpu_vec_costs arm_default_vec_cost = {
950 1, /* scalar_stmt_cost. */
951 1, /* scalar load_cost. */
952 1, /* scalar_store_cost. */
953 1, /* vec_stmt_cost. */
954 1, /* vec_to_scalar_cost. */
955 1, /* scalar_to_vec_cost. */
956 1, /* vec_align_load_cost. */
957 1, /* vec_unalign_load_cost. */
958 1, /* vec_unalign_store_cost. */
959 1, /* vec_store_cost. */
960 3, /* cond_taken_branch_cost. */
961 1, /* cond_not_taken_branch_cost. */
964 /* Cost tables for AArch32 + AArch64 cores should go in aarch-cost-tables.h */
965 #include "aarch-cost-tables.h"
969 const struct cpu_cost_table cortexa9_extra_costs =
971 /* ALU */
973 0, /* Arith. */
974 0, /* Logical. */
975 0, /* Shift. */
976 COSTS_N_INSNS (1), /* Shift_reg. */
977 COSTS_N_INSNS (1), /* Arith_shift. */
978 COSTS_N_INSNS (2), /* Arith_shift_reg. */
979 0, /* Log_shift. */
980 COSTS_N_INSNS (1), /* Log_shift_reg. */
981 COSTS_N_INSNS (1), /* Extend. */
982 COSTS_N_INSNS (2), /* Extend_arith. */
983 COSTS_N_INSNS (1), /* Bfi. */
984 COSTS_N_INSNS (1), /* Bfx. */
985 0, /* Clz. */
986 0, /* non_exec. */
987 true /* non_exec_costs_exec. */
990 /* MULT SImode */
992 COSTS_N_INSNS (3), /* Simple. */
993 COSTS_N_INSNS (3), /* Flag_setting. */
994 COSTS_N_INSNS (2), /* Extend. */
995 COSTS_N_INSNS (3), /* Add. */
996 COSTS_N_INSNS (2), /* Extend_add. */
997 COSTS_N_INSNS (30) /* Idiv. No HW div on Cortex A9. */
999 /* MULT DImode */
1001 0, /* Simple (N/A). */
1002 0, /* Flag_setting (N/A). */
1003 COSTS_N_INSNS (4), /* Extend. */
1004 0, /* Add (N/A). */
1005 COSTS_N_INSNS (4), /* Extend_add. */
1006 0 /* Idiv (N/A). */
1009 /* LD/ST */
1011 COSTS_N_INSNS (2), /* Load. */
1012 COSTS_N_INSNS (2), /* Load_sign_extend. */
1013 COSTS_N_INSNS (2), /* Ldrd. */
1014 COSTS_N_INSNS (2), /* Ldm_1st. */
1015 1, /* Ldm_regs_per_insn_1st. */
1016 2, /* Ldm_regs_per_insn_subsequent. */
1017 COSTS_N_INSNS (5), /* Loadf. */
1018 COSTS_N_INSNS (5), /* Loadd. */
1019 COSTS_N_INSNS (1), /* Load_unaligned. */
1020 COSTS_N_INSNS (2), /* Store. */
1021 COSTS_N_INSNS (2), /* Strd. */
1022 COSTS_N_INSNS (2), /* Stm_1st. */
1023 1, /* Stm_regs_per_insn_1st. */
1024 2, /* Stm_regs_per_insn_subsequent. */
1025 COSTS_N_INSNS (1), /* Storef. */
1026 COSTS_N_INSNS (1), /* Stored. */
1027 COSTS_N_INSNS (1) /* Store_unaligned. */
1030 /* FP SFmode */
1032 COSTS_N_INSNS (14), /* Div. */
1033 COSTS_N_INSNS (4), /* Mult. */
1034 COSTS_N_INSNS (7), /* Mult_addsub. */
1035 COSTS_N_INSNS (30), /* Fma. */
1036 COSTS_N_INSNS (3), /* Addsub. */
1037 COSTS_N_INSNS (1), /* Fpconst. */
1038 COSTS_N_INSNS (1), /* Neg. */
1039 COSTS_N_INSNS (3), /* Compare. */
1040 COSTS_N_INSNS (3), /* Widen. */
1041 COSTS_N_INSNS (3), /* Narrow. */
1042 COSTS_N_INSNS (3), /* Toint. */
1043 COSTS_N_INSNS (3), /* Fromint. */
1044 COSTS_N_INSNS (3) /* Roundint. */
1046 /* FP DFmode */
1048 COSTS_N_INSNS (24), /* Div. */
1049 COSTS_N_INSNS (5), /* Mult. */
1050 COSTS_N_INSNS (8), /* Mult_addsub. */
1051 COSTS_N_INSNS (30), /* Fma. */
1052 COSTS_N_INSNS (3), /* Addsub. */
1053 COSTS_N_INSNS (1), /* Fpconst. */
1054 COSTS_N_INSNS (1), /* Neg. */
1055 COSTS_N_INSNS (3), /* Compare. */
1056 COSTS_N_INSNS (3), /* Widen. */
1057 COSTS_N_INSNS (3), /* Narrow. */
1058 COSTS_N_INSNS (3), /* Toint. */
1059 COSTS_N_INSNS (3), /* Fromint. */
1060 COSTS_N_INSNS (3) /* Roundint. */
1063 /* Vector */
1065 COSTS_N_INSNS (1) /* Alu. */
1070 const struct cpu_cost_table cortexa7_extra_costs =
1072 /* ALU */
1074 0, /* Arith. */
1075 0, /* Logical. */
1076 COSTS_N_INSNS (1), /* Shift. */
1077 COSTS_N_INSNS (1), /* Shift_reg. */
1078 COSTS_N_INSNS (1), /* Arith_shift. */
1079 COSTS_N_INSNS (1), /* Arith_shift_reg. */
1080 COSTS_N_INSNS (1), /* Log_shift. */
1081 COSTS_N_INSNS (1), /* Log_shift_reg. */
1082 COSTS_N_INSNS (1), /* Extend. */
1083 COSTS_N_INSNS (1), /* Extend_arith. */
1084 COSTS_N_INSNS (1), /* Bfi. */
1085 COSTS_N_INSNS (1), /* Bfx. */
1086 COSTS_N_INSNS (1), /* Clz. */
1087 0, /* non_exec. */
1088 true /* non_exec_costs_exec. */
1092 /* MULT SImode */
1094 0, /* Simple. */
1095 COSTS_N_INSNS (1), /* Flag_setting. */
1096 COSTS_N_INSNS (1), /* Extend. */
1097 COSTS_N_INSNS (1), /* Add. */
1098 COSTS_N_INSNS (1), /* Extend_add. */
1099 COSTS_N_INSNS (7) /* Idiv. */
1101 /* MULT DImode */
1103 0, /* Simple (N/A). */
1104 0, /* Flag_setting (N/A). */
1105 COSTS_N_INSNS (1), /* Extend. */
1106 0, /* Add. */
1107 COSTS_N_INSNS (2), /* Extend_add. */
1108 0 /* Idiv (N/A). */
1111 /* LD/ST */
1113 COSTS_N_INSNS (1), /* Load. */
1114 COSTS_N_INSNS (1), /* Load_sign_extend. */
1115 COSTS_N_INSNS (3), /* Ldrd. */
1116 COSTS_N_INSNS (1), /* Ldm_1st. */
1117 1, /* Ldm_regs_per_insn_1st. */
1118 2, /* Ldm_regs_per_insn_subsequent. */
1119 COSTS_N_INSNS (2), /* Loadf. */
1120 COSTS_N_INSNS (2), /* Loadd. */
1121 COSTS_N_INSNS (1), /* Load_unaligned. */
1122 COSTS_N_INSNS (1), /* Store. */
1123 COSTS_N_INSNS (3), /* Strd. */
1124 COSTS_N_INSNS (1), /* Stm_1st. */
1125 1, /* Stm_regs_per_insn_1st. */
1126 2, /* Stm_regs_per_insn_subsequent. */
1127 COSTS_N_INSNS (2), /* Storef. */
1128 COSTS_N_INSNS (2), /* Stored. */
1129 COSTS_N_INSNS (1) /* Store_unaligned. */
1132 /* FP SFmode */
1134 COSTS_N_INSNS (15), /* Div. */
1135 COSTS_N_INSNS (3), /* Mult. */
1136 COSTS_N_INSNS (7), /* Mult_addsub. */
1137 COSTS_N_INSNS (7), /* Fma. */
1138 COSTS_N_INSNS (3), /* Addsub. */
1139 COSTS_N_INSNS (3), /* Fpconst. */
1140 COSTS_N_INSNS (3), /* Neg. */
1141 COSTS_N_INSNS (3), /* Compare. */
1142 COSTS_N_INSNS (3), /* Widen. */
1143 COSTS_N_INSNS (3), /* Narrow. */
1144 COSTS_N_INSNS (3), /* Toint. */
1145 COSTS_N_INSNS (3), /* Fromint. */
1146 COSTS_N_INSNS (3) /* Roundint. */
1148 /* FP DFmode */
1150 COSTS_N_INSNS (30), /* Div. */
1151 COSTS_N_INSNS (6), /* Mult. */
1152 COSTS_N_INSNS (10), /* Mult_addsub. */
1153 COSTS_N_INSNS (7), /* Fma. */
1154 COSTS_N_INSNS (3), /* Addsub. */
1155 COSTS_N_INSNS (3), /* Fpconst. */
1156 COSTS_N_INSNS (3), /* Neg. */
1157 COSTS_N_INSNS (3), /* Compare. */
1158 COSTS_N_INSNS (3), /* Widen. */
1159 COSTS_N_INSNS (3), /* Narrow. */
1160 COSTS_N_INSNS (3), /* Toint. */
1161 COSTS_N_INSNS (3), /* Fromint. */
1162 COSTS_N_INSNS (3) /* Roundint. */
1165 /* Vector */
1167 COSTS_N_INSNS (1) /* Alu. */
1171 const struct cpu_cost_table cortexa12_extra_costs =
1173 /* ALU */
1175 0, /* Arith. */
1176 0, /* Logical. */
1177 0, /* Shift. */
1178 COSTS_N_INSNS (1), /* Shift_reg. */
1179 COSTS_N_INSNS (1), /* Arith_shift. */
1180 COSTS_N_INSNS (1), /* Arith_shift_reg. */
1181 COSTS_N_INSNS (1), /* Log_shift. */
1182 COSTS_N_INSNS (1), /* Log_shift_reg. */
1183 0, /* Extend. */
1184 COSTS_N_INSNS (1), /* Extend_arith. */
1185 0, /* Bfi. */
1186 COSTS_N_INSNS (1), /* Bfx. */
1187 COSTS_N_INSNS (1), /* Clz. */
1188 0, /* non_exec. */
1189 true /* non_exec_costs_exec. */
1191 /* MULT SImode */
1194 COSTS_N_INSNS (2), /* Simple. */
1195 COSTS_N_INSNS (3), /* Flag_setting. */
1196 COSTS_N_INSNS (2), /* Extend. */
1197 COSTS_N_INSNS (3), /* Add. */
1198 COSTS_N_INSNS (2), /* Extend_add. */
1199 COSTS_N_INSNS (18) /* Idiv. */
1201 /* MULT DImode */
1203 0, /* Simple (N/A). */
1204 0, /* Flag_setting (N/A). */
1205 COSTS_N_INSNS (3), /* Extend. */
1206 0, /* Add (N/A). */
1207 COSTS_N_INSNS (3), /* Extend_add. */
1208 0 /* Idiv (N/A). */
1211 /* LD/ST */
1213 COSTS_N_INSNS (3), /* Load. */
1214 COSTS_N_INSNS (3), /* Load_sign_extend. */
1215 COSTS_N_INSNS (3), /* Ldrd. */
1216 COSTS_N_INSNS (3), /* Ldm_1st. */
1217 1, /* Ldm_regs_per_insn_1st. */
1218 2, /* Ldm_regs_per_insn_subsequent. */
1219 COSTS_N_INSNS (3), /* Loadf. */
1220 COSTS_N_INSNS (3), /* Loadd. */
1221 0, /* Load_unaligned. */
1222 0, /* Store. */
1223 0, /* Strd. */
1224 0, /* Stm_1st. */
1225 1, /* Stm_regs_per_insn_1st. */
1226 2, /* Stm_regs_per_insn_subsequent. */
1227 COSTS_N_INSNS (2), /* Storef. */
1228 COSTS_N_INSNS (2), /* Stored. */
1229 0 /* Store_unaligned. */
1232 /* FP SFmode */
1234 COSTS_N_INSNS (17), /* Div. */
1235 COSTS_N_INSNS (4), /* Mult. */
1236 COSTS_N_INSNS (8), /* Mult_addsub. */
1237 COSTS_N_INSNS (8), /* Fma. */
1238 COSTS_N_INSNS (4), /* Addsub. */
1239 COSTS_N_INSNS (2), /* Fpconst. */
1240 COSTS_N_INSNS (2), /* Neg. */
1241 COSTS_N_INSNS (2), /* Compare. */
1242 COSTS_N_INSNS (4), /* Widen. */
1243 COSTS_N_INSNS (4), /* Narrow. */
1244 COSTS_N_INSNS (4), /* Toint. */
1245 COSTS_N_INSNS (4), /* Fromint. */
1246 COSTS_N_INSNS (4) /* Roundint. */
1248 /* FP DFmode */
1250 COSTS_N_INSNS (31), /* Div. */
1251 COSTS_N_INSNS (4), /* Mult. */
1252 COSTS_N_INSNS (8), /* Mult_addsub. */
1253 COSTS_N_INSNS (8), /* Fma. */
1254 COSTS_N_INSNS (4), /* Addsub. */
1255 COSTS_N_INSNS (2), /* Fpconst. */
1256 COSTS_N_INSNS (2), /* Neg. */
1257 COSTS_N_INSNS (2), /* Compare. */
1258 COSTS_N_INSNS (4), /* Widen. */
1259 COSTS_N_INSNS (4), /* Narrow. */
1260 COSTS_N_INSNS (4), /* Toint. */
1261 COSTS_N_INSNS (4), /* Fromint. */
1262 COSTS_N_INSNS (4) /* Roundint. */
1265 /* Vector */
1267 COSTS_N_INSNS (1) /* Alu. */
1271 const struct cpu_cost_table cortexa15_extra_costs =
1273 /* ALU */
1275 0, /* Arith. */
1276 0, /* Logical. */
1277 0, /* Shift. */
1278 0, /* Shift_reg. */
1279 COSTS_N_INSNS (1), /* Arith_shift. */
1280 COSTS_N_INSNS (1), /* Arith_shift_reg. */
1281 COSTS_N_INSNS (1), /* Log_shift. */
1282 COSTS_N_INSNS (1), /* Log_shift_reg. */
1283 0, /* Extend. */
1284 COSTS_N_INSNS (1), /* Extend_arith. */
1285 COSTS_N_INSNS (1), /* Bfi. */
1286 0, /* Bfx. */
1287 0, /* Clz. */
1288 0, /* non_exec. */
1289 true /* non_exec_costs_exec. */
1291 /* MULT SImode */
1294 COSTS_N_INSNS (2), /* Simple. */
1295 COSTS_N_INSNS (3), /* Flag_setting. */
1296 COSTS_N_INSNS (2), /* Extend. */
1297 COSTS_N_INSNS (2), /* Add. */
1298 COSTS_N_INSNS (2), /* Extend_add. */
1299 COSTS_N_INSNS (18) /* Idiv. */
1301 /* MULT DImode */
1303 0, /* Simple (N/A). */
1304 0, /* Flag_setting (N/A). */
1305 COSTS_N_INSNS (3), /* Extend. */
1306 0, /* Add (N/A). */
1307 COSTS_N_INSNS (3), /* Extend_add. */
1308 0 /* Idiv (N/A). */
1311 /* LD/ST */
1313 COSTS_N_INSNS (3), /* Load. */
1314 COSTS_N_INSNS (3), /* Load_sign_extend. */
1315 COSTS_N_INSNS (3), /* Ldrd. */
1316 COSTS_N_INSNS (4), /* Ldm_1st. */
1317 1, /* Ldm_regs_per_insn_1st. */
1318 2, /* Ldm_regs_per_insn_subsequent. */
1319 COSTS_N_INSNS (4), /* Loadf. */
1320 COSTS_N_INSNS (4), /* Loadd. */
1321 0, /* Load_unaligned. */
1322 0, /* Store. */
1323 0, /* Strd. */
1324 COSTS_N_INSNS (1), /* Stm_1st. */
1325 1, /* Stm_regs_per_insn_1st. */
1326 2, /* Stm_regs_per_insn_subsequent. */
1327 0, /* Storef. */
1328 0, /* Stored. */
1329 0 /* Store_unaligned. */
1332 /* FP SFmode */
1334 COSTS_N_INSNS (17), /* Div. */
1335 COSTS_N_INSNS (4), /* Mult. */
1336 COSTS_N_INSNS (8), /* Mult_addsub. */
1337 COSTS_N_INSNS (8), /* Fma. */
1338 COSTS_N_INSNS (4), /* Addsub. */
1339 COSTS_N_INSNS (2), /* Fpconst. */
1340 COSTS_N_INSNS (2), /* Neg. */
1341 COSTS_N_INSNS (5), /* Compare. */
1342 COSTS_N_INSNS (4), /* Widen. */
1343 COSTS_N_INSNS (4), /* Narrow. */
1344 COSTS_N_INSNS (4), /* Toint. */
1345 COSTS_N_INSNS (4), /* Fromint. */
1346 COSTS_N_INSNS (4) /* Roundint. */
1348 /* FP DFmode */
1350 COSTS_N_INSNS (31), /* Div. */
1351 COSTS_N_INSNS (4), /* Mult. */
1352 COSTS_N_INSNS (8), /* Mult_addsub. */
1353 COSTS_N_INSNS (8), /* Fma. */
1354 COSTS_N_INSNS (4), /* Addsub. */
1355 COSTS_N_INSNS (2), /* Fpconst. */
1356 COSTS_N_INSNS (2), /* Neg. */
1357 COSTS_N_INSNS (2), /* Compare. */
1358 COSTS_N_INSNS (4), /* Widen. */
1359 COSTS_N_INSNS (4), /* Narrow. */
1360 COSTS_N_INSNS (4), /* Toint. */
1361 COSTS_N_INSNS (4), /* Fromint. */
1362 COSTS_N_INSNS (4) /* Roundint. */
1365 /* Vector */
1367 COSTS_N_INSNS (1) /* Alu. */
1371 const struct cpu_cost_table v7m_extra_costs =
1373 /* ALU */
1375 0, /* Arith. */
1376 0, /* Logical. */
1377 0, /* Shift. */
1378 0, /* Shift_reg. */
1379 0, /* Arith_shift. */
1380 COSTS_N_INSNS (1), /* Arith_shift_reg. */
1381 0, /* Log_shift. */
1382 COSTS_N_INSNS (1), /* Log_shift_reg. */
1383 0, /* Extend. */
1384 COSTS_N_INSNS (1), /* Extend_arith. */
1385 0, /* Bfi. */
1386 0, /* Bfx. */
1387 0, /* Clz. */
1388 COSTS_N_INSNS (1), /* non_exec. */
1389 false /* non_exec_costs_exec. */
1392 /* MULT SImode */
1394 COSTS_N_INSNS (1), /* Simple. */
1395 COSTS_N_INSNS (1), /* Flag_setting. */
1396 COSTS_N_INSNS (2), /* Extend. */
1397 COSTS_N_INSNS (1), /* Add. */
1398 COSTS_N_INSNS (3), /* Extend_add. */
1399 COSTS_N_INSNS (8) /* Idiv. */
1401 /* MULT DImode */
1403 0, /* Simple (N/A). */
1404 0, /* Flag_setting (N/A). */
1405 COSTS_N_INSNS (2), /* Extend. */
1406 0, /* Add (N/A). */
1407 COSTS_N_INSNS (3), /* Extend_add. */
1408 0 /* Idiv (N/A). */
1411 /* LD/ST */
1413 COSTS_N_INSNS (2), /* Load. */
1414 0, /* Load_sign_extend. */
1415 COSTS_N_INSNS (3), /* Ldrd. */
1416 COSTS_N_INSNS (2), /* Ldm_1st. */
1417 1, /* Ldm_regs_per_insn_1st. */
1418 1, /* Ldm_regs_per_insn_subsequent. */
1419 COSTS_N_INSNS (2), /* Loadf. */
1420 COSTS_N_INSNS (3), /* Loadd. */
1421 COSTS_N_INSNS (1), /* Load_unaligned. */
1422 COSTS_N_INSNS (2), /* Store. */
1423 COSTS_N_INSNS (3), /* Strd. */
1424 COSTS_N_INSNS (2), /* Stm_1st. */
1425 1, /* Stm_regs_per_insn_1st. */
1426 1, /* Stm_regs_per_insn_subsequent. */
1427 COSTS_N_INSNS (2), /* Storef. */
1428 COSTS_N_INSNS (3), /* Stored. */
1429 COSTS_N_INSNS (1) /* Store_unaligned. */
1432 /* FP SFmode */
1434 COSTS_N_INSNS (7), /* Div. */
1435 COSTS_N_INSNS (2), /* Mult. */
1436 COSTS_N_INSNS (5), /* Mult_addsub. */
1437 COSTS_N_INSNS (3), /* Fma. */
1438 COSTS_N_INSNS (1), /* Addsub. */
1439 0, /* Fpconst. */
1440 0, /* Neg. */
1441 0, /* Compare. */
1442 0, /* Widen. */
1443 0, /* Narrow. */
1444 0, /* Toint. */
1445 0, /* Fromint. */
1446 0 /* Roundint. */
1448 /* FP DFmode */
1450 COSTS_N_INSNS (15), /* Div. */
1451 COSTS_N_INSNS (5), /* Mult. */
1452 COSTS_N_INSNS (7), /* Mult_addsub. */
1453 COSTS_N_INSNS (7), /* Fma. */
1454 COSTS_N_INSNS (3), /* Addsub. */
1455 0, /* Fpconst. */
1456 0, /* Neg. */
1457 0, /* Compare. */
1458 0, /* Widen. */
1459 0, /* Narrow. */
1460 0, /* Toint. */
1461 0, /* Fromint. */
1462 0 /* Roundint. */
1465 /* Vector */
1467 COSTS_N_INSNS (1) /* Alu. */
1471 const struct tune_params arm_slowmul_tune =
1473 arm_slowmul_rtx_costs,
1474 NULL,
1475 NULL, /* Sched adj cost. */
1476 3, /* Constant limit. */
1477 5, /* Max cond insns. */
1478 ARM_PREFETCH_NOT_BENEFICIAL,
1479 true, /* Prefer constant pool. */
1480 arm_default_branch_cost,
1481 false, /* Prefer LDRD/STRD. */
1482 {true, true}, /* Prefer non short circuit. */
1483 &arm_default_vec_cost, /* Vectorizer costs. */
1484 false /* Prefer Neon for 64-bits bitops. */
1487 const struct tune_params arm_fastmul_tune =
1489 arm_fastmul_rtx_costs,
1490 NULL,
1491 NULL, /* Sched adj cost. */
1492 1, /* Constant limit. */
1493 5, /* Max cond insns. */
1494 ARM_PREFETCH_NOT_BENEFICIAL,
1495 true, /* Prefer constant pool. */
1496 arm_default_branch_cost,
1497 false, /* Prefer LDRD/STRD. */
1498 {true, true}, /* Prefer non short circuit. */
1499 &arm_default_vec_cost, /* Vectorizer costs. */
1500 false /* Prefer Neon for 64-bits bitops. */
1503 /* StrongARM has early execution of branches, so a sequence that is worth
1504 skipping is shorter. Set max_insns_skipped to a lower value. */
1506 const struct tune_params arm_strongarm_tune =
1508 arm_fastmul_rtx_costs,
1509 NULL,
1510 NULL, /* Sched adj cost. */
1511 1, /* Constant limit. */
1512 3, /* Max cond insns. */
1513 ARM_PREFETCH_NOT_BENEFICIAL,
1514 true, /* Prefer constant pool. */
1515 arm_default_branch_cost,
1516 false, /* Prefer LDRD/STRD. */
1517 {true, true}, /* Prefer non short circuit. */
1518 &arm_default_vec_cost, /* Vectorizer costs. */
1519 false /* Prefer Neon for 64-bits bitops. */
1522 const struct tune_params arm_xscale_tune =
1524 arm_xscale_rtx_costs,
1525 NULL,
1526 xscale_sched_adjust_cost,
1527 2, /* Constant limit. */
1528 3, /* Max cond insns. */
1529 ARM_PREFETCH_NOT_BENEFICIAL,
1530 true, /* Prefer constant pool. */
1531 arm_default_branch_cost,
1532 false, /* Prefer LDRD/STRD. */
1533 {true, true}, /* Prefer non short circuit. */
1534 &arm_default_vec_cost, /* Vectorizer costs. */
1535 false /* Prefer Neon for 64-bits bitops. */
1538 const struct tune_params arm_9e_tune =
1540 arm_9e_rtx_costs,
1541 NULL,
1542 NULL, /* Sched adj cost. */
1543 1, /* Constant limit. */
1544 5, /* Max cond insns. */
1545 ARM_PREFETCH_NOT_BENEFICIAL,
1546 true, /* Prefer constant pool. */
1547 arm_default_branch_cost,
1548 false, /* Prefer LDRD/STRD. */
1549 {true, true}, /* Prefer non short circuit. */
1550 &arm_default_vec_cost, /* Vectorizer costs. */
1551 false /* Prefer Neon for 64-bits bitops. */
1554 const struct tune_params arm_v6t2_tune =
1556 arm_9e_rtx_costs,
1557 NULL,
1558 NULL, /* Sched adj cost. */
1559 1, /* Constant limit. */
1560 5, /* Max cond insns. */
1561 ARM_PREFETCH_NOT_BENEFICIAL,
1562 false, /* Prefer constant pool. */
1563 arm_default_branch_cost,
1564 false, /* Prefer LDRD/STRD. */
1565 {true, true}, /* Prefer non short circuit. */
1566 &arm_default_vec_cost, /* Vectorizer costs. */
1567 false /* Prefer Neon for 64-bits bitops. */
1570 /* Generic Cortex tuning. Use more specific tunings if appropriate. */
1571 const struct tune_params arm_cortex_tune =
1573 arm_9e_rtx_costs,
1574 &generic_extra_costs,
1575 NULL, /* Sched adj cost. */
1576 1, /* Constant limit. */
1577 5, /* Max cond insns. */
1578 ARM_PREFETCH_NOT_BENEFICIAL,
1579 false, /* Prefer constant pool. */
1580 arm_default_branch_cost,
1581 false, /* Prefer LDRD/STRD. */
1582 {true, true}, /* Prefer non short circuit. */
1583 &arm_default_vec_cost, /* Vectorizer costs. */
1584 false /* Prefer Neon for 64-bits bitops. */
1587 const struct tune_params arm_cortex_a7_tune =
1589 arm_9e_rtx_costs,
1590 &cortexa7_extra_costs,
1591 NULL,
1592 1, /* Constant limit. */
1593 5, /* Max cond insns. */
1594 ARM_PREFETCH_NOT_BENEFICIAL,
1595 false, /* Prefer constant pool. */
1596 arm_default_branch_cost,
1597 false, /* Prefer LDRD/STRD. */
1598 {true, true}, /* Prefer non short circuit. */
1599 &arm_default_vec_cost, /* Vectorizer costs. */
1600 false /* Prefer Neon for 64-bits bitops. */
1603 const struct tune_params arm_cortex_a15_tune =
1605 arm_9e_rtx_costs,
1606 &cortexa15_extra_costs,
1607 NULL, /* Sched adj cost. */
1608 1, /* Constant limit. */
1609 2, /* Max cond insns. */
1610 ARM_PREFETCH_NOT_BENEFICIAL,
1611 false, /* Prefer constant pool. */
1612 arm_default_branch_cost,
1613 true, /* Prefer LDRD/STRD. */
1614 {true, true}, /* Prefer non short circuit. */
1615 &arm_default_vec_cost, /* Vectorizer costs. */
1616 false /* Prefer Neon for 64-bits bitops. */
1619 const struct tune_params arm_cortex_a53_tune =
1621 arm_9e_rtx_costs,
1622 &cortexa53_extra_costs,
1623 NULL, /* Scheduler cost adjustment. */
1624 1, /* Constant limit. */
1625 5, /* Max cond insns. */
1626 ARM_PREFETCH_NOT_BENEFICIAL,
1627 false, /* Prefer constant pool. */
1628 arm_default_branch_cost,
1629 false, /* Prefer LDRD/STRD. */
1630 {true, true}, /* Prefer non short circuit. */
1631 &arm_default_vec_cost, /* Vectorizer costs. */
1632 false /* Prefer Neon for 64-bits bitops. */
1635 /* Branches can be dual-issued on Cortex-A5, so conditional execution is
1636 less appealing. Set max_insns_skipped to a low value. */
1638 const struct tune_params arm_cortex_a5_tune =
1640 arm_9e_rtx_costs,
1641 NULL,
1642 NULL, /* Sched adj cost. */
1643 1, /* Constant limit. */
1644 1, /* Max cond insns. */
1645 ARM_PREFETCH_NOT_BENEFICIAL,
1646 false, /* Prefer constant pool. */
1647 arm_cortex_a5_branch_cost,
1648 false, /* Prefer LDRD/STRD. */
1649 {false, false}, /* Prefer non short circuit. */
1650 &arm_default_vec_cost, /* Vectorizer costs. */
1651 false /* Prefer Neon for 64-bits bitops. */
1654 const struct tune_params arm_cortex_a9_tune =
1656 arm_9e_rtx_costs,
1657 &cortexa9_extra_costs,
1658 cortex_a9_sched_adjust_cost,
1659 1, /* Constant limit. */
1660 5, /* Max cond insns. */
1661 ARM_PREFETCH_BENEFICIAL(4,32,32),
1662 false, /* Prefer constant pool. */
1663 arm_default_branch_cost,
1664 false, /* Prefer LDRD/STRD. */
1665 {true, true}, /* Prefer non short circuit. */
1666 &arm_default_vec_cost, /* Vectorizer costs. */
1667 false /* Prefer Neon for 64-bits bitops. */
1670 const struct tune_params arm_cortex_a12_tune =
1672 arm_9e_rtx_costs,
1673 &cortexa12_extra_costs,
1674 NULL,
1675 1, /* Constant limit. */
1676 5, /* Max cond insns. */
1677 ARM_PREFETCH_BENEFICIAL(4,32,32),
1678 false, /* Prefer constant pool. */
1679 arm_default_branch_cost,
1680 true, /* Prefer LDRD/STRD. */
1681 {true, true}, /* Prefer non short circuit. */
1682 &arm_default_vec_cost, /* Vectorizer costs. */
1683 false /* Prefer Neon for 64-bits bitops. */
1686 /* armv7m tuning. On Cortex-M4 cores for example, MOVW/MOVT take a single
1687 cycle to execute each. An LDR from the constant pool also takes two cycles
1688 to execute, but mildly increases pipelining opportunity (consecutive
1689 loads/stores can be pipelined together, saving one cycle), and may also
1690 improve icache utilisation. Hence we prefer the constant pool for such
1691 processors. */
1693 const struct tune_params arm_v7m_tune =
1695 arm_9e_rtx_costs,
1696 &v7m_extra_costs,
1697 NULL, /* Sched adj cost. */
1698 1, /* Constant limit. */
1699 2, /* Max cond insns. */
1700 ARM_PREFETCH_NOT_BENEFICIAL,
1701 true, /* Prefer constant pool. */
1702 arm_cortex_m_branch_cost,
1703 false, /* Prefer LDRD/STRD. */
1704 {false, false}, /* Prefer non short circuit. */
1705 &arm_default_vec_cost, /* Vectorizer costs. */
1706 false /* Prefer Neon for 64-bits bitops. */
1709 /* The arm_v6m_tune is duplicated from arm_cortex_tune, rather than
1710 arm_v6t2_tune. It is used for cortex-m0, cortex-m1 and cortex-m0plus. */
1711 const struct tune_params arm_v6m_tune =
1713 arm_9e_rtx_costs,
1714 NULL,
1715 NULL, /* Sched adj cost. */
1716 1, /* Constant limit. */
1717 5, /* Max cond insns. */
1718 ARM_PREFETCH_NOT_BENEFICIAL,
1719 false, /* Prefer constant pool. */
1720 arm_default_branch_cost,
1721 false, /* Prefer LDRD/STRD. */
1722 {false, false}, /* Prefer non short circuit. */
1723 &arm_default_vec_cost, /* Vectorizer costs. */
1724 false /* Prefer Neon for 64-bits bitops. */
1727 const struct tune_params arm_fa726te_tune =
1729 arm_9e_rtx_costs,
1730 NULL,
1731 fa726te_sched_adjust_cost,
1732 1, /* Constant limit. */
1733 5, /* Max cond insns. */
1734 ARM_PREFETCH_NOT_BENEFICIAL,
1735 true, /* Prefer constant pool. */
1736 arm_default_branch_cost,
1737 false, /* Prefer LDRD/STRD. */
1738 {true, true}, /* Prefer non short circuit. */
1739 &arm_default_vec_cost, /* Vectorizer costs. */
1740 false /* Prefer Neon for 64-bits bitops. */
1744 /* Not all of these give usefully different compilation alternatives,
1745 but there is no simple way of generalizing them. */
1746 static const struct processors all_cores[] =
1748 /* ARM Cores */
1749 #define ARM_CORE(NAME, X, IDENT, ARCH, FLAGS, COSTS) \
1750 {NAME, IDENT, #ARCH, BASE_ARCH_##ARCH, \
1751 FLAGS | FL_FOR_ARCH##ARCH, &arm_##COSTS##_tune},
1752 #include "arm-cores.def"
1753 #undef ARM_CORE
1754 {NULL, arm_none, NULL, BASE_ARCH_0, 0, NULL}
1757 static const struct processors all_architectures[] =
1759 /* ARM Architectures */
1760 /* We don't specify tuning costs here as it will be figured out
1761 from the core. */
1763 #define ARM_ARCH(NAME, CORE, ARCH, FLAGS) \
1764 {NAME, CORE, #ARCH, BASE_ARCH_##ARCH, FLAGS, NULL},
1765 #include "arm-arches.def"
1766 #undef ARM_ARCH
1767 {NULL, arm_none, NULL, BASE_ARCH_0, 0, NULL}
1771 /* These are populated as commandline arguments are processed, or NULL
1772 if not specified. */
1773 static const struct processors *arm_selected_arch;
1774 static const struct processors *arm_selected_cpu;
1775 static const struct processors *arm_selected_tune;
1777 /* The name of the preprocessor macro to define for this architecture. */
1779 char arm_arch_name[] = "__ARM_ARCH_0UNK__";
1781 /* Available values for -mfpu=. */
1783 static const struct arm_fpu_desc all_fpus[] =
1785 #define ARM_FPU(NAME, MODEL, REV, VFP_REGS, NEON, FP16, CRYPTO) \
1786 { NAME, MODEL, REV, VFP_REGS, NEON, FP16, CRYPTO },
1787 #include "arm-fpus.def"
1788 #undef ARM_FPU
1792 /* Supported TLS relocations. */
1794 enum tls_reloc {
1795 TLS_GD32,
1796 TLS_LDM32,
1797 TLS_LDO32,
1798 TLS_IE32,
1799 TLS_LE32,
1800 TLS_DESCSEQ /* GNU scheme */
1803 /* The maximum number of insns to be used when loading a constant. */
1804 inline static int
1805 arm_constant_limit (bool size_p)
1807 return size_p ? 1 : current_tune->constant_limit;
1810 /* Emit an insn that's a simple single-set. Both the operands must be known
1811 to be valid. */
1812 inline static rtx
1813 emit_set_insn (rtx x, rtx y)
1815 return emit_insn (gen_rtx_SET (VOIDmode, x, y));
1818 /* Return the number of bits set in VALUE. */
1819 static unsigned
1820 bit_count (unsigned long value)
1822 unsigned long count = 0;
1824 while (value)
1826 count++;
1827 value &= value - 1; /* Clear the least-significant set bit. */
1830 return count;
1833 typedef struct
1835 enum machine_mode mode;
1836 const char *name;
1837 } arm_fixed_mode_set;
1839 /* A small helper for setting fixed-point library libfuncs. */
1841 static void
1842 arm_set_fixed_optab_libfunc (optab optable, enum machine_mode mode,
1843 const char *funcname, const char *modename,
1844 int num_suffix)
1846 char buffer[50];
1848 if (num_suffix == 0)
1849 sprintf (buffer, "__gnu_%s%s", funcname, modename);
1850 else
1851 sprintf (buffer, "__gnu_%s%s%d", funcname, modename, num_suffix);
1853 set_optab_libfunc (optable, mode, buffer);
1856 static void
1857 arm_set_fixed_conv_libfunc (convert_optab optable, enum machine_mode to,
1858 enum machine_mode from, const char *funcname,
1859 const char *toname, const char *fromname)
1861 char buffer[50];
1862 const char *maybe_suffix_2 = "";
1864 /* Follow the logic for selecting a "2" suffix in fixed-bit.h. */
1865 if (ALL_FIXED_POINT_MODE_P (from) && ALL_FIXED_POINT_MODE_P (to)
1866 && UNSIGNED_FIXED_POINT_MODE_P (from) == UNSIGNED_FIXED_POINT_MODE_P (to)
1867 && ALL_FRACT_MODE_P (from) == ALL_FRACT_MODE_P (to))
1868 maybe_suffix_2 = "2";
1870 sprintf (buffer, "__gnu_%s%s%s%s", funcname, fromname, toname,
1871 maybe_suffix_2);
1873 set_conv_libfunc (optable, to, from, buffer);
1876 /* Set up library functions unique to ARM. */
1878 static void
1879 arm_init_libfuncs (void)
1881 /* For Linux, we have access to kernel support for atomic operations. */
1882 if (arm_abi == ARM_ABI_AAPCS_LINUX)
1883 init_sync_libfuncs (2 * UNITS_PER_WORD);
1885 /* There are no special library functions unless we are using the
1886 ARM BPABI. */
1887 if (!TARGET_BPABI)
1888 return;
1890 /* The functions below are described in Section 4 of the "Run-Time
1891 ABI for the ARM architecture", Version 1.0. */
1893 /* Double-precision floating-point arithmetic. Table 2. */
1894 set_optab_libfunc (add_optab, DFmode, "__aeabi_dadd");
1895 set_optab_libfunc (sdiv_optab, DFmode, "__aeabi_ddiv");
1896 set_optab_libfunc (smul_optab, DFmode, "__aeabi_dmul");
1897 set_optab_libfunc (neg_optab, DFmode, "__aeabi_dneg");
1898 set_optab_libfunc (sub_optab, DFmode, "__aeabi_dsub");
1900 /* Double-precision comparisons. Table 3. */
1901 set_optab_libfunc (eq_optab, DFmode, "__aeabi_dcmpeq");
1902 set_optab_libfunc (ne_optab, DFmode, NULL);
1903 set_optab_libfunc (lt_optab, DFmode, "__aeabi_dcmplt");
1904 set_optab_libfunc (le_optab, DFmode, "__aeabi_dcmple");
1905 set_optab_libfunc (ge_optab, DFmode, "__aeabi_dcmpge");
1906 set_optab_libfunc (gt_optab, DFmode, "__aeabi_dcmpgt");
1907 set_optab_libfunc (unord_optab, DFmode, "__aeabi_dcmpun");
1909 /* Single-precision floating-point arithmetic. Table 4. */
1910 set_optab_libfunc (add_optab, SFmode, "__aeabi_fadd");
1911 set_optab_libfunc (sdiv_optab, SFmode, "__aeabi_fdiv");
1912 set_optab_libfunc (smul_optab, SFmode, "__aeabi_fmul");
1913 set_optab_libfunc (neg_optab, SFmode, "__aeabi_fneg");
1914 set_optab_libfunc (sub_optab, SFmode, "__aeabi_fsub");
1916 /* Single-precision comparisons. Table 5. */
1917 set_optab_libfunc (eq_optab, SFmode, "__aeabi_fcmpeq");
1918 set_optab_libfunc (ne_optab, SFmode, NULL);
1919 set_optab_libfunc (lt_optab, SFmode, "__aeabi_fcmplt");
1920 set_optab_libfunc (le_optab, SFmode, "__aeabi_fcmple");
1921 set_optab_libfunc (ge_optab, SFmode, "__aeabi_fcmpge");
1922 set_optab_libfunc (gt_optab, SFmode, "__aeabi_fcmpgt");
1923 set_optab_libfunc (unord_optab, SFmode, "__aeabi_fcmpun");
1925 /* Floating-point to integer conversions. Table 6. */
1926 set_conv_libfunc (sfix_optab, SImode, DFmode, "__aeabi_d2iz");
1927 set_conv_libfunc (ufix_optab, SImode, DFmode, "__aeabi_d2uiz");
1928 set_conv_libfunc (sfix_optab, DImode, DFmode, "__aeabi_d2lz");
1929 set_conv_libfunc (ufix_optab, DImode, DFmode, "__aeabi_d2ulz");
1930 set_conv_libfunc (sfix_optab, SImode, SFmode, "__aeabi_f2iz");
1931 set_conv_libfunc (ufix_optab, SImode, SFmode, "__aeabi_f2uiz");
1932 set_conv_libfunc (sfix_optab, DImode, SFmode, "__aeabi_f2lz");
1933 set_conv_libfunc (ufix_optab, DImode, SFmode, "__aeabi_f2ulz");
1935 /* Conversions between floating types. Table 7. */
1936 set_conv_libfunc (trunc_optab, SFmode, DFmode, "__aeabi_d2f");
1937 set_conv_libfunc (sext_optab, DFmode, SFmode, "__aeabi_f2d");
1939 /* Integer to floating-point conversions. Table 8. */
1940 set_conv_libfunc (sfloat_optab, DFmode, SImode, "__aeabi_i2d");
1941 set_conv_libfunc (ufloat_optab, DFmode, SImode, "__aeabi_ui2d");
1942 set_conv_libfunc (sfloat_optab, DFmode, DImode, "__aeabi_l2d");
1943 set_conv_libfunc (ufloat_optab, DFmode, DImode, "__aeabi_ul2d");
1944 set_conv_libfunc (sfloat_optab, SFmode, SImode, "__aeabi_i2f");
1945 set_conv_libfunc (ufloat_optab, SFmode, SImode, "__aeabi_ui2f");
1946 set_conv_libfunc (sfloat_optab, SFmode, DImode, "__aeabi_l2f");
1947 set_conv_libfunc (ufloat_optab, SFmode, DImode, "__aeabi_ul2f");
1949 /* Long long. Table 9. */
1950 set_optab_libfunc (smul_optab, DImode, "__aeabi_lmul");
1951 set_optab_libfunc (sdivmod_optab, DImode, "__aeabi_ldivmod");
1952 set_optab_libfunc (udivmod_optab, DImode, "__aeabi_uldivmod");
1953 set_optab_libfunc (ashl_optab, DImode, "__aeabi_llsl");
1954 set_optab_libfunc (lshr_optab, DImode, "__aeabi_llsr");
1955 set_optab_libfunc (ashr_optab, DImode, "__aeabi_lasr");
1956 set_optab_libfunc (cmp_optab, DImode, "__aeabi_lcmp");
1957 set_optab_libfunc (ucmp_optab, DImode, "__aeabi_ulcmp");
1959 /* Integer (32/32->32) division. \S 4.3.1. */
1960 set_optab_libfunc (sdivmod_optab, SImode, "__aeabi_idivmod");
1961 set_optab_libfunc (udivmod_optab, SImode, "__aeabi_uidivmod");
1963 /* The divmod functions are designed so that they can be used for
1964 plain division, even though they return both the quotient and the
1965 remainder. The quotient is returned in the usual location (i.e.,
1966 r0 for SImode, {r0, r1} for DImode), just as would be expected
1967 for an ordinary division routine. Because the AAPCS calling
1968 conventions specify that all of { r0, r1, r2, r3 } are
1969 callee-saved registers, there is no need to tell the compiler
1970 explicitly that those registers are clobbered by these
1971 routines. */
1972 set_optab_libfunc (sdiv_optab, DImode, "__aeabi_ldivmod");
1973 set_optab_libfunc (udiv_optab, DImode, "__aeabi_uldivmod");
1975 /* For SImode division the ABI provides div-without-mod routines,
1976 which are faster. */
1977 set_optab_libfunc (sdiv_optab, SImode, "__aeabi_idiv");
1978 set_optab_libfunc (udiv_optab, SImode, "__aeabi_uidiv");
1980 /* We don't have mod libcalls. Fortunately gcc knows how to use the
1981 divmod libcalls instead. */
1982 set_optab_libfunc (smod_optab, DImode, NULL);
1983 set_optab_libfunc (umod_optab, DImode, NULL);
1984 set_optab_libfunc (smod_optab, SImode, NULL);
1985 set_optab_libfunc (umod_optab, SImode, NULL);
1987 /* Half-precision float operations. The compiler handles all operations
1988 with NULL libfuncs by converting the SFmode. */
1989 switch (arm_fp16_format)
1991 case ARM_FP16_FORMAT_IEEE:
1992 case ARM_FP16_FORMAT_ALTERNATIVE:
1994 /* Conversions. */
1995 set_conv_libfunc (trunc_optab, HFmode, SFmode,
1996 (arm_fp16_format == ARM_FP16_FORMAT_IEEE
1997 ? "__gnu_f2h_ieee"
1998 : "__gnu_f2h_alternative"));
1999 set_conv_libfunc (sext_optab, SFmode, HFmode,
2000 (arm_fp16_format == ARM_FP16_FORMAT_IEEE
2001 ? "__gnu_h2f_ieee"
2002 : "__gnu_h2f_alternative"));
2004 /* Arithmetic. */
2005 set_optab_libfunc (add_optab, HFmode, NULL);
2006 set_optab_libfunc (sdiv_optab, HFmode, NULL);
2007 set_optab_libfunc (smul_optab, HFmode, NULL);
2008 set_optab_libfunc (neg_optab, HFmode, NULL);
2009 set_optab_libfunc (sub_optab, HFmode, NULL);
2011 /* Comparisons. */
2012 set_optab_libfunc (eq_optab, HFmode, NULL);
2013 set_optab_libfunc (ne_optab, HFmode, NULL);
2014 set_optab_libfunc (lt_optab, HFmode, NULL);
2015 set_optab_libfunc (le_optab, HFmode, NULL);
2016 set_optab_libfunc (ge_optab, HFmode, NULL);
2017 set_optab_libfunc (gt_optab, HFmode, NULL);
2018 set_optab_libfunc (unord_optab, HFmode, NULL);
2019 break;
2021 default:
2022 break;
2025 /* Use names prefixed with __gnu_ for fixed-point helper functions. */
2027 const arm_fixed_mode_set fixed_arith_modes[] =
2029 { QQmode, "qq" },
2030 { UQQmode, "uqq" },
2031 { HQmode, "hq" },
2032 { UHQmode, "uhq" },
2033 { SQmode, "sq" },
2034 { USQmode, "usq" },
2035 { DQmode, "dq" },
2036 { UDQmode, "udq" },
2037 { TQmode, "tq" },
2038 { UTQmode, "utq" },
2039 { HAmode, "ha" },
2040 { UHAmode, "uha" },
2041 { SAmode, "sa" },
2042 { USAmode, "usa" },
2043 { DAmode, "da" },
2044 { UDAmode, "uda" },
2045 { TAmode, "ta" },
2046 { UTAmode, "uta" }
2048 const arm_fixed_mode_set fixed_conv_modes[] =
2050 { QQmode, "qq" },
2051 { UQQmode, "uqq" },
2052 { HQmode, "hq" },
2053 { UHQmode, "uhq" },
2054 { SQmode, "sq" },
2055 { USQmode, "usq" },
2056 { DQmode, "dq" },
2057 { UDQmode, "udq" },
2058 { TQmode, "tq" },
2059 { UTQmode, "utq" },
2060 { HAmode, "ha" },
2061 { UHAmode, "uha" },
2062 { SAmode, "sa" },
2063 { USAmode, "usa" },
2064 { DAmode, "da" },
2065 { UDAmode, "uda" },
2066 { TAmode, "ta" },
2067 { UTAmode, "uta" },
2068 { QImode, "qi" },
2069 { HImode, "hi" },
2070 { SImode, "si" },
2071 { DImode, "di" },
2072 { TImode, "ti" },
2073 { SFmode, "sf" },
2074 { DFmode, "df" }
2076 unsigned int i, j;
2078 for (i = 0; i < ARRAY_SIZE (fixed_arith_modes); i++)
2080 arm_set_fixed_optab_libfunc (add_optab, fixed_arith_modes[i].mode,
2081 "add", fixed_arith_modes[i].name, 3);
2082 arm_set_fixed_optab_libfunc (ssadd_optab, fixed_arith_modes[i].mode,
2083 "ssadd", fixed_arith_modes[i].name, 3);
2084 arm_set_fixed_optab_libfunc (usadd_optab, fixed_arith_modes[i].mode,
2085 "usadd", fixed_arith_modes[i].name, 3);
2086 arm_set_fixed_optab_libfunc (sub_optab, fixed_arith_modes[i].mode,
2087 "sub", fixed_arith_modes[i].name, 3);
2088 arm_set_fixed_optab_libfunc (sssub_optab, fixed_arith_modes[i].mode,
2089 "sssub", fixed_arith_modes[i].name, 3);
2090 arm_set_fixed_optab_libfunc (ussub_optab, fixed_arith_modes[i].mode,
2091 "ussub", fixed_arith_modes[i].name, 3);
2092 arm_set_fixed_optab_libfunc (smul_optab, fixed_arith_modes[i].mode,
2093 "mul", fixed_arith_modes[i].name, 3);
2094 arm_set_fixed_optab_libfunc (ssmul_optab, fixed_arith_modes[i].mode,
2095 "ssmul", fixed_arith_modes[i].name, 3);
2096 arm_set_fixed_optab_libfunc (usmul_optab, fixed_arith_modes[i].mode,
2097 "usmul", fixed_arith_modes[i].name, 3);
2098 arm_set_fixed_optab_libfunc (sdiv_optab, fixed_arith_modes[i].mode,
2099 "div", fixed_arith_modes[i].name, 3);
2100 arm_set_fixed_optab_libfunc (udiv_optab, fixed_arith_modes[i].mode,
2101 "udiv", fixed_arith_modes[i].name, 3);
2102 arm_set_fixed_optab_libfunc (ssdiv_optab, fixed_arith_modes[i].mode,
2103 "ssdiv", fixed_arith_modes[i].name, 3);
2104 arm_set_fixed_optab_libfunc (usdiv_optab, fixed_arith_modes[i].mode,
2105 "usdiv", fixed_arith_modes[i].name, 3);
2106 arm_set_fixed_optab_libfunc (neg_optab, fixed_arith_modes[i].mode,
2107 "neg", fixed_arith_modes[i].name, 2);
2108 arm_set_fixed_optab_libfunc (ssneg_optab, fixed_arith_modes[i].mode,
2109 "ssneg", fixed_arith_modes[i].name, 2);
2110 arm_set_fixed_optab_libfunc (usneg_optab, fixed_arith_modes[i].mode,
2111 "usneg", fixed_arith_modes[i].name, 2);
2112 arm_set_fixed_optab_libfunc (ashl_optab, fixed_arith_modes[i].mode,
2113 "ashl", fixed_arith_modes[i].name, 3);
2114 arm_set_fixed_optab_libfunc (ashr_optab, fixed_arith_modes[i].mode,
2115 "ashr", fixed_arith_modes[i].name, 3);
2116 arm_set_fixed_optab_libfunc (lshr_optab, fixed_arith_modes[i].mode,
2117 "lshr", fixed_arith_modes[i].name, 3);
2118 arm_set_fixed_optab_libfunc (ssashl_optab, fixed_arith_modes[i].mode,
2119 "ssashl", fixed_arith_modes[i].name, 3);
2120 arm_set_fixed_optab_libfunc (usashl_optab, fixed_arith_modes[i].mode,
2121 "usashl", fixed_arith_modes[i].name, 3);
2122 arm_set_fixed_optab_libfunc (cmp_optab, fixed_arith_modes[i].mode,
2123 "cmp", fixed_arith_modes[i].name, 2);
2126 for (i = 0; i < ARRAY_SIZE (fixed_conv_modes); i++)
2127 for (j = 0; j < ARRAY_SIZE (fixed_conv_modes); j++)
2129 if (i == j
2130 || (!ALL_FIXED_POINT_MODE_P (fixed_conv_modes[i].mode)
2131 && !ALL_FIXED_POINT_MODE_P (fixed_conv_modes[j].mode)))
2132 continue;
2134 arm_set_fixed_conv_libfunc (fract_optab, fixed_conv_modes[i].mode,
2135 fixed_conv_modes[j].mode, "fract",
2136 fixed_conv_modes[i].name,
2137 fixed_conv_modes[j].name);
2138 arm_set_fixed_conv_libfunc (satfract_optab,
2139 fixed_conv_modes[i].mode,
2140 fixed_conv_modes[j].mode, "satfract",
2141 fixed_conv_modes[i].name,
2142 fixed_conv_modes[j].name);
2143 arm_set_fixed_conv_libfunc (fractuns_optab,
2144 fixed_conv_modes[i].mode,
2145 fixed_conv_modes[j].mode, "fractuns",
2146 fixed_conv_modes[i].name,
2147 fixed_conv_modes[j].name);
2148 arm_set_fixed_conv_libfunc (satfractuns_optab,
2149 fixed_conv_modes[i].mode,
2150 fixed_conv_modes[j].mode, "satfractuns",
2151 fixed_conv_modes[i].name,
2152 fixed_conv_modes[j].name);
2156 if (TARGET_AAPCS_BASED)
2157 synchronize_libfunc = init_one_libfunc ("__sync_synchronize");
2160 /* On AAPCS systems, this is the "struct __va_list". */
2161 static GTY(()) tree va_list_type;
2163 /* Return the type to use as __builtin_va_list. */
2164 static tree
2165 arm_build_builtin_va_list (void)
2167 tree va_list_name;
2168 tree ap_field;
2170 if (!TARGET_AAPCS_BASED)
2171 return std_build_builtin_va_list ();
2173 /* AAPCS \S 7.1.4 requires that va_list be a typedef for a type
2174 defined as:
2176 struct __va_list
2178 void *__ap;
2181 The C Library ABI further reinforces this definition in \S
2182 4.1.
2184 We must follow this definition exactly. The structure tag
2185 name is visible in C++ mangled names, and thus forms a part
2186 of the ABI. The field name may be used by people who
2187 #include <stdarg.h>. */
2188 /* Create the type. */
2189 va_list_type = lang_hooks.types.make_type (RECORD_TYPE);
2190 /* Give it the required name. */
2191 va_list_name = build_decl (BUILTINS_LOCATION,
2192 TYPE_DECL,
2193 get_identifier ("__va_list"),
2194 va_list_type);
2195 DECL_ARTIFICIAL (va_list_name) = 1;
2196 TYPE_NAME (va_list_type) = va_list_name;
2197 TYPE_STUB_DECL (va_list_type) = va_list_name;
2198 /* Create the __ap field. */
2199 ap_field = build_decl (BUILTINS_LOCATION,
2200 FIELD_DECL,
2201 get_identifier ("__ap"),
2202 ptr_type_node);
2203 DECL_ARTIFICIAL (ap_field) = 1;
2204 DECL_FIELD_CONTEXT (ap_field) = va_list_type;
2205 TYPE_FIELDS (va_list_type) = ap_field;
2206 /* Compute its layout. */
2207 layout_type (va_list_type);
2209 return va_list_type;
2212 /* Return an expression of type "void *" pointing to the next
2213 available argument in a variable-argument list. VALIST is the
2214 user-level va_list object, of type __builtin_va_list. */
2215 static tree
2216 arm_extract_valist_ptr (tree valist)
2218 if (TREE_TYPE (valist) == error_mark_node)
2219 return error_mark_node;
2221 /* On an AAPCS target, the pointer is stored within "struct
2222 va_list". */
2223 if (TARGET_AAPCS_BASED)
2225 tree ap_field = TYPE_FIELDS (TREE_TYPE (valist));
2226 valist = build3 (COMPONENT_REF, TREE_TYPE (ap_field),
2227 valist, ap_field, NULL_TREE);
2230 return valist;
2233 /* Implement TARGET_EXPAND_BUILTIN_VA_START. */
2234 static void
2235 arm_expand_builtin_va_start (tree valist, rtx nextarg)
2237 valist = arm_extract_valist_ptr (valist);
2238 std_expand_builtin_va_start (valist, nextarg);
2241 /* Implement TARGET_GIMPLIFY_VA_ARG_EXPR. */
2242 static tree
2243 arm_gimplify_va_arg_expr (tree valist, tree type, gimple_seq *pre_p,
2244 gimple_seq *post_p)
2246 valist = arm_extract_valist_ptr (valist);
2247 return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
2250 /* Fix up any incompatible options that the user has specified. */
2251 static void
2252 arm_option_override (void)
2254 if (global_options_set.x_arm_arch_option)
2255 arm_selected_arch = &all_architectures[arm_arch_option];
2257 if (global_options_set.x_arm_cpu_option)
2259 arm_selected_cpu = &all_cores[(int) arm_cpu_option];
2260 arm_selected_tune = &all_cores[(int) arm_cpu_option];
2263 if (global_options_set.x_arm_tune_option)
2264 arm_selected_tune = &all_cores[(int) arm_tune_option];
2266 #ifdef SUBTARGET_OVERRIDE_OPTIONS
2267 SUBTARGET_OVERRIDE_OPTIONS;
2268 #endif
2270 if (arm_selected_arch)
2272 if (arm_selected_cpu)
2274 /* Check for conflict between mcpu and march. */
2275 if ((arm_selected_cpu->flags ^ arm_selected_arch->flags) & ~FL_TUNE)
2277 warning (0, "switch -mcpu=%s conflicts with -march=%s switch",
2278 arm_selected_cpu->name, arm_selected_arch->name);
2279 /* -march wins for code generation.
2280 -mcpu wins for default tuning. */
2281 if (!arm_selected_tune)
2282 arm_selected_tune = arm_selected_cpu;
2284 arm_selected_cpu = arm_selected_arch;
2286 else
2287 /* -mcpu wins. */
2288 arm_selected_arch = NULL;
2290 else
2291 /* Pick a CPU based on the architecture. */
2292 arm_selected_cpu = arm_selected_arch;
2295 /* If the user did not specify a processor, choose one for them. */
2296 if (!arm_selected_cpu)
2298 const struct processors * sel;
2299 unsigned int sought;
2301 arm_selected_cpu = &all_cores[TARGET_CPU_DEFAULT];
2302 if (!arm_selected_cpu->name)
2304 #ifdef SUBTARGET_CPU_DEFAULT
2305 /* Use the subtarget default CPU if none was specified by
2306 configure. */
2307 arm_selected_cpu = &all_cores[SUBTARGET_CPU_DEFAULT];
2308 #endif
2309 /* Default to ARM6. */
2310 if (!arm_selected_cpu->name)
2311 arm_selected_cpu = &all_cores[arm6];
2314 sel = arm_selected_cpu;
2315 insn_flags = sel->flags;
2317 /* Now check to see if the user has specified some command line
2318 switch that require certain abilities from the cpu. */
2319 sought = 0;
2321 if (TARGET_INTERWORK || TARGET_THUMB)
2323 sought |= (FL_THUMB | FL_MODE32);
2325 /* There are no ARM processors that support both APCS-26 and
2326 interworking. Therefore we force FL_MODE26 to be removed
2327 from insn_flags here (if it was set), so that the search
2328 below will always be able to find a compatible processor. */
2329 insn_flags &= ~FL_MODE26;
2332 if (sought != 0 && ((sought & insn_flags) != sought))
2334 /* Try to locate a CPU type that supports all of the abilities
2335 of the default CPU, plus the extra abilities requested by
2336 the user. */
2337 for (sel = all_cores; sel->name != NULL; sel++)
2338 if ((sel->flags & sought) == (sought | insn_flags))
2339 break;
2341 if (sel->name == NULL)
2343 unsigned current_bit_count = 0;
2344 const struct processors * best_fit = NULL;
2346 /* Ideally we would like to issue an error message here
2347 saying that it was not possible to find a CPU compatible
2348 with the default CPU, but which also supports the command
2349 line options specified by the programmer, and so they
2350 ought to use the -mcpu=<name> command line option to
2351 override the default CPU type.
2353 If we cannot find a cpu that has both the
2354 characteristics of the default cpu and the given
2355 command line options we scan the array again looking
2356 for a best match. */
2357 for (sel = all_cores; sel->name != NULL; sel++)
2358 if ((sel->flags & sought) == sought)
2360 unsigned count;
2362 count = bit_count (sel->flags & insn_flags);
2364 if (count >= current_bit_count)
2366 best_fit = sel;
2367 current_bit_count = count;
2371 gcc_assert (best_fit);
2372 sel = best_fit;
2375 arm_selected_cpu = sel;
2379 gcc_assert (arm_selected_cpu);
2380 /* The selected cpu may be an architecture, so lookup tuning by core ID. */
2381 if (!arm_selected_tune)
2382 arm_selected_tune = &all_cores[arm_selected_cpu->core];
2384 sprintf (arm_arch_name, "__ARM_ARCH_%s__", arm_selected_cpu->arch);
2385 insn_flags = arm_selected_cpu->flags;
2386 arm_base_arch = arm_selected_cpu->base_arch;
2388 arm_tune = arm_selected_tune->core;
2389 tune_flags = arm_selected_tune->flags;
2390 current_tune = arm_selected_tune->tune;
2392 /* Make sure that the processor choice does not conflict with any of the
2393 other command line choices. */
2394 if (TARGET_ARM && !(insn_flags & FL_NOTM))
2395 error ("target CPU does not support ARM mode");
2397 /* BPABI targets use linker tricks to allow interworking on cores
2398 without thumb support. */
2399 if (TARGET_INTERWORK && !((insn_flags & FL_THUMB) || TARGET_BPABI))
2401 warning (0, "target CPU does not support interworking" );
2402 target_flags &= ~MASK_INTERWORK;
2405 if (TARGET_THUMB && !(insn_flags & FL_THUMB))
2407 warning (0, "target CPU does not support THUMB instructions");
2408 target_flags &= ~MASK_THUMB;
2411 if (TARGET_APCS_FRAME && TARGET_THUMB)
2413 /* warning (0, "ignoring -mapcs-frame because -mthumb was used"); */
2414 target_flags &= ~MASK_APCS_FRAME;
2417 /* Callee super interworking implies thumb interworking. Adding
2418 this to the flags here simplifies the logic elsewhere. */
2419 if (TARGET_THUMB && TARGET_CALLEE_INTERWORKING)
2420 target_flags |= MASK_INTERWORK;
2422 /* TARGET_BACKTRACE calls leaf_function_p, which causes a crash if done
2423 from here where no function is being compiled currently. */
2424 if ((TARGET_TPCS_FRAME || TARGET_TPCS_LEAF_FRAME) && TARGET_ARM)
2425 warning (0, "enabling backtrace support is only meaningful when compiling for the Thumb");
2427 if (TARGET_ARM && TARGET_CALLEE_INTERWORKING)
2428 warning (0, "enabling callee interworking support is only meaningful when compiling for the Thumb");
2430 if (TARGET_APCS_STACK && !TARGET_APCS_FRAME)
2432 warning (0, "-mapcs-stack-check incompatible with -mno-apcs-frame");
2433 target_flags |= MASK_APCS_FRAME;
2436 if (TARGET_POKE_FUNCTION_NAME)
2437 target_flags |= MASK_APCS_FRAME;
2439 if (TARGET_APCS_REENT && flag_pic)
2440 error ("-fpic and -mapcs-reent are incompatible");
2442 if (TARGET_APCS_REENT)
2443 warning (0, "APCS reentrant code not supported. Ignored");
2445 /* If this target is normally configured to use APCS frames, warn if they
2446 are turned off and debugging is turned on. */
2447 if (TARGET_ARM
2448 && write_symbols != NO_DEBUG
2449 && !TARGET_APCS_FRAME
2450 && (TARGET_DEFAULT & MASK_APCS_FRAME))
2451 warning (0, "-g with -mno-apcs-frame may not give sensible debugging");
2453 if (TARGET_APCS_FLOAT)
2454 warning (0, "passing floating point arguments in fp regs not yet supported");
2456 if (TARGET_LITTLE_WORDS)
2457 warning (OPT_Wdeprecated, "%<mwords-little-endian%> is deprecated and "
2458 "will be removed in a future release");
2460 /* Initialize boolean versions of the flags, for use in the arm.md file. */
2461 arm_arch3m = (insn_flags & FL_ARCH3M) != 0;
2462 arm_arch4 = (insn_flags & FL_ARCH4) != 0;
2463 arm_arch4t = arm_arch4 & ((insn_flags & FL_THUMB) != 0);
2464 arm_arch5 = (insn_flags & FL_ARCH5) != 0;
2465 arm_arch5e = (insn_flags & FL_ARCH5E) != 0;
2466 arm_arch6 = (insn_flags & FL_ARCH6) != 0;
2467 arm_arch6k = (insn_flags & FL_ARCH6K) != 0;
2468 arm_arch_notm = (insn_flags & FL_NOTM) != 0;
2469 arm_arch6m = arm_arch6 && !arm_arch_notm;
2470 arm_arch7 = (insn_flags & FL_ARCH7) != 0;
2471 arm_arch7em = (insn_flags & FL_ARCH7EM) != 0;
2472 arm_arch8 = (insn_flags & FL_ARCH8) != 0;
2473 arm_arch_thumb2 = (insn_flags & FL_THUMB2) != 0;
2474 arm_arch_xscale = (insn_flags & FL_XSCALE) != 0;
2476 arm_ld_sched = (tune_flags & FL_LDSCHED) != 0;
2477 arm_tune_strongarm = (tune_flags & FL_STRONG) != 0;
2478 thumb_code = TARGET_ARM == 0;
2479 thumb1_code = TARGET_THUMB1 != 0;
2480 arm_tune_wbuf = (tune_flags & FL_WBUF) != 0;
2481 arm_tune_xscale = (tune_flags & FL_XSCALE) != 0;
2482 arm_arch_iwmmxt = (insn_flags & FL_IWMMXT) != 0;
2483 arm_arch_iwmmxt2 = (insn_flags & FL_IWMMXT2) != 0;
2484 arm_arch_thumb_hwdiv = (insn_flags & FL_THUMB_DIV) != 0;
2485 arm_arch_arm_hwdiv = (insn_flags & FL_ARM_DIV) != 0;
2486 arm_tune_cortex_a9 = (arm_tune == cortexa9) != 0;
2487 arm_arch_crc = (insn_flags & FL_CRC32) != 0;
2488 if (arm_restrict_it == 2)
2489 arm_restrict_it = arm_arch8 && TARGET_THUMB2;
2491 if (!TARGET_THUMB2)
2492 arm_restrict_it = 0;
2494 /* If we are not using the default (ARM mode) section anchor offset
2495 ranges, then set the correct ranges now. */
2496 if (TARGET_THUMB1)
2498 /* Thumb-1 LDR instructions cannot have negative offsets.
2499 Permissible positive offset ranges are 5-bit (for byte loads),
2500 6-bit (for halfword loads), or 7-bit (for word loads).
2501 Empirical results suggest a 7-bit anchor range gives the best
2502 overall code size. */
2503 targetm.min_anchor_offset = 0;
2504 targetm.max_anchor_offset = 127;
2506 else if (TARGET_THUMB2)
2508 /* The minimum is set such that the total size of the block
2509 for a particular anchor is 248 + 1 + 4095 bytes, which is
2510 divisible by eight, ensuring natural spacing of anchors. */
2511 targetm.min_anchor_offset = -248;
2512 targetm.max_anchor_offset = 4095;
2515 /* V5 code we generate is completely interworking capable, so we turn off
2516 TARGET_INTERWORK here to avoid many tests later on. */
2518 /* XXX However, we must pass the right pre-processor defines to CPP
2519 or GLD can get confused. This is a hack. */
2520 if (TARGET_INTERWORK)
2521 arm_cpp_interwork = 1;
2523 if (arm_arch5)
2524 target_flags &= ~MASK_INTERWORK;
2526 if (TARGET_IWMMXT && !ARM_DOUBLEWORD_ALIGN)
2527 error ("iwmmxt requires an AAPCS compatible ABI for proper operation");
2529 if (TARGET_IWMMXT_ABI && !TARGET_IWMMXT)
2530 error ("iwmmxt abi requires an iwmmxt capable cpu");
2532 if (!global_options_set.x_arm_fpu_index)
2534 const char *target_fpu_name;
2535 bool ok;
2537 #ifdef FPUTYPE_DEFAULT
2538 target_fpu_name = FPUTYPE_DEFAULT;
2539 #else
2540 target_fpu_name = "vfp";
2541 #endif
2543 ok = opt_enum_arg_to_value (OPT_mfpu_, target_fpu_name, &arm_fpu_index,
2544 CL_TARGET);
2545 gcc_assert (ok);
2548 arm_fpu_desc = &all_fpus[arm_fpu_index];
2550 switch (arm_fpu_desc->model)
2552 case ARM_FP_MODEL_VFP:
2553 arm_fpu_attr = FPU_VFP;
2554 break;
2556 default:
2557 gcc_unreachable();
2560 if (TARGET_AAPCS_BASED)
2562 if (TARGET_CALLER_INTERWORKING)
2563 error ("AAPCS does not support -mcaller-super-interworking");
2564 else
2565 if (TARGET_CALLEE_INTERWORKING)
2566 error ("AAPCS does not support -mcallee-super-interworking");
2569 /* iWMMXt and NEON are incompatible. */
2570 if (TARGET_IWMMXT && TARGET_NEON)
2571 error ("iWMMXt and NEON are incompatible");
2573 /* iWMMXt unsupported under Thumb mode. */
2574 if (TARGET_THUMB && TARGET_IWMMXT)
2575 error ("iWMMXt unsupported under Thumb mode");
2577 /* __fp16 support currently assumes the core has ldrh. */
2578 if (!arm_arch4 && arm_fp16_format != ARM_FP16_FORMAT_NONE)
2579 sorry ("__fp16 and no ldrh");
2581 /* If soft-float is specified then don't use FPU. */
2582 if (TARGET_SOFT_FLOAT)
2583 arm_fpu_attr = FPU_NONE;
2585 if (TARGET_AAPCS_BASED)
2587 if (arm_abi == ARM_ABI_IWMMXT)
2588 arm_pcs_default = ARM_PCS_AAPCS_IWMMXT;
2589 else if (arm_float_abi == ARM_FLOAT_ABI_HARD
2590 && TARGET_HARD_FLOAT
2591 && TARGET_VFP)
2592 arm_pcs_default = ARM_PCS_AAPCS_VFP;
2593 else
2594 arm_pcs_default = ARM_PCS_AAPCS;
2596 else
2598 if (arm_float_abi == ARM_FLOAT_ABI_HARD && TARGET_VFP)
2599 sorry ("-mfloat-abi=hard and VFP");
2601 if (arm_abi == ARM_ABI_APCS)
2602 arm_pcs_default = ARM_PCS_APCS;
2603 else
2604 arm_pcs_default = ARM_PCS_ATPCS;
2607 /* For arm2/3 there is no need to do any scheduling if we are doing
2608 software floating-point. */
2609 if (TARGET_SOFT_FLOAT && (tune_flags & FL_MODE32) == 0)
2610 flag_schedule_insns = flag_schedule_insns_after_reload = 0;
2612 /* Use the cp15 method if it is available. */
2613 if (target_thread_pointer == TP_AUTO)
2615 if (arm_arch6k && !TARGET_THUMB1)
2616 target_thread_pointer = TP_CP15;
2617 else
2618 target_thread_pointer = TP_SOFT;
2621 if (TARGET_HARD_TP && TARGET_THUMB1)
2622 error ("can not use -mtp=cp15 with 16-bit Thumb");
2624 /* Override the default structure alignment for AAPCS ABI. */
2625 if (!global_options_set.x_arm_structure_size_boundary)
2627 if (TARGET_AAPCS_BASED)
2628 arm_structure_size_boundary = 8;
2630 else
2632 if (arm_structure_size_boundary != 8
2633 && arm_structure_size_boundary != 32
2634 && !(ARM_DOUBLEWORD_ALIGN && arm_structure_size_boundary == 64))
2636 if (ARM_DOUBLEWORD_ALIGN)
2637 warning (0,
2638 "structure size boundary can only be set to 8, 32 or 64");
2639 else
2640 warning (0, "structure size boundary can only be set to 8 or 32");
2641 arm_structure_size_boundary
2642 = (TARGET_AAPCS_BASED ? 8 : DEFAULT_STRUCTURE_SIZE_BOUNDARY);
2646 if (!TARGET_ARM && TARGET_VXWORKS_RTP && flag_pic)
2648 error ("RTP PIC is incompatible with Thumb");
2649 flag_pic = 0;
2652 /* If stack checking is disabled, we can use r10 as the PIC register,
2653 which keeps r9 available. The EABI specifies r9 as the PIC register. */
2654 if (flag_pic && TARGET_SINGLE_PIC_BASE)
2656 if (TARGET_VXWORKS_RTP)
2657 warning (0, "RTP PIC is incompatible with -msingle-pic-base");
2658 arm_pic_register = (TARGET_APCS_STACK || TARGET_AAPCS_BASED) ? 9 : 10;
2661 if (flag_pic && TARGET_VXWORKS_RTP)
2662 arm_pic_register = 9;
2664 if (arm_pic_register_string != NULL)
2666 int pic_register = decode_reg_name (arm_pic_register_string);
2668 if (!flag_pic)
2669 warning (0, "-mpic-register= is useless without -fpic");
2671 /* Prevent the user from choosing an obviously stupid PIC register. */
2672 else if (pic_register < 0 || call_used_regs[pic_register]
2673 || pic_register == HARD_FRAME_POINTER_REGNUM
2674 || pic_register == STACK_POINTER_REGNUM
2675 || pic_register >= PC_REGNUM
2676 || (TARGET_VXWORKS_RTP
2677 && (unsigned int) pic_register != arm_pic_register))
2678 error ("unable to use '%s' for PIC register", arm_pic_register_string);
2679 else
2680 arm_pic_register = pic_register;
2683 if (TARGET_VXWORKS_RTP
2684 && !global_options_set.x_arm_pic_data_is_text_relative)
2685 arm_pic_data_is_text_relative = 0;
2687 /* Enable -mfix-cortex-m3-ldrd by default for Cortex-M3 cores. */
2688 if (fix_cm3_ldrd == 2)
2690 if (arm_selected_cpu->core == cortexm3)
2691 fix_cm3_ldrd = 1;
2692 else
2693 fix_cm3_ldrd = 0;
2696 /* Enable -munaligned-access by default for
2697 - all ARMv6 architecture-based processors
2698 - ARMv7-A, ARMv7-R, and ARMv7-M architecture-based processors.
2699 - ARMv8 architecture-base processors.
2701 Disable -munaligned-access by default for
2702 - all pre-ARMv6 architecture-based processors
2703 - ARMv6-M architecture-based processors. */
2705 if (unaligned_access == 2)
2707 if (arm_arch6 && (arm_arch_notm || arm_arch7))
2708 unaligned_access = 1;
2709 else
2710 unaligned_access = 0;
2712 else if (unaligned_access == 1
2713 && !(arm_arch6 && (arm_arch_notm || arm_arch7)))
2715 warning (0, "target CPU does not support unaligned accesses");
2716 unaligned_access = 0;
2719 if (TARGET_THUMB1 && flag_schedule_insns)
2721 /* Don't warn since it's on by default in -O2. */
2722 flag_schedule_insns = 0;
2725 if (optimize_size)
2727 /* If optimizing for size, bump the number of instructions that we
2728 are prepared to conditionally execute (even on a StrongARM). */
2729 max_insns_skipped = 6;
2731 else
2732 max_insns_skipped = current_tune->max_insns_skipped;
2734 /* Hot/Cold partitioning is not currently supported, since we can't
2735 handle literal pool placement in that case. */
2736 if (flag_reorder_blocks_and_partition)
2738 inform (input_location,
2739 "-freorder-blocks-and-partition not supported on this architecture");
2740 flag_reorder_blocks_and_partition = 0;
2741 flag_reorder_blocks = 1;
2744 if (flag_pic)
2745 /* Hoisting PIC address calculations more aggressively provides a small,
2746 but measurable, size reduction for PIC code. Therefore, we decrease
2747 the bar for unrestricted expression hoisting to the cost of PIC address
2748 calculation, which is 2 instructions. */
2749 maybe_set_param_value (PARAM_GCSE_UNRESTRICTED_COST, 2,
2750 global_options.x_param_values,
2751 global_options_set.x_param_values);
2753 /* ARM EABI defaults to strict volatile bitfields. */
2754 if (TARGET_AAPCS_BASED && flag_strict_volatile_bitfields < 0
2755 && abi_version_at_least(2))
2756 flag_strict_volatile_bitfields = 1;
2758 /* Enable sw prefetching at -O3 for CPUS that have prefetch, and we have deemed
2759 it beneficial (signified by setting num_prefetch_slots to 1 or more.) */
2760 if (flag_prefetch_loop_arrays < 0
2761 && HAVE_prefetch
2762 && optimize >= 3
2763 && current_tune->num_prefetch_slots > 0)
2764 flag_prefetch_loop_arrays = 1;
2766 /* Set up parameters to be used in prefetching algorithm. Do not override the
2767 defaults unless we are tuning for a core we have researched values for. */
2768 if (current_tune->num_prefetch_slots > 0)
2769 maybe_set_param_value (PARAM_SIMULTANEOUS_PREFETCHES,
2770 current_tune->num_prefetch_slots,
2771 global_options.x_param_values,
2772 global_options_set.x_param_values);
2773 if (current_tune->l1_cache_line_size >= 0)
2774 maybe_set_param_value (PARAM_L1_CACHE_LINE_SIZE,
2775 current_tune->l1_cache_line_size,
2776 global_options.x_param_values,
2777 global_options_set.x_param_values);
2778 if (current_tune->l1_cache_size >= 0)
2779 maybe_set_param_value (PARAM_L1_CACHE_SIZE,
2780 current_tune->l1_cache_size,
2781 global_options.x_param_values,
2782 global_options_set.x_param_values);
2784 /* Use Neon to perform 64-bits operations rather than core
2785 registers. */
2786 prefer_neon_for_64bits = current_tune->prefer_neon_for_64bits;
2787 if (use_neon_for_64bits == 1)
2788 prefer_neon_for_64bits = true;
2790 /* Use the alternative scheduling-pressure algorithm by default. */
2791 maybe_set_param_value (PARAM_SCHED_PRESSURE_ALGORITHM, 2,
2792 global_options.x_param_values,
2793 global_options_set.x_param_values);
2795 /* Disable shrink-wrap when optimizing function for size, since it tends to
2796 generate additional returns. */
2797 if (optimize_function_for_size_p (cfun) && TARGET_THUMB2)
2798 flag_shrink_wrap = false;
2799 /* TBD: Dwarf info for apcs frame is not handled yet. */
2800 if (TARGET_APCS_FRAME)
2801 flag_shrink_wrap = false;
2803 /* We only support -mslow-flash-data on armv7-m targets. */
2804 if (target_slow_flash_data
2805 && ((!(arm_arch7 && !arm_arch_notm) && !arm_arch7em)
2806 || (TARGET_THUMB1 || flag_pic || TARGET_NEON)))
2807 error ("-mslow-flash-data only supports non-pic code on armv7-m targets");
2809 /* Currently, for slow flash data, we just disable literal pools. */
2810 if (target_slow_flash_data)
2811 arm_disable_literal_pool = true;
2813 /* Register global variables with the garbage collector. */
2814 arm_add_gc_roots ();
2817 static void
2818 arm_add_gc_roots (void)
2820 gcc_obstack_init(&minipool_obstack);
2821 minipool_startobj = (char *) obstack_alloc (&minipool_obstack, 0);
2824 /* A table of known ARM exception types.
2825 For use with the interrupt function attribute. */
2827 typedef struct
2829 const char *const arg;
2830 const unsigned long return_value;
2832 isr_attribute_arg;
2834 static const isr_attribute_arg isr_attribute_args [] =
2836 { "IRQ", ARM_FT_ISR },
2837 { "irq", ARM_FT_ISR },
2838 { "FIQ", ARM_FT_FIQ },
2839 { "fiq", ARM_FT_FIQ },
2840 { "ABORT", ARM_FT_ISR },
2841 { "abort", ARM_FT_ISR },
2842 { "ABORT", ARM_FT_ISR },
2843 { "abort", ARM_FT_ISR },
2844 { "UNDEF", ARM_FT_EXCEPTION },
2845 { "undef", ARM_FT_EXCEPTION },
2846 { "SWI", ARM_FT_EXCEPTION },
2847 { "swi", ARM_FT_EXCEPTION },
2848 { NULL, ARM_FT_NORMAL }
2851 /* Returns the (interrupt) function type of the current
2852 function, or ARM_FT_UNKNOWN if the type cannot be determined. */
2854 static unsigned long
2855 arm_isr_value (tree argument)
2857 const isr_attribute_arg * ptr;
2858 const char * arg;
2860 if (!arm_arch_notm)
2861 return ARM_FT_NORMAL | ARM_FT_STACKALIGN;
2863 /* No argument - default to IRQ. */
2864 if (argument == NULL_TREE)
2865 return ARM_FT_ISR;
2867 /* Get the value of the argument. */
2868 if (TREE_VALUE (argument) == NULL_TREE
2869 || TREE_CODE (TREE_VALUE (argument)) != STRING_CST)
2870 return ARM_FT_UNKNOWN;
2872 arg = TREE_STRING_POINTER (TREE_VALUE (argument));
2874 /* Check it against the list of known arguments. */
2875 for (ptr = isr_attribute_args; ptr->arg != NULL; ptr++)
2876 if (streq (arg, ptr->arg))
2877 return ptr->return_value;
2879 /* An unrecognized interrupt type. */
2880 return ARM_FT_UNKNOWN;
2883 /* Computes the type of the current function. */
2885 static unsigned long
2886 arm_compute_func_type (void)
2888 unsigned long type = ARM_FT_UNKNOWN;
2889 tree a;
2890 tree attr;
2892 gcc_assert (TREE_CODE (current_function_decl) == FUNCTION_DECL);
2894 /* Decide if the current function is volatile. Such functions
2895 never return, and many memory cycles can be saved by not storing
2896 register values that will never be needed again. This optimization
2897 was added to speed up context switching in a kernel application. */
2898 if (optimize > 0
2899 && (TREE_NOTHROW (current_function_decl)
2900 || !(flag_unwind_tables
2901 || (flag_exceptions
2902 && arm_except_unwind_info (&global_options) != UI_SJLJ)))
2903 && TREE_THIS_VOLATILE (current_function_decl))
2904 type |= ARM_FT_VOLATILE;
2906 if (cfun->static_chain_decl != NULL)
2907 type |= ARM_FT_NESTED;
2909 attr = DECL_ATTRIBUTES (current_function_decl);
2911 a = lookup_attribute ("naked", attr);
2912 if (a != NULL_TREE)
2913 type |= ARM_FT_NAKED;
2915 a = lookup_attribute ("isr", attr);
2916 if (a == NULL_TREE)
2917 a = lookup_attribute ("interrupt", attr);
2919 if (a == NULL_TREE)
2920 type |= TARGET_INTERWORK ? ARM_FT_INTERWORKED : ARM_FT_NORMAL;
2921 else
2922 type |= arm_isr_value (TREE_VALUE (a));
2924 return type;
2927 /* Returns the type of the current function. */
2929 unsigned long
2930 arm_current_func_type (void)
2932 if (ARM_FUNC_TYPE (cfun->machine->func_type) == ARM_FT_UNKNOWN)
2933 cfun->machine->func_type = arm_compute_func_type ();
2935 return cfun->machine->func_type;
2938 bool
2939 arm_allocate_stack_slots_for_args (void)
2941 /* Naked functions should not allocate stack slots for arguments. */
2942 return !IS_NAKED (arm_current_func_type ());
2945 static bool
2946 arm_warn_func_return (tree decl)
2948 /* Naked functions are implemented entirely in assembly, including the
2949 return sequence, so suppress warnings about this. */
2950 return lookup_attribute ("naked", DECL_ATTRIBUTES (decl)) == NULL_TREE;
2954 /* Output assembler code for a block containing the constant parts
2955 of a trampoline, leaving space for the variable parts.
2957 On the ARM, (if r8 is the static chain regnum, and remembering that
2958 referencing pc adds an offset of 8) the trampoline looks like:
2959 ldr r8, [pc, #0]
2960 ldr pc, [pc]
2961 .word static chain value
2962 .word function's address
2963 XXX FIXME: When the trampoline returns, r8 will be clobbered. */
2965 static void
2966 arm_asm_trampoline_template (FILE *f)
2968 if (TARGET_ARM)
2970 asm_fprintf (f, "\tldr\t%r, [%r, #0]\n", STATIC_CHAIN_REGNUM, PC_REGNUM);
2971 asm_fprintf (f, "\tldr\t%r, [%r, #0]\n", PC_REGNUM, PC_REGNUM);
2973 else if (TARGET_THUMB2)
2975 /* The Thumb-2 trampoline is similar to the arm implementation.
2976 Unlike 16-bit Thumb, we enter the stub in thumb mode. */
2977 asm_fprintf (f, "\tldr.w\t%r, [%r, #4]\n",
2978 STATIC_CHAIN_REGNUM, PC_REGNUM);
2979 asm_fprintf (f, "\tldr.w\t%r, [%r, #4]\n", PC_REGNUM, PC_REGNUM);
2981 else
2983 ASM_OUTPUT_ALIGN (f, 2);
2984 fprintf (f, "\t.code\t16\n");
2985 fprintf (f, ".Ltrampoline_start:\n");
2986 asm_fprintf (f, "\tpush\t{r0, r1}\n");
2987 asm_fprintf (f, "\tldr\tr0, [%r, #8]\n", PC_REGNUM);
2988 asm_fprintf (f, "\tmov\t%r, r0\n", STATIC_CHAIN_REGNUM);
2989 asm_fprintf (f, "\tldr\tr0, [%r, #8]\n", PC_REGNUM);
2990 asm_fprintf (f, "\tstr\tr0, [%r, #4]\n", SP_REGNUM);
2991 asm_fprintf (f, "\tpop\t{r0, %r}\n", PC_REGNUM);
2993 assemble_aligned_integer (UNITS_PER_WORD, const0_rtx);
2994 assemble_aligned_integer (UNITS_PER_WORD, const0_rtx);
2997 /* Emit RTL insns to initialize the variable parts of a trampoline. */
2999 static void
3000 arm_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value)
3002 rtx fnaddr, mem, a_tramp;
3004 emit_block_move (m_tramp, assemble_trampoline_template (),
3005 GEN_INT (TRAMPOLINE_SIZE), BLOCK_OP_NORMAL);
3007 mem = adjust_address (m_tramp, SImode, TARGET_32BIT ? 8 : 12);
3008 emit_move_insn (mem, chain_value);
3010 mem = adjust_address (m_tramp, SImode, TARGET_32BIT ? 12 : 16);
3011 fnaddr = XEXP (DECL_RTL (fndecl), 0);
3012 emit_move_insn (mem, fnaddr);
3014 a_tramp = XEXP (m_tramp, 0);
3015 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__clear_cache"),
3016 LCT_NORMAL, VOIDmode, 2, a_tramp, Pmode,
3017 plus_constant (Pmode, a_tramp, TRAMPOLINE_SIZE), Pmode);
3020 /* Thumb trampolines should be entered in thumb mode, so set
3021 the bottom bit of the address. */
3023 static rtx
3024 arm_trampoline_adjust_address (rtx addr)
3026 if (TARGET_THUMB)
3027 addr = expand_simple_binop (Pmode, IOR, addr, const1_rtx,
3028 NULL, 0, OPTAB_LIB_WIDEN);
3029 return addr;
3032 /* Return 1 if it is possible to return using a single instruction.
3033 If SIBLING is non-null, this is a test for a return before a sibling
3034 call. SIBLING is the call insn, so we can examine its register usage. */
3037 use_return_insn (int iscond, rtx sibling)
3039 int regno;
3040 unsigned int func_type;
3041 unsigned long saved_int_regs;
3042 unsigned HOST_WIDE_INT stack_adjust;
3043 arm_stack_offsets *offsets;
3045 /* Never use a return instruction before reload has run. */
3046 if (!reload_completed)
3047 return 0;
3049 func_type = arm_current_func_type ();
3051 /* Naked, volatile and stack alignment functions need special
3052 consideration. */
3053 if (func_type & (ARM_FT_VOLATILE | ARM_FT_NAKED | ARM_FT_STACKALIGN))
3054 return 0;
3056 /* So do interrupt functions that use the frame pointer and Thumb
3057 interrupt functions. */
3058 if (IS_INTERRUPT (func_type) && (frame_pointer_needed || TARGET_THUMB))
3059 return 0;
3061 if (TARGET_LDRD && current_tune->prefer_ldrd_strd
3062 && !optimize_function_for_size_p (cfun))
3063 return 0;
3065 offsets = arm_get_frame_offsets ();
3066 stack_adjust = offsets->outgoing_args - offsets->saved_regs;
3068 /* As do variadic functions. */
3069 if (crtl->args.pretend_args_size
3070 || cfun->machine->uses_anonymous_args
3071 /* Or if the function calls __builtin_eh_return () */
3072 || crtl->calls_eh_return
3073 /* Or if the function calls alloca */
3074 || cfun->calls_alloca
3075 /* Or if there is a stack adjustment. However, if the stack pointer
3076 is saved on the stack, we can use a pre-incrementing stack load. */
3077 || !(stack_adjust == 0 || (TARGET_APCS_FRAME && frame_pointer_needed
3078 && stack_adjust == 4)))
3079 return 0;
3081 saved_int_regs = offsets->saved_regs_mask;
3083 /* Unfortunately, the insn
3085 ldmib sp, {..., sp, ...}
3087 triggers a bug on most SA-110 based devices, such that the stack
3088 pointer won't be correctly restored if the instruction takes a
3089 page fault. We work around this problem by popping r3 along with
3090 the other registers, since that is never slower than executing
3091 another instruction.
3093 We test for !arm_arch5 here, because code for any architecture
3094 less than this could potentially be run on one of the buggy
3095 chips. */
3096 if (stack_adjust == 4 && !arm_arch5 && TARGET_ARM)
3098 /* Validate that r3 is a call-clobbered register (always true in
3099 the default abi) ... */
3100 if (!call_used_regs[3])
3101 return 0;
3103 /* ... that it isn't being used for a return value ... */
3104 if (arm_size_return_regs () >= (4 * UNITS_PER_WORD))
3105 return 0;
3107 /* ... or for a tail-call argument ... */
3108 if (sibling)
3110 gcc_assert (CALL_P (sibling));
3112 if (find_regno_fusage (sibling, USE, 3))
3113 return 0;
3116 /* ... and that there are no call-saved registers in r0-r2
3117 (always true in the default ABI). */
3118 if (saved_int_regs & 0x7)
3119 return 0;
3122 /* Can't be done if interworking with Thumb, and any registers have been
3123 stacked. */
3124 if (TARGET_INTERWORK && saved_int_regs != 0 && !IS_INTERRUPT(func_type))
3125 return 0;
3127 /* On StrongARM, conditional returns are expensive if they aren't
3128 taken and multiple registers have been stacked. */
3129 if (iscond && arm_tune_strongarm)
3131 /* Conditional return when just the LR is stored is a simple
3132 conditional-load instruction, that's not expensive. */
3133 if (saved_int_regs != 0 && saved_int_regs != (1 << LR_REGNUM))
3134 return 0;
3136 if (flag_pic
3137 && arm_pic_register != INVALID_REGNUM
3138 && df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM))
3139 return 0;
3142 /* If there are saved registers but the LR isn't saved, then we need
3143 two instructions for the return. */
3144 if (saved_int_regs && !(saved_int_regs & (1 << LR_REGNUM)))
3145 return 0;
3147 /* Can't be done if any of the VFP regs are pushed,
3148 since this also requires an insn. */
3149 if (TARGET_HARD_FLOAT && TARGET_VFP)
3150 for (regno = FIRST_VFP_REGNUM; regno <= LAST_VFP_REGNUM; regno++)
3151 if (df_regs_ever_live_p (regno) && !call_used_regs[regno])
3152 return 0;
3154 if (TARGET_REALLY_IWMMXT)
3155 for (regno = FIRST_IWMMXT_REGNUM; regno <= LAST_IWMMXT_REGNUM; regno++)
3156 if (df_regs_ever_live_p (regno) && ! call_used_regs[regno])
3157 return 0;
3159 return 1;
3162 /* Return TRUE if we should try to use a simple_return insn, i.e. perform
3163 shrink-wrapping if possible. This is the case if we need to emit a
3164 prologue, which we can test by looking at the offsets. */
3165 bool
3166 use_simple_return_p (void)
3168 arm_stack_offsets *offsets;
3170 offsets = arm_get_frame_offsets ();
3171 return offsets->outgoing_args != 0;
3174 /* Return TRUE if int I is a valid immediate ARM constant. */
3177 const_ok_for_arm (HOST_WIDE_INT i)
3179 int lowbit;
3181 /* For machines with >32 bit HOST_WIDE_INT, the bits above bit 31 must
3182 be all zero, or all one. */
3183 if ((i & ~(unsigned HOST_WIDE_INT) 0xffffffff) != 0
3184 && ((i & ~(unsigned HOST_WIDE_INT) 0xffffffff)
3185 != ((~(unsigned HOST_WIDE_INT) 0)
3186 & ~(unsigned HOST_WIDE_INT) 0xffffffff)))
3187 return FALSE;
3189 i &= (unsigned HOST_WIDE_INT) 0xffffffff;
3191 /* Fast return for 0 and small values. We must do this for zero, since
3192 the code below can't handle that one case. */
3193 if ((i & ~(unsigned HOST_WIDE_INT) 0xff) == 0)
3194 return TRUE;
3196 /* Get the number of trailing zeros. */
3197 lowbit = ffs((int) i) - 1;
3199 /* Only even shifts are allowed in ARM mode so round down to the
3200 nearest even number. */
3201 if (TARGET_ARM)
3202 lowbit &= ~1;
3204 if ((i & ~(((unsigned HOST_WIDE_INT) 0xff) << lowbit)) == 0)
3205 return TRUE;
3207 if (TARGET_ARM)
3209 /* Allow rotated constants in ARM mode. */
3210 if (lowbit <= 4
3211 && ((i & ~0xc000003f) == 0
3212 || (i & ~0xf000000f) == 0
3213 || (i & ~0xfc000003) == 0))
3214 return TRUE;
3216 else
3218 HOST_WIDE_INT v;
3220 /* Allow repeated patterns 0x00XY00XY or 0xXYXYXYXY. */
3221 v = i & 0xff;
3222 v |= v << 16;
3223 if (i == v || i == (v | (v << 8)))
3224 return TRUE;
3226 /* Allow repeated pattern 0xXY00XY00. */
3227 v = i & 0xff00;
3228 v |= v << 16;
3229 if (i == v)
3230 return TRUE;
3233 return FALSE;
3236 /* Return true if I is a valid constant for the operation CODE. */
3238 const_ok_for_op (HOST_WIDE_INT i, enum rtx_code code)
3240 if (const_ok_for_arm (i))
3241 return 1;
3243 switch (code)
3245 case SET:
3246 /* See if we can use movw. */
3247 if (arm_arch_thumb2 && (i & 0xffff0000) == 0)
3248 return 1;
3249 else
3250 /* Otherwise, try mvn. */
3251 return const_ok_for_arm (ARM_SIGN_EXTEND (~i));
3253 case PLUS:
3254 /* See if we can use addw or subw. */
3255 if (TARGET_THUMB2
3256 && ((i & 0xfffff000) == 0
3257 || ((-i) & 0xfffff000) == 0))
3258 return 1;
3259 /* else fall through. */
3261 case COMPARE:
3262 case EQ:
3263 case NE:
3264 case GT:
3265 case LE:
3266 case LT:
3267 case GE:
3268 case GEU:
3269 case LTU:
3270 case GTU:
3271 case LEU:
3272 case UNORDERED:
3273 case ORDERED:
3274 case UNEQ:
3275 case UNGE:
3276 case UNLT:
3277 case UNGT:
3278 case UNLE:
3279 return const_ok_for_arm (ARM_SIGN_EXTEND (-i));
3281 case MINUS: /* Should only occur with (MINUS I reg) => rsb */
3282 case XOR:
3283 return 0;
3285 case IOR:
3286 if (TARGET_THUMB2)
3287 return const_ok_for_arm (ARM_SIGN_EXTEND (~i));
3288 return 0;
3290 case AND:
3291 return const_ok_for_arm (ARM_SIGN_EXTEND (~i));
3293 default:
3294 gcc_unreachable ();
3298 /* Return true if I is a valid di mode constant for the operation CODE. */
3300 const_ok_for_dimode_op (HOST_WIDE_INT i, enum rtx_code code)
3302 HOST_WIDE_INT hi_val = (i >> 32) & 0xFFFFFFFF;
3303 HOST_WIDE_INT lo_val = i & 0xFFFFFFFF;
3304 rtx hi = GEN_INT (hi_val);
3305 rtx lo = GEN_INT (lo_val);
3307 if (TARGET_THUMB1)
3308 return 0;
3310 switch (code)
3312 case AND:
3313 case IOR:
3314 case XOR:
3315 return (const_ok_for_op (hi_val, code) || hi_val == 0xFFFFFFFF)
3316 && (const_ok_for_op (lo_val, code) || lo_val == 0xFFFFFFFF);
3317 case PLUS:
3318 return arm_not_operand (hi, SImode) && arm_add_operand (lo, SImode);
3320 default:
3321 return 0;
3325 /* Emit a sequence of insns to handle a large constant.
3326 CODE is the code of the operation required, it can be any of SET, PLUS,
3327 IOR, AND, XOR, MINUS;
3328 MODE is the mode in which the operation is being performed;
3329 VAL is the integer to operate on;
3330 SOURCE is the other operand (a register, or a null-pointer for SET);
3331 SUBTARGETS means it is safe to create scratch registers if that will
3332 either produce a simpler sequence, or we will want to cse the values.
3333 Return value is the number of insns emitted. */
3335 /* ??? Tweak this for thumb2. */
3337 arm_split_constant (enum rtx_code code, enum machine_mode mode, rtx insn,
3338 HOST_WIDE_INT val, rtx target, rtx source, int subtargets)
3340 rtx cond;
3342 if (insn && GET_CODE (PATTERN (insn)) == COND_EXEC)
3343 cond = COND_EXEC_TEST (PATTERN (insn));
3344 else
3345 cond = NULL_RTX;
3347 if (subtargets || code == SET
3348 || (REG_P (target) && REG_P (source)
3349 && REGNO (target) != REGNO (source)))
3351 /* After arm_reorg has been called, we can't fix up expensive
3352 constants by pushing them into memory so we must synthesize
3353 them in-line, regardless of the cost. This is only likely to
3354 be more costly on chips that have load delay slots and we are
3355 compiling without running the scheduler (so no splitting
3356 occurred before the final instruction emission).
3358 Ref: gcc -O1 -mcpu=strongarm gcc.c-torture/compile/980506-2.c
3360 if (!after_arm_reorg
3361 && !cond
3362 && (arm_gen_constant (code, mode, NULL_RTX, val, target, source,
3363 1, 0)
3364 > (arm_constant_limit (optimize_function_for_size_p (cfun))
3365 + (code != SET))))
3367 if (code == SET)
3369 /* Currently SET is the only monadic value for CODE, all
3370 the rest are diadic. */
3371 if (TARGET_USE_MOVT)
3372 arm_emit_movpair (target, GEN_INT (val));
3373 else
3374 emit_set_insn (target, GEN_INT (val));
3376 return 1;
3378 else
3380 rtx temp = subtargets ? gen_reg_rtx (mode) : target;
3382 if (TARGET_USE_MOVT)
3383 arm_emit_movpair (temp, GEN_INT (val));
3384 else
3385 emit_set_insn (temp, GEN_INT (val));
3387 /* For MINUS, the value is subtracted from, since we never
3388 have subtraction of a constant. */
3389 if (code == MINUS)
3390 emit_set_insn (target, gen_rtx_MINUS (mode, temp, source));
3391 else
3392 emit_set_insn (target,
3393 gen_rtx_fmt_ee (code, mode, source, temp));
3394 return 2;
3399 return arm_gen_constant (code, mode, cond, val, target, source, subtargets,
3403 /* Return a sequence of integers, in RETURN_SEQUENCE that fit into
3404 ARM/THUMB2 immediates, and add up to VAL.
3405 Thr function return value gives the number of insns required. */
3406 static int
3407 optimal_immediate_sequence (enum rtx_code code, unsigned HOST_WIDE_INT val,
3408 struct four_ints *return_sequence)
3410 int best_consecutive_zeros = 0;
3411 int i;
3412 int best_start = 0;
3413 int insns1, insns2;
3414 struct four_ints tmp_sequence;
3416 /* If we aren't targeting ARM, the best place to start is always at
3417 the bottom, otherwise look more closely. */
3418 if (TARGET_ARM)
3420 for (i = 0; i < 32; i += 2)
3422 int consecutive_zeros = 0;
3424 if (!(val & (3 << i)))
3426 while ((i < 32) && !(val & (3 << i)))
3428 consecutive_zeros += 2;
3429 i += 2;
3431 if (consecutive_zeros > best_consecutive_zeros)
3433 best_consecutive_zeros = consecutive_zeros;
3434 best_start = i - consecutive_zeros;
3436 i -= 2;
3441 /* So long as it won't require any more insns to do so, it's
3442 desirable to emit a small constant (in bits 0...9) in the last
3443 insn. This way there is more chance that it can be combined with
3444 a later addressing insn to form a pre-indexed load or store
3445 operation. Consider:
3447 *((volatile int *)0xe0000100) = 1;
3448 *((volatile int *)0xe0000110) = 2;
3450 We want this to wind up as:
3452 mov rA, #0xe0000000
3453 mov rB, #1
3454 str rB, [rA, #0x100]
3455 mov rB, #2
3456 str rB, [rA, #0x110]
3458 rather than having to synthesize both large constants from scratch.
3460 Therefore, we calculate how many insns would be required to emit
3461 the constant starting from `best_start', and also starting from
3462 zero (i.e. with bit 31 first to be output). If `best_start' doesn't
3463 yield a shorter sequence, we may as well use zero. */
3464 insns1 = optimal_immediate_sequence_1 (code, val, return_sequence, best_start);
3465 if (best_start != 0
3466 && ((((unsigned HOST_WIDE_INT) 1) << best_start) < val))
3468 insns2 = optimal_immediate_sequence_1 (code, val, &tmp_sequence, 0);
3469 if (insns2 <= insns1)
3471 *return_sequence = tmp_sequence;
3472 insns1 = insns2;
3476 return insns1;
3479 /* As for optimal_immediate_sequence, but starting at bit-position I. */
3480 static int
3481 optimal_immediate_sequence_1 (enum rtx_code code, unsigned HOST_WIDE_INT val,
3482 struct four_ints *return_sequence, int i)
3484 int remainder = val & 0xffffffff;
3485 int insns = 0;
3487 /* Try and find a way of doing the job in either two or three
3488 instructions.
3490 In ARM mode we can use 8-bit constants, rotated to any 2-bit aligned
3491 location. We start at position I. This may be the MSB, or
3492 optimial_immediate_sequence may have positioned it at the largest block
3493 of zeros that are aligned on a 2-bit boundary. We then fill up the temps,
3494 wrapping around to the top of the word when we drop off the bottom.
3495 In the worst case this code should produce no more than four insns.
3497 In Thumb2 mode, we can use 32/16-bit replicated constants, and 8-bit
3498 constants, shifted to any arbitrary location. We should always start
3499 at the MSB. */
3502 int end;
3503 unsigned int b1, b2, b3, b4;
3504 unsigned HOST_WIDE_INT result;
3505 int loc;
3507 gcc_assert (insns < 4);
3509 if (i <= 0)
3510 i += 32;
3512 /* First, find the next normal 12/8-bit shifted/rotated immediate. */
3513 if (remainder & ((TARGET_ARM ? (3 << (i - 2)) : (1 << (i - 1)))))
3515 loc = i;
3516 if (i <= 12 && TARGET_THUMB2 && code == PLUS)
3517 /* We can use addw/subw for the last 12 bits. */
3518 result = remainder;
3519 else
3521 /* Use an 8-bit shifted/rotated immediate. */
3522 end = i - 8;
3523 if (end < 0)
3524 end += 32;
3525 result = remainder & ((0x0ff << end)
3526 | ((i < end) ? (0xff >> (32 - end))
3527 : 0));
3528 i -= 8;
3531 else
3533 /* Arm allows rotates by a multiple of two. Thumb-2 allows
3534 arbitrary shifts. */
3535 i -= TARGET_ARM ? 2 : 1;
3536 continue;
3539 /* Next, see if we can do a better job with a thumb2 replicated
3540 constant.
3542 We do it this way around to catch the cases like 0x01F001E0 where
3543 two 8-bit immediates would work, but a replicated constant would
3544 make it worse.
3546 TODO: 16-bit constants that don't clear all the bits, but still win.
3547 TODO: Arithmetic splitting for set/add/sub, rather than bitwise. */
3548 if (TARGET_THUMB2)
3550 b1 = (remainder & 0xff000000) >> 24;
3551 b2 = (remainder & 0x00ff0000) >> 16;
3552 b3 = (remainder & 0x0000ff00) >> 8;
3553 b4 = remainder & 0xff;
3555 if (loc > 24)
3557 /* The 8-bit immediate already found clears b1 (and maybe b2),
3558 but must leave b3 and b4 alone. */
3560 /* First try to find a 32-bit replicated constant that clears
3561 almost everything. We can assume that we can't do it in one,
3562 or else we wouldn't be here. */
3563 unsigned int tmp = b1 & b2 & b3 & b4;
3564 unsigned int tmp2 = tmp + (tmp << 8) + (tmp << 16)
3565 + (tmp << 24);
3566 unsigned int matching_bytes = (tmp == b1) + (tmp == b2)
3567 + (tmp == b3) + (tmp == b4);
3568 if (tmp
3569 && (matching_bytes >= 3
3570 || (matching_bytes == 2
3571 && const_ok_for_op (remainder & ~tmp2, code))))
3573 /* At least 3 of the bytes match, and the fourth has at
3574 least as many bits set, or two of the bytes match
3575 and it will only require one more insn to finish. */
3576 result = tmp2;
3577 i = tmp != b1 ? 32
3578 : tmp != b2 ? 24
3579 : tmp != b3 ? 16
3580 : 8;
3583 /* Second, try to find a 16-bit replicated constant that can
3584 leave three of the bytes clear. If b2 or b4 is already
3585 zero, then we can. If the 8-bit from above would not
3586 clear b2 anyway, then we still win. */
3587 else if (b1 == b3 && (!b2 || !b4
3588 || (remainder & 0x00ff0000 & ~result)))
3590 result = remainder & 0xff00ff00;
3591 i = 24;
3594 else if (loc > 16)
3596 /* The 8-bit immediate already found clears b2 (and maybe b3)
3597 and we don't get here unless b1 is alredy clear, but it will
3598 leave b4 unchanged. */
3600 /* If we can clear b2 and b4 at once, then we win, since the
3601 8-bits couldn't possibly reach that far. */
3602 if (b2 == b4)
3604 result = remainder & 0x00ff00ff;
3605 i = 16;
3610 return_sequence->i[insns++] = result;
3611 remainder &= ~result;
3613 if (code == SET || code == MINUS)
3614 code = PLUS;
3616 while (remainder);
3618 return insns;
3621 /* Emit an instruction with the indicated PATTERN. If COND is
3622 non-NULL, conditionalize the execution of the instruction on COND
3623 being true. */
3625 static void
3626 emit_constant_insn (rtx cond, rtx pattern)
3628 if (cond)
3629 pattern = gen_rtx_COND_EXEC (VOIDmode, copy_rtx (cond), pattern);
3630 emit_insn (pattern);
3633 /* As above, but extra parameter GENERATE which, if clear, suppresses
3634 RTL generation. */
3636 static int
3637 arm_gen_constant (enum rtx_code code, enum machine_mode mode, rtx cond,
3638 HOST_WIDE_INT val, rtx target, rtx source, int subtargets,
3639 int generate)
3641 int can_invert = 0;
3642 int can_negate = 0;
3643 int final_invert = 0;
3644 int i;
3645 int set_sign_bit_copies = 0;
3646 int clear_sign_bit_copies = 0;
3647 int clear_zero_bit_copies = 0;
3648 int set_zero_bit_copies = 0;
3649 int insns = 0, neg_insns, inv_insns;
3650 unsigned HOST_WIDE_INT temp1, temp2;
3651 unsigned HOST_WIDE_INT remainder = val & 0xffffffff;
3652 struct four_ints *immediates;
3653 struct four_ints pos_immediates, neg_immediates, inv_immediates;
3655 /* Find out which operations are safe for a given CODE. Also do a quick
3656 check for degenerate cases; these can occur when DImode operations
3657 are split. */
3658 switch (code)
3660 case SET:
3661 can_invert = 1;
3662 break;
3664 case PLUS:
3665 can_negate = 1;
3666 break;
3668 case IOR:
3669 if (remainder == 0xffffffff)
3671 if (generate)
3672 emit_constant_insn (cond,
3673 gen_rtx_SET (VOIDmode, target,
3674 GEN_INT (ARM_SIGN_EXTEND (val))));
3675 return 1;
3678 if (remainder == 0)
3680 if (reload_completed && rtx_equal_p (target, source))
3681 return 0;
3683 if (generate)
3684 emit_constant_insn (cond,
3685 gen_rtx_SET (VOIDmode, target, source));
3686 return 1;
3688 break;
3690 case AND:
3691 if (remainder == 0)
3693 if (generate)
3694 emit_constant_insn (cond,
3695 gen_rtx_SET (VOIDmode, target, const0_rtx));
3696 return 1;
3698 if (remainder == 0xffffffff)
3700 if (reload_completed && rtx_equal_p (target, source))
3701 return 0;
3702 if (generate)
3703 emit_constant_insn (cond,
3704 gen_rtx_SET (VOIDmode, target, source));
3705 return 1;
3707 can_invert = 1;
3708 break;
3710 case XOR:
3711 if (remainder == 0)
3713 if (reload_completed && rtx_equal_p (target, source))
3714 return 0;
3715 if (generate)
3716 emit_constant_insn (cond,
3717 gen_rtx_SET (VOIDmode, target, source));
3718 return 1;
3721 if (remainder == 0xffffffff)
3723 if (generate)
3724 emit_constant_insn (cond,
3725 gen_rtx_SET (VOIDmode, target,
3726 gen_rtx_NOT (mode, source)));
3727 return 1;
3729 final_invert = 1;
3730 break;
3732 case MINUS:
3733 /* We treat MINUS as (val - source), since (source - val) is always
3734 passed as (source + (-val)). */
3735 if (remainder == 0)
3737 if (generate)
3738 emit_constant_insn (cond,
3739 gen_rtx_SET (VOIDmode, target,
3740 gen_rtx_NEG (mode, source)));
3741 return 1;
3743 if (const_ok_for_arm (val))
3745 if (generate)
3746 emit_constant_insn (cond,
3747 gen_rtx_SET (VOIDmode, target,
3748 gen_rtx_MINUS (mode, GEN_INT (val),
3749 source)));
3750 return 1;
3753 break;
3755 default:
3756 gcc_unreachable ();
3759 /* If we can do it in one insn get out quickly. */
3760 if (const_ok_for_op (val, code))
3762 if (generate)
3763 emit_constant_insn (cond,
3764 gen_rtx_SET (VOIDmode, target,
3765 (source
3766 ? gen_rtx_fmt_ee (code, mode, source,
3767 GEN_INT (val))
3768 : GEN_INT (val))));
3769 return 1;
3772 /* On targets with UXTH/UBFX, we can deal with AND (2^N)-1 in a single
3773 insn. */
3774 if (code == AND && (i = exact_log2 (remainder + 1)) > 0
3775 && (arm_arch_thumb2 || (i == 16 && arm_arch6 && mode == SImode)))
3777 if (generate)
3779 if (mode == SImode && i == 16)
3780 /* Use UXTH in preference to UBFX, since on Thumb2 it's a
3781 smaller insn. */
3782 emit_constant_insn (cond,
3783 gen_zero_extendhisi2
3784 (target, gen_lowpart (HImode, source)));
3785 else
3786 /* Extz only supports SImode, but we can coerce the operands
3787 into that mode. */
3788 emit_constant_insn (cond,
3789 gen_extzv_t2 (gen_lowpart (SImode, target),
3790 gen_lowpart (SImode, source),
3791 GEN_INT (i), const0_rtx));
3794 return 1;
3797 /* Calculate a few attributes that may be useful for specific
3798 optimizations. */
3799 /* Count number of leading zeros. */
3800 for (i = 31; i >= 0; i--)
3802 if ((remainder & (1 << i)) == 0)
3803 clear_sign_bit_copies++;
3804 else
3805 break;
3808 /* Count number of leading 1's. */
3809 for (i = 31; i >= 0; i--)
3811 if ((remainder & (1 << i)) != 0)
3812 set_sign_bit_copies++;
3813 else
3814 break;
3817 /* Count number of trailing zero's. */
3818 for (i = 0; i <= 31; i++)
3820 if ((remainder & (1 << i)) == 0)
3821 clear_zero_bit_copies++;
3822 else
3823 break;
3826 /* Count number of trailing 1's. */
3827 for (i = 0; i <= 31; i++)
3829 if ((remainder & (1 << i)) != 0)
3830 set_zero_bit_copies++;
3831 else
3832 break;
3835 switch (code)
3837 case SET:
3838 /* See if we can do this by sign_extending a constant that is known
3839 to be negative. This is a good, way of doing it, since the shift
3840 may well merge into a subsequent insn. */
3841 if (set_sign_bit_copies > 1)
3843 if (const_ok_for_arm
3844 (temp1 = ARM_SIGN_EXTEND (remainder
3845 << (set_sign_bit_copies - 1))))
3847 if (generate)
3849 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
3850 emit_constant_insn (cond,
3851 gen_rtx_SET (VOIDmode, new_src,
3852 GEN_INT (temp1)));
3853 emit_constant_insn (cond,
3854 gen_ashrsi3 (target, new_src,
3855 GEN_INT (set_sign_bit_copies - 1)));
3857 return 2;
3859 /* For an inverted constant, we will need to set the low bits,
3860 these will be shifted out of harm's way. */
3861 temp1 |= (1 << (set_sign_bit_copies - 1)) - 1;
3862 if (const_ok_for_arm (~temp1))
3864 if (generate)
3866 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
3867 emit_constant_insn (cond,
3868 gen_rtx_SET (VOIDmode, new_src,
3869 GEN_INT (temp1)));
3870 emit_constant_insn (cond,
3871 gen_ashrsi3 (target, new_src,
3872 GEN_INT (set_sign_bit_copies - 1)));
3874 return 2;
3878 /* See if we can calculate the value as the difference between two
3879 valid immediates. */
3880 if (clear_sign_bit_copies + clear_zero_bit_copies <= 16)
3882 int topshift = clear_sign_bit_copies & ~1;
3884 temp1 = ARM_SIGN_EXTEND ((remainder + (0x00800000 >> topshift))
3885 & (0xff000000 >> topshift));
3887 /* If temp1 is zero, then that means the 9 most significant
3888 bits of remainder were 1 and we've caused it to overflow.
3889 When topshift is 0 we don't need to do anything since we
3890 can borrow from 'bit 32'. */
3891 if (temp1 == 0 && topshift != 0)
3892 temp1 = 0x80000000 >> (topshift - 1);
3894 temp2 = ARM_SIGN_EXTEND (temp1 - remainder);
3896 if (const_ok_for_arm (temp2))
3898 if (generate)
3900 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
3901 emit_constant_insn (cond,
3902 gen_rtx_SET (VOIDmode, new_src,
3903 GEN_INT (temp1)));
3904 emit_constant_insn (cond,
3905 gen_addsi3 (target, new_src,
3906 GEN_INT (-temp2)));
3909 return 2;
3913 /* See if we can generate this by setting the bottom (or the top)
3914 16 bits, and then shifting these into the other half of the
3915 word. We only look for the simplest cases, to do more would cost
3916 too much. Be careful, however, not to generate this when the
3917 alternative would take fewer insns. */
3918 if (val & 0xffff0000)
3920 temp1 = remainder & 0xffff0000;
3921 temp2 = remainder & 0x0000ffff;
3923 /* Overlaps outside this range are best done using other methods. */
3924 for (i = 9; i < 24; i++)
3926 if ((((temp2 | (temp2 << i)) & 0xffffffff) == remainder)
3927 && !const_ok_for_arm (temp2))
3929 rtx new_src = (subtargets
3930 ? (generate ? gen_reg_rtx (mode) : NULL_RTX)
3931 : target);
3932 insns = arm_gen_constant (code, mode, cond, temp2, new_src,
3933 source, subtargets, generate);
3934 source = new_src;
3935 if (generate)
3936 emit_constant_insn
3937 (cond,
3938 gen_rtx_SET
3939 (VOIDmode, target,
3940 gen_rtx_IOR (mode,
3941 gen_rtx_ASHIFT (mode, source,
3942 GEN_INT (i)),
3943 source)));
3944 return insns + 1;
3948 /* Don't duplicate cases already considered. */
3949 for (i = 17; i < 24; i++)
3951 if (((temp1 | (temp1 >> i)) == remainder)
3952 && !const_ok_for_arm (temp1))
3954 rtx new_src = (subtargets
3955 ? (generate ? gen_reg_rtx (mode) : NULL_RTX)
3956 : target);
3957 insns = arm_gen_constant (code, mode, cond, temp1, new_src,
3958 source, subtargets, generate);
3959 source = new_src;
3960 if (generate)
3961 emit_constant_insn
3962 (cond,
3963 gen_rtx_SET (VOIDmode, target,
3964 gen_rtx_IOR
3965 (mode,
3966 gen_rtx_LSHIFTRT (mode, source,
3967 GEN_INT (i)),
3968 source)));
3969 return insns + 1;
3973 break;
3975 case IOR:
3976 case XOR:
3977 /* If we have IOR or XOR, and the constant can be loaded in a
3978 single instruction, and we can find a temporary to put it in,
3979 then this can be done in two instructions instead of 3-4. */
3980 if (subtargets
3981 /* TARGET can't be NULL if SUBTARGETS is 0 */
3982 || (reload_completed && !reg_mentioned_p (target, source)))
3984 if (const_ok_for_arm (ARM_SIGN_EXTEND (~val)))
3986 if (generate)
3988 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
3990 emit_constant_insn (cond,
3991 gen_rtx_SET (VOIDmode, sub,
3992 GEN_INT (val)));
3993 emit_constant_insn (cond,
3994 gen_rtx_SET (VOIDmode, target,
3995 gen_rtx_fmt_ee (code, mode,
3996 source, sub)));
3998 return 2;
4002 if (code == XOR)
4003 break;
4005 /* Convert.
4006 x = y | constant ( which is composed of set_sign_bit_copies of leading 1s
4007 and the remainder 0s for e.g. 0xfff00000)
4008 x = ~(~(y ashift set_sign_bit_copies) lshiftrt set_sign_bit_copies)
4010 This can be done in 2 instructions by using shifts with mov or mvn.
4011 e.g. for
4012 x = x | 0xfff00000;
4013 we generate.
4014 mvn r0, r0, asl #12
4015 mvn r0, r0, lsr #12 */
4016 if (set_sign_bit_copies > 8
4017 && (val & (-1 << (32 - set_sign_bit_copies))) == val)
4019 if (generate)
4021 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
4022 rtx shift = GEN_INT (set_sign_bit_copies);
4024 emit_constant_insn
4025 (cond,
4026 gen_rtx_SET (VOIDmode, sub,
4027 gen_rtx_NOT (mode,
4028 gen_rtx_ASHIFT (mode,
4029 source,
4030 shift))));
4031 emit_constant_insn
4032 (cond,
4033 gen_rtx_SET (VOIDmode, target,
4034 gen_rtx_NOT (mode,
4035 gen_rtx_LSHIFTRT (mode, sub,
4036 shift))));
4038 return 2;
4041 /* Convert
4042 x = y | constant (which has set_zero_bit_copies number of trailing ones).
4044 x = ~((~y lshiftrt set_zero_bit_copies) ashift set_zero_bit_copies).
4046 For eg. r0 = r0 | 0xfff
4047 mvn r0, r0, lsr #12
4048 mvn r0, r0, asl #12
4051 if (set_zero_bit_copies > 8
4052 && (remainder & ((1 << set_zero_bit_copies) - 1)) == remainder)
4054 if (generate)
4056 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
4057 rtx shift = GEN_INT (set_zero_bit_copies);
4059 emit_constant_insn
4060 (cond,
4061 gen_rtx_SET (VOIDmode, sub,
4062 gen_rtx_NOT (mode,
4063 gen_rtx_LSHIFTRT (mode,
4064 source,
4065 shift))));
4066 emit_constant_insn
4067 (cond,
4068 gen_rtx_SET (VOIDmode, target,
4069 gen_rtx_NOT (mode,
4070 gen_rtx_ASHIFT (mode, sub,
4071 shift))));
4073 return 2;
4076 /* This will never be reached for Thumb2 because orn is a valid
4077 instruction. This is for Thumb1 and the ARM 32 bit cases.
4079 x = y | constant (such that ~constant is a valid constant)
4080 Transform this to
4081 x = ~(~y & ~constant).
4083 if (const_ok_for_arm (temp1 = ARM_SIGN_EXTEND (~val)))
4085 if (generate)
4087 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
4088 emit_constant_insn (cond,
4089 gen_rtx_SET (VOIDmode, sub,
4090 gen_rtx_NOT (mode, source)));
4091 source = sub;
4092 if (subtargets)
4093 sub = gen_reg_rtx (mode);
4094 emit_constant_insn (cond,
4095 gen_rtx_SET (VOIDmode, sub,
4096 gen_rtx_AND (mode, source,
4097 GEN_INT (temp1))));
4098 emit_constant_insn (cond,
4099 gen_rtx_SET (VOIDmode, target,
4100 gen_rtx_NOT (mode, sub)));
4102 return 3;
4104 break;
4106 case AND:
4107 /* See if two shifts will do 2 or more insn's worth of work. */
4108 if (clear_sign_bit_copies >= 16 && clear_sign_bit_copies < 24)
4110 HOST_WIDE_INT shift_mask = ((0xffffffff
4111 << (32 - clear_sign_bit_copies))
4112 & 0xffffffff);
4114 if ((remainder | shift_mask) != 0xffffffff)
4116 if (generate)
4118 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
4119 insns = arm_gen_constant (AND, mode, cond,
4120 remainder | shift_mask,
4121 new_src, source, subtargets, 1);
4122 source = new_src;
4124 else
4126 rtx targ = subtargets ? NULL_RTX : target;
4127 insns = arm_gen_constant (AND, mode, cond,
4128 remainder | shift_mask,
4129 targ, source, subtargets, 0);
4133 if (generate)
4135 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
4136 rtx shift = GEN_INT (clear_sign_bit_copies);
4138 emit_insn (gen_ashlsi3 (new_src, source, shift));
4139 emit_insn (gen_lshrsi3 (target, new_src, shift));
4142 return insns + 2;
4145 if (clear_zero_bit_copies >= 16 && clear_zero_bit_copies < 24)
4147 HOST_WIDE_INT shift_mask = (1 << clear_zero_bit_copies) - 1;
4149 if ((remainder | shift_mask) != 0xffffffff)
4151 if (generate)
4153 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
4155 insns = arm_gen_constant (AND, mode, cond,
4156 remainder | shift_mask,
4157 new_src, source, subtargets, 1);
4158 source = new_src;
4160 else
4162 rtx targ = subtargets ? NULL_RTX : target;
4164 insns = arm_gen_constant (AND, mode, cond,
4165 remainder | shift_mask,
4166 targ, source, subtargets, 0);
4170 if (generate)
4172 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
4173 rtx shift = GEN_INT (clear_zero_bit_copies);
4175 emit_insn (gen_lshrsi3 (new_src, source, shift));
4176 emit_insn (gen_ashlsi3 (target, new_src, shift));
4179 return insns + 2;
4182 break;
4184 default:
4185 break;
4188 /* Calculate what the instruction sequences would be if we generated it
4189 normally, negated, or inverted. */
4190 if (code == AND)
4191 /* AND cannot be split into multiple insns, so invert and use BIC. */
4192 insns = 99;
4193 else
4194 insns = optimal_immediate_sequence (code, remainder, &pos_immediates);
4196 if (can_negate)
4197 neg_insns = optimal_immediate_sequence (code, (-remainder) & 0xffffffff,
4198 &neg_immediates);
4199 else
4200 neg_insns = 99;
4202 if (can_invert || final_invert)
4203 inv_insns = optimal_immediate_sequence (code, remainder ^ 0xffffffff,
4204 &inv_immediates);
4205 else
4206 inv_insns = 99;
4208 immediates = &pos_immediates;
4210 /* Is the negated immediate sequence more efficient? */
4211 if (neg_insns < insns && neg_insns <= inv_insns)
4213 insns = neg_insns;
4214 immediates = &neg_immediates;
4216 else
4217 can_negate = 0;
4219 /* Is the inverted immediate sequence more efficient?
4220 We must allow for an extra NOT instruction for XOR operations, although
4221 there is some chance that the final 'mvn' will get optimized later. */
4222 if ((inv_insns + 1) < insns || (!final_invert && inv_insns < insns))
4224 insns = inv_insns;
4225 immediates = &inv_immediates;
4227 else
4229 can_invert = 0;
4230 final_invert = 0;
4233 /* Now output the chosen sequence as instructions. */
4234 if (generate)
4236 for (i = 0; i < insns; i++)
4238 rtx new_src, temp1_rtx;
4240 temp1 = immediates->i[i];
4242 if (code == SET || code == MINUS)
4243 new_src = (subtargets ? gen_reg_rtx (mode) : target);
4244 else if ((final_invert || i < (insns - 1)) && subtargets)
4245 new_src = gen_reg_rtx (mode);
4246 else
4247 new_src = target;
4249 if (can_invert)
4250 temp1 = ~temp1;
4251 else if (can_negate)
4252 temp1 = -temp1;
4254 temp1 = trunc_int_for_mode (temp1, mode);
4255 temp1_rtx = GEN_INT (temp1);
4257 if (code == SET)
4259 else if (code == MINUS)
4260 temp1_rtx = gen_rtx_MINUS (mode, temp1_rtx, source);
4261 else
4262 temp1_rtx = gen_rtx_fmt_ee (code, mode, source, temp1_rtx);
4264 emit_constant_insn (cond,
4265 gen_rtx_SET (VOIDmode, new_src,
4266 temp1_rtx));
4267 source = new_src;
4269 if (code == SET)
4271 can_negate = can_invert;
4272 can_invert = 0;
4273 code = PLUS;
4275 else if (code == MINUS)
4276 code = PLUS;
4280 if (final_invert)
4282 if (generate)
4283 emit_constant_insn (cond, gen_rtx_SET (VOIDmode, target,
4284 gen_rtx_NOT (mode, source)));
4285 insns++;
4288 return insns;
4291 /* Canonicalize a comparison so that we are more likely to recognize it.
4292 This can be done for a few constant compares, where we can make the
4293 immediate value easier to load. */
4295 static void
4296 arm_canonicalize_comparison (int *code, rtx *op0, rtx *op1,
4297 bool op0_preserve_value)
4299 enum machine_mode mode;
4300 unsigned HOST_WIDE_INT i, maxval;
4302 mode = GET_MODE (*op0);
4303 if (mode == VOIDmode)
4304 mode = GET_MODE (*op1);
4306 maxval = (((unsigned HOST_WIDE_INT) 1) << (GET_MODE_BITSIZE(mode) - 1)) - 1;
4308 /* For DImode, we have GE/LT/GEU/LTU comparisons. In ARM mode
4309 we can also use cmp/cmpeq for GTU/LEU. GT/LE must be either
4310 reversed or (for constant OP1) adjusted to GE/LT. Similarly
4311 for GTU/LEU in Thumb mode. */
4312 if (mode == DImode)
4314 rtx tem;
4316 if (*code == GT || *code == LE
4317 || (!TARGET_ARM && (*code == GTU || *code == LEU)))
4319 /* Missing comparison. First try to use an available
4320 comparison. */
4321 if (CONST_INT_P (*op1))
4323 i = INTVAL (*op1);
4324 switch (*code)
4326 case GT:
4327 case LE:
4328 if (i != maxval
4329 && arm_const_double_by_immediates (GEN_INT (i + 1)))
4331 *op1 = GEN_INT (i + 1);
4332 *code = *code == GT ? GE : LT;
4333 return;
4335 break;
4336 case GTU:
4337 case LEU:
4338 if (i != ~((unsigned HOST_WIDE_INT) 0)
4339 && arm_const_double_by_immediates (GEN_INT (i + 1)))
4341 *op1 = GEN_INT (i + 1);
4342 *code = *code == GTU ? GEU : LTU;
4343 return;
4345 break;
4346 default:
4347 gcc_unreachable ();
4351 /* If that did not work, reverse the condition. */
4352 if (!op0_preserve_value)
4354 tem = *op0;
4355 *op0 = *op1;
4356 *op1 = tem;
4357 *code = (int)swap_condition ((enum rtx_code)*code);
4360 return;
4363 /* If *op0 is (zero_extend:SI (subreg:QI (reg:SI) 0)) and comparing
4364 with const0_rtx, change it to (and:SI (reg:SI) (const_int 255)),
4365 to facilitate possible combining with a cmp into 'ands'. */
4366 if (mode == SImode
4367 && GET_CODE (*op0) == ZERO_EXTEND
4368 && GET_CODE (XEXP (*op0, 0)) == SUBREG
4369 && GET_MODE (XEXP (*op0, 0)) == QImode
4370 && GET_MODE (SUBREG_REG (XEXP (*op0, 0))) == SImode
4371 && subreg_lowpart_p (XEXP (*op0, 0))
4372 && *op1 == const0_rtx)
4373 *op0 = gen_rtx_AND (SImode, SUBREG_REG (XEXP (*op0, 0)),
4374 GEN_INT (255));
4376 /* Comparisons smaller than DImode. Only adjust comparisons against
4377 an out-of-range constant. */
4378 if (!CONST_INT_P (*op1)
4379 || const_ok_for_arm (INTVAL (*op1))
4380 || const_ok_for_arm (- INTVAL (*op1)))
4381 return;
4383 i = INTVAL (*op1);
4385 switch (*code)
4387 case EQ:
4388 case NE:
4389 return;
4391 case GT:
4392 case LE:
4393 if (i != maxval
4394 && (const_ok_for_arm (i + 1) || const_ok_for_arm (-(i + 1))))
4396 *op1 = GEN_INT (i + 1);
4397 *code = *code == GT ? GE : LT;
4398 return;
4400 break;
4402 case GE:
4403 case LT:
4404 if (i != ~maxval
4405 && (const_ok_for_arm (i - 1) || const_ok_for_arm (-(i - 1))))
4407 *op1 = GEN_INT (i - 1);
4408 *code = *code == GE ? GT : LE;
4409 return;
4411 break;
4413 case GTU:
4414 case LEU:
4415 if (i != ~((unsigned HOST_WIDE_INT) 0)
4416 && (const_ok_for_arm (i + 1) || const_ok_for_arm (-(i + 1))))
4418 *op1 = GEN_INT (i + 1);
4419 *code = *code == GTU ? GEU : LTU;
4420 return;
4422 break;
4424 case GEU:
4425 case LTU:
4426 if (i != 0
4427 && (const_ok_for_arm (i - 1) || const_ok_for_arm (-(i - 1))))
4429 *op1 = GEN_INT (i - 1);
4430 *code = *code == GEU ? GTU : LEU;
4431 return;
4433 break;
4435 default:
4436 gcc_unreachable ();
4441 /* Define how to find the value returned by a function. */
4443 static rtx
4444 arm_function_value(const_tree type, const_tree func,
4445 bool outgoing ATTRIBUTE_UNUSED)
4447 enum machine_mode mode;
4448 int unsignedp ATTRIBUTE_UNUSED;
4449 rtx r ATTRIBUTE_UNUSED;
4451 mode = TYPE_MODE (type);
4453 if (TARGET_AAPCS_BASED)
4454 return aapcs_allocate_return_reg (mode, type, func);
4456 /* Promote integer types. */
4457 if (INTEGRAL_TYPE_P (type))
4458 mode = arm_promote_function_mode (type, mode, &unsignedp, func, 1);
4460 /* Promotes small structs returned in a register to full-word size
4461 for big-endian AAPCS. */
4462 if (arm_return_in_msb (type))
4464 HOST_WIDE_INT size = int_size_in_bytes (type);
4465 if (size % UNITS_PER_WORD != 0)
4467 size += UNITS_PER_WORD - size % UNITS_PER_WORD;
4468 mode = mode_for_size (size * BITS_PER_UNIT, MODE_INT, 0);
4472 return arm_libcall_value_1 (mode);
4475 /* libcall hashtable helpers. */
4477 struct libcall_hasher : typed_noop_remove <rtx_def>
4479 typedef rtx_def value_type;
4480 typedef rtx_def compare_type;
4481 static inline hashval_t hash (const value_type *);
4482 static inline bool equal (const value_type *, const compare_type *);
4483 static inline void remove (value_type *);
4486 inline bool
4487 libcall_hasher::equal (const value_type *p1, const compare_type *p2)
4489 return rtx_equal_p (p1, p2);
4492 inline hashval_t
4493 libcall_hasher::hash (const value_type *p1)
4495 return hash_rtx (p1, VOIDmode, NULL, NULL, FALSE);
4498 typedef hash_table <libcall_hasher> libcall_table_type;
4500 static void
4501 add_libcall (libcall_table_type htab, rtx libcall)
4503 *htab.find_slot (libcall, INSERT) = libcall;
4506 static bool
4507 arm_libcall_uses_aapcs_base (const_rtx libcall)
4509 static bool init_done = false;
4510 static libcall_table_type libcall_htab;
4512 if (!init_done)
4514 init_done = true;
4516 libcall_htab.create (31);
4517 add_libcall (libcall_htab,
4518 convert_optab_libfunc (sfloat_optab, SFmode, SImode));
4519 add_libcall (libcall_htab,
4520 convert_optab_libfunc (sfloat_optab, DFmode, SImode));
4521 add_libcall (libcall_htab,
4522 convert_optab_libfunc (sfloat_optab, SFmode, DImode));
4523 add_libcall (libcall_htab,
4524 convert_optab_libfunc (sfloat_optab, DFmode, DImode));
4526 add_libcall (libcall_htab,
4527 convert_optab_libfunc (ufloat_optab, SFmode, SImode));
4528 add_libcall (libcall_htab,
4529 convert_optab_libfunc (ufloat_optab, DFmode, SImode));
4530 add_libcall (libcall_htab,
4531 convert_optab_libfunc (ufloat_optab, SFmode, DImode));
4532 add_libcall (libcall_htab,
4533 convert_optab_libfunc (ufloat_optab, DFmode, DImode));
4535 add_libcall (libcall_htab,
4536 convert_optab_libfunc (sext_optab, SFmode, HFmode));
4537 add_libcall (libcall_htab,
4538 convert_optab_libfunc (trunc_optab, HFmode, SFmode));
4539 add_libcall (libcall_htab,
4540 convert_optab_libfunc (sfix_optab, SImode, DFmode));
4541 add_libcall (libcall_htab,
4542 convert_optab_libfunc (ufix_optab, SImode, DFmode));
4543 add_libcall (libcall_htab,
4544 convert_optab_libfunc (sfix_optab, DImode, DFmode));
4545 add_libcall (libcall_htab,
4546 convert_optab_libfunc (ufix_optab, DImode, DFmode));
4547 add_libcall (libcall_htab,
4548 convert_optab_libfunc (sfix_optab, DImode, SFmode));
4549 add_libcall (libcall_htab,
4550 convert_optab_libfunc (ufix_optab, DImode, SFmode));
4552 /* Values from double-precision helper functions are returned in core
4553 registers if the selected core only supports single-precision
4554 arithmetic, even if we are using the hard-float ABI. The same is
4555 true for single-precision helpers, but we will never be using the
4556 hard-float ABI on a CPU which doesn't support single-precision
4557 operations in hardware. */
4558 add_libcall (libcall_htab, optab_libfunc (add_optab, DFmode));
4559 add_libcall (libcall_htab, optab_libfunc (sdiv_optab, DFmode));
4560 add_libcall (libcall_htab, optab_libfunc (smul_optab, DFmode));
4561 add_libcall (libcall_htab, optab_libfunc (neg_optab, DFmode));
4562 add_libcall (libcall_htab, optab_libfunc (sub_optab, DFmode));
4563 add_libcall (libcall_htab, optab_libfunc (eq_optab, DFmode));
4564 add_libcall (libcall_htab, optab_libfunc (lt_optab, DFmode));
4565 add_libcall (libcall_htab, optab_libfunc (le_optab, DFmode));
4566 add_libcall (libcall_htab, optab_libfunc (ge_optab, DFmode));
4567 add_libcall (libcall_htab, optab_libfunc (gt_optab, DFmode));
4568 add_libcall (libcall_htab, optab_libfunc (unord_optab, DFmode));
4569 add_libcall (libcall_htab, convert_optab_libfunc (sext_optab, DFmode,
4570 SFmode));
4571 add_libcall (libcall_htab, convert_optab_libfunc (trunc_optab, SFmode,
4572 DFmode));
4575 return libcall && libcall_htab.find (libcall) != NULL;
4578 static rtx
4579 arm_libcall_value_1 (enum machine_mode mode)
4581 if (TARGET_AAPCS_BASED)
4582 return aapcs_libcall_value (mode);
4583 else if (TARGET_IWMMXT_ABI
4584 && arm_vector_mode_supported_p (mode))
4585 return gen_rtx_REG (mode, FIRST_IWMMXT_REGNUM);
4586 else
4587 return gen_rtx_REG (mode, ARG_REGISTER (1));
4590 /* Define how to find the value returned by a library function
4591 assuming the value has mode MODE. */
4593 static rtx
4594 arm_libcall_value (enum machine_mode mode, const_rtx libcall)
4596 if (TARGET_AAPCS_BASED && arm_pcs_default != ARM_PCS_AAPCS
4597 && GET_MODE_CLASS (mode) == MODE_FLOAT)
4599 /* The following libcalls return their result in integer registers,
4600 even though they return a floating point value. */
4601 if (arm_libcall_uses_aapcs_base (libcall))
4602 return gen_rtx_REG (mode, ARG_REGISTER(1));
4606 return arm_libcall_value_1 (mode);
4609 /* Implement TARGET_FUNCTION_VALUE_REGNO_P. */
4611 static bool
4612 arm_function_value_regno_p (const unsigned int regno)
4614 if (regno == ARG_REGISTER (1)
4615 || (TARGET_32BIT
4616 && TARGET_AAPCS_BASED
4617 && TARGET_VFP
4618 && TARGET_HARD_FLOAT
4619 && regno == FIRST_VFP_REGNUM)
4620 || (TARGET_IWMMXT_ABI
4621 && regno == FIRST_IWMMXT_REGNUM))
4622 return true;
4624 return false;
4627 /* Determine the amount of memory needed to store the possible return
4628 registers of an untyped call. */
4630 arm_apply_result_size (void)
4632 int size = 16;
4634 if (TARGET_32BIT)
4636 if (TARGET_HARD_FLOAT_ABI && TARGET_VFP)
4637 size += 32;
4638 if (TARGET_IWMMXT_ABI)
4639 size += 8;
4642 return size;
4645 /* Decide whether TYPE should be returned in memory (true)
4646 or in a register (false). FNTYPE is the type of the function making
4647 the call. */
4648 static bool
4649 arm_return_in_memory (const_tree type, const_tree fntype)
4651 HOST_WIDE_INT size;
4653 size = int_size_in_bytes (type); /* Negative if not fixed size. */
4655 if (TARGET_AAPCS_BASED)
4657 /* Simple, non-aggregate types (ie not including vectors and
4658 complex) are always returned in a register (or registers).
4659 We don't care about which register here, so we can short-cut
4660 some of the detail. */
4661 if (!AGGREGATE_TYPE_P (type)
4662 && TREE_CODE (type) != VECTOR_TYPE
4663 && TREE_CODE (type) != COMPLEX_TYPE)
4664 return false;
4666 /* Any return value that is no larger than one word can be
4667 returned in r0. */
4668 if (((unsigned HOST_WIDE_INT) size) <= UNITS_PER_WORD)
4669 return false;
4671 /* Check any available co-processors to see if they accept the
4672 type as a register candidate (VFP, for example, can return
4673 some aggregates in consecutive registers). These aren't
4674 available if the call is variadic. */
4675 if (aapcs_select_return_coproc (type, fntype) >= 0)
4676 return false;
4678 /* Vector values should be returned using ARM registers, not
4679 memory (unless they're over 16 bytes, which will break since
4680 we only have four call-clobbered registers to play with). */
4681 if (TREE_CODE (type) == VECTOR_TYPE)
4682 return (size < 0 || size > (4 * UNITS_PER_WORD));
4684 /* The rest go in memory. */
4685 return true;
4688 if (TREE_CODE (type) == VECTOR_TYPE)
4689 return (size < 0 || size > (4 * UNITS_PER_WORD));
4691 if (!AGGREGATE_TYPE_P (type) &&
4692 (TREE_CODE (type) != VECTOR_TYPE))
4693 /* All simple types are returned in registers. */
4694 return false;
4696 if (arm_abi != ARM_ABI_APCS)
4698 /* ATPCS and later return aggregate types in memory only if they are
4699 larger than a word (or are variable size). */
4700 return (size < 0 || size > UNITS_PER_WORD);
4703 /* For the arm-wince targets we choose to be compatible with Microsoft's
4704 ARM and Thumb compilers, which always return aggregates in memory. */
4705 #ifndef ARM_WINCE
4706 /* All structures/unions bigger than one word are returned in memory.
4707 Also catch the case where int_size_in_bytes returns -1. In this case
4708 the aggregate is either huge or of variable size, and in either case
4709 we will want to return it via memory and not in a register. */
4710 if (size < 0 || size > UNITS_PER_WORD)
4711 return true;
4713 if (TREE_CODE (type) == RECORD_TYPE)
4715 tree field;
4717 /* For a struct the APCS says that we only return in a register
4718 if the type is 'integer like' and every addressable element
4719 has an offset of zero. For practical purposes this means
4720 that the structure can have at most one non bit-field element
4721 and that this element must be the first one in the structure. */
4723 /* Find the first field, ignoring non FIELD_DECL things which will
4724 have been created by C++. */
4725 for (field = TYPE_FIELDS (type);
4726 field && TREE_CODE (field) != FIELD_DECL;
4727 field = DECL_CHAIN (field))
4728 continue;
4730 if (field == NULL)
4731 return false; /* An empty structure. Allowed by an extension to ANSI C. */
4733 /* Check that the first field is valid for returning in a register. */
4735 /* ... Floats are not allowed */
4736 if (FLOAT_TYPE_P (TREE_TYPE (field)))
4737 return true;
4739 /* ... Aggregates that are not themselves valid for returning in
4740 a register are not allowed. */
4741 if (arm_return_in_memory (TREE_TYPE (field), NULL_TREE))
4742 return true;
4744 /* Now check the remaining fields, if any. Only bitfields are allowed,
4745 since they are not addressable. */
4746 for (field = DECL_CHAIN (field);
4747 field;
4748 field = DECL_CHAIN (field))
4750 if (TREE_CODE (field) != FIELD_DECL)
4751 continue;
4753 if (!DECL_BIT_FIELD_TYPE (field))
4754 return true;
4757 return false;
4760 if (TREE_CODE (type) == UNION_TYPE)
4762 tree field;
4764 /* Unions can be returned in registers if every element is
4765 integral, or can be returned in an integer register. */
4766 for (field = TYPE_FIELDS (type);
4767 field;
4768 field = DECL_CHAIN (field))
4770 if (TREE_CODE (field) != FIELD_DECL)
4771 continue;
4773 if (FLOAT_TYPE_P (TREE_TYPE (field)))
4774 return true;
4776 if (arm_return_in_memory (TREE_TYPE (field), NULL_TREE))
4777 return true;
4780 return false;
4782 #endif /* not ARM_WINCE */
4784 /* Return all other types in memory. */
4785 return true;
4788 const struct pcs_attribute_arg
4790 const char *arg;
4791 enum arm_pcs value;
4792 } pcs_attribute_args[] =
4794 {"aapcs", ARM_PCS_AAPCS},
4795 {"aapcs-vfp", ARM_PCS_AAPCS_VFP},
4796 #if 0
4797 /* We could recognize these, but changes would be needed elsewhere
4798 * to implement them. */
4799 {"aapcs-iwmmxt", ARM_PCS_AAPCS_IWMMXT},
4800 {"atpcs", ARM_PCS_ATPCS},
4801 {"apcs", ARM_PCS_APCS},
4802 #endif
4803 {NULL, ARM_PCS_UNKNOWN}
4806 static enum arm_pcs
4807 arm_pcs_from_attribute (tree attr)
4809 const struct pcs_attribute_arg *ptr;
4810 const char *arg;
4812 /* Get the value of the argument. */
4813 if (TREE_VALUE (attr) == NULL_TREE
4814 || TREE_CODE (TREE_VALUE (attr)) != STRING_CST)
4815 return ARM_PCS_UNKNOWN;
4817 arg = TREE_STRING_POINTER (TREE_VALUE (attr));
4819 /* Check it against the list of known arguments. */
4820 for (ptr = pcs_attribute_args; ptr->arg != NULL; ptr++)
4821 if (streq (arg, ptr->arg))
4822 return ptr->value;
4824 /* An unrecognized interrupt type. */
4825 return ARM_PCS_UNKNOWN;
4828 /* Get the PCS variant to use for this call. TYPE is the function's type
4829 specification, DECL is the specific declartion. DECL may be null if
4830 the call could be indirect or if this is a library call. */
4831 static enum arm_pcs
4832 arm_get_pcs_model (const_tree type, const_tree decl)
4834 bool user_convention = false;
4835 enum arm_pcs user_pcs = arm_pcs_default;
4836 tree attr;
4838 gcc_assert (type);
4840 attr = lookup_attribute ("pcs", TYPE_ATTRIBUTES (type));
4841 if (attr)
4843 user_pcs = arm_pcs_from_attribute (TREE_VALUE (attr));
4844 user_convention = true;
4847 if (TARGET_AAPCS_BASED)
4849 /* Detect varargs functions. These always use the base rules
4850 (no argument is ever a candidate for a co-processor
4851 register). */
4852 bool base_rules = stdarg_p (type);
4854 if (user_convention)
4856 if (user_pcs > ARM_PCS_AAPCS_LOCAL)
4857 sorry ("non-AAPCS derived PCS variant");
4858 else if (base_rules && user_pcs != ARM_PCS_AAPCS)
4859 error ("variadic functions must use the base AAPCS variant");
4862 if (base_rules)
4863 return ARM_PCS_AAPCS;
4864 else if (user_convention)
4865 return user_pcs;
4866 else if (decl && flag_unit_at_a_time)
4868 /* Local functions never leak outside this compilation unit,
4869 so we are free to use whatever conventions are
4870 appropriate. */
4871 /* FIXME: remove CONST_CAST_TREE when cgraph is constified. */
4872 struct cgraph_local_info *i = cgraph_local_info (CONST_CAST_TREE(decl));
4873 if (i && i->local)
4874 return ARM_PCS_AAPCS_LOCAL;
4877 else if (user_convention && user_pcs != arm_pcs_default)
4878 sorry ("PCS variant");
4880 /* For everything else we use the target's default. */
4881 return arm_pcs_default;
4885 static void
4886 aapcs_vfp_cum_init (CUMULATIVE_ARGS *pcum ATTRIBUTE_UNUSED,
4887 const_tree fntype ATTRIBUTE_UNUSED,
4888 rtx libcall ATTRIBUTE_UNUSED,
4889 const_tree fndecl ATTRIBUTE_UNUSED)
4891 /* Record the unallocated VFP registers. */
4892 pcum->aapcs_vfp_regs_free = (1 << NUM_VFP_ARG_REGS) - 1;
4893 pcum->aapcs_vfp_reg_alloc = 0;
4896 /* Walk down the type tree of TYPE counting consecutive base elements.
4897 If *MODEP is VOIDmode, then set it to the first valid floating point
4898 type. If a non-floating point type is found, or if a floating point
4899 type that doesn't match a non-VOIDmode *MODEP is found, then return -1,
4900 otherwise return the count in the sub-tree. */
4901 static int
4902 aapcs_vfp_sub_candidate (const_tree type, enum machine_mode *modep)
4904 enum machine_mode mode;
4905 HOST_WIDE_INT size;
4907 switch (TREE_CODE (type))
4909 case REAL_TYPE:
4910 mode = TYPE_MODE (type);
4911 if (mode != DFmode && mode != SFmode)
4912 return -1;
4914 if (*modep == VOIDmode)
4915 *modep = mode;
4917 if (*modep == mode)
4918 return 1;
4920 break;
4922 case COMPLEX_TYPE:
4923 mode = TYPE_MODE (TREE_TYPE (type));
4924 if (mode != DFmode && mode != SFmode)
4925 return -1;
4927 if (*modep == VOIDmode)
4928 *modep = mode;
4930 if (*modep == mode)
4931 return 2;
4933 break;
4935 case VECTOR_TYPE:
4936 /* Use V2SImode and V4SImode as representatives of all 64-bit
4937 and 128-bit vector types, whether or not those modes are
4938 supported with the present options. */
4939 size = int_size_in_bytes (type);
4940 switch (size)
4942 case 8:
4943 mode = V2SImode;
4944 break;
4945 case 16:
4946 mode = V4SImode;
4947 break;
4948 default:
4949 return -1;
4952 if (*modep == VOIDmode)
4953 *modep = mode;
4955 /* Vector modes are considered to be opaque: two vectors are
4956 equivalent for the purposes of being homogeneous aggregates
4957 if they are the same size. */
4958 if (*modep == mode)
4959 return 1;
4961 break;
4963 case ARRAY_TYPE:
4965 int count;
4966 tree index = TYPE_DOMAIN (type);
4968 /* Can't handle incomplete types. */
4969 if (!COMPLETE_TYPE_P (type))
4970 return -1;
4972 count = aapcs_vfp_sub_candidate (TREE_TYPE (type), modep);
4973 if (count == -1
4974 || !index
4975 || !TYPE_MAX_VALUE (index)
4976 || !tree_fits_uhwi_p (TYPE_MAX_VALUE (index))
4977 || !TYPE_MIN_VALUE (index)
4978 || !tree_fits_uhwi_p (TYPE_MIN_VALUE (index))
4979 || count < 0)
4980 return -1;
4982 count *= (1 + tree_to_uhwi (TYPE_MAX_VALUE (index))
4983 - tree_to_uhwi (TYPE_MIN_VALUE (index)));
4985 /* There must be no padding. */
4986 if (!tree_fits_uhwi_p (TYPE_SIZE (type))
4987 || ((HOST_WIDE_INT) tree_to_uhwi (TYPE_SIZE (type))
4988 != count * GET_MODE_BITSIZE (*modep)))
4989 return -1;
4991 return count;
4994 case RECORD_TYPE:
4996 int count = 0;
4997 int sub_count;
4998 tree field;
5000 /* Can't handle incomplete types. */
5001 if (!COMPLETE_TYPE_P (type))
5002 return -1;
5004 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
5006 if (TREE_CODE (field) != FIELD_DECL)
5007 continue;
5009 sub_count = aapcs_vfp_sub_candidate (TREE_TYPE (field), modep);
5010 if (sub_count < 0)
5011 return -1;
5012 count += sub_count;
5015 /* There must be no padding. */
5016 if (!tree_fits_uhwi_p (TYPE_SIZE (type))
5017 || ((HOST_WIDE_INT) tree_to_uhwi (TYPE_SIZE (type))
5018 != count * GET_MODE_BITSIZE (*modep)))
5019 return -1;
5021 return count;
5024 case UNION_TYPE:
5025 case QUAL_UNION_TYPE:
5027 /* These aren't very interesting except in a degenerate case. */
5028 int count = 0;
5029 int sub_count;
5030 tree field;
5032 /* Can't handle incomplete types. */
5033 if (!COMPLETE_TYPE_P (type))
5034 return -1;
5036 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
5038 if (TREE_CODE (field) != FIELD_DECL)
5039 continue;
5041 sub_count = aapcs_vfp_sub_candidate (TREE_TYPE (field), modep);
5042 if (sub_count < 0)
5043 return -1;
5044 count = count > sub_count ? count : sub_count;
5047 /* There must be no padding. */
5048 if (!tree_fits_uhwi_p (TYPE_SIZE (type))
5049 || ((HOST_WIDE_INT) tree_to_uhwi (TYPE_SIZE (type))
5050 != count * GET_MODE_BITSIZE (*modep)))
5051 return -1;
5053 return count;
5056 default:
5057 break;
5060 return -1;
5063 /* Return true if PCS_VARIANT should use VFP registers. */
5064 static bool
5065 use_vfp_abi (enum arm_pcs pcs_variant, bool is_double)
5067 if (pcs_variant == ARM_PCS_AAPCS_VFP)
5069 static bool seen_thumb1_vfp = false;
5071 if (TARGET_THUMB1 && !seen_thumb1_vfp)
5073 sorry ("Thumb-1 hard-float VFP ABI");
5074 /* sorry() is not immediately fatal, so only display this once. */
5075 seen_thumb1_vfp = true;
5078 return true;
5081 if (pcs_variant != ARM_PCS_AAPCS_LOCAL)
5082 return false;
5084 return (TARGET_32BIT && TARGET_VFP && TARGET_HARD_FLOAT &&
5085 (TARGET_VFP_DOUBLE || !is_double));
5088 /* Return true if an argument whose type is TYPE, or mode is MODE, is
5089 suitable for passing or returning in VFP registers for the PCS
5090 variant selected. If it is, then *BASE_MODE is updated to contain
5091 a machine mode describing each element of the argument's type and
5092 *COUNT to hold the number of such elements. */
5093 static bool
5094 aapcs_vfp_is_call_or_return_candidate (enum arm_pcs pcs_variant,
5095 enum machine_mode mode, const_tree type,
5096 enum machine_mode *base_mode, int *count)
5098 enum machine_mode new_mode = VOIDmode;
5100 /* If we have the type information, prefer that to working things
5101 out from the mode. */
5102 if (type)
5104 int ag_count = aapcs_vfp_sub_candidate (type, &new_mode);
5106 if (ag_count > 0 && ag_count <= 4)
5107 *count = ag_count;
5108 else
5109 return false;
5111 else if (GET_MODE_CLASS (mode) == MODE_FLOAT
5112 || GET_MODE_CLASS (mode) == MODE_VECTOR_INT
5113 || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
5115 *count = 1;
5116 new_mode = mode;
5118 else if (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT)
5120 *count = 2;
5121 new_mode = (mode == DCmode ? DFmode : SFmode);
5123 else
5124 return false;
5127 if (!use_vfp_abi (pcs_variant, ARM_NUM_REGS (new_mode) > 1))
5128 return false;
5130 *base_mode = new_mode;
5131 return true;
5134 static bool
5135 aapcs_vfp_is_return_candidate (enum arm_pcs pcs_variant,
5136 enum machine_mode mode, const_tree type)
5138 int count ATTRIBUTE_UNUSED;
5139 enum machine_mode ag_mode ATTRIBUTE_UNUSED;
5141 if (!use_vfp_abi (pcs_variant, false))
5142 return false;
5143 return aapcs_vfp_is_call_or_return_candidate (pcs_variant, mode, type,
5144 &ag_mode, &count);
5147 static bool
5148 aapcs_vfp_is_call_candidate (CUMULATIVE_ARGS *pcum, enum machine_mode mode,
5149 const_tree type)
5151 if (!use_vfp_abi (pcum->pcs_variant, false))
5152 return false;
5154 return aapcs_vfp_is_call_or_return_candidate (pcum->pcs_variant, mode, type,
5155 &pcum->aapcs_vfp_rmode,
5156 &pcum->aapcs_vfp_rcount);
5159 static bool
5160 aapcs_vfp_allocate (CUMULATIVE_ARGS *pcum, enum machine_mode mode,
5161 const_tree type ATTRIBUTE_UNUSED)
5163 int shift = GET_MODE_SIZE (pcum->aapcs_vfp_rmode) / GET_MODE_SIZE (SFmode);
5164 unsigned mask = (1 << (shift * pcum->aapcs_vfp_rcount)) - 1;
5165 int regno;
5167 for (regno = 0; regno < NUM_VFP_ARG_REGS; regno += shift)
5168 if (((pcum->aapcs_vfp_regs_free >> regno) & mask) == mask)
5170 pcum->aapcs_vfp_reg_alloc = mask << regno;
5171 if (mode == BLKmode
5172 || (mode == TImode && ! TARGET_NEON)
5173 || ! arm_hard_regno_mode_ok (FIRST_VFP_REGNUM + regno, mode))
5175 int i;
5176 int rcount = pcum->aapcs_vfp_rcount;
5177 int rshift = shift;
5178 enum machine_mode rmode = pcum->aapcs_vfp_rmode;
5179 rtx par;
5180 if (!TARGET_NEON)
5182 /* Avoid using unsupported vector modes. */
5183 if (rmode == V2SImode)
5184 rmode = DImode;
5185 else if (rmode == V4SImode)
5187 rmode = DImode;
5188 rcount *= 2;
5189 rshift /= 2;
5192 par = gen_rtx_PARALLEL (mode, rtvec_alloc (rcount));
5193 for (i = 0; i < rcount; i++)
5195 rtx tmp = gen_rtx_REG (rmode,
5196 FIRST_VFP_REGNUM + regno + i * rshift);
5197 tmp = gen_rtx_EXPR_LIST
5198 (VOIDmode, tmp,
5199 GEN_INT (i * GET_MODE_SIZE (rmode)));
5200 XVECEXP (par, 0, i) = tmp;
5203 pcum->aapcs_reg = par;
5205 else
5206 pcum->aapcs_reg = gen_rtx_REG (mode, FIRST_VFP_REGNUM + regno);
5207 return true;
5209 return false;
5212 static rtx
5213 aapcs_vfp_allocate_return_reg (enum arm_pcs pcs_variant ATTRIBUTE_UNUSED,
5214 enum machine_mode mode,
5215 const_tree type ATTRIBUTE_UNUSED)
5217 if (!use_vfp_abi (pcs_variant, false))
5218 return NULL;
5220 if (mode == BLKmode || (mode == TImode && !TARGET_NEON))
5222 int count;
5223 enum machine_mode ag_mode;
5224 int i;
5225 rtx par;
5226 int shift;
5228 aapcs_vfp_is_call_or_return_candidate (pcs_variant, mode, type,
5229 &ag_mode, &count);
5231 if (!TARGET_NEON)
5233 if (ag_mode == V2SImode)
5234 ag_mode = DImode;
5235 else if (ag_mode == V4SImode)
5237 ag_mode = DImode;
5238 count *= 2;
5241 shift = GET_MODE_SIZE(ag_mode) / GET_MODE_SIZE(SFmode);
5242 par = gen_rtx_PARALLEL (mode, rtvec_alloc (count));
5243 for (i = 0; i < count; i++)
5245 rtx tmp = gen_rtx_REG (ag_mode, FIRST_VFP_REGNUM + i * shift);
5246 tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp,
5247 GEN_INT (i * GET_MODE_SIZE (ag_mode)));
5248 XVECEXP (par, 0, i) = tmp;
5251 return par;
5254 return gen_rtx_REG (mode, FIRST_VFP_REGNUM);
5257 static void
5258 aapcs_vfp_advance (CUMULATIVE_ARGS *pcum ATTRIBUTE_UNUSED,
5259 enum machine_mode mode ATTRIBUTE_UNUSED,
5260 const_tree type ATTRIBUTE_UNUSED)
5262 pcum->aapcs_vfp_regs_free &= ~pcum->aapcs_vfp_reg_alloc;
5263 pcum->aapcs_vfp_reg_alloc = 0;
5264 return;
5267 #define AAPCS_CP(X) \
5269 aapcs_ ## X ## _cum_init, \
5270 aapcs_ ## X ## _is_call_candidate, \
5271 aapcs_ ## X ## _allocate, \
5272 aapcs_ ## X ## _is_return_candidate, \
5273 aapcs_ ## X ## _allocate_return_reg, \
5274 aapcs_ ## X ## _advance \
5277 /* Table of co-processors that can be used to pass arguments in
5278 registers. Idealy no arugment should be a candidate for more than
5279 one co-processor table entry, but the table is processed in order
5280 and stops after the first match. If that entry then fails to put
5281 the argument into a co-processor register, the argument will go on
5282 the stack. */
5283 static struct
5285 /* Initialize co-processor related state in CUMULATIVE_ARGS structure. */
5286 void (*cum_init) (CUMULATIVE_ARGS *, const_tree, rtx, const_tree);
5288 /* Return true if an argument of mode MODE (or type TYPE if MODE is
5289 BLKmode) is a candidate for this co-processor's registers; this
5290 function should ignore any position-dependent state in
5291 CUMULATIVE_ARGS and only use call-type dependent information. */
5292 bool (*is_call_candidate) (CUMULATIVE_ARGS *, enum machine_mode, const_tree);
5294 /* Return true if the argument does get a co-processor register; it
5295 should set aapcs_reg to an RTX of the register allocated as is
5296 required for a return from FUNCTION_ARG. */
5297 bool (*allocate) (CUMULATIVE_ARGS *, enum machine_mode, const_tree);
5299 /* Return true if a result of mode MODE (or type TYPE if MODE is
5300 BLKmode) is can be returned in this co-processor's registers. */
5301 bool (*is_return_candidate) (enum arm_pcs, enum machine_mode, const_tree);
5303 /* Allocate and return an RTX element to hold the return type of a
5304 call, this routine must not fail and will only be called if
5305 is_return_candidate returned true with the same parameters. */
5306 rtx (*allocate_return_reg) (enum arm_pcs, enum machine_mode, const_tree);
5308 /* Finish processing this argument and prepare to start processing
5309 the next one. */
5310 void (*advance) (CUMULATIVE_ARGS *, enum machine_mode, const_tree);
5311 } aapcs_cp_arg_layout[ARM_NUM_COPROC_SLOTS] =
5313 AAPCS_CP(vfp)
5316 #undef AAPCS_CP
5318 static int
5319 aapcs_select_call_coproc (CUMULATIVE_ARGS *pcum, enum machine_mode mode,
5320 const_tree type)
5322 int i;
5324 for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
5325 if (aapcs_cp_arg_layout[i].is_call_candidate (pcum, mode, type))
5326 return i;
5328 return -1;
5331 static int
5332 aapcs_select_return_coproc (const_tree type, const_tree fntype)
5334 /* We aren't passed a decl, so we can't check that a call is local.
5335 However, it isn't clear that that would be a win anyway, since it
5336 might limit some tail-calling opportunities. */
5337 enum arm_pcs pcs_variant;
5339 if (fntype)
5341 const_tree fndecl = NULL_TREE;
5343 if (TREE_CODE (fntype) == FUNCTION_DECL)
5345 fndecl = fntype;
5346 fntype = TREE_TYPE (fntype);
5349 pcs_variant = arm_get_pcs_model (fntype, fndecl);
5351 else
5352 pcs_variant = arm_pcs_default;
5354 if (pcs_variant != ARM_PCS_AAPCS)
5356 int i;
5358 for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
5359 if (aapcs_cp_arg_layout[i].is_return_candidate (pcs_variant,
5360 TYPE_MODE (type),
5361 type))
5362 return i;
5364 return -1;
5367 static rtx
5368 aapcs_allocate_return_reg (enum machine_mode mode, const_tree type,
5369 const_tree fntype)
5371 /* We aren't passed a decl, so we can't check that a call is local.
5372 However, it isn't clear that that would be a win anyway, since it
5373 might limit some tail-calling opportunities. */
5374 enum arm_pcs pcs_variant;
5375 int unsignedp ATTRIBUTE_UNUSED;
5377 if (fntype)
5379 const_tree fndecl = NULL_TREE;
5381 if (TREE_CODE (fntype) == FUNCTION_DECL)
5383 fndecl = fntype;
5384 fntype = TREE_TYPE (fntype);
5387 pcs_variant = arm_get_pcs_model (fntype, fndecl);
5389 else
5390 pcs_variant = arm_pcs_default;
5392 /* Promote integer types. */
5393 if (type && INTEGRAL_TYPE_P (type))
5394 mode = arm_promote_function_mode (type, mode, &unsignedp, fntype, 1);
5396 if (pcs_variant != ARM_PCS_AAPCS)
5398 int i;
5400 for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
5401 if (aapcs_cp_arg_layout[i].is_return_candidate (pcs_variant, mode,
5402 type))
5403 return aapcs_cp_arg_layout[i].allocate_return_reg (pcs_variant,
5404 mode, type);
5407 /* Promotes small structs returned in a register to full-word size
5408 for big-endian AAPCS. */
5409 if (type && arm_return_in_msb (type))
5411 HOST_WIDE_INT size = int_size_in_bytes (type);
5412 if (size % UNITS_PER_WORD != 0)
5414 size += UNITS_PER_WORD - size % UNITS_PER_WORD;
5415 mode = mode_for_size (size * BITS_PER_UNIT, MODE_INT, 0);
5419 return gen_rtx_REG (mode, R0_REGNUM);
5422 static rtx
5423 aapcs_libcall_value (enum machine_mode mode)
5425 if (BYTES_BIG_ENDIAN && ALL_FIXED_POINT_MODE_P (mode)
5426 && GET_MODE_SIZE (mode) <= 4)
5427 mode = SImode;
5429 return aapcs_allocate_return_reg (mode, NULL_TREE, NULL_TREE);
5432 /* Lay out a function argument using the AAPCS rules. The rule
5433 numbers referred to here are those in the AAPCS. */
5434 static void
5435 aapcs_layout_arg (CUMULATIVE_ARGS *pcum, enum machine_mode mode,
5436 const_tree type, bool named)
5438 int nregs, nregs2;
5439 int ncrn;
5441 /* We only need to do this once per argument. */
5442 if (pcum->aapcs_arg_processed)
5443 return;
5445 pcum->aapcs_arg_processed = true;
5447 /* Special case: if named is false then we are handling an incoming
5448 anonymous argument which is on the stack. */
5449 if (!named)
5450 return;
5452 /* Is this a potential co-processor register candidate? */
5453 if (pcum->pcs_variant != ARM_PCS_AAPCS)
5455 int slot = aapcs_select_call_coproc (pcum, mode, type);
5456 pcum->aapcs_cprc_slot = slot;
5458 /* We don't have to apply any of the rules from part B of the
5459 preparation phase, these are handled elsewhere in the
5460 compiler. */
5462 if (slot >= 0)
5464 /* A Co-processor register candidate goes either in its own
5465 class of registers or on the stack. */
5466 if (!pcum->aapcs_cprc_failed[slot])
5468 /* C1.cp - Try to allocate the argument to co-processor
5469 registers. */
5470 if (aapcs_cp_arg_layout[slot].allocate (pcum, mode, type))
5471 return;
5473 /* C2.cp - Put the argument on the stack and note that we
5474 can't assign any more candidates in this slot. We also
5475 need to note that we have allocated stack space, so that
5476 we won't later try to split a non-cprc candidate between
5477 core registers and the stack. */
5478 pcum->aapcs_cprc_failed[slot] = true;
5479 pcum->can_split = false;
5482 /* We didn't get a register, so this argument goes on the
5483 stack. */
5484 gcc_assert (pcum->can_split == false);
5485 return;
5489 /* C3 - For double-word aligned arguments, round the NCRN up to the
5490 next even number. */
5491 ncrn = pcum->aapcs_ncrn;
5492 if ((ncrn & 1) && arm_needs_doubleword_align (mode, type))
5493 ncrn++;
5495 nregs = ARM_NUM_REGS2(mode, type);
5497 /* Sigh, this test should really assert that nregs > 0, but a GCC
5498 extension allows empty structs and then gives them empty size; it
5499 then allows such a structure to be passed by value. For some of
5500 the code below we have to pretend that such an argument has
5501 non-zero size so that we 'locate' it correctly either in
5502 registers or on the stack. */
5503 gcc_assert (nregs >= 0);
5505 nregs2 = nregs ? nregs : 1;
5507 /* C4 - Argument fits entirely in core registers. */
5508 if (ncrn + nregs2 <= NUM_ARG_REGS)
5510 pcum->aapcs_reg = gen_rtx_REG (mode, ncrn);
5511 pcum->aapcs_next_ncrn = ncrn + nregs;
5512 return;
5515 /* C5 - Some core registers left and there are no arguments already
5516 on the stack: split this argument between the remaining core
5517 registers and the stack. */
5518 if (ncrn < NUM_ARG_REGS && pcum->can_split)
5520 pcum->aapcs_reg = gen_rtx_REG (mode, ncrn);
5521 pcum->aapcs_next_ncrn = NUM_ARG_REGS;
5522 pcum->aapcs_partial = (NUM_ARG_REGS - ncrn) * UNITS_PER_WORD;
5523 return;
5526 /* C6 - NCRN is set to 4. */
5527 pcum->aapcs_next_ncrn = NUM_ARG_REGS;
5529 /* C7,C8 - arugment goes on the stack. We have nothing to do here. */
5530 return;
5533 /* Initialize a variable CUM of type CUMULATIVE_ARGS
5534 for a call to a function whose data type is FNTYPE.
5535 For a library call, FNTYPE is NULL. */
5536 void
5537 arm_init_cumulative_args (CUMULATIVE_ARGS *pcum, tree fntype,
5538 rtx libname,
5539 tree fndecl ATTRIBUTE_UNUSED)
5541 /* Long call handling. */
5542 if (fntype)
5543 pcum->pcs_variant = arm_get_pcs_model (fntype, fndecl);
5544 else
5545 pcum->pcs_variant = arm_pcs_default;
5547 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
5549 if (arm_libcall_uses_aapcs_base (libname))
5550 pcum->pcs_variant = ARM_PCS_AAPCS;
5552 pcum->aapcs_ncrn = pcum->aapcs_next_ncrn = 0;
5553 pcum->aapcs_reg = NULL_RTX;
5554 pcum->aapcs_partial = 0;
5555 pcum->aapcs_arg_processed = false;
5556 pcum->aapcs_cprc_slot = -1;
5557 pcum->can_split = true;
5559 if (pcum->pcs_variant != ARM_PCS_AAPCS)
5561 int i;
5563 for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
5565 pcum->aapcs_cprc_failed[i] = false;
5566 aapcs_cp_arg_layout[i].cum_init (pcum, fntype, libname, fndecl);
5569 return;
5572 /* Legacy ABIs */
5574 /* On the ARM, the offset starts at 0. */
5575 pcum->nregs = 0;
5576 pcum->iwmmxt_nregs = 0;
5577 pcum->can_split = true;
5579 /* Varargs vectors are treated the same as long long.
5580 named_count avoids having to change the way arm handles 'named' */
5581 pcum->named_count = 0;
5582 pcum->nargs = 0;
5584 if (TARGET_REALLY_IWMMXT && fntype)
5586 tree fn_arg;
5588 for (fn_arg = TYPE_ARG_TYPES (fntype);
5589 fn_arg;
5590 fn_arg = TREE_CHAIN (fn_arg))
5591 pcum->named_count += 1;
5593 if (! pcum->named_count)
5594 pcum->named_count = INT_MAX;
5598 /* Return true if we use LRA instead of reload pass. */
5599 static bool
5600 arm_lra_p (void)
5602 return arm_lra_flag;
5605 /* Return true if mode/type need doubleword alignment. */
5606 static bool
5607 arm_needs_doubleword_align (enum machine_mode mode, const_tree type)
5609 return (GET_MODE_ALIGNMENT (mode) > PARM_BOUNDARY
5610 || (type && TYPE_ALIGN (type) > PARM_BOUNDARY));
5614 /* Determine where to put an argument to a function.
5615 Value is zero to push the argument on the stack,
5616 or a hard register in which to store the argument.
5618 MODE is the argument's machine mode.
5619 TYPE is the data type of the argument (as a tree).
5620 This is null for libcalls where that information may
5621 not be available.
5622 CUM is a variable of type CUMULATIVE_ARGS which gives info about
5623 the preceding args and about the function being called.
5624 NAMED is nonzero if this argument is a named parameter
5625 (otherwise it is an extra parameter matching an ellipsis).
5627 On the ARM, normally the first 16 bytes are passed in registers r0-r3; all
5628 other arguments are passed on the stack. If (NAMED == 0) (which happens
5629 only in assign_parms, since TARGET_SETUP_INCOMING_VARARGS is
5630 defined), say it is passed in the stack (function_prologue will
5631 indeed make it pass in the stack if necessary). */
5633 static rtx
5634 arm_function_arg (cumulative_args_t pcum_v, enum machine_mode mode,
5635 const_tree type, bool named)
5637 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
5638 int nregs;
5640 /* Handle the special case quickly. Pick an arbitrary value for op2 of
5641 a call insn (op3 of a call_value insn). */
5642 if (mode == VOIDmode)
5643 return const0_rtx;
5645 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
5647 aapcs_layout_arg (pcum, mode, type, named);
5648 return pcum->aapcs_reg;
5651 /* Varargs vectors are treated the same as long long.
5652 named_count avoids having to change the way arm handles 'named' */
5653 if (TARGET_IWMMXT_ABI
5654 && arm_vector_mode_supported_p (mode)
5655 && pcum->named_count > pcum->nargs + 1)
5657 if (pcum->iwmmxt_nregs <= 9)
5658 return gen_rtx_REG (mode, pcum->iwmmxt_nregs + FIRST_IWMMXT_REGNUM);
5659 else
5661 pcum->can_split = false;
5662 return NULL_RTX;
5666 /* Put doubleword aligned quantities in even register pairs. */
5667 if (pcum->nregs & 1
5668 && ARM_DOUBLEWORD_ALIGN
5669 && arm_needs_doubleword_align (mode, type))
5670 pcum->nregs++;
5672 /* Only allow splitting an arg between regs and memory if all preceding
5673 args were allocated to regs. For args passed by reference we only count
5674 the reference pointer. */
5675 if (pcum->can_split)
5676 nregs = 1;
5677 else
5678 nregs = ARM_NUM_REGS2 (mode, type);
5680 if (!named || pcum->nregs + nregs > NUM_ARG_REGS)
5681 return NULL_RTX;
5683 return gen_rtx_REG (mode, pcum->nregs);
5686 static unsigned int
5687 arm_function_arg_boundary (enum machine_mode mode, const_tree type)
5689 return (ARM_DOUBLEWORD_ALIGN && arm_needs_doubleword_align (mode, type)
5690 ? DOUBLEWORD_ALIGNMENT
5691 : PARM_BOUNDARY);
5694 static int
5695 arm_arg_partial_bytes (cumulative_args_t pcum_v, enum machine_mode mode,
5696 tree type, bool named)
5698 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
5699 int nregs = pcum->nregs;
5701 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
5703 aapcs_layout_arg (pcum, mode, type, named);
5704 return pcum->aapcs_partial;
5707 if (TARGET_IWMMXT_ABI && arm_vector_mode_supported_p (mode))
5708 return 0;
5710 if (NUM_ARG_REGS > nregs
5711 && (NUM_ARG_REGS < nregs + ARM_NUM_REGS2 (mode, type))
5712 && pcum->can_split)
5713 return (NUM_ARG_REGS - nregs) * UNITS_PER_WORD;
5715 return 0;
5718 /* Update the data in PCUM to advance over an argument
5719 of mode MODE and data type TYPE.
5720 (TYPE is null for libcalls where that information may not be available.) */
5722 static void
5723 arm_function_arg_advance (cumulative_args_t pcum_v, enum machine_mode mode,
5724 const_tree type, bool named)
5726 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
5728 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
5730 aapcs_layout_arg (pcum, mode, type, named);
5732 if (pcum->aapcs_cprc_slot >= 0)
5734 aapcs_cp_arg_layout[pcum->aapcs_cprc_slot].advance (pcum, mode,
5735 type);
5736 pcum->aapcs_cprc_slot = -1;
5739 /* Generic stuff. */
5740 pcum->aapcs_arg_processed = false;
5741 pcum->aapcs_ncrn = pcum->aapcs_next_ncrn;
5742 pcum->aapcs_reg = NULL_RTX;
5743 pcum->aapcs_partial = 0;
5745 else
5747 pcum->nargs += 1;
5748 if (arm_vector_mode_supported_p (mode)
5749 && pcum->named_count > pcum->nargs
5750 && TARGET_IWMMXT_ABI)
5751 pcum->iwmmxt_nregs += 1;
5752 else
5753 pcum->nregs += ARM_NUM_REGS2 (mode, type);
5757 /* Variable sized types are passed by reference. This is a GCC
5758 extension to the ARM ABI. */
5760 static bool
5761 arm_pass_by_reference (cumulative_args_t cum ATTRIBUTE_UNUSED,
5762 enum machine_mode mode ATTRIBUTE_UNUSED,
5763 const_tree type, bool named ATTRIBUTE_UNUSED)
5765 return type && TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST;
5768 /* Encode the current state of the #pragma [no_]long_calls. */
5769 typedef enum
5771 OFF, /* No #pragma [no_]long_calls is in effect. */
5772 LONG, /* #pragma long_calls is in effect. */
5773 SHORT /* #pragma no_long_calls is in effect. */
5774 } arm_pragma_enum;
5776 static arm_pragma_enum arm_pragma_long_calls = OFF;
5778 void
5779 arm_pr_long_calls (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
5781 arm_pragma_long_calls = LONG;
5784 void
5785 arm_pr_no_long_calls (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
5787 arm_pragma_long_calls = SHORT;
5790 void
5791 arm_pr_long_calls_off (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
5793 arm_pragma_long_calls = OFF;
5796 /* Handle an attribute requiring a FUNCTION_DECL;
5797 arguments as in struct attribute_spec.handler. */
5798 static tree
5799 arm_handle_fndecl_attribute (tree *node, tree name, tree args ATTRIBUTE_UNUSED,
5800 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
5802 if (TREE_CODE (*node) != FUNCTION_DECL)
5804 warning (OPT_Wattributes, "%qE attribute only applies to functions",
5805 name);
5806 *no_add_attrs = true;
5809 return NULL_TREE;
5812 /* Handle an "interrupt" or "isr" attribute;
5813 arguments as in struct attribute_spec.handler. */
5814 static tree
5815 arm_handle_isr_attribute (tree *node, tree name, tree args, int flags,
5816 bool *no_add_attrs)
5818 if (DECL_P (*node))
5820 if (TREE_CODE (*node) != FUNCTION_DECL)
5822 warning (OPT_Wattributes, "%qE attribute only applies to functions",
5823 name);
5824 *no_add_attrs = true;
5826 /* FIXME: the argument if any is checked for type attributes;
5827 should it be checked for decl ones? */
5829 else
5831 if (TREE_CODE (*node) == FUNCTION_TYPE
5832 || TREE_CODE (*node) == METHOD_TYPE)
5834 if (arm_isr_value (args) == ARM_FT_UNKNOWN)
5836 warning (OPT_Wattributes, "%qE attribute ignored",
5837 name);
5838 *no_add_attrs = true;
5841 else if (TREE_CODE (*node) == POINTER_TYPE
5842 && (TREE_CODE (TREE_TYPE (*node)) == FUNCTION_TYPE
5843 || TREE_CODE (TREE_TYPE (*node)) == METHOD_TYPE)
5844 && arm_isr_value (args) != ARM_FT_UNKNOWN)
5846 *node = build_variant_type_copy (*node);
5847 TREE_TYPE (*node) = build_type_attribute_variant
5848 (TREE_TYPE (*node),
5849 tree_cons (name, args, TYPE_ATTRIBUTES (TREE_TYPE (*node))));
5850 *no_add_attrs = true;
5852 else
5854 /* Possibly pass this attribute on from the type to a decl. */
5855 if (flags & ((int) ATTR_FLAG_DECL_NEXT
5856 | (int) ATTR_FLAG_FUNCTION_NEXT
5857 | (int) ATTR_FLAG_ARRAY_NEXT))
5859 *no_add_attrs = true;
5860 return tree_cons (name, args, NULL_TREE);
5862 else
5864 warning (OPT_Wattributes, "%qE attribute ignored",
5865 name);
5870 return NULL_TREE;
5873 /* Handle a "pcs" attribute; arguments as in struct
5874 attribute_spec.handler. */
5875 static tree
5876 arm_handle_pcs_attribute (tree *node ATTRIBUTE_UNUSED, tree name, tree args,
5877 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
5879 if (arm_pcs_from_attribute (args) == ARM_PCS_UNKNOWN)
5881 warning (OPT_Wattributes, "%qE attribute ignored", name);
5882 *no_add_attrs = true;
5884 return NULL_TREE;
5887 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
5888 /* Handle the "notshared" attribute. This attribute is another way of
5889 requesting hidden visibility. ARM's compiler supports
5890 "__declspec(notshared)"; we support the same thing via an
5891 attribute. */
5893 static tree
5894 arm_handle_notshared_attribute (tree *node,
5895 tree name ATTRIBUTE_UNUSED,
5896 tree args ATTRIBUTE_UNUSED,
5897 int flags ATTRIBUTE_UNUSED,
5898 bool *no_add_attrs)
5900 tree decl = TYPE_NAME (*node);
5902 if (decl)
5904 DECL_VISIBILITY (decl) = VISIBILITY_HIDDEN;
5905 DECL_VISIBILITY_SPECIFIED (decl) = 1;
5906 *no_add_attrs = false;
5908 return NULL_TREE;
5910 #endif
5912 /* Return 0 if the attributes for two types are incompatible, 1 if they
5913 are compatible, and 2 if they are nearly compatible (which causes a
5914 warning to be generated). */
5915 static int
5916 arm_comp_type_attributes (const_tree type1, const_tree type2)
5918 int l1, l2, s1, s2;
5920 /* Check for mismatch of non-default calling convention. */
5921 if (TREE_CODE (type1) != FUNCTION_TYPE)
5922 return 1;
5924 /* Check for mismatched call attributes. */
5925 l1 = lookup_attribute ("long_call", TYPE_ATTRIBUTES (type1)) != NULL;
5926 l2 = lookup_attribute ("long_call", TYPE_ATTRIBUTES (type2)) != NULL;
5927 s1 = lookup_attribute ("short_call", TYPE_ATTRIBUTES (type1)) != NULL;
5928 s2 = lookup_attribute ("short_call", TYPE_ATTRIBUTES (type2)) != NULL;
5930 /* Only bother to check if an attribute is defined. */
5931 if (l1 | l2 | s1 | s2)
5933 /* If one type has an attribute, the other must have the same attribute. */
5934 if ((l1 != l2) || (s1 != s2))
5935 return 0;
5937 /* Disallow mixed attributes. */
5938 if ((l1 & s2) || (l2 & s1))
5939 return 0;
5942 /* Check for mismatched ISR attribute. */
5943 l1 = lookup_attribute ("isr", TYPE_ATTRIBUTES (type1)) != NULL;
5944 if (! l1)
5945 l1 = lookup_attribute ("interrupt", TYPE_ATTRIBUTES (type1)) != NULL;
5946 l2 = lookup_attribute ("isr", TYPE_ATTRIBUTES (type2)) != NULL;
5947 if (! l2)
5948 l1 = lookup_attribute ("interrupt", TYPE_ATTRIBUTES (type2)) != NULL;
5949 if (l1 != l2)
5950 return 0;
5952 return 1;
5955 /* Assigns default attributes to newly defined type. This is used to
5956 set short_call/long_call attributes for function types of
5957 functions defined inside corresponding #pragma scopes. */
5958 static void
5959 arm_set_default_type_attributes (tree type)
5961 /* Add __attribute__ ((long_call)) to all functions, when
5962 inside #pragma long_calls or __attribute__ ((short_call)),
5963 when inside #pragma no_long_calls. */
5964 if (TREE_CODE (type) == FUNCTION_TYPE || TREE_CODE (type) == METHOD_TYPE)
5966 tree type_attr_list, attr_name;
5967 type_attr_list = TYPE_ATTRIBUTES (type);
5969 if (arm_pragma_long_calls == LONG)
5970 attr_name = get_identifier ("long_call");
5971 else if (arm_pragma_long_calls == SHORT)
5972 attr_name = get_identifier ("short_call");
5973 else
5974 return;
5976 type_attr_list = tree_cons (attr_name, NULL_TREE, type_attr_list);
5977 TYPE_ATTRIBUTES (type) = type_attr_list;
5981 /* Return true if DECL is known to be linked into section SECTION. */
5983 static bool
5984 arm_function_in_section_p (tree decl, section *section)
5986 /* We can only be certain about functions defined in the same
5987 compilation unit. */
5988 if (!TREE_STATIC (decl))
5989 return false;
5991 /* Make sure that SYMBOL always binds to the definition in this
5992 compilation unit. */
5993 if (!targetm.binds_local_p (decl))
5994 return false;
5996 /* If DECL_SECTION_NAME is set, assume it is trustworthy. */
5997 if (!DECL_SECTION_NAME (decl))
5999 /* Make sure that we will not create a unique section for DECL. */
6000 if (flag_function_sections || DECL_ONE_ONLY (decl))
6001 return false;
6004 return function_section (decl) == section;
6007 /* Return nonzero if a 32-bit "long_call" should be generated for
6008 a call from the current function to DECL. We generate a long_call
6009 if the function:
6011 a. has an __attribute__((long call))
6012 or b. is within the scope of a #pragma long_calls
6013 or c. the -mlong-calls command line switch has been specified
6015 However we do not generate a long call if the function:
6017 d. has an __attribute__ ((short_call))
6018 or e. is inside the scope of a #pragma no_long_calls
6019 or f. is defined in the same section as the current function. */
6021 bool
6022 arm_is_long_call_p (tree decl)
6024 tree attrs;
6026 if (!decl)
6027 return TARGET_LONG_CALLS;
6029 attrs = TYPE_ATTRIBUTES (TREE_TYPE (decl));
6030 if (lookup_attribute ("short_call", attrs))
6031 return false;
6033 /* For "f", be conservative, and only cater for cases in which the
6034 whole of the current function is placed in the same section. */
6035 if (!flag_reorder_blocks_and_partition
6036 && TREE_CODE (decl) == FUNCTION_DECL
6037 && arm_function_in_section_p (decl, current_function_section ()))
6038 return false;
6040 if (lookup_attribute ("long_call", attrs))
6041 return true;
6043 return TARGET_LONG_CALLS;
6046 /* Return nonzero if it is ok to make a tail-call to DECL. */
6047 static bool
6048 arm_function_ok_for_sibcall (tree decl, tree exp)
6050 unsigned long func_type;
6052 if (cfun->machine->sibcall_blocked)
6053 return false;
6055 /* Never tailcall something if we are generating code for Thumb-1. */
6056 if (TARGET_THUMB1)
6057 return false;
6059 /* The PIC register is live on entry to VxWorks PLT entries, so we
6060 must make the call before restoring the PIC register. */
6061 if (TARGET_VXWORKS_RTP && flag_pic && !targetm.binds_local_p (decl))
6062 return false;
6064 /* Cannot tail-call to long calls, since these are out of range of
6065 a branch instruction. */
6066 if (decl && arm_is_long_call_p (decl))
6067 return false;
6069 /* If we are interworking and the function is not declared static
6070 then we can't tail-call it unless we know that it exists in this
6071 compilation unit (since it might be a Thumb routine). */
6072 if (TARGET_INTERWORK && decl && TREE_PUBLIC (decl)
6073 && !TREE_ASM_WRITTEN (decl))
6074 return false;
6076 func_type = arm_current_func_type ();
6077 /* Never tailcall from an ISR routine - it needs a special exit sequence. */
6078 if (IS_INTERRUPT (func_type))
6079 return false;
6081 if (!VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun->decl))))
6083 /* Check that the return value locations are the same. For
6084 example that we aren't returning a value from the sibling in
6085 a VFP register but then need to transfer it to a core
6086 register. */
6087 rtx a, b;
6089 a = arm_function_value (TREE_TYPE (exp), decl, false);
6090 b = arm_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)),
6091 cfun->decl, false);
6092 if (!rtx_equal_p (a, b))
6093 return false;
6096 /* Never tailcall if function may be called with a misaligned SP. */
6097 if (IS_STACKALIGN (func_type))
6098 return false;
6100 /* The AAPCS says that, on bare-metal, calls to unresolved weak
6101 references should become a NOP. Don't convert such calls into
6102 sibling calls. */
6103 if (TARGET_AAPCS_BASED
6104 && arm_abi == ARM_ABI_AAPCS
6105 && decl
6106 && DECL_WEAK (decl))
6107 return false;
6109 /* Everything else is ok. */
6110 return true;
6114 /* Addressing mode support functions. */
6116 /* Return nonzero if X is a legitimate immediate operand when compiling
6117 for PIC. We know that X satisfies CONSTANT_P and flag_pic is true. */
6119 legitimate_pic_operand_p (rtx x)
6121 if (GET_CODE (x) == SYMBOL_REF
6122 || (GET_CODE (x) == CONST
6123 && GET_CODE (XEXP (x, 0)) == PLUS
6124 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF))
6125 return 0;
6127 return 1;
6130 /* Record that the current function needs a PIC register. Initialize
6131 cfun->machine->pic_reg if we have not already done so. */
6133 static void
6134 require_pic_register (void)
6136 /* A lot of the logic here is made obscure by the fact that this
6137 routine gets called as part of the rtx cost estimation process.
6138 We don't want those calls to affect any assumptions about the real
6139 function; and further, we can't call entry_of_function() until we
6140 start the real expansion process. */
6141 if (!crtl->uses_pic_offset_table)
6143 gcc_assert (can_create_pseudo_p ());
6144 if (arm_pic_register != INVALID_REGNUM
6145 && !(TARGET_THUMB1 && arm_pic_register > LAST_LO_REGNUM))
6147 if (!cfun->machine->pic_reg)
6148 cfun->machine->pic_reg = gen_rtx_REG (Pmode, arm_pic_register);
6150 /* Play games to avoid marking the function as needing pic
6151 if we are being called as part of the cost-estimation
6152 process. */
6153 if (current_ir_type () != IR_GIMPLE || currently_expanding_to_rtl)
6154 crtl->uses_pic_offset_table = 1;
6156 else
6158 rtx seq, insn;
6160 if (!cfun->machine->pic_reg)
6161 cfun->machine->pic_reg = gen_reg_rtx (Pmode);
6163 /* Play games to avoid marking the function as needing pic
6164 if we are being called as part of the cost-estimation
6165 process. */
6166 if (current_ir_type () != IR_GIMPLE || currently_expanding_to_rtl)
6168 crtl->uses_pic_offset_table = 1;
6169 start_sequence ();
6171 if (TARGET_THUMB1 && arm_pic_register != INVALID_REGNUM
6172 && arm_pic_register > LAST_LO_REGNUM)
6173 emit_move_insn (cfun->machine->pic_reg,
6174 gen_rtx_REG (Pmode, arm_pic_register));
6175 else
6176 arm_load_pic_register (0UL);
6178 seq = get_insns ();
6179 end_sequence ();
6181 for (insn = seq; insn; insn = NEXT_INSN (insn))
6182 if (INSN_P (insn))
6183 INSN_LOCATION (insn) = prologue_location;
6185 /* We can be called during expansion of PHI nodes, where
6186 we can't yet emit instructions directly in the final
6187 insn stream. Queue the insns on the entry edge, they will
6188 be committed after everything else is expanded. */
6189 insert_insn_on_edge (seq,
6190 single_succ_edge (ENTRY_BLOCK_PTR_FOR_FN (cfun)));
6197 legitimize_pic_address (rtx orig, enum machine_mode mode, rtx reg)
6199 if (GET_CODE (orig) == SYMBOL_REF
6200 || GET_CODE (orig) == LABEL_REF)
6202 rtx insn;
6204 if (reg == 0)
6206 gcc_assert (can_create_pseudo_p ());
6207 reg = gen_reg_rtx (Pmode);
6210 /* VxWorks does not impose a fixed gap between segments; the run-time
6211 gap can be different from the object-file gap. We therefore can't
6212 use GOTOFF unless we are absolutely sure that the symbol is in the
6213 same segment as the GOT. Unfortunately, the flexibility of linker
6214 scripts means that we can't be sure of that in general, so assume
6215 that GOTOFF is never valid on VxWorks. */
6216 if ((GET_CODE (orig) == LABEL_REF
6217 || (GET_CODE (orig) == SYMBOL_REF &&
6218 SYMBOL_REF_LOCAL_P (orig)))
6219 && NEED_GOT_RELOC
6220 && arm_pic_data_is_text_relative)
6221 insn = arm_pic_static_addr (orig, reg);
6222 else
6224 rtx pat;
6225 rtx mem;
6227 /* If this function doesn't have a pic register, create one now. */
6228 require_pic_register ();
6230 pat = gen_calculate_pic_address (reg, cfun->machine->pic_reg, orig);
6232 /* Make the MEM as close to a constant as possible. */
6233 mem = SET_SRC (pat);
6234 gcc_assert (MEM_P (mem) && !MEM_VOLATILE_P (mem));
6235 MEM_READONLY_P (mem) = 1;
6236 MEM_NOTRAP_P (mem) = 1;
6238 insn = emit_insn (pat);
6241 /* Put a REG_EQUAL note on this insn, so that it can be optimized
6242 by loop. */
6243 set_unique_reg_note (insn, REG_EQUAL, orig);
6245 return reg;
6247 else if (GET_CODE (orig) == CONST)
6249 rtx base, offset;
6251 if (GET_CODE (XEXP (orig, 0)) == PLUS
6252 && XEXP (XEXP (orig, 0), 0) == cfun->machine->pic_reg)
6253 return orig;
6255 /* Handle the case where we have: const (UNSPEC_TLS). */
6256 if (GET_CODE (XEXP (orig, 0)) == UNSPEC
6257 && XINT (XEXP (orig, 0), 1) == UNSPEC_TLS)
6258 return orig;
6260 /* Handle the case where we have:
6261 const (plus (UNSPEC_TLS) (ADDEND)). The ADDEND must be a
6262 CONST_INT. */
6263 if (GET_CODE (XEXP (orig, 0)) == PLUS
6264 && GET_CODE (XEXP (XEXP (orig, 0), 0)) == UNSPEC
6265 && XINT (XEXP (XEXP (orig, 0), 0), 1) == UNSPEC_TLS)
6267 gcc_assert (CONST_INT_P (XEXP (XEXP (orig, 0), 1)));
6268 return orig;
6271 if (reg == 0)
6273 gcc_assert (can_create_pseudo_p ());
6274 reg = gen_reg_rtx (Pmode);
6277 gcc_assert (GET_CODE (XEXP (orig, 0)) == PLUS);
6279 base = legitimize_pic_address (XEXP (XEXP (orig, 0), 0), Pmode, reg);
6280 offset = legitimize_pic_address (XEXP (XEXP (orig, 0), 1), Pmode,
6281 base == reg ? 0 : reg);
6283 if (CONST_INT_P (offset))
6285 /* The base register doesn't really matter, we only want to
6286 test the index for the appropriate mode. */
6287 if (!arm_legitimate_index_p (mode, offset, SET, 0))
6289 gcc_assert (can_create_pseudo_p ());
6290 offset = force_reg (Pmode, offset);
6293 if (CONST_INT_P (offset))
6294 return plus_constant (Pmode, base, INTVAL (offset));
6297 if (GET_MODE_SIZE (mode) > 4
6298 && (GET_MODE_CLASS (mode) == MODE_INT
6299 || TARGET_SOFT_FLOAT))
6301 emit_insn (gen_addsi3 (reg, base, offset));
6302 return reg;
6305 return gen_rtx_PLUS (Pmode, base, offset);
6308 return orig;
6312 /* Find a spare register to use during the prolog of a function. */
6314 static int
6315 thumb_find_work_register (unsigned long pushed_regs_mask)
6317 int reg;
6319 /* Check the argument registers first as these are call-used. The
6320 register allocation order means that sometimes r3 might be used
6321 but earlier argument registers might not, so check them all. */
6322 for (reg = LAST_ARG_REGNUM; reg >= 0; reg --)
6323 if (!df_regs_ever_live_p (reg))
6324 return reg;
6326 /* Before going on to check the call-saved registers we can try a couple
6327 more ways of deducing that r3 is available. The first is when we are
6328 pushing anonymous arguments onto the stack and we have less than 4
6329 registers worth of fixed arguments(*). In this case r3 will be part of
6330 the variable argument list and so we can be sure that it will be
6331 pushed right at the start of the function. Hence it will be available
6332 for the rest of the prologue.
6333 (*): ie crtl->args.pretend_args_size is greater than 0. */
6334 if (cfun->machine->uses_anonymous_args
6335 && crtl->args.pretend_args_size > 0)
6336 return LAST_ARG_REGNUM;
6338 /* The other case is when we have fixed arguments but less than 4 registers
6339 worth. In this case r3 might be used in the body of the function, but
6340 it is not being used to convey an argument into the function. In theory
6341 we could just check crtl->args.size to see how many bytes are
6342 being passed in argument registers, but it seems that it is unreliable.
6343 Sometimes it will have the value 0 when in fact arguments are being
6344 passed. (See testcase execute/20021111-1.c for an example). So we also
6345 check the args_info.nregs field as well. The problem with this field is
6346 that it makes no allowances for arguments that are passed to the
6347 function but which are not used. Hence we could miss an opportunity
6348 when a function has an unused argument in r3. But it is better to be
6349 safe than to be sorry. */
6350 if (! cfun->machine->uses_anonymous_args
6351 && crtl->args.size >= 0
6352 && crtl->args.size <= (LAST_ARG_REGNUM * UNITS_PER_WORD)
6353 && (TARGET_AAPCS_BASED
6354 ? crtl->args.info.aapcs_ncrn < 4
6355 : crtl->args.info.nregs < 4))
6356 return LAST_ARG_REGNUM;
6358 /* Otherwise look for a call-saved register that is going to be pushed. */
6359 for (reg = LAST_LO_REGNUM; reg > LAST_ARG_REGNUM; reg --)
6360 if (pushed_regs_mask & (1 << reg))
6361 return reg;
6363 if (TARGET_THUMB2)
6365 /* Thumb-2 can use high regs. */
6366 for (reg = FIRST_HI_REGNUM; reg < 15; reg ++)
6367 if (pushed_regs_mask & (1 << reg))
6368 return reg;
6370 /* Something went wrong - thumb_compute_save_reg_mask()
6371 should have arranged for a suitable register to be pushed. */
6372 gcc_unreachable ();
6375 static GTY(()) int pic_labelno;
6377 /* Generate code to load the PIC register. In thumb mode SCRATCH is a
6378 low register. */
6380 void
6381 arm_load_pic_register (unsigned long saved_regs ATTRIBUTE_UNUSED)
6383 rtx l1, labelno, pic_tmp, pic_rtx, pic_reg;
6385 if (crtl->uses_pic_offset_table == 0 || TARGET_SINGLE_PIC_BASE)
6386 return;
6388 gcc_assert (flag_pic);
6390 pic_reg = cfun->machine->pic_reg;
6391 if (TARGET_VXWORKS_RTP)
6393 pic_rtx = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_BASE);
6394 pic_rtx = gen_rtx_CONST (Pmode, pic_rtx);
6395 emit_insn (gen_pic_load_addr_32bit (pic_reg, pic_rtx));
6397 emit_insn (gen_rtx_SET (Pmode, pic_reg, gen_rtx_MEM (Pmode, pic_reg)));
6399 pic_tmp = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_INDEX);
6400 emit_insn (gen_pic_offset_arm (pic_reg, pic_reg, pic_tmp));
6402 else
6404 /* We use an UNSPEC rather than a LABEL_REF because this label
6405 never appears in the code stream. */
6407 labelno = GEN_INT (pic_labelno++);
6408 l1 = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
6409 l1 = gen_rtx_CONST (VOIDmode, l1);
6411 /* On the ARM the PC register contains 'dot + 8' at the time of the
6412 addition, on the Thumb it is 'dot + 4'. */
6413 pic_rtx = plus_constant (Pmode, l1, TARGET_ARM ? 8 : 4);
6414 pic_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, pic_rtx),
6415 UNSPEC_GOTSYM_OFF);
6416 pic_rtx = gen_rtx_CONST (Pmode, pic_rtx);
6418 if (TARGET_32BIT)
6420 emit_insn (gen_pic_load_addr_unified (pic_reg, pic_rtx, labelno));
6422 else /* TARGET_THUMB1 */
6424 if (arm_pic_register != INVALID_REGNUM
6425 && REGNO (pic_reg) > LAST_LO_REGNUM)
6427 /* We will have pushed the pic register, so we should always be
6428 able to find a work register. */
6429 pic_tmp = gen_rtx_REG (SImode,
6430 thumb_find_work_register (saved_regs));
6431 emit_insn (gen_pic_load_addr_thumb1 (pic_tmp, pic_rtx));
6432 emit_insn (gen_movsi (pic_offset_table_rtx, pic_tmp));
6433 emit_insn (gen_pic_add_dot_plus_four (pic_reg, pic_reg, labelno));
6435 else if (arm_pic_register != INVALID_REGNUM
6436 && arm_pic_register > LAST_LO_REGNUM
6437 && REGNO (pic_reg) <= LAST_LO_REGNUM)
6439 emit_insn (gen_pic_load_addr_unified (pic_reg, pic_rtx, labelno));
6440 emit_move_insn (gen_rtx_REG (Pmode, arm_pic_register), pic_reg);
6441 emit_use (gen_rtx_REG (Pmode, arm_pic_register));
6443 else
6444 emit_insn (gen_pic_load_addr_unified (pic_reg, pic_rtx, labelno));
6448 /* Need to emit this whether or not we obey regdecls,
6449 since setjmp/longjmp can cause life info to screw up. */
6450 emit_use (pic_reg);
6453 /* Generate code to load the address of a static var when flag_pic is set. */
6454 static rtx
6455 arm_pic_static_addr (rtx orig, rtx reg)
6457 rtx l1, labelno, offset_rtx, insn;
6459 gcc_assert (flag_pic);
6461 /* We use an UNSPEC rather than a LABEL_REF because this label
6462 never appears in the code stream. */
6463 labelno = GEN_INT (pic_labelno++);
6464 l1 = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
6465 l1 = gen_rtx_CONST (VOIDmode, l1);
6467 /* On the ARM the PC register contains 'dot + 8' at the time of the
6468 addition, on the Thumb it is 'dot + 4'. */
6469 offset_rtx = plus_constant (Pmode, l1, TARGET_ARM ? 8 : 4);
6470 offset_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, orig, offset_rtx),
6471 UNSPEC_SYMBOL_OFFSET);
6472 offset_rtx = gen_rtx_CONST (Pmode, offset_rtx);
6474 insn = emit_insn (gen_pic_load_addr_unified (reg, offset_rtx, labelno));
6475 return insn;
6478 /* Return nonzero if X is valid as an ARM state addressing register. */
6479 static int
6480 arm_address_register_rtx_p (rtx x, int strict_p)
6482 int regno;
6484 if (!REG_P (x))
6485 return 0;
6487 regno = REGNO (x);
6489 if (strict_p)
6490 return ARM_REGNO_OK_FOR_BASE_P (regno);
6492 return (regno <= LAST_ARM_REGNUM
6493 || regno >= FIRST_PSEUDO_REGISTER
6494 || regno == FRAME_POINTER_REGNUM
6495 || regno == ARG_POINTER_REGNUM);
6498 /* Return TRUE if this rtx is the difference of a symbol and a label,
6499 and will reduce to a PC-relative relocation in the object file.
6500 Expressions like this can be left alone when generating PIC, rather
6501 than forced through the GOT. */
6502 static int
6503 pcrel_constant_p (rtx x)
6505 if (GET_CODE (x) == MINUS)
6506 return symbol_mentioned_p (XEXP (x, 0)) && label_mentioned_p (XEXP (x, 1));
6508 return FALSE;
6511 /* Return true if X will surely end up in an index register after next
6512 splitting pass. */
6513 static bool
6514 will_be_in_index_register (const_rtx x)
6516 /* arm.md: calculate_pic_address will split this into a register. */
6517 return GET_CODE (x) == UNSPEC && (XINT (x, 1) == UNSPEC_PIC_SYM);
6520 /* Return nonzero if X is a valid ARM state address operand. */
6522 arm_legitimate_address_outer_p (enum machine_mode mode, rtx x, RTX_CODE outer,
6523 int strict_p)
6525 bool use_ldrd;
6526 enum rtx_code code = GET_CODE (x);
6528 if (arm_address_register_rtx_p (x, strict_p))
6529 return 1;
6531 use_ldrd = (TARGET_LDRD
6532 && (mode == DImode
6533 || (mode == DFmode && (TARGET_SOFT_FLOAT || TARGET_VFP))));
6535 if (code == POST_INC || code == PRE_DEC
6536 || ((code == PRE_INC || code == POST_DEC)
6537 && (use_ldrd || GET_MODE_SIZE (mode) <= 4)))
6538 return arm_address_register_rtx_p (XEXP (x, 0), strict_p);
6540 else if ((code == POST_MODIFY || code == PRE_MODIFY)
6541 && arm_address_register_rtx_p (XEXP (x, 0), strict_p)
6542 && GET_CODE (XEXP (x, 1)) == PLUS
6543 && rtx_equal_p (XEXP (XEXP (x, 1), 0), XEXP (x, 0)))
6545 rtx addend = XEXP (XEXP (x, 1), 1);
6547 /* Don't allow ldrd post increment by register because it's hard
6548 to fixup invalid register choices. */
6549 if (use_ldrd
6550 && GET_CODE (x) == POST_MODIFY
6551 && REG_P (addend))
6552 return 0;
6554 return ((use_ldrd || GET_MODE_SIZE (mode) <= 4)
6555 && arm_legitimate_index_p (mode, addend, outer, strict_p));
6558 /* After reload constants split into minipools will have addresses
6559 from a LABEL_REF. */
6560 else if (reload_completed
6561 && (code == LABEL_REF
6562 || (code == CONST
6563 && GET_CODE (XEXP (x, 0)) == PLUS
6564 && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
6565 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))))
6566 return 1;
6568 else if (mode == TImode || (TARGET_NEON && VALID_NEON_STRUCT_MODE (mode)))
6569 return 0;
6571 else if (code == PLUS)
6573 rtx xop0 = XEXP (x, 0);
6574 rtx xop1 = XEXP (x, 1);
6576 return ((arm_address_register_rtx_p (xop0, strict_p)
6577 && ((CONST_INT_P (xop1)
6578 && arm_legitimate_index_p (mode, xop1, outer, strict_p))
6579 || (!strict_p && will_be_in_index_register (xop1))))
6580 || (arm_address_register_rtx_p (xop1, strict_p)
6581 && arm_legitimate_index_p (mode, xop0, outer, strict_p)));
6584 #if 0
6585 /* Reload currently can't handle MINUS, so disable this for now */
6586 else if (GET_CODE (x) == MINUS)
6588 rtx xop0 = XEXP (x, 0);
6589 rtx xop1 = XEXP (x, 1);
6591 return (arm_address_register_rtx_p (xop0, strict_p)
6592 && arm_legitimate_index_p (mode, xop1, outer, strict_p));
6594 #endif
6596 else if (GET_MODE_CLASS (mode) != MODE_FLOAT
6597 && code == SYMBOL_REF
6598 && CONSTANT_POOL_ADDRESS_P (x)
6599 && ! (flag_pic
6600 && symbol_mentioned_p (get_pool_constant (x))
6601 && ! pcrel_constant_p (get_pool_constant (x))))
6602 return 1;
6604 return 0;
6607 /* Return nonzero if X is a valid Thumb-2 address operand. */
6608 static int
6609 thumb2_legitimate_address_p (enum machine_mode mode, rtx x, int strict_p)
6611 bool use_ldrd;
6612 enum rtx_code code = GET_CODE (x);
6614 if (arm_address_register_rtx_p (x, strict_p))
6615 return 1;
6617 use_ldrd = (TARGET_LDRD
6618 && (mode == DImode
6619 || (mode == DFmode && (TARGET_SOFT_FLOAT || TARGET_VFP))));
6621 if (code == POST_INC || code == PRE_DEC
6622 || ((code == PRE_INC || code == POST_DEC)
6623 && (use_ldrd || GET_MODE_SIZE (mode) <= 4)))
6624 return arm_address_register_rtx_p (XEXP (x, 0), strict_p);
6626 else if ((code == POST_MODIFY || code == PRE_MODIFY)
6627 && arm_address_register_rtx_p (XEXP (x, 0), strict_p)
6628 && GET_CODE (XEXP (x, 1)) == PLUS
6629 && rtx_equal_p (XEXP (XEXP (x, 1), 0), XEXP (x, 0)))
6631 /* Thumb-2 only has autoincrement by constant. */
6632 rtx addend = XEXP (XEXP (x, 1), 1);
6633 HOST_WIDE_INT offset;
6635 if (!CONST_INT_P (addend))
6636 return 0;
6638 offset = INTVAL(addend);
6639 if (GET_MODE_SIZE (mode) <= 4)
6640 return (offset > -256 && offset < 256);
6642 return (use_ldrd && offset > -1024 && offset < 1024
6643 && (offset & 3) == 0);
6646 /* After reload constants split into minipools will have addresses
6647 from a LABEL_REF. */
6648 else if (reload_completed
6649 && (code == LABEL_REF
6650 || (code == CONST
6651 && GET_CODE (XEXP (x, 0)) == PLUS
6652 && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
6653 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))))
6654 return 1;
6656 else if (mode == TImode || (TARGET_NEON && VALID_NEON_STRUCT_MODE (mode)))
6657 return 0;
6659 else if (code == PLUS)
6661 rtx xop0 = XEXP (x, 0);
6662 rtx xop1 = XEXP (x, 1);
6664 return ((arm_address_register_rtx_p (xop0, strict_p)
6665 && (thumb2_legitimate_index_p (mode, xop1, strict_p)
6666 || (!strict_p && will_be_in_index_register (xop1))))
6667 || (arm_address_register_rtx_p (xop1, strict_p)
6668 && thumb2_legitimate_index_p (mode, xop0, strict_p)));
6671 /* Normally we can assign constant values to target registers without
6672 the help of constant pool. But there are cases we have to use constant
6673 pool like:
6674 1) assign a label to register.
6675 2) sign-extend a 8bit value to 32bit and then assign to register.
6677 Constant pool access in format:
6678 (set (reg r0) (mem (symbol_ref (".LC0"))))
6679 will cause the use of literal pool (later in function arm_reorg).
6680 So here we mark such format as an invalid format, then the compiler
6681 will adjust it into:
6682 (set (reg r0) (symbol_ref (".LC0")))
6683 (set (reg r0) (mem (reg r0))).
6684 No extra register is required, and (mem (reg r0)) won't cause the use
6685 of literal pools. */
6686 else if (arm_disable_literal_pool && code == SYMBOL_REF
6687 && CONSTANT_POOL_ADDRESS_P (x))
6688 return 0;
6690 else if (GET_MODE_CLASS (mode) != MODE_FLOAT
6691 && code == SYMBOL_REF
6692 && CONSTANT_POOL_ADDRESS_P (x)
6693 && ! (flag_pic
6694 && symbol_mentioned_p (get_pool_constant (x))
6695 && ! pcrel_constant_p (get_pool_constant (x))))
6696 return 1;
6698 return 0;
6701 /* Return nonzero if INDEX is valid for an address index operand in
6702 ARM state. */
6703 static int
6704 arm_legitimate_index_p (enum machine_mode mode, rtx index, RTX_CODE outer,
6705 int strict_p)
6707 HOST_WIDE_INT range;
6708 enum rtx_code code = GET_CODE (index);
6710 /* Standard coprocessor addressing modes. */
6711 if (TARGET_HARD_FLOAT
6712 && TARGET_VFP
6713 && (mode == SFmode || mode == DFmode))
6714 return (code == CONST_INT && INTVAL (index) < 1024
6715 && INTVAL (index) > -1024
6716 && (INTVAL (index) & 3) == 0);
6718 /* For quad modes, we restrict the constant offset to be slightly less
6719 than what the instruction format permits. We do this because for
6720 quad mode moves, we will actually decompose them into two separate
6721 double-mode reads or writes. INDEX must therefore be a valid
6722 (double-mode) offset and so should INDEX+8. */
6723 if (TARGET_NEON && VALID_NEON_QREG_MODE (mode))
6724 return (code == CONST_INT
6725 && INTVAL (index) < 1016
6726 && INTVAL (index) > -1024
6727 && (INTVAL (index) & 3) == 0);
6729 /* We have no such constraint on double mode offsets, so we permit the
6730 full range of the instruction format. */
6731 if (TARGET_NEON && VALID_NEON_DREG_MODE (mode))
6732 return (code == CONST_INT
6733 && INTVAL (index) < 1024
6734 && INTVAL (index) > -1024
6735 && (INTVAL (index) & 3) == 0);
6737 if (TARGET_REALLY_IWMMXT && VALID_IWMMXT_REG_MODE (mode))
6738 return (code == CONST_INT
6739 && INTVAL (index) < 1024
6740 && INTVAL (index) > -1024
6741 && (INTVAL (index) & 3) == 0);
6743 if (arm_address_register_rtx_p (index, strict_p)
6744 && (GET_MODE_SIZE (mode) <= 4))
6745 return 1;
6747 if (mode == DImode || mode == DFmode)
6749 if (code == CONST_INT)
6751 HOST_WIDE_INT val = INTVAL (index);
6753 if (TARGET_LDRD)
6754 return val > -256 && val < 256;
6755 else
6756 return val > -4096 && val < 4092;
6759 return TARGET_LDRD && arm_address_register_rtx_p (index, strict_p);
6762 if (GET_MODE_SIZE (mode) <= 4
6763 && ! (arm_arch4
6764 && (mode == HImode
6765 || mode == HFmode
6766 || (mode == QImode && outer == SIGN_EXTEND))))
6768 if (code == MULT)
6770 rtx xiop0 = XEXP (index, 0);
6771 rtx xiop1 = XEXP (index, 1);
6773 return ((arm_address_register_rtx_p (xiop0, strict_p)
6774 && power_of_two_operand (xiop1, SImode))
6775 || (arm_address_register_rtx_p (xiop1, strict_p)
6776 && power_of_two_operand (xiop0, SImode)));
6778 else if (code == LSHIFTRT || code == ASHIFTRT
6779 || code == ASHIFT || code == ROTATERT)
6781 rtx op = XEXP (index, 1);
6783 return (arm_address_register_rtx_p (XEXP (index, 0), strict_p)
6784 && CONST_INT_P (op)
6785 && INTVAL (op) > 0
6786 && INTVAL (op) <= 31);
6790 /* For ARM v4 we may be doing a sign-extend operation during the
6791 load. */
6792 if (arm_arch4)
6794 if (mode == HImode
6795 || mode == HFmode
6796 || (outer == SIGN_EXTEND && mode == QImode))
6797 range = 256;
6798 else
6799 range = 4096;
6801 else
6802 range = (mode == HImode || mode == HFmode) ? 4095 : 4096;
6804 return (code == CONST_INT
6805 && INTVAL (index) < range
6806 && INTVAL (index) > -range);
6809 /* Return true if OP is a valid index scaling factor for Thumb-2 address
6810 index operand. i.e. 1, 2, 4 or 8. */
6811 static bool
6812 thumb2_index_mul_operand (rtx op)
6814 HOST_WIDE_INT val;
6816 if (!CONST_INT_P (op))
6817 return false;
6819 val = INTVAL(op);
6820 return (val == 1 || val == 2 || val == 4 || val == 8);
6823 /* Return nonzero if INDEX is a valid Thumb-2 address index operand. */
6824 static int
6825 thumb2_legitimate_index_p (enum machine_mode mode, rtx index, int strict_p)
6827 enum rtx_code code = GET_CODE (index);
6829 /* ??? Combine arm and thumb2 coprocessor addressing modes. */
6830 /* Standard coprocessor addressing modes. */
6831 if (TARGET_HARD_FLOAT
6832 && TARGET_VFP
6833 && (mode == SFmode || mode == DFmode))
6834 return (code == CONST_INT && INTVAL (index) < 1024
6835 /* Thumb-2 allows only > -256 index range for it's core register
6836 load/stores. Since we allow SF/DF in core registers, we have
6837 to use the intersection between -256~4096 (core) and -1024~1024
6838 (coprocessor). */
6839 && INTVAL (index) > -256
6840 && (INTVAL (index) & 3) == 0);
6842 if (TARGET_REALLY_IWMMXT && VALID_IWMMXT_REG_MODE (mode))
6844 /* For DImode assume values will usually live in core regs
6845 and only allow LDRD addressing modes. */
6846 if (!TARGET_LDRD || mode != DImode)
6847 return (code == CONST_INT
6848 && INTVAL (index) < 1024
6849 && INTVAL (index) > -1024
6850 && (INTVAL (index) & 3) == 0);
6853 /* For quad modes, we restrict the constant offset to be slightly less
6854 than what the instruction format permits. We do this because for
6855 quad mode moves, we will actually decompose them into two separate
6856 double-mode reads or writes. INDEX must therefore be a valid
6857 (double-mode) offset and so should INDEX+8. */
6858 if (TARGET_NEON && VALID_NEON_QREG_MODE (mode))
6859 return (code == CONST_INT
6860 && INTVAL (index) < 1016
6861 && INTVAL (index) > -1024
6862 && (INTVAL (index) & 3) == 0);
6864 /* We have no such constraint on double mode offsets, so we permit the
6865 full range of the instruction format. */
6866 if (TARGET_NEON && VALID_NEON_DREG_MODE (mode))
6867 return (code == CONST_INT
6868 && INTVAL (index) < 1024
6869 && INTVAL (index) > -1024
6870 && (INTVAL (index) & 3) == 0);
6872 if (arm_address_register_rtx_p (index, strict_p)
6873 && (GET_MODE_SIZE (mode) <= 4))
6874 return 1;
6876 if (mode == DImode || mode == DFmode)
6878 if (code == CONST_INT)
6880 HOST_WIDE_INT val = INTVAL (index);
6881 /* ??? Can we assume ldrd for thumb2? */
6882 /* Thumb-2 ldrd only has reg+const addressing modes. */
6883 /* ldrd supports offsets of +-1020.
6884 However the ldr fallback does not. */
6885 return val > -256 && val < 256 && (val & 3) == 0;
6887 else
6888 return 0;
6891 if (code == MULT)
6893 rtx xiop0 = XEXP (index, 0);
6894 rtx xiop1 = XEXP (index, 1);
6896 return ((arm_address_register_rtx_p (xiop0, strict_p)
6897 && thumb2_index_mul_operand (xiop1))
6898 || (arm_address_register_rtx_p (xiop1, strict_p)
6899 && thumb2_index_mul_operand (xiop0)));
6901 else if (code == ASHIFT)
6903 rtx op = XEXP (index, 1);
6905 return (arm_address_register_rtx_p (XEXP (index, 0), strict_p)
6906 && CONST_INT_P (op)
6907 && INTVAL (op) > 0
6908 && INTVAL (op) <= 3);
6911 return (code == CONST_INT
6912 && INTVAL (index) < 4096
6913 && INTVAL (index) > -256);
6916 /* Return nonzero if X is valid as a 16-bit Thumb state base register. */
6917 static int
6918 thumb1_base_register_rtx_p (rtx x, enum machine_mode mode, int strict_p)
6920 int regno;
6922 if (!REG_P (x))
6923 return 0;
6925 regno = REGNO (x);
6927 if (strict_p)
6928 return THUMB1_REGNO_MODE_OK_FOR_BASE_P (regno, mode);
6930 return (regno <= LAST_LO_REGNUM
6931 || regno > LAST_VIRTUAL_REGISTER
6932 || regno == FRAME_POINTER_REGNUM
6933 || (GET_MODE_SIZE (mode) >= 4
6934 && (regno == STACK_POINTER_REGNUM
6935 || regno >= FIRST_PSEUDO_REGISTER
6936 || x == hard_frame_pointer_rtx
6937 || x == arg_pointer_rtx)));
6940 /* Return nonzero if x is a legitimate index register. This is the case
6941 for any base register that can access a QImode object. */
6942 inline static int
6943 thumb1_index_register_rtx_p (rtx x, int strict_p)
6945 return thumb1_base_register_rtx_p (x, QImode, strict_p);
6948 /* Return nonzero if x is a legitimate 16-bit Thumb-state address.
6950 The AP may be eliminated to either the SP or the FP, so we use the
6951 least common denominator, e.g. SImode, and offsets from 0 to 64.
6953 ??? Verify whether the above is the right approach.
6955 ??? Also, the FP may be eliminated to the SP, so perhaps that
6956 needs special handling also.
6958 ??? Look at how the mips16 port solves this problem. It probably uses
6959 better ways to solve some of these problems.
6961 Although it is not incorrect, we don't accept QImode and HImode
6962 addresses based on the frame pointer or arg pointer until the
6963 reload pass starts. This is so that eliminating such addresses
6964 into stack based ones won't produce impossible code. */
6966 thumb1_legitimate_address_p (enum machine_mode mode, rtx x, int strict_p)
6968 /* ??? Not clear if this is right. Experiment. */
6969 if (GET_MODE_SIZE (mode) < 4
6970 && !(reload_in_progress || reload_completed)
6971 && (reg_mentioned_p (frame_pointer_rtx, x)
6972 || reg_mentioned_p (arg_pointer_rtx, x)
6973 || reg_mentioned_p (virtual_incoming_args_rtx, x)
6974 || reg_mentioned_p (virtual_outgoing_args_rtx, x)
6975 || reg_mentioned_p (virtual_stack_dynamic_rtx, x)
6976 || reg_mentioned_p (virtual_stack_vars_rtx, x)))
6977 return 0;
6979 /* Accept any base register. SP only in SImode or larger. */
6980 else if (thumb1_base_register_rtx_p (x, mode, strict_p))
6981 return 1;
6983 /* This is PC relative data before arm_reorg runs. */
6984 else if (GET_MODE_SIZE (mode) >= 4 && CONSTANT_P (x)
6985 && GET_CODE (x) == SYMBOL_REF
6986 && CONSTANT_POOL_ADDRESS_P (x) && !flag_pic)
6987 return 1;
6989 /* This is PC relative data after arm_reorg runs. */
6990 else if ((GET_MODE_SIZE (mode) >= 4 || mode == HFmode)
6991 && reload_completed
6992 && (GET_CODE (x) == LABEL_REF
6993 || (GET_CODE (x) == CONST
6994 && GET_CODE (XEXP (x, 0)) == PLUS
6995 && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
6996 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))))
6997 return 1;
6999 /* Post-inc indexing only supported for SImode and larger. */
7000 else if (GET_CODE (x) == POST_INC && GET_MODE_SIZE (mode) >= 4
7001 && thumb1_index_register_rtx_p (XEXP (x, 0), strict_p))
7002 return 1;
7004 else if (GET_CODE (x) == PLUS)
7006 /* REG+REG address can be any two index registers. */
7007 /* We disallow FRAME+REG addressing since we know that FRAME
7008 will be replaced with STACK, and SP relative addressing only
7009 permits SP+OFFSET. */
7010 if (GET_MODE_SIZE (mode) <= 4
7011 && XEXP (x, 0) != frame_pointer_rtx
7012 && XEXP (x, 1) != frame_pointer_rtx
7013 && thumb1_index_register_rtx_p (XEXP (x, 0), strict_p)
7014 && (thumb1_index_register_rtx_p (XEXP (x, 1), strict_p)
7015 || (!strict_p && will_be_in_index_register (XEXP (x, 1)))))
7016 return 1;
7018 /* REG+const has 5-7 bit offset for non-SP registers. */
7019 else if ((thumb1_index_register_rtx_p (XEXP (x, 0), strict_p)
7020 || XEXP (x, 0) == arg_pointer_rtx)
7021 && CONST_INT_P (XEXP (x, 1))
7022 && thumb_legitimate_offset_p (mode, INTVAL (XEXP (x, 1))))
7023 return 1;
7025 /* REG+const has 10-bit offset for SP, but only SImode and
7026 larger is supported. */
7027 /* ??? Should probably check for DI/DFmode overflow here
7028 just like GO_IF_LEGITIMATE_OFFSET does. */
7029 else if (REG_P (XEXP (x, 0))
7030 && REGNO (XEXP (x, 0)) == STACK_POINTER_REGNUM
7031 && GET_MODE_SIZE (mode) >= 4
7032 && CONST_INT_P (XEXP (x, 1))
7033 && INTVAL (XEXP (x, 1)) >= 0
7034 && INTVAL (XEXP (x, 1)) + GET_MODE_SIZE (mode) <= 1024
7035 && (INTVAL (XEXP (x, 1)) & 3) == 0)
7036 return 1;
7038 else if (REG_P (XEXP (x, 0))
7039 && (REGNO (XEXP (x, 0)) == FRAME_POINTER_REGNUM
7040 || REGNO (XEXP (x, 0)) == ARG_POINTER_REGNUM
7041 || (REGNO (XEXP (x, 0)) >= FIRST_VIRTUAL_REGISTER
7042 && REGNO (XEXP (x, 0))
7043 <= LAST_VIRTUAL_POINTER_REGISTER))
7044 && GET_MODE_SIZE (mode) >= 4
7045 && CONST_INT_P (XEXP (x, 1))
7046 && (INTVAL (XEXP (x, 1)) & 3) == 0)
7047 return 1;
7050 else if (GET_MODE_CLASS (mode) != MODE_FLOAT
7051 && GET_MODE_SIZE (mode) == 4
7052 && GET_CODE (x) == SYMBOL_REF
7053 && CONSTANT_POOL_ADDRESS_P (x)
7054 && ! (flag_pic
7055 && symbol_mentioned_p (get_pool_constant (x))
7056 && ! pcrel_constant_p (get_pool_constant (x))))
7057 return 1;
7059 return 0;
7062 /* Return nonzero if VAL can be used as an offset in a Thumb-state address
7063 instruction of mode MODE. */
7065 thumb_legitimate_offset_p (enum machine_mode mode, HOST_WIDE_INT val)
7067 switch (GET_MODE_SIZE (mode))
7069 case 1:
7070 return val >= 0 && val < 32;
7072 case 2:
7073 return val >= 0 && val < 64 && (val & 1) == 0;
7075 default:
7076 return (val >= 0
7077 && (val + GET_MODE_SIZE (mode)) <= 128
7078 && (val & 3) == 0);
7082 bool
7083 arm_legitimate_address_p (enum machine_mode mode, rtx x, bool strict_p)
7085 if (TARGET_ARM)
7086 return arm_legitimate_address_outer_p (mode, x, SET, strict_p);
7087 else if (TARGET_THUMB2)
7088 return thumb2_legitimate_address_p (mode, x, strict_p);
7089 else /* if (TARGET_THUMB1) */
7090 return thumb1_legitimate_address_p (mode, x, strict_p);
7093 /* Worker function for TARGET_PREFERRED_RELOAD_CLASS.
7095 Given an rtx X being reloaded into a reg required to be
7096 in class CLASS, return the class of reg to actually use.
7097 In general this is just CLASS, but for the Thumb core registers and
7098 immediate constants we prefer a LO_REGS class or a subset. */
7100 static reg_class_t
7101 arm_preferred_reload_class (rtx x ATTRIBUTE_UNUSED, reg_class_t rclass)
7103 if (TARGET_32BIT)
7104 return rclass;
7105 else
7107 if (rclass == GENERAL_REGS)
7108 return LO_REGS;
7109 else
7110 return rclass;
7114 /* Build the SYMBOL_REF for __tls_get_addr. */
7116 static GTY(()) rtx tls_get_addr_libfunc;
7118 static rtx
7119 get_tls_get_addr (void)
7121 if (!tls_get_addr_libfunc)
7122 tls_get_addr_libfunc = init_one_libfunc ("__tls_get_addr");
7123 return tls_get_addr_libfunc;
7127 arm_load_tp (rtx target)
7129 if (!target)
7130 target = gen_reg_rtx (SImode);
7132 if (TARGET_HARD_TP)
7134 /* Can return in any reg. */
7135 emit_insn (gen_load_tp_hard (target));
7137 else
7139 /* Always returned in r0. Immediately copy the result into a pseudo,
7140 otherwise other uses of r0 (e.g. setting up function arguments) may
7141 clobber the value. */
7143 rtx tmp;
7145 emit_insn (gen_load_tp_soft ());
7147 tmp = gen_rtx_REG (SImode, 0);
7148 emit_move_insn (target, tmp);
7150 return target;
7153 static rtx
7154 load_tls_operand (rtx x, rtx reg)
7156 rtx tmp;
7158 if (reg == NULL_RTX)
7159 reg = gen_reg_rtx (SImode);
7161 tmp = gen_rtx_CONST (SImode, x);
7163 emit_move_insn (reg, tmp);
7165 return reg;
7168 static rtx
7169 arm_call_tls_get_addr (rtx x, rtx reg, rtx *valuep, int reloc)
7171 rtx insns, label, labelno, sum;
7173 gcc_assert (reloc != TLS_DESCSEQ);
7174 start_sequence ();
7176 labelno = GEN_INT (pic_labelno++);
7177 label = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
7178 label = gen_rtx_CONST (VOIDmode, label);
7180 sum = gen_rtx_UNSPEC (Pmode,
7181 gen_rtvec (4, x, GEN_INT (reloc), label,
7182 GEN_INT (TARGET_ARM ? 8 : 4)),
7183 UNSPEC_TLS);
7184 reg = load_tls_operand (sum, reg);
7186 if (TARGET_ARM)
7187 emit_insn (gen_pic_add_dot_plus_eight (reg, reg, labelno));
7188 else
7189 emit_insn (gen_pic_add_dot_plus_four (reg, reg, labelno));
7191 *valuep = emit_library_call_value (get_tls_get_addr (), NULL_RTX,
7192 LCT_PURE, /* LCT_CONST? */
7193 Pmode, 1, reg, Pmode);
7195 insns = get_insns ();
7196 end_sequence ();
7198 return insns;
7201 static rtx
7202 arm_tls_descseq_addr (rtx x, rtx reg)
7204 rtx labelno = GEN_INT (pic_labelno++);
7205 rtx label = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
7206 rtx sum = gen_rtx_UNSPEC (Pmode,
7207 gen_rtvec (4, x, GEN_INT (TLS_DESCSEQ),
7208 gen_rtx_CONST (VOIDmode, label),
7209 GEN_INT (!TARGET_ARM)),
7210 UNSPEC_TLS);
7211 rtx reg0 = load_tls_operand (sum, gen_rtx_REG (SImode, 0));
7213 emit_insn (gen_tlscall (x, labelno));
7214 if (!reg)
7215 reg = gen_reg_rtx (SImode);
7216 else
7217 gcc_assert (REGNO (reg) != 0);
7219 emit_move_insn (reg, reg0);
7221 return reg;
7225 legitimize_tls_address (rtx x, rtx reg)
7227 rtx dest, tp, label, labelno, sum, insns, ret, eqv, addend;
7228 unsigned int model = SYMBOL_REF_TLS_MODEL (x);
7230 switch (model)
7232 case TLS_MODEL_GLOBAL_DYNAMIC:
7233 if (TARGET_GNU2_TLS)
7235 reg = arm_tls_descseq_addr (x, reg);
7237 tp = arm_load_tp (NULL_RTX);
7239 dest = gen_rtx_PLUS (Pmode, tp, reg);
7241 else
7243 /* Original scheme */
7244 insns = arm_call_tls_get_addr (x, reg, &ret, TLS_GD32);
7245 dest = gen_reg_rtx (Pmode);
7246 emit_libcall_block (insns, dest, ret, x);
7248 return dest;
7250 case TLS_MODEL_LOCAL_DYNAMIC:
7251 if (TARGET_GNU2_TLS)
7253 reg = arm_tls_descseq_addr (x, reg);
7255 tp = arm_load_tp (NULL_RTX);
7257 dest = gen_rtx_PLUS (Pmode, tp, reg);
7259 else
7261 insns = arm_call_tls_get_addr (x, reg, &ret, TLS_LDM32);
7263 /* Attach a unique REG_EQUIV, to allow the RTL optimizers to
7264 share the LDM result with other LD model accesses. */
7265 eqv = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const1_rtx),
7266 UNSPEC_TLS);
7267 dest = gen_reg_rtx (Pmode);
7268 emit_libcall_block (insns, dest, ret, eqv);
7270 /* Load the addend. */
7271 addend = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, x,
7272 GEN_INT (TLS_LDO32)),
7273 UNSPEC_TLS);
7274 addend = force_reg (SImode, gen_rtx_CONST (SImode, addend));
7275 dest = gen_rtx_PLUS (Pmode, dest, addend);
7277 return dest;
7279 case TLS_MODEL_INITIAL_EXEC:
7280 labelno = GEN_INT (pic_labelno++);
7281 label = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
7282 label = gen_rtx_CONST (VOIDmode, label);
7283 sum = gen_rtx_UNSPEC (Pmode,
7284 gen_rtvec (4, x, GEN_INT (TLS_IE32), label,
7285 GEN_INT (TARGET_ARM ? 8 : 4)),
7286 UNSPEC_TLS);
7287 reg = load_tls_operand (sum, reg);
7289 if (TARGET_ARM)
7290 emit_insn (gen_tls_load_dot_plus_eight (reg, reg, labelno));
7291 else if (TARGET_THUMB2)
7292 emit_insn (gen_tls_load_dot_plus_four (reg, NULL, reg, labelno));
7293 else
7295 emit_insn (gen_pic_add_dot_plus_four (reg, reg, labelno));
7296 emit_move_insn (reg, gen_const_mem (SImode, reg));
7299 tp = arm_load_tp (NULL_RTX);
7301 return gen_rtx_PLUS (Pmode, tp, reg);
7303 case TLS_MODEL_LOCAL_EXEC:
7304 tp = arm_load_tp (NULL_RTX);
7306 reg = gen_rtx_UNSPEC (Pmode,
7307 gen_rtvec (2, x, GEN_INT (TLS_LE32)),
7308 UNSPEC_TLS);
7309 reg = force_reg (SImode, gen_rtx_CONST (SImode, reg));
7311 return gen_rtx_PLUS (Pmode, tp, reg);
7313 default:
7314 abort ();
7318 /* Try machine-dependent ways of modifying an illegitimate address
7319 to be legitimate. If we find one, return the new, valid address. */
7321 arm_legitimize_address (rtx x, rtx orig_x, enum machine_mode mode)
7323 if (!TARGET_ARM)
7325 /* TODO: legitimize_address for Thumb2. */
7326 if (TARGET_THUMB2)
7327 return x;
7328 return thumb_legitimize_address (x, orig_x, mode);
7331 if (arm_tls_symbol_p (x))
7332 return legitimize_tls_address (x, NULL_RTX);
7334 if (GET_CODE (x) == PLUS)
7336 rtx xop0 = XEXP (x, 0);
7337 rtx xop1 = XEXP (x, 1);
7339 if (CONSTANT_P (xop0) && !symbol_mentioned_p (xop0))
7340 xop0 = force_reg (SImode, xop0);
7342 if (CONSTANT_P (xop1) && !CONST_INT_P (xop1)
7343 && !symbol_mentioned_p (xop1))
7344 xop1 = force_reg (SImode, xop1);
7346 if (ARM_BASE_REGISTER_RTX_P (xop0)
7347 && CONST_INT_P (xop1))
7349 HOST_WIDE_INT n, low_n;
7350 rtx base_reg, val;
7351 n = INTVAL (xop1);
7353 /* VFP addressing modes actually allow greater offsets, but for
7354 now we just stick with the lowest common denominator. */
7355 if (mode == DImode
7356 || ((TARGET_SOFT_FLOAT || TARGET_VFP) && mode == DFmode))
7358 low_n = n & 0x0f;
7359 n &= ~0x0f;
7360 if (low_n > 4)
7362 n += 16;
7363 low_n -= 16;
7366 else
7368 low_n = ((mode) == TImode ? 0
7369 : n >= 0 ? (n & 0xfff) : -((-n) & 0xfff));
7370 n -= low_n;
7373 base_reg = gen_reg_rtx (SImode);
7374 val = force_operand (plus_constant (Pmode, xop0, n), NULL_RTX);
7375 emit_move_insn (base_reg, val);
7376 x = plus_constant (Pmode, base_reg, low_n);
7378 else if (xop0 != XEXP (x, 0) || xop1 != XEXP (x, 1))
7379 x = gen_rtx_PLUS (SImode, xop0, xop1);
7382 /* XXX We don't allow MINUS any more -- see comment in
7383 arm_legitimate_address_outer_p (). */
7384 else if (GET_CODE (x) == MINUS)
7386 rtx xop0 = XEXP (x, 0);
7387 rtx xop1 = XEXP (x, 1);
7389 if (CONSTANT_P (xop0))
7390 xop0 = force_reg (SImode, xop0);
7392 if (CONSTANT_P (xop1) && ! symbol_mentioned_p (xop1))
7393 xop1 = force_reg (SImode, xop1);
7395 if (xop0 != XEXP (x, 0) || xop1 != XEXP (x, 1))
7396 x = gen_rtx_MINUS (SImode, xop0, xop1);
7399 /* Make sure to take full advantage of the pre-indexed addressing mode
7400 with absolute addresses which often allows for the base register to
7401 be factorized for multiple adjacent memory references, and it might
7402 even allows for the mini pool to be avoided entirely. */
7403 else if (CONST_INT_P (x) && optimize > 0)
7405 unsigned int bits;
7406 HOST_WIDE_INT mask, base, index;
7407 rtx base_reg;
7409 /* ldr and ldrb can use a 12-bit index, ldrsb and the rest can only
7410 use a 8-bit index. So let's use a 12-bit index for SImode only and
7411 hope that arm_gen_constant will enable ldrb to use more bits. */
7412 bits = (mode == SImode) ? 12 : 8;
7413 mask = (1 << bits) - 1;
7414 base = INTVAL (x) & ~mask;
7415 index = INTVAL (x) & mask;
7416 if (bit_count (base & 0xffffffff) > (32 - bits)/2)
7418 /* It'll most probably be more efficient to generate the base
7419 with more bits set and use a negative index instead. */
7420 base |= mask;
7421 index -= mask;
7423 base_reg = force_reg (SImode, GEN_INT (base));
7424 x = plus_constant (Pmode, base_reg, index);
7427 if (flag_pic)
7429 /* We need to find and carefully transform any SYMBOL and LABEL
7430 references; so go back to the original address expression. */
7431 rtx new_x = legitimize_pic_address (orig_x, mode, NULL_RTX);
7433 if (new_x != orig_x)
7434 x = new_x;
7437 return x;
7441 /* Try machine-dependent ways of modifying an illegitimate Thumb address
7442 to be legitimate. If we find one, return the new, valid address. */
7444 thumb_legitimize_address (rtx x, rtx orig_x, enum machine_mode mode)
7446 if (arm_tls_symbol_p (x))
7447 return legitimize_tls_address (x, NULL_RTX);
7449 if (GET_CODE (x) == PLUS
7450 && CONST_INT_P (XEXP (x, 1))
7451 && (INTVAL (XEXP (x, 1)) >= 32 * GET_MODE_SIZE (mode)
7452 || INTVAL (XEXP (x, 1)) < 0))
7454 rtx xop0 = XEXP (x, 0);
7455 rtx xop1 = XEXP (x, 1);
7456 HOST_WIDE_INT offset = INTVAL (xop1);
7458 /* Try and fold the offset into a biasing of the base register and
7459 then offsetting that. Don't do this when optimizing for space
7460 since it can cause too many CSEs. */
7461 if (optimize_size && offset >= 0
7462 && offset < 256 + 31 * GET_MODE_SIZE (mode))
7464 HOST_WIDE_INT delta;
7466 if (offset >= 256)
7467 delta = offset - (256 - GET_MODE_SIZE (mode));
7468 else if (offset < 32 * GET_MODE_SIZE (mode) + 8)
7469 delta = 31 * GET_MODE_SIZE (mode);
7470 else
7471 delta = offset & (~31 * GET_MODE_SIZE (mode));
7473 xop0 = force_operand (plus_constant (Pmode, xop0, offset - delta),
7474 NULL_RTX);
7475 x = plus_constant (Pmode, xop0, delta);
7477 else if (offset < 0 && offset > -256)
7478 /* Small negative offsets are best done with a subtract before the
7479 dereference, forcing these into a register normally takes two
7480 instructions. */
7481 x = force_operand (x, NULL_RTX);
7482 else
7484 /* For the remaining cases, force the constant into a register. */
7485 xop1 = force_reg (SImode, xop1);
7486 x = gen_rtx_PLUS (SImode, xop0, xop1);
7489 else if (GET_CODE (x) == PLUS
7490 && s_register_operand (XEXP (x, 1), SImode)
7491 && !s_register_operand (XEXP (x, 0), SImode))
7493 rtx xop0 = force_operand (XEXP (x, 0), NULL_RTX);
7495 x = gen_rtx_PLUS (SImode, xop0, XEXP (x, 1));
7498 if (flag_pic)
7500 /* We need to find and carefully transform any SYMBOL and LABEL
7501 references; so go back to the original address expression. */
7502 rtx new_x = legitimize_pic_address (orig_x, mode, NULL_RTX);
7504 if (new_x != orig_x)
7505 x = new_x;
7508 return x;
7511 bool
7512 arm_legitimize_reload_address (rtx *p,
7513 enum machine_mode mode,
7514 int opnum, int type,
7515 int ind_levels ATTRIBUTE_UNUSED)
7517 /* We must recognize output that we have already generated ourselves. */
7518 if (GET_CODE (*p) == PLUS
7519 && GET_CODE (XEXP (*p, 0)) == PLUS
7520 && REG_P (XEXP (XEXP (*p, 0), 0))
7521 && CONST_INT_P (XEXP (XEXP (*p, 0), 1))
7522 && CONST_INT_P (XEXP (*p, 1)))
7524 push_reload (XEXP (*p, 0), NULL_RTX, &XEXP (*p, 0), NULL,
7525 MODE_BASE_REG_CLASS (mode), GET_MODE (*p),
7526 VOIDmode, 0, 0, opnum, (enum reload_type) type);
7527 return true;
7530 if (GET_CODE (*p) == PLUS
7531 && REG_P (XEXP (*p, 0))
7532 && ARM_REGNO_OK_FOR_BASE_P (REGNO (XEXP (*p, 0)))
7533 /* If the base register is equivalent to a constant, let the generic
7534 code handle it. Otherwise we will run into problems if a future
7535 reload pass decides to rematerialize the constant. */
7536 && !reg_equiv_constant (ORIGINAL_REGNO (XEXP (*p, 0)))
7537 && CONST_INT_P (XEXP (*p, 1)))
7539 HOST_WIDE_INT val = INTVAL (XEXP (*p, 1));
7540 HOST_WIDE_INT low, high;
7542 /* Detect coprocessor load/stores. */
7543 bool coproc_p = ((TARGET_HARD_FLOAT
7544 && TARGET_VFP
7545 && (mode == SFmode || mode == DFmode))
7546 || (TARGET_REALLY_IWMMXT
7547 && VALID_IWMMXT_REG_MODE (mode))
7548 || (TARGET_NEON
7549 && (VALID_NEON_DREG_MODE (mode)
7550 || VALID_NEON_QREG_MODE (mode))));
7552 /* For some conditions, bail out when lower two bits are unaligned. */
7553 if ((val & 0x3) != 0
7554 /* Coprocessor load/store indexes are 8-bits + '00' appended. */
7555 && (coproc_p
7556 /* For DI, and DF under soft-float: */
7557 || ((mode == DImode || mode == DFmode)
7558 /* Without ldrd, we use stm/ldm, which does not
7559 fair well with unaligned bits. */
7560 && (! TARGET_LDRD
7561 /* Thumb-2 ldrd/strd is [-1020,+1020] in steps of 4. */
7562 || TARGET_THUMB2))))
7563 return false;
7565 /* When breaking down a [reg+index] reload address into [(reg+high)+low],
7566 of which the (reg+high) gets turned into a reload add insn,
7567 we try to decompose the index into high/low values that can often
7568 also lead to better reload CSE.
7569 For example:
7570 ldr r0, [r2, #4100] // Offset too large
7571 ldr r1, [r2, #4104] // Offset too large
7573 is best reloaded as:
7574 add t1, r2, #4096
7575 ldr r0, [t1, #4]
7576 add t2, r2, #4096
7577 ldr r1, [t2, #8]
7579 which post-reload CSE can simplify in most cases to eliminate the
7580 second add instruction:
7581 add t1, r2, #4096
7582 ldr r0, [t1, #4]
7583 ldr r1, [t1, #8]
7585 The idea here is that we want to split out the bits of the constant
7586 as a mask, rather than as subtracting the maximum offset that the
7587 respective type of load/store used can handle.
7589 When encountering negative offsets, we can still utilize it even if
7590 the overall offset is positive; sometimes this may lead to an immediate
7591 that can be constructed with fewer instructions.
7592 For example:
7593 ldr r0, [r2, #0x3FFFFC]
7595 This is best reloaded as:
7596 add t1, r2, #0x400000
7597 ldr r0, [t1, #-4]
7599 The trick for spotting this for a load insn with N bits of offset
7600 (i.e. bits N-1:0) is to look at bit N; if it is set, then chose a
7601 negative offset that is going to make bit N and all the bits below
7602 it become zero in the remainder part.
7604 The SIGN_MAG_LOW_ADDR_BITS macro below implements this, with respect
7605 to sign-magnitude addressing (i.e. separate +- bit, or 1's complement),
7606 used in most cases of ARM load/store instructions. */
7608 #define SIGN_MAG_LOW_ADDR_BITS(VAL, N) \
7609 (((VAL) & ((1 << (N)) - 1)) \
7610 ? (((VAL) & ((1 << ((N) + 1)) - 1)) ^ (1 << (N))) - (1 << (N)) \
7611 : 0)
7613 if (coproc_p)
7615 low = SIGN_MAG_LOW_ADDR_BITS (val, 10);
7617 /* NEON quad-word load/stores are made of two double-word accesses,
7618 so the valid index range is reduced by 8. Treat as 9-bit range if
7619 we go over it. */
7620 if (TARGET_NEON && VALID_NEON_QREG_MODE (mode) && low >= 1016)
7621 low = SIGN_MAG_LOW_ADDR_BITS (val, 9);
7623 else if (GET_MODE_SIZE (mode) == 8)
7625 if (TARGET_LDRD)
7626 low = (TARGET_THUMB2
7627 ? SIGN_MAG_LOW_ADDR_BITS (val, 10)
7628 : SIGN_MAG_LOW_ADDR_BITS (val, 8));
7629 else
7630 /* For pre-ARMv5TE (without ldrd), we use ldm/stm(db/da/ib)
7631 to access doublewords. The supported load/store offsets are
7632 -8, -4, and 4, which we try to produce here. */
7633 low = ((val & 0xf) ^ 0x8) - 0x8;
7635 else if (GET_MODE_SIZE (mode) < 8)
7637 /* NEON element load/stores do not have an offset. */
7638 if (TARGET_NEON_FP16 && mode == HFmode)
7639 return false;
7641 if (TARGET_THUMB2)
7643 /* Thumb-2 has an asymmetrical index range of (-256,4096).
7644 Try the wider 12-bit range first, and re-try if the result
7645 is out of range. */
7646 low = SIGN_MAG_LOW_ADDR_BITS (val, 12);
7647 if (low < -255)
7648 low = SIGN_MAG_LOW_ADDR_BITS (val, 8);
7650 else
7652 if (mode == HImode || mode == HFmode)
7654 if (arm_arch4)
7655 low = SIGN_MAG_LOW_ADDR_BITS (val, 8);
7656 else
7658 /* The storehi/movhi_bytes fallbacks can use only
7659 [-4094,+4094] of the full ldrb/strb index range. */
7660 low = SIGN_MAG_LOW_ADDR_BITS (val, 12);
7661 if (low == 4095 || low == -4095)
7662 return false;
7665 else
7666 low = SIGN_MAG_LOW_ADDR_BITS (val, 12);
7669 else
7670 return false;
7672 high = ((((val - low) & (unsigned HOST_WIDE_INT) 0xffffffff)
7673 ^ (unsigned HOST_WIDE_INT) 0x80000000)
7674 - (unsigned HOST_WIDE_INT) 0x80000000);
7675 /* Check for overflow or zero */
7676 if (low == 0 || high == 0 || (high + low != val))
7677 return false;
7679 /* Reload the high part into a base reg; leave the low part
7680 in the mem.
7681 Note that replacing this gen_rtx_PLUS with plus_constant is
7682 wrong in this case because we rely on the
7683 (plus (plus reg c1) c2) structure being preserved so that
7684 XEXP (*p, 0) in push_reload below uses the correct term. */
7685 *p = gen_rtx_PLUS (GET_MODE (*p),
7686 gen_rtx_PLUS (GET_MODE (*p), XEXP (*p, 0),
7687 GEN_INT (high)),
7688 GEN_INT (low));
7689 push_reload (XEXP (*p, 0), NULL_RTX, &XEXP (*p, 0), NULL,
7690 MODE_BASE_REG_CLASS (mode), GET_MODE (*p),
7691 VOIDmode, 0, 0, opnum, (enum reload_type) type);
7692 return true;
7695 return false;
7699 thumb_legitimize_reload_address (rtx *x_p,
7700 enum machine_mode mode,
7701 int opnum, int type,
7702 int ind_levels ATTRIBUTE_UNUSED)
7704 rtx x = *x_p;
7706 if (GET_CODE (x) == PLUS
7707 && GET_MODE_SIZE (mode) < 4
7708 && REG_P (XEXP (x, 0))
7709 && XEXP (x, 0) == stack_pointer_rtx
7710 && CONST_INT_P (XEXP (x, 1))
7711 && !thumb_legitimate_offset_p (mode, INTVAL (XEXP (x, 1))))
7713 rtx orig_x = x;
7715 x = copy_rtx (x);
7716 push_reload (orig_x, NULL_RTX, x_p, NULL, MODE_BASE_REG_CLASS (mode),
7717 Pmode, VOIDmode, 0, 0, opnum, (enum reload_type) type);
7718 return x;
7721 /* If both registers are hi-regs, then it's better to reload the
7722 entire expression rather than each register individually. That
7723 only requires one reload register rather than two. */
7724 if (GET_CODE (x) == PLUS
7725 && REG_P (XEXP (x, 0))
7726 && REG_P (XEXP (x, 1))
7727 && !REG_MODE_OK_FOR_REG_BASE_P (XEXP (x, 0), mode)
7728 && !REG_MODE_OK_FOR_REG_BASE_P (XEXP (x, 1), mode))
7730 rtx orig_x = x;
7732 x = copy_rtx (x);
7733 push_reload (orig_x, NULL_RTX, x_p, NULL, MODE_BASE_REG_CLASS (mode),
7734 Pmode, VOIDmode, 0, 0, opnum, (enum reload_type) type);
7735 return x;
7738 return NULL;
7741 /* Test for various thread-local symbols. */
7743 /* Return TRUE if X is a thread-local symbol. */
7745 static bool
7746 arm_tls_symbol_p (rtx x)
7748 if (! TARGET_HAVE_TLS)
7749 return false;
7751 if (GET_CODE (x) != SYMBOL_REF)
7752 return false;
7754 return SYMBOL_REF_TLS_MODEL (x) != 0;
7757 /* Helper for arm_tls_referenced_p. */
7759 static int
7760 arm_tls_operand_p_1 (rtx *x, void *data ATTRIBUTE_UNUSED)
7762 if (GET_CODE (*x) == SYMBOL_REF)
7763 return SYMBOL_REF_TLS_MODEL (*x) != 0;
7765 /* Don't recurse into UNSPEC_TLS looking for TLS symbols; these are
7766 TLS offsets, not real symbol references. */
7767 if (GET_CODE (*x) == UNSPEC
7768 && XINT (*x, 1) == UNSPEC_TLS)
7769 return -1;
7771 return 0;
7774 /* Return TRUE if X contains any TLS symbol references. */
7776 bool
7777 arm_tls_referenced_p (rtx x)
7779 if (! TARGET_HAVE_TLS)
7780 return false;
7782 return for_each_rtx (&x, arm_tls_operand_p_1, NULL);
7785 /* Implement TARGET_LEGITIMATE_CONSTANT_P.
7787 On the ARM, allow any integer (invalid ones are removed later by insn
7788 patterns), nice doubles and symbol_refs which refer to the function's
7789 constant pool XXX.
7791 When generating pic allow anything. */
7793 static bool
7794 arm_legitimate_constant_p_1 (enum machine_mode mode, rtx x)
7796 /* At present, we have no support for Neon structure constants, so forbid
7797 them here. It might be possible to handle simple cases like 0 and -1
7798 in future. */
7799 if (TARGET_NEON && VALID_NEON_STRUCT_MODE (mode))
7800 return false;
7802 return flag_pic || !label_mentioned_p (x);
7805 static bool
7806 thumb_legitimate_constant_p (enum machine_mode mode ATTRIBUTE_UNUSED, rtx x)
7808 return (CONST_INT_P (x)
7809 || CONST_DOUBLE_P (x)
7810 || CONSTANT_ADDRESS_P (x)
7811 || flag_pic);
7814 static bool
7815 arm_legitimate_constant_p (enum machine_mode mode, rtx x)
7817 return (!arm_cannot_force_const_mem (mode, x)
7818 && (TARGET_32BIT
7819 ? arm_legitimate_constant_p_1 (mode, x)
7820 : thumb_legitimate_constant_p (mode, x)));
7823 /* Implement TARGET_CANNOT_FORCE_CONST_MEM. */
7825 static bool
7826 arm_cannot_force_const_mem (enum machine_mode mode ATTRIBUTE_UNUSED, rtx x)
7828 rtx base, offset;
7830 if (ARM_OFFSETS_MUST_BE_WITHIN_SECTIONS_P)
7832 split_const (x, &base, &offset);
7833 if (GET_CODE (base) == SYMBOL_REF
7834 && !offset_within_block_p (base, INTVAL (offset)))
7835 return true;
7837 return arm_tls_referenced_p (x);
7840 #define REG_OR_SUBREG_REG(X) \
7841 (REG_P (X) \
7842 || (GET_CODE (X) == SUBREG && REG_P (SUBREG_REG (X))))
7844 #define REG_OR_SUBREG_RTX(X) \
7845 (REG_P (X) ? (X) : SUBREG_REG (X))
7847 static inline int
7848 thumb1_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer)
7850 enum machine_mode mode = GET_MODE (x);
7851 int total, words;
7853 switch (code)
7855 case ASHIFT:
7856 case ASHIFTRT:
7857 case LSHIFTRT:
7858 case ROTATERT:
7859 return (mode == SImode) ? COSTS_N_INSNS (1) : COSTS_N_INSNS (2);
7861 case PLUS:
7862 case MINUS:
7863 case COMPARE:
7864 case NEG:
7865 case NOT:
7866 return COSTS_N_INSNS (1);
7868 case MULT:
7869 if (CONST_INT_P (XEXP (x, 1)))
7871 int cycles = 0;
7872 unsigned HOST_WIDE_INT i = INTVAL (XEXP (x, 1));
7874 while (i)
7876 i >>= 2;
7877 cycles++;
7879 return COSTS_N_INSNS (2) + cycles;
7881 return COSTS_N_INSNS (1) + 16;
7883 case SET:
7884 /* A SET doesn't have a mode, so let's look at the SET_DEST to get
7885 the mode. */
7886 words = ARM_NUM_INTS (GET_MODE_SIZE (GET_MODE (SET_DEST (x))));
7887 return (COSTS_N_INSNS (words)
7888 + 4 * ((MEM_P (SET_SRC (x)))
7889 + MEM_P (SET_DEST (x))));
7891 case CONST_INT:
7892 if (outer == SET)
7894 if ((unsigned HOST_WIDE_INT) INTVAL (x) < 256)
7895 return 0;
7896 if (thumb_shiftable_const (INTVAL (x)))
7897 return COSTS_N_INSNS (2);
7898 return COSTS_N_INSNS (3);
7900 else if ((outer == PLUS || outer == COMPARE)
7901 && INTVAL (x) < 256 && INTVAL (x) > -256)
7902 return 0;
7903 else if ((outer == IOR || outer == XOR || outer == AND)
7904 && INTVAL (x) < 256 && INTVAL (x) >= -256)
7905 return COSTS_N_INSNS (1);
7906 else if (outer == AND)
7908 int i;
7909 /* This duplicates the tests in the andsi3 expander. */
7910 for (i = 9; i <= 31; i++)
7911 if ((((HOST_WIDE_INT) 1) << i) - 1 == INTVAL (x)
7912 || (((HOST_WIDE_INT) 1) << i) - 1 == ~INTVAL (x))
7913 return COSTS_N_INSNS (2);
7915 else if (outer == ASHIFT || outer == ASHIFTRT
7916 || outer == LSHIFTRT)
7917 return 0;
7918 return COSTS_N_INSNS (2);
7920 case CONST:
7921 case CONST_DOUBLE:
7922 case LABEL_REF:
7923 case SYMBOL_REF:
7924 return COSTS_N_INSNS (3);
7926 case UDIV:
7927 case UMOD:
7928 case DIV:
7929 case MOD:
7930 return 100;
7932 case TRUNCATE:
7933 return 99;
7935 case AND:
7936 case XOR:
7937 case IOR:
7938 /* XXX guess. */
7939 return 8;
7941 case MEM:
7942 /* XXX another guess. */
7943 /* Memory costs quite a lot for the first word, but subsequent words
7944 load at the equivalent of a single insn each. */
7945 return (10 + 4 * ((GET_MODE_SIZE (mode) - 1) / UNITS_PER_WORD)
7946 + ((GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
7947 ? 4 : 0));
7949 case IF_THEN_ELSE:
7950 /* XXX a guess. */
7951 if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
7952 return 14;
7953 return 2;
7955 case SIGN_EXTEND:
7956 case ZERO_EXTEND:
7957 total = mode == DImode ? COSTS_N_INSNS (1) : 0;
7958 total += thumb1_rtx_costs (XEXP (x, 0), GET_CODE (XEXP (x, 0)), code);
7960 if (mode == SImode)
7961 return total;
7963 if (arm_arch6)
7964 return total + COSTS_N_INSNS (1);
7966 /* Assume a two-shift sequence. Increase the cost slightly so
7967 we prefer actual shifts over an extend operation. */
7968 return total + 1 + COSTS_N_INSNS (2);
7970 default:
7971 return 99;
7975 static inline bool
7976 arm_rtx_costs_1 (rtx x, enum rtx_code outer, int* total, bool speed)
7978 enum machine_mode mode = GET_MODE (x);
7979 enum rtx_code subcode;
7980 rtx operand;
7981 enum rtx_code code = GET_CODE (x);
7982 *total = 0;
7984 switch (code)
7986 case MEM:
7987 /* Memory costs quite a lot for the first word, but subsequent words
7988 load at the equivalent of a single insn each. */
7989 *total = COSTS_N_INSNS (2 + ARM_NUM_REGS (mode));
7990 return true;
7992 case DIV:
7993 case MOD:
7994 case UDIV:
7995 case UMOD:
7996 if (TARGET_HARD_FLOAT && mode == SFmode)
7997 *total = COSTS_N_INSNS (2);
7998 else if (TARGET_HARD_FLOAT && mode == DFmode && !TARGET_VFP_SINGLE)
7999 *total = COSTS_N_INSNS (4);
8000 else
8001 *total = COSTS_N_INSNS (20);
8002 return false;
8004 case ROTATE:
8005 if (REG_P (XEXP (x, 1)))
8006 *total = COSTS_N_INSNS (1); /* Need to subtract from 32 */
8007 else if (!CONST_INT_P (XEXP (x, 1)))
8008 *total = rtx_cost (XEXP (x, 1), code, 1, speed);
8010 /* Fall through */
8011 case ROTATERT:
8012 if (mode != SImode)
8014 *total += COSTS_N_INSNS (4);
8015 return true;
8018 /* Fall through */
8019 case ASHIFT: case LSHIFTRT: case ASHIFTRT:
8020 *total += rtx_cost (XEXP (x, 0), code, 0, speed);
8021 if (mode == DImode)
8023 *total += COSTS_N_INSNS (3);
8024 return true;
8027 *total += COSTS_N_INSNS (1);
8028 /* Increase the cost of complex shifts because they aren't any faster,
8029 and reduce dual issue opportunities. */
8030 if (arm_tune_cortex_a9
8031 && outer != SET && !CONST_INT_P (XEXP (x, 1)))
8032 ++*total;
8034 return true;
8036 case MINUS:
8037 if (mode == DImode)
8039 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
8040 if (CONST_INT_P (XEXP (x, 0))
8041 && const_ok_for_arm (INTVAL (XEXP (x, 0))))
8043 *total += rtx_cost (XEXP (x, 1), code, 1, speed);
8044 return true;
8047 if (CONST_INT_P (XEXP (x, 1))
8048 && const_ok_for_arm (INTVAL (XEXP (x, 1))))
8050 *total += rtx_cost (XEXP (x, 0), code, 0, speed);
8051 return true;
8054 return false;
8057 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
8059 if (TARGET_HARD_FLOAT
8060 && (mode == SFmode
8061 || (mode == DFmode && !TARGET_VFP_SINGLE)))
8063 *total = COSTS_N_INSNS (1);
8064 if (CONST_DOUBLE_P (XEXP (x, 0))
8065 && arm_const_double_rtx (XEXP (x, 0)))
8067 *total += rtx_cost (XEXP (x, 1), code, 1, speed);
8068 return true;
8071 if (CONST_DOUBLE_P (XEXP (x, 1))
8072 && arm_const_double_rtx (XEXP (x, 1)))
8074 *total += rtx_cost (XEXP (x, 0), code, 0, speed);
8075 return true;
8078 return false;
8080 *total = COSTS_N_INSNS (20);
8081 return false;
8084 *total = COSTS_N_INSNS (1);
8085 if (CONST_INT_P (XEXP (x, 0))
8086 && const_ok_for_arm (INTVAL (XEXP (x, 0))))
8088 *total += rtx_cost (XEXP (x, 1), code, 1, speed);
8089 return true;
8092 subcode = GET_CODE (XEXP (x, 1));
8093 if (subcode == ASHIFT || subcode == ASHIFTRT
8094 || subcode == LSHIFTRT
8095 || subcode == ROTATE || subcode == ROTATERT)
8097 *total += rtx_cost (XEXP (x, 0), code, 0, speed);
8098 *total += rtx_cost (XEXP (XEXP (x, 1), 0), subcode, 0, speed);
8099 return true;
8102 /* A shift as a part of RSB costs no more than RSB itself. */
8103 if (GET_CODE (XEXP (x, 0)) == MULT
8104 && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode))
8106 *total += rtx_cost (XEXP (XEXP (x, 0), 0), code, 0, speed);
8107 *total += rtx_cost (XEXP (x, 1), code, 1, speed);
8108 return true;
8111 if (subcode == MULT
8112 && power_of_two_operand (XEXP (XEXP (x, 1), 1), SImode))
8114 *total += rtx_cost (XEXP (x, 0), code, 0, speed);
8115 *total += rtx_cost (XEXP (XEXP (x, 1), 0), subcode, 0, speed);
8116 return true;
8119 if (GET_RTX_CLASS (GET_CODE (XEXP (x, 1))) == RTX_COMPARE
8120 || GET_RTX_CLASS (GET_CODE (XEXP (x, 1))) == RTX_COMM_COMPARE)
8122 *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 0), code, 0, speed);
8123 if (REG_P (XEXP (XEXP (x, 1), 0))
8124 && REGNO (XEXP (XEXP (x, 1), 0)) != CC_REGNUM)
8125 *total += COSTS_N_INSNS (1);
8127 return true;
8130 /* Fall through */
8132 case PLUS:
8133 if (code == PLUS && arm_arch6 && mode == SImode
8134 && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
8135 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
8137 *total = COSTS_N_INSNS (1);
8138 *total += rtx_cost (XEXP (XEXP (x, 0), 0), GET_CODE (XEXP (x, 0)),
8139 0, speed);
8140 *total += rtx_cost (XEXP (x, 1), code, 1, speed);
8141 return true;
8144 /* MLA: All arguments must be registers. We filter out
8145 multiplication by a power of two, so that we fall down into
8146 the code below. */
8147 if (GET_CODE (XEXP (x, 0)) == MULT
8148 && !power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode))
8150 /* The cost comes from the cost of the multiply. */
8151 return false;
8154 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
8156 if (TARGET_HARD_FLOAT
8157 && (mode == SFmode
8158 || (mode == DFmode && !TARGET_VFP_SINGLE)))
8160 *total = COSTS_N_INSNS (1);
8161 if (CONST_DOUBLE_P (XEXP (x, 1))
8162 && arm_const_double_rtx (XEXP (x, 1)))
8164 *total += rtx_cost (XEXP (x, 0), code, 0, speed);
8165 return true;
8168 return false;
8171 *total = COSTS_N_INSNS (20);
8172 return false;
8175 if (GET_RTX_CLASS (GET_CODE (XEXP (x, 0))) == RTX_COMPARE
8176 || GET_RTX_CLASS (GET_CODE (XEXP (x, 0))) == RTX_COMM_COMPARE)
8178 *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 1), code, 1, speed);
8179 if (REG_P (XEXP (XEXP (x, 0), 0))
8180 && REGNO (XEXP (XEXP (x, 0), 0)) != CC_REGNUM)
8181 *total += COSTS_N_INSNS (1);
8182 return true;
8185 /* Fall through */
8187 case AND: case XOR: case IOR:
8189 /* Normally the frame registers will be spilt into reg+const during
8190 reload, so it is a bad idea to combine them with other instructions,
8191 since then they might not be moved outside of loops. As a compromise
8192 we allow integration with ops that have a constant as their second
8193 operand. */
8194 if (REG_OR_SUBREG_REG (XEXP (x, 0))
8195 && ARM_FRAME_RTX (REG_OR_SUBREG_RTX (XEXP (x, 0)))
8196 && !CONST_INT_P (XEXP (x, 1)))
8197 *total = COSTS_N_INSNS (1);
8199 if (mode == DImode)
8201 *total += COSTS_N_INSNS (2);
8202 if (CONST_INT_P (XEXP (x, 1))
8203 && const_ok_for_op (INTVAL (XEXP (x, 1)), code))
8205 *total += rtx_cost (XEXP (x, 0), code, 0, speed);
8206 return true;
8209 return false;
8212 *total += COSTS_N_INSNS (1);
8213 if (CONST_INT_P (XEXP (x, 1))
8214 && const_ok_for_op (INTVAL (XEXP (x, 1)), code))
8216 *total += rtx_cost (XEXP (x, 0), code, 0, speed);
8217 return true;
8219 subcode = GET_CODE (XEXP (x, 0));
8220 if (subcode == ASHIFT || subcode == ASHIFTRT
8221 || subcode == LSHIFTRT
8222 || subcode == ROTATE || subcode == ROTATERT)
8224 *total += rtx_cost (XEXP (x, 1), code, 1, speed);
8225 *total += rtx_cost (XEXP (XEXP (x, 0), 0), subcode, 0, speed);
8226 return true;
8229 if (subcode == MULT
8230 && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode))
8232 *total += rtx_cost (XEXP (x, 1), code, 1, speed);
8233 *total += rtx_cost (XEXP (XEXP (x, 0), 0), subcode, 0, speed);
8234 return true;
8237 if (subcode == UMIN || subcode == UMAX
8238 || subcode == SMIN || subcode == SMAX)
8240 *total = COSTS_N_INSNS (3);
8241 return true;
8244 return false;
8246 case MULT:
8247 /* This should have been handled by the CPU specific routines. */
8248 gcc_unreachable ();
8250 case TRUNCATE:
8251 if (arm_arch3m && mode == SImode
8252 && GET_CODE (XEXP (x, 0)) == LSHIFTRT
8253 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
8254 && (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0))
8255 == GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1)))
8256 && (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == ZERO_EXTEND
8257 || GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == SIGN_EXTEND))
8259 *total = rtx_cost (XEXP (XEXP (x, 0), 0), LSHIFTRT, 0, speed);
8260 return true;
8262 *total = COSTS_N_INSNS (2); /* Plus the cost of the MULT */
8263 return false;
8265 case NEG:
8266 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
8268 if (TARGET_HARD_FLOAT
8269 && (mode == SFmode
8270 || (mode == DFmode && !TARGET_VFP_SINGLE)))
8272 *total = COSTS_N_INSNS (1);
8273 return false;
8275 *total = COSTS_N_INSNS (2);
8276 return false;
8279 /* Fall through */
8280 case NOT:
8281 *total = COSTS_N_INSNS (ARM_NUM_REGS(mode));
8282 if (mode == SImode && code == NOT)
8284 subcode = GET_CODE (XEXP (x, 0));
8285 if (subcode == ASHIFT || subcode == ASHIFTRT
8286 || subcode == LSHIFTRT
8287 || subcode == ROTATE || subcode == ROTATERT
8288 || (subcode == MULT
8289 && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode)))
8291 *total += rtx_cost (XEXP (XEXP (x, 0), 0), subcode, 0, speed);
8292 /* Register shifts cost an extra cycle. */
8293 if (!CONST_INT_P (XEXP (XEXP (x, 0), 1)))
8294 *total += COSTS_N_INSNS (1) + rtx_cost (XEXP (XEXP (x, 0), 1),
8295 subcode, 1, speed);
8296 return true;
8300 return false;
8302 case IF_THEN_ELSE:
8303 if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
8305 *total = COSTS_N_INSNS (4);
8306 return true;
8309 operand = XEXP (x, 0);
8311 if (!((GET_RTX_CLASS (GET_CODE (operand)) == RTX_COMPARE
8312 || GET_RTX_CLASS (GET_CODE (operand)) == RTX_COMM_COMPARE)
8313 && REG_P (XEXP (operand, 0))
8314 && REGNO (XEXP (operand, 0)) == CC_REGNUM))
8315 *total += COSTS_N_INSNS (1);
8316 *total += (rtx_cost (XEXP (x, 1), code, 1, speed)
8317 + rtx_cost (XEXP (x, 2), code, 2, speed));
8318 return true;
8320 case NE:
8321 if (mode == SImode && XEXP (x, 1) == const0_rtx)
8323 *total = COSTS_N_INSNS (2) + rtx_cost (XEXP (x, 0), code, 0, speed);
8324 return true;
8326 goto scc_insn;
8328 case GE:
8329 if ((!REG_P (XEXP (x, 0)) || REGNO (XEXP (x, 0)) != CC_REGNUM)
8330 && mode == SImode && XEXP (x, 1) == const0_rtx)
8332 *total = COSTS_N_INSNS (2) + rtx_cost (XEXP (x, 0), code, 0, speed);
8333 return true;
8335 goto scc_insn;
8337 case LT:
8338 if ((!REG_P (XEXP (x, 0)) || REGNO (XEXP (x, 0)) != CC_REGNUM)
8339 && mode == SImode && XEXP (x, 1) == const0_rtx)
8341 *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 0), code, 0, speed);
8342 return true;
8344 goto scc_insn;
8346 case EQ:
8347 case GT:
8348 case LE:
8349 case GEU:
8350 case LTU:
8351 case GTU:
8352 case LEU:
8353 case UNORDERED:
8354 case ORDERED:
8355 case UNEQ:
8356 case UNGE:
8357 case UNLT:
8358 case UNGT:
8359 case UNLE:
8360 scc_insn:
8361 /* SCC insns. In the case where the comparison has already been
8362 performed, then they cost 2 instructions. Otherwise they need
8363 an additional comparison before them. */
8364 *total = COSTS_N_INSNS (2);
8365 if (REG_P (XEXP (x, 0)) && REGNO (XEXP (x, 0)) == CC_REGNUM)
8367 return true;
8370 /* Fall through */
8371 case COMPARE:
8372 if (REG_P (XEXP (x, 0)) && REGNO (XEXP (x, 0)) == CC_REGNUM)
8374 *total = 0;
8375 return true;
8378 *total += COSTS_N_INSNS (1);
8379 if (CONST_INT_P (XEXP (x, 1))
8380 && const_ok_for_op (INTVAL (XEXP (x, 1)), code))
8382 *total += rtx_cost (XEXP (x, 0), code, 0, speed);
8383 return true;
8386 subcode = GET_CODE (XEXP (x, 0));
8387 if (subcode == ASHIFT || subcode == ASHIFTRT
8388 || subcode == LSHIFTRT
8389 || subcode == ROTATE || subcode == ROTATERT)
8391 *total += rtx_cost (XEXP (x, 1), code, 1, speed);
8392 *total += rtx_cost (XEXP (XEXP (x, 0), 0), subcode, 0, speed);
8393 return true;
8396 if (subcode == MULT
8397 && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode))
8399 *total += rtx_cost (XEXP (x, 1), code, 1, speed);
8400 *total += rtx_cost (XEXP (XEXP (x, 0), 0), subcode, 0, speed);
8401 return true;
8404 return false;
8406 case UMIN:
8407 case UMAX:
8408 case SMIN:
8409 case SMAX:
8410 *total = COSTS_N_INSNS (2) + rtx_cost (XEXP (x, 0), code, 0, speed);
8411 if (!CONST_INT_P (XEXP (x, 1))
8412 || !const_ok_for_arm (INTVAL (XEXP (x, 1))))
8413 *total += rtx_cost (XEXP (x, 1), code, 1, speed);
8414 return true;
8416 case ABS:
8417 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
8419 if (TARGET_HARD_FLOAT
8420 && (mode == SFmode
8421 || (mode == DFmode && !TARGET_VFP_SINGLE)))
8423 *total = COSTS_N_INSNS (1);
8424 return false;
8426 *total = COSTS_N_INSNS (20);
8427 return false;
8429 *total = COSTS_N_INSNS (1);
8430 if (mode == DImode)
8431 *total += COSTS_N_INSNS (3);
8432 return false;
8434 case SIGN_EXTEND:
8435 case ZERO_EXTEND:
8436 *total = 0;
8437 if (GET_MODE_CLASS (mode) == MODE_INT)
8439 rtx op = XEXP (x, 0);
8440 enum machine_mode opmode = GET_MODE (op);
8442 if (mode == DImode)
8443 *total += COSTS_N_INSNS (1);
8445 if (opmode != SImode)
8447 if (MEM_P (op))
8449 /* If !arm_arch4, we use one of the extendhisi2_mem
8450 or movhi_bytes patterns for HImode. For a QImode
8451 sign extension, we first zero-extend from memory
8452 and then perform a shift sequence. */
8453 if (!arm_arch4 && (opmode != QImode || code == SIGN_EXTEND))
8454 *total += COSTS_N_INSNS (2);
8456 else if (arm_arch6)
8457 *total += COSTS_N_INSNS (1);
8459 /* We don't have the necessary insn, so we need to perform some
8460 other operation. */
8461 else if (TARGET_ARM && code == ZERO_EXTEND && mode == QImode)
8462 /* An and with constant 255. */
8463 *total += COSTS_N_INSNS (1);
8464 else
8465 /* A shift sequence. Increase costs slightly to avoid
8466 combining two shifts into an extend operation. */
8467 *total += COSTS_N_INSNS (2) + 1;
8470 return false;
8473 switch (GET_MODE (XEXP (x, 0)))
8475 case V8QImode:
8476 case V4HImode:
8477 case V2SImode:
8478 case V4QImode:
8479 case V2HImode:
8480 *total = COSTS_N_INSNS (1);
8481 return false;
8483 default:
8484 gcc_unreachable ();
8486 gcc_unreachable ();
8488 case ZERO_EXTRACT:
8489 case SIGN_EXTRACT:
8490 *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 0), code, 0, speed);
8491 return true;
8493 case CONST_INT:
8494 if (const_ok_for_arm (INTVAL (x))
8495 || const_ok_for_arm (~INTVAL (x)))
8496 *total = COSTS_N_INSNS (1);
8497 else
8498 *total = COSTS_N_INSNS (arm_gen_constant (SET, mode, NULL_RTX,
8499 INTVAL (x), NULL_RTX,
8500 NULL_RTX, 0, 0));
8501 return true;
8503 case CONST:
8504 case LABEL_REF:
8505 case SYMBOL_REF:
8506 *total = COSTS_N_INSNS (3);
8507 return true;
8509 case HIGH:
8510 *total = COSTS_N_INSNS (1);
8511 return true;
8513 case LO_SUM:
8514 *total = COSTS_N_INSNS (1);
8515 *total += rtx_cost (XEXP (x, 0), code, 0, speed);
8516 return true;
8518 case CONST_DOUBLE:
8519 if (TARGET_HARD_FLOAT && vfp3_const_double_rtx (x)
8520 && (mode == SFmode || !TARGET_VFP_SINGLE))
8521 *total = COSTS_N_INSNS (1);
8522 else
8523 *total = COSTS_N_INSNS (4);
8524 return true;
8526 case SET:
8527 /* The vec_extract patterns accept memory operands that require an
8528 address reload. Account for the cost of that reload to give the
8529 auto-inc-dec pass an incentive to try to replace them. */
8530 if (TARGET_NEON && MEM_P (SET_DEST (x))
8531 && GET_CODE (SET_SRC (x)) == VEC_SELECT)
8533 *total = rtx_cost (SET_DEST (x), code, 0, speed);
8534 if (!neon_vector_mem_operand (SET_DEST (x), 2, true))
8535 *total += COSTS_N_INSNS (1);
8536 return true;
8538 /* Likewise for the vec_set patterns. */
8539 if (TARGET_NEON && GET_CODE (SET_SRC (x)) == VEC_MERGE
8540 && GET_CODE (XEXP (SET_SRC (x), 0)) == VEC_DUPLICATE
8541 && MEM_P (XEXP (XEXP (SET_SRC (x), 0), 0)))
8543 rtx mem = XEXP (XEXP (SET_SRC (x), 0), 0);
8544 *total = rtx_cost (mem, code, 0, speed);
8545 if (!neon_vector_mem_operand (mem, 2, true))
8546 *total += COSTS_N_INSNS (1);
8547 return true;
8549 return false;
8551 case UNSPEC:
8552 /* We cost this as high as our memory costs to allow this to
8553 be hoisted from loops. */
8554 if (XINT (x, 1) == UNSPEC_PIC_UNIFIED)
8556 *total = COSTS_N_INSNS (2 + ARM_NUM_REGS (mode));
8558 return true;
8560 case CONST_VECTOR:
8561 if (TARGET_NEON
8562 && TARGET_HARD_FLOAT
8563 && outer == SET
8564 && (VALID_NEON_DREG_MODE (mode) || VALID_NEON_QREG_MODE (mode))
8565 && neon_immediate_valid_for_move (x, mode, NULL, NULL))
8566 *total = COSTS_N_INSNS (1);
8567 else
8568 *total = COSTS_N_INSNS (4);
8569 return true;
8571 default:
8572 *total = COSTS_N_INSNS (4);
8573 return false;
8577 /* Estimates the size cost of thumb1 instructions.
8578 For now most of the code is copied from thumb1_rtx_costs. We need more
8579 fine grain tuning when we have more related test cases. */
8580 static inline int
8581 thumb1_size_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer)
8583 enum machine_mode mode = GET_MODE (x);
8584 int words;
8586 switch (code)
8588 case ASHIFT:
8589 case ASHIFTRT:
8590 case LSHIFTRT:
8591 case ROTATERT:
8592 return (mode == SImode) ? COSTS_N_INSNS (1) : COSTS_N_INSNS (2);
8594 case PLUS:
8595 case MINUS:
8596 /* Thumb-1 needs two instructions to fulfill shiftadd/shiftsub0/shiftsub1
8597 defined by RTL expansion, especially for the expansion of
8598 multiplication. */
8599 if ((GET_CODE (XEXP (x, 0)) == MULT
8600 && power_of_two_operand (XEXP (XEXP (x,0),1), SImode))
8601 || (GET_CODE (XEXP (x, 1)) == MULT
8602 && power_of_two_operand (XEXP (XEXP (x, 1), 1), SImode)))
8603 return COSTS_N_INSNS (2);
8604 /* On purpose fall through for normal RTX. */
8605 case COMPARE:
8606 case NEG:
8607 case NOT:
8608 return COSTS_N_INSNS (1);
8610 case MULT:
8611 if (CONST_INT_P (XEXP (x, 1)))
8613 /* Thumb1 mul instruction can't operate on const. We must Load it
8614 into a register first. */
8615 int const_size = thumb1_size_rtx_costs (XEXP (x, 1), CONST_INT, SET);
8616 return COSTS_N_INSNS (1) + const_size;
8618 return COSTS_N_INSNS (1);
8620 case SET:
8621 /* A SET doesn't have a mode, so let's look at the SET_DEST to get
8622 the mode. */
8623 words = ARM_NUM_INTS (GET_MODE_SIZE (GET_MODE (SET_DEST (x))));
8624 return (COSTS_N_INSNS (words)
8625 + 4 * ((MEM_P (SET_SRC (x)))
8626 + MEM_P (SET_DEST (x))));
8628 case CONST_INT:
8629 if (outer == SET)
8631 if ((unsigned HOST_WIDE_INT) INTVAL (x) < 256)
8632 return COSTS_N_INSNS (1);
8633 /* See split "TARGET_THUMB1 && satisfies_constraint_J". */
8634 if (INTVAL (x) >= -255 && INTVAL (x) <= -1)
8635 return COSTS_N_INSNS (2);
8636 /* See split "TARGET_THUMB1 && satisfies_constraint_K". */
8637 if (thumb_shiftable_const (INTVAL (x)))
8638 return COSTS_N_INSNS (2);
8639 return COSTS_N_INSNS (3);
8641 else if ((outer == PLUS || outer == COMPARE)
8642 && INTVAL (x) < 256 && INTVAL (x) > -256)
8643 return 0;
8644 else if ((outer == IOR || outer == XOR || outer == AND)
8645 && INTVAL (x) < 256 && INTVAL (x) >= -256)
8646 return COSTS_N_INSNS (1);
8647 else if (outer == AND)
8649 int i;
8650 /* This duplicates the tests in the andsi3 expander. */
8651 for (i = 9; i <= 31; i++)
8652 if ((((HOST_WIDE_INT) 1) << i) - 1 == INTVAL (x)
8653 || (((HOST_WIDE_INT) 1) << i) - 1 == ~INTVAL (x))
8654 return COSTS_N_INSNS (2);
8656 else if (outer == ASHIFT || outer == ASHIFTRT
8657 || outer == LSHIFTRT)
8658 return 0;
8659 return COSTS_N_INSNS (2);
8661 case CONST:
8662 case CONST_DOUBLE:
8663 case LABEL_REF:
8664 case SYMBOL_REF:
8665 return COSTS_N_INSNS (3);
8667 case UDIV:
8668 case UMOD:
8669 case DIV:
8670 case MOD:
8671 return 100;
8673 case TRUNCATE:
8674 return 99;
8676 case AND:
8677 case XOR:
8678 case IOR:
8679 /* XXX guess. */
8680 return 8;
8682 case MEM:
8683 /* XXX another guess. */
8684 /* Memory costs quite a lot for the first word, but subsequent words
8685 load at the equivalent of a single insn each. */
8686 return (10 + 4 * ((GET_MODE_SIZE (mode) - 1) / UNITS_PER_WORD)
8687 + ((GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
8688 ? 4 : 0));
8690 case IF_THEN_ELSE:
8691 /* XXX a guess. */
8692 if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
8693 return 14;
8694 return 2;
8696 case ZERO_EXTEND:
8697 /* XXX still guessing. */
8698 switch (GET_MODE (XEXP (x, 0)))
8700 case QImode:
8701 return (1 + (mode == DImode ? 4 : 0)
8702 + (MEM_P (XEXP (x, 0)) ? 10 : 0));
8704 case HImode:
8705 return (4 + (mode == DImode ? 4 : 0)
8706 + (MEM_P (XEXP (x, 0)) ? 10 : 0));
8708 case SImode:
8709 return (1 + (MEM_P (XEXP (x, 0)) ? 10 : 0));
8711 default:
8712 return 99;
8715 default:
8716 return 99;
8720 /* RTX costs when optimizing for size. */
8721 static bool
8722 arm_size_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
8723 int *total)
8725 enum machine_mode mode = GET_MODE (x);
8726 if (TARGET_THUMB1)
8728 *total = thumb1_size_rtx_costs (x, code, outer_code);
8729 return true;
8732 /* FIXME: This makes no attempt to prefer narrow Thumb-2 instructions. */
8733 switch (code)
8735 case MEM:
8736 /* A memory access costs 1 insn if the mode is small, or the address is
8737 a single register, otherwise it costs one insn per word. */
8738 if (REG_P (XEXP (x, 0)))
8739 *total = COSTS_N_INSNS (1);
8740 else if (flag_pic
8741 && GET_CODE (XEXP (x, 0)) == PLUS
8742 && will_be_in_index_register (XEXP (XEXP (x, 0), 1)))
8743 /* This will be split into two instructions.
8744 See arm.md:calculate_pic_address. */
8745 *total = COSTS_N_INSNS (2);
8746 else
8747 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
8748 return true;
8750 case DIV:
8751 case MOD:
8752 case UDIV:
8753 case UMOD:
8754 /* Needs a libcall, so it costs about this. */
8755 *total = COSTS_N_INSNS (2);
8756 return false;
8758 case ROTATE:
8759 if (mode == SImode && REG_P (XEXP (x, 1)))
8761 *total = COSTS_N_INSNS (2) + rtx_cost (XEXP (x, 0), code, 0, false);
8762 return true;
8764 /* Fall through */
8765 case ROTATERT:
8766 case ASHIFT:
8767 case LSHIFTRT:
8768 case ASHIFTRT:
8769 if (mode == DImode && CONST_INT_P (XEXP (x, 1)))
8771 *total = COSTS_N_INSNS (3) + rtx_cost (XEXP (x, 0), code, 0, false);
8772 return true;
8774 else if (mode == SImode)
8776 *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 0), code, 0, false);
8777 /* Slightly disparage register shifts, but not by much. */
8778 if (!CONST_INT_P (XEXP (x, 1)))
8779 *total += 1 + rtx_cost (XEXP (x, 1), code, 1, false);
8780 return true;
8783 /* Needs a libcall. */
8784 *total = COSTS_N_INSNS (2);
8785 return false;
8787 case MINUS:
8788 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
8789 && (mode == SFmode || !TARGET_VFP_SINGLE))
8791 *total = COSTS_N_INSNS (1);
8792 return false;
8795 if (mode == SImode)
8797 enum rtx_code subcode0 = GET_CODE (XEXP (x, 0));
8798 enum rtx_code subcode1 = GET_CODE (XEXP (x, 1));
8800 if (subcode0 == ROTATE || subcode0 == ROTATERT || subcode0 == ASHIFT
8801 || subcode0 == LSHIFTRT || subcode0 == ASHIFTRT
8802 || subcode1 == ROTATE || subcode1 == ROTATERT
8803 || subcode1 == ASHIFT || subcode1 == LSHIFTRT
8804 || subcode1 == ASHIFTRT)
8806 /* It's just the cost of the two operands. */
8807 *total = 0;
8808 return false;
8811 *total = COSTS_N_INSNS (1);
8812 return false;
8815 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
8816 return false;
8818 case PLUS:
8819 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
8820 && (mode == SFmode || !TARGET_VFP_SINGLE))
8822 *total = COSTS_N_INSNS (1);
8823 return false;
8826 /* A shift as a part of ADD costs nothing. */
8827 if (GET_CODE (XEXP (x, 0)) == MULT
8828 && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode))
8830 *total = COSTS_N_INSNS (TARGET_THUMB2 ? 2 : 1);
8831 *total += rtx_cost (XEXP (XEXP (x, 0), 0), code, 0, false);
8832 *total += rtx_cost (XEXP (x, 1), code, 1, false);
8833 return true;
8836 /* Fall through */
8837 case AND: case XOR: case IOR:
8838 if (mode == SImode)
8840 enum rtx_code subcode = GET_CODE (XEXP (x, 0));
8842 if (subcode == ROTATE || subcode == ROTATERT || subcode == ASHIFT
8843 || subcode == LSHIFTRT || subcode == ASHIFTRT
8844 || (code == AND && subcode == NOT))
8846 /* It's just the cost of the two operands. */
8847 *total = 0;
8848 return false;
8852 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
8853 return false;
8855 case MULT:
8856 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
8857 return false;
8859 case NEG:
8860 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
8861 && (mode == SFmode || !TARGET_VFP_SINGLE))
8863 *total = COSTS_N_INSNS (1);
8864 return false;
8867 /* Fall through */
8868 case NOT:
8869 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
8871 return false;
8873 case IF_THEN_ELSE:
8874 *total = 0;
8875 return false;
8877 case COMPARE:
8878 if (cc_register (XEXP (x, 0), VOIDmode))
8879 * total = 0;
8880 else
8881 *total = COSTS_N_INSNS (1);
8882 return false;
8884 case ABS:
8885 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
8886 && (mode == SFmode || !TARGET_VFP_SINGLE))
8887 *total = COSTS_N_INSNS (1);
8888 else
8889 *total = COSTS_N_INSNS (1 + ARM_NUM_REGS (mode));
8890 return false;
8892 case SIGN_EXTEND:
8893 case ZERO_EXTEND:
8894 return arm_rtx_costs_1 (x, outer_code, total, 0);
8896 case CONST_INT:
8897 if (const_ok_for_arm (INTVAL (x)))
8898 /* A multiplication by a constant requires another instruction
8899 to load the constant to a register. */
8900 *total = COSTS_N_INSNS ((outer_code == SET || outer_code == MULT)
8901 ? 1 : 0);
8902 else if (const_ok_for_arm (~INTVAL (x)))
8903 *total = COSTS_N_INSNS (outer_code == AND ? 0 : 1);
8904 else if (const_ok_for_arm (-INTVAL (x)))
8906 if (outer_code == COMPARE || outer_code == PLUS
8907 || outer_code == MINUS)
8908 *total = 0;
8909 else
8910 *total = COSTS_N_INSNS (1);
8912 else
8913 *total = COSTS_N_INSNS (2);
8914 return true;
8916 case CONST:
8917 case LABEL_REF:
8918 case SYMBOL_REF:
8919 *total = COSTS_N_INSNS (2);
8920 return true;
8922 case CONST_DOUBLE:
8923 *total = COSTS_N_INSNS (4);
8924 return true;
8926 case CONST_VECTOR:
8927 if (TARGET_NEON
8928 && TARGET_HARD_FLOAT
8929 && outer_code == SET
8930 && (VALID_NEON_DREG_MODE (mode) || VALID_NEON_QREG_MODE (mode))
8931 && neon_immediate_valid_for_move (x, mode, NULL, NULL))
8932 *total = COSTS_N_INSNS (1);
8933 else
8934 *total = COSTS_N_INSNS (4);
8935 return true;
8937 case HIGH:
8938 case LO_SUM:
8939 /* We prefer constant pool entries to MOVW/MOVT pairs, so bump the
8940 cost of these slightly. */
8941 *total = COSTS_N_INSNS (1) + 1;
8942 return true;
8944 case SET:
8945 return false;
8947 default:
8948 if (mode != VOIDmode)
8949 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
8950 else
8951 *total = COSTS_N_INSNS (4); /* How knows? */
8952 return false;
8956 /* Helper function for arm_rtx_costs. If the operand is a valid shift
8957 operand, then return the operand that is being shifted. If the shift
8958 is not by a constant, then set SHIFT_REG to point to the operand.
8959 Return NULL if OP is not a shifter operand. */
8960 static rtx
8961 shifter_op_p (rtx op, rtx *shift_reg)
8963 enum rtx_code code = GET_CODE (op);
8965 if (code == MULT && CONST_INT_P (XEXP (op, 1))
8966 && exact_log2 (INTVAL (XEXP (op, 1))) > 0)
8967 return XEXP (op, 0);
8968 else if (code == ROTATE && CONST_INT_P (XEXP (op, 1)))
8969 return XEXP (op, 0);
8970 else if (code == ROTATERT || code == ASHIFT || code == LSHIFTRT
8971 || code == ASHIFTRT)
8973 if (!CONST_INT_P (XEXP (op, 1)))
8974 *shift_reg = XEXP (op, 1);
8975 return XEXP (op, 0);
8978 return NULL;
8981 static bool
8982 arm_unspec_cost (rtx x, enum rtx_code /* outer_code */, bool speed_p, int *cost)
8984 const struct cpu_cost_table *extra_cost = current_tune->insn_extra_cost;
8985 gcc_assert (GET_CODE (x) == UNSPEC);
8987 switch (XINT (x, 1))
8989 case UNSPEC_UNALIGNED_LOAD:
8990 /* We can only do unaligned loads into the integer unit, and we can't
8991 use LDM or LDRD. */
8992 *cost = COSTS_N_INSNS (ARM_NUM_REGS (GET_MODE (x)));
8993 if (speed_p)
8994 *cost += (ARM_NUM_REGS (GET_MODE (x)) * extra_cost->ldst.load
8995 + extra_cost->ldst.load_unaligned);
8997 #ifdef NOT_YET
8998 *cost += arm_address_cost (XEXP (XVECEXP (x, 0, 0), 0), GET_MODE (x),
8999 ADDR_SPACE_GENERIC, speed_p);
9000 #endif
9001 return true;
9003 case UNSPEC_UNALIGNED_STORE:
9004 *cost = COSTS_N_INSNS (ARM_NUM_REGS (GET_MODE (x)));
9005 if (speed_p)
9006 *cost += (ARM_NUM_REGS (GET_MODE (x)) * extra_cost->ldst.store
9007 + extra_cost->ldst.store_unaligned);
9009 *cost += rtx_cost (XVECEXP (x, 0, 0), UNSPEC, 0, speed_p);
9010 #ifdef NOT_YET
9011 *cost += arm_address_cost (XEXP (XVECEXP (x, 0, 0), 0), GET_MODE (x),
9012 ADDR_SPACE_GENERIC, speed_p);
9013 #endif
9014 return true;
9016 case UNSPEC_VRINTZ:
9017 case UNSPEC_VRINTP:
9018 case UNSPEC_VRINTM:
9019 case UNSPEC_VRINTR:
9020 case UNSPEC_VRINTX:
9021 case UNSPEC_VRINTA:
9022 *cost = COSTS_N_INSNS (1);
9023 if (speed_p)
9024 *cost += extra_cost->fp[GET_MODE (x) == DFmode].roundint;
9026 return true;
9027 default:
9028 *cost = COSTS_N_INSNS (2);
9029 break;
9031 return false;
9034 /* Cost of a libcall. We assume one insn per argument, an amount for the
9035 call (one insn for -Os) and then one for processing the result. */
9036 #define LIBCALL_COST(N) COSTS_N_INSNS (N + (speed_p ? 18 : 2))
9038 #define HANDLE_NARROW_SHIFT_ARITH(OP, IDX) \
9039 do \
9041 shift_op = shifter_op_p (XEXP (x, IDX), &shift_reg); \
9042 if (shift_op != NULL \
9043 && arm_rtx_shift_left_p (XEXP (x, IDX))) \
9045 if (shift_reg) \
9047 if (speed_p) \
9048 *cost += extra_cost->alu.arith_shift_reg; \
9049 *cost += rtx_cost (shift_reg, ASHIFT, 1, speed_p); \
9051 else if (speed_p) \
9052 *cost += extra_cost->alu.arith_shift; \
9054 *cost += (rtx_cost (shift_op, ASHIFT, 0, speed_p) \
9055 + rtx_cost (XEXP (x, 1 - IDX), \
9056 OP, 1, speed_p)); \
9057 return true; \
9060 while (0);
9062 /* RTX costs. Make an estimate of the cost of executing the operation
9063 X, which is contained with an operation with code OUTER_CODE.
9064 SPEED_P indicates whether the cost desired is the performance cost,
9065 or the size cost. The estimate is stored in COST and the return
9066 value is TRUE if the cost calculation is final, or FALSE if the
9067 caller should recurse through the operands of X to add additional
9068 costs.
9070 We currently make no attempt to model the size savings of Thumb-2
9071 16-bit instructions. At the normal points in compilation where
9072 this code is called we have no measure of whether the condition
9073 flags are live or not, and thus no realistic way to determine what
9074 the size will eventually be. */
9075 static bool
9076 arm_new_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
9077 const struct cpu_cost_table *extra_cost,
9078 int *cost, bool speed_p)
9080 enum machine_mode mode = GET_MODE (x);
9082 if (TARGET_THUMB1)
9084 if (speed_p)
9085 *cost = thumb1_rtx_costs (x, code, outer_code);
9086 else
9087 *cost = thumb1_size_rtx_costs (x, code, outer_code);
9088 return true;
9091 switch (code)
9093 case SET:
9094 *cost = 0;
9095 /* SET RTXs don't have a mode so we get it from the destination. */
9096 mode = GET_MODE (SET_DEST (x));
9098 if (REG_P (SET_SRC (x))
9099 && REG_P (SET_DEST (x)))
9101 /* Assume that most copies can be done with a single insn,
9102 unless we don't have HW FP, in which case everything
9103 larger than word mode will require two insns. */
9104 *cost = COSTS_N_INSNS (((!TARGET_HARD_FLOAT
9105 && GET_MODE_SIZE (mode) > 4)
9106 || mode == DImode)
9107 ? 2 : 1);
9108 /* Conditional register moves can be encoded
9109 in 16 bits in Thumb mode. */
9110 if (!speed_p && TARGET_THUMB && outer_code == COND_EXEC)
9111 *cost >>= 1;
9113 return true;
9116 if (CONST_INT_P (SET_SRC (x)))
9118 /* Handle CONST_INT here, since the value doesn't have a mode
9119 and we would otherwise be unable to work out the true cost. */
9120 *cost = rtx_cost (SET_DEST (x), SET, 0, speed_p);
9121 outer_code = SET;
9122 /* Slightly lower the cost of setting a core reg to a constant.
9123 This helps break up chains and allows for better scheduling. */
9124 if (REG_P (SET_DEST (x))
9125 && REGNO (SET_DEST (x)) <= LR_REGNUM)
9126 *cost -= 1;
9127 x = SET_SRC (x);
9128 /* Immediate moves with an immediate in the range [0, 255] can be
9129 encoded in 16 bits in Thumb mode. */
9130 if (!speed_p && TARGET_THUMB && GET_MODE (x) == SImode
9131 && INTVAL (x) >= 0 && INTVAL (x) <=255)
9132 *cost >>= 1;
9133 goto const_int_cost;
9136 return false;
9138 case MEM:
9139 /* A memory access costs 1 insn if the mode is small, or the address is
9140 a single register, otherwise it costs one insn per word. */
9141 if (REG_P (XEXP (x, 0)))
9142 *cost = COSTS_N_INSNS (1);
9143 else if (flag_pic
9144 && GET_CODE (XEXP (x, 0)) == PLUS
9145 && will_be_in_index_register (XEXP (XEXP (x, 0), 1)))
9146 /* This will be split into two instructions.
9147 See arm.md:calculate_pic_address. */
9148 *cost = COSTS_N_INSNS (2);
9149 else
9150 *cost = COSTS_N_INSNS (ARM_NUM_REGS (mode));
9152 /* For speed optimizations, add the costs of the address and
9153 accessing memory. */
9154 if (speed_p)
9155 #ifdef NOT_YET
9156 *cost += (extra_cost->ldst.load
9157 + arm_address_cost (XEXP (x, 0), mode,
9158 ADDR_SPACE_GENERIC, speed_p));
9159 #else
9160 *cost += extra_cost->ldst.load;
9161 #endif
9162 return true;
9164 case PARALLEL:
9166 /* Calculations of LDM costs are complex. We assume an initial cost
9167 (ldm_1st) which will load the number of registers mentioned in
9168 ldm_regs_per_insn_1st registers; then each additional
9169 ldm_regs_per_insn_subsequent registers cost one more insn. The
9170 formula for N regs is thus:
9172 ldm_1st + COSTS_N_INSNS ((max (N - ldm_regs_per_insn_1st, 0)
9173 + ldm_regs_per_insn_subsequent - 1)
9174 / ldm_regs_per_insn_subsequent).
9176 Additional costs may also be added for addressing. A similar
9177 formula is used for STM. */
9179 bool is_ldm = load_multiple_operation (x, SImode);
9180 bool is_stm = store_multiple_operation (x, SImode);
9182 *cost = COSTS_N_INSNS (1);
9184 if (is_ldm || is_stm)
9186 if (speed_p)
9188 HOST_WIDE_INT nregs = XVECLEN (x, 0);
9189 HOST_WIDE_INT regs_per_insn_1st = is_ldm
9190 ? extra_cost->ldst.ldm_regs_per_insn_1st
9191 : extra_cost->ldst.stm_regs_per_insn_1st;
9192 HOST_WIDE_INT regs_per_insn_sub = is_ldm
9193 ? extra_cost->ldst.ldm_regs_per_insn_subsequent
9194 : extra_cost->ldst.stm_regs_per_insn_subsequent;
9196 *cost += regs_per_insn_1st
9197 + COSTS_N_INSNS (((MAX (nregs - regs_per_insn_1st, 0))
9198 + regs_per_insn_sub - 1)
9199 / regs_per_insn_sub);
9200 return true;
9204 return false;
9206 case DIV:
9207 case UDIV:
9208 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
9209 && (mode == SFmode || !TARGET_VFP_SINGLE))
9210 *cost = COSTS_N_INSNS (speed_p
9211 ? extra_cost->fp[mode != SFmode].div : 1);
9212 else if (mode == SImode && TARGET_IDIV)
9213 *cost = COSTS_N_INSNS (speed_p ? extra_cost->mult[0].idiv : 1);
9214 else
9215 *cost = LIBCALL_COST (2);
9216 return false; /* All arguments must be in registers. */
9218 case MOD:
9219 case UMOD:
9220 *cost = LIBCALL_COST (2);
9221 return false; /* All arguments must be in registers. */
9223 case ROTATE:
9224 if (mode == SImode && REG_P (XEXP (x, 1)))
9226 *cost = (COSTS_N_INSNS (2)
9227 + rtx_cost (XEXP (x, 0), code, 0, speed_p));
9228 if (speed_p)
9229 *cost += extra_cost->alu.shift_reg;
9230 return true;
9232 /* Fall through */
9233 case ROTATERT:
9234 case ASHIFT:
9235 case LSHIFTRT:
9236 case ASHIFTRT:
9237 if (mode == DImode && CONST_INT_P (XEXP (x, 1)))
9239 *cost = (COSTS_N_INSNS (3)
9240 + rtx_cost (XEXP (x, 0), code, 0, speed_p));
9241 if (speed_p)
9242 *cost += 2 * extra_cost->alu.shift;
9243 return true;
9245 else if (mode == SImode)
9247 *cost = (COSTS_N_INSNS (1)
9248 + rtx_cost (XEXP (x, 0), code, 0, speed_p));
9249 /* Slightly disparage register shifts at -Os, but not by much. */
9250 if (!CONST_INT_P (XEXP (x, 1)))
9251 *cost += (speed_p ? extra_cost->alu.shift_reg : 1
9252 + rtx_cost (XEXP (x, 1), code, 1, speed_p));
9253 return true;
9255 else if (GET_MODE_CLASS (mode) == MODE_INT
9256 && GET_MODE_SIZE (mode) < 4)
9258 if (code == ASHIFT)
9260 *cost = (COSTS_N_INSNS (1)
9261 + rtx_cost (XEXP (x, 0), code, 0, speed_p));
9262 /* Slightly disparage register shifts at -Os, but not by
9263 much. */
9264 if (!CONST_INT_P (XEXP (x, 1)))
9265 *cost += (speed_p ? extra_cost->alu.shift_reg : 1
9266 + rtx_cost (XEXP (x, 1), code, 1, speed_p));
9268 else if (code == LSHIFTRT || code == ASHIFTRT)
9270 if (arm_arch_thumb2 && CONST_INT_P (XEXP (x, 1)))
9272 /* Can use SBFX/UBFX. */
9273 *cost = COSTS_N_INSNS (1);
9274 if (speed_p)
9275 *cost += extra_cost->alu.bfx;
9276 *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
9278 else
9280 *cost = COSTS_N_INSNS (2);
9281 *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
9282 if (speed_p)
9284 if (CONST_INT_P (XEXP (x, 1)))
9285 *cost += 2 * extra_cost->alu.shift;
9286 else
9287 *cost += (extra_cost->alu.shift
9288 + extra_cost->alu.shift_reg);
9290 else
9291 /* Slightly disparage register shifts. */
9292 *cost += !CONST_INT_P (XEXP (x, 1));
9295 else /* Rotates. */
9297 *cost = COSTS_N_INSNS (3 + !CONST_INT_P (XEXP (x, 1)));
9298 *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
9299 if (speed_p)
9301 if (CONST_INT_P (XEXP (x, 1)))
9302 *cost += (2 * extra_cost->alu.shift
9303 + extra_cost->alu.log_shift);
9304 else
9305 *cost += (extra_cost->alu.shift
9306 + extra_cost->alu.shift_reg
9307 + extra_cost->alu.log_shift_reg);
9310 return true;
9313 *cost = LIBCALL_COST (2);
9314 return false;
9316 case MINUS:
9317 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
9318 && (mode == SFmode || !TARGET_VFP_SINGLE))
9320 *cost = COSTS_N_INSNS (1);
9321 if (GET_CODE (XEXP (x, 0)) == MULT
9322 || GET_CODE (XEXP (x, 1)) == MULT)
9324 rtx mul_op0, mul_op1, sub_op;
9326 if (speed_p)
9327 *cost += extra_cost->fp[mode != SFmode].mult_addsub;
9329 if (GET_CODE (XEXP (x, 0)) == MULT)
9331 mul_op0 = XEXP (XEXP (x, 0), 0);
9332 mul_op1 = XEXP (XEXP (x, 0), 1);
9333 sub_op = XEXP (x, 1);
9335 else
9337 mul_op0 = XEXP (XEXP (x, 1), 0);
9338 mul_op1 = XEXP (XEXP (x, 1), 1);
9339 sub_op = XEXP (x, 0);
9342 /* The first operand of the multiply may be optionally
9343 negated. */
9344 if (GET_CODE (mul_op0) == NEG)
9345 mul_op0 = XEXP (mul_op0, 0);
9347 *cost += (rtx_cost (mul_op0, code, 0, speed_p)
9348 + rtx_cost (mul_op1, code, 0, speed_p)
9349 + rtx_cost (sub_op, code, 0, speed_p));
9351 return true;
9354 if (speed_p)
9355 *cost += extra_cost->fp[mode != SFmode].addsub;
9356 return false;
9359 if (mode == SImode)
9361 rtx shift_by_reg = NULL;
9362 rtx shift_op;
9363 rtx non_shift_op;
9365 *cost = COSTS_N_INSNS (1);
9367 shift_op = shifter_op_p (XEXP (x, 0), &shift_by_reg);
9368 if (shift_op == NULL)
9370 shift_op = shifter_op_p (XEXP (x, 1), &shift_by_reg);
9371 non_shift_op = XEXP (x, 0);
9373 else
9374 non_shift_op = XEXP (x, 1);
9376 if (shift_op != NULL)
9378 if (shift_by_reg != NULL)
9380 if (speed_p)
9381 *cost += extra_cost->alu.arith_shift_reg;
9382 *cost += rtx_cost (shift_by_reg, code, 0, speed_p);
9384 else if (speed_p)
9385 *cost += extra_cost->alu.arith_shift;
9387 *cost += (rtx_cost (shift_op, code, 0, speed_p)
9388 + rtx_cost (non_shift_op, code, 0, speed_p));
9389 return true;
9392 if (arm_arch_thumb2
9393 && GET_CODE (XEXP (x, 1)) == MULT)
9395 /* MLS. */
9396 if (speed_p)
9397 *cost += extra_cost->mult[0].add;
9398 *cost += (rtx_cost (XEXP (x, 0), MINUS, 0, speed_p)
9399 + rtx_cost (XEXP (XEXP (x, 1), 0), MULT, 0, speed_p)
9400 + rtx_cost (XEXP (XEXP (x, 1), 1), MULT, 1, speed_p));
9401 return true;
9404 if (CONST_INT_P (XEXP (x, 0)))
9406 int insns = arm_gen_constant (MINUS, SImode, NULL_RTX,
9407 INTVAL (XEXP (x, 0)), NULL_RTX,
9408 NULL_RTX, 1, 0);
9409 *cost = COSTS_N_INSNS (insns);
9410 if (speed_p)
9411 *cost += insns * extra_cost->alu.arith;
9412 *cost += rtx_cost (XEXP (x, 1), code, 1, speed_p);
9413 return true;
9416 return false;
9419 if (GET_MODE_CLASS (mode) == MODE_INT
9420 && GET_MODE_SIZE (mode) < 4)
9422 rtx shift_op, shift_reg;
9423 shift_reg = NULL;
9425 /* We check both sides of the MINUS for shifter operands since,
9426 unlike PLUS, it's not commutative. */
9428 HANDLE_NARROW_SHIFT_ARITH (MINUS, 0)
9429 HANDLE_NARROW_SHIFT_ARITH (MINUS, 1)
9431 /* Slightly disparage, as we might need to widen the result. */
9432 *cost = 1 + COSTS_N_INSNS (1);
9433 if (speed_p)
9434 *cost += extra_cost->alu.arith;
9436 if (CONST_INT_P (XEXP (x, 0)))
9438 *cost += rtx_cost (XEXP (x, 1), code, 1, speed_p);
9439 return true;
9442 return false;
9445 if (mode == DImode)
9447 *cost = COSTS_N_INSNS (2);
9449 if (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND)
9451 rtx op1 = XEXP (x, 1);
9453 if (speed_p)
9454 *cost += 2 * extra_cost->alu.arith;
9456 if (GET_CODE (op1) == ZERO_EXTEND)
9457 *cost += rtx_cost (XEXP (op1, 0), ZERO_EXTEND, 0, speed_p);
9458 else
9459 *cost += rtx_cost (op1, MINUS, 1, speed_p);
9460 *cost += rtx_cost (XEXP (XEXP (x, 0), 0), ZERO_EXTEND,
9461 0, speed_p);
9462 return true;
9464 else if (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND)
9466 if (speed_p)
9467 *cost += extra_cost->alu.arith + extra_cost->alu.arith_shift;
9468 *cost += (rtx_cost (XEXP (XEXP (x, 0), 0), SIGN_EXTEND,
9469 0, speed_p)
9470 + rtx_cost (XEXP (x, 1), MINUS, 1, speed_p));
9471 return true;
9473 else if (GET_CODE (XEXP (x, 1)) == ZERO_EXTEND
9474 || GET_CODE (XEXP (x, 1)) == SIGN_EXTEND)
9476 if (speed_p)
9477 *cost += (extra_cost->alu.arith
9478 + (GET_CODE (XEXP (x, 1)) == ZERO_EXTEND
9479 ? extra_cost->alu.arith
9480 : extra_cost->alu.arith_shift));
9481 *cost += (rtx_cost (XEXP (x, 0), MINUS, 0, speed_p)
9482 + rtx_cost (XEXP (XEXP (x, 1), 0),
9483 GET_CODE (XEXP (x, 1)), 0, speed_p));
9484 return true;
9487 if (speed_p)
9488 *cost += 2 * extra_cost->alu.arith;
9489 return false;
9492 /* Vector mode? */
9494 *cost = LIBCALL_COST (2);
9495 return false;
9497 case PLUS:
9498 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
9499 && (mode == SFmode || !TARGET_VFP_SINGLE))
9501 *cost = COSTS_N_INSNS (1);
9502 if (GET_CODE (XEXP (x, 0)) == MULT)
9504 rtx mul_op0, mul_op1, add_op;
9506 if (speed_p)
9507 *cost += extra_cost->fp[mode != SFmode].mult_addsub;
9509 mul_op0 = XEXP (XEXP (x, 0), 0);
9510 mul_op1 = XEXP (XEXP (x, 0), 1);
9511 add_op = XEXP (x, 1);
9513 *cost += (rtx_cost (mul_op0, code, 0, speed_p)
9514 + rtx_cost (mul_op1, code, 0, speed_p)
9515 + rtx_cost (add_op, code, 0, speed_p));
9517 return true;
9520 if (speed_p)
9521 *cost += extra_cost->fp[mode != SFmode].addsub;
9522 return false;
9524 else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
9526 *cost = LIBCALL_COST (2);
9527 return false;
9530 /* Narrow modes can be synthesized in SImode, but the range
9531 of useful sub-operations is limited. Check for shift operations
9532 on one of the operands. Only left shifts can be used in the
9533 narrow modes. */
9534 if (GET_MODE_CLASS (mode) == MODE_INT
9535 && GET_MODE_SIZE (mode) < 4)
9537 rtx shift_op, shift_reg;
9538 shift_reg = NULL;
9540 HANDLE_NARROW_SHIFT_ARITH (PLUS, 0)
9542 if (CONST_INT_P (XEXP (x, 1)))
9544 int insns = arm_gen_constant (PLUS, SImode, NULL_RTX,
9545 INTVAL (XEXP (x, 1)), NULL_RTX,
9546 NULL_RTX, 1, 0);
9547 *cost = COSTS_N_INSNS (insns);
9548 if (speed_p)
9549 *cost += insns * extra_cost->alu.arith;
9550 /* Slightly penalize a narrow operation as the result may
9551 need widening. */
9552 *cost += 1 + rtx_cost (XEXP (x, 0), PLUS, 0, speed_p);
9553 return true;
9556 /* Slightly penalize a narrow operation as the result may
9557 need widening. */
9558 *cost = 1 + COSTS_N_INSNS (1);
9559 if (speed_p)
9560 *cost += extra_cost->alu.arith;
9562 return false;
9565 if (mode == SImode)
9567 rtx shift_op, shift_reg;
9569 *cost = COSTS_N_INSNS (1);
9570 if (TARGET_INT_SIMD
9571 && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
9572 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
9574 /* UXTA[BH] or SXTA[BH]. */
9575 if (speed_p)
9576 *cost += extra_cost->alu.extnd_arith;
9577 *cost += (rtx_cost (XEXP (XEXP (x, 0), 0), ZERO_EXTEND, 0,
9578 speed_p)
9579 + rtx_cost (XEXP (x, 1), PLUS, 0, speed_p));
9580 return true;
9583 shift_reg = NULL;
9584 shift_op = shifter_op_p (XEXP (x, 0), &shift_reg);
9585 if (shift_op != NULL)
9587 if (shift_reg)
9589 if (speed_p)
9590 *cost += extra_cost->alu.arith_shift_reg;
9591 *cost += rtx_cost (shift_reg, ASHIFT, 1, speed_p);
9593 else if (speed_p)
9594 *cost += extra_cost->alu.arith_shift;
9596 *cost += (rtx_cost (shift_op, ASHIFT, 0, speed_p)
9597 + rtx_cost (XEXP (x, 1), PLUS, 1, speed_p));
9598 return true;
9600 if (GET_CODE (XEXP (x, 0)) == MULT)
9602 rtx mul_op = XEXP (x, 0);
9604 *cost = COSTS_N_INSNS (1);
9606 if (TARGET_DSP_MULTIPLY
9607 && ((GET_CODE (XEXP (mul_op, 0)) == SIGN_EXTEND
9608 && (GET_CODE (XEXP (mul_op, 1)) == SIGN_EXTEND
9609 || (GET_CODE (XEXP (mul_op, 1)) == ASHIFTRT
9610 && CONST_INT_P (XEXP (XEXP (mul_op, 1), 1))
9611 && INTVAL (XEXP (XEXP (mul_op, 1), 1)) == 16)))
9612 || (GET_CODE (XEXP (mul_op, 0)) == ASHIFTRT
9613 && CONST_INT_P (XEXP (XEXP (mul_op, 0), 1))
9614 && INTVAL (XEXP (XEXP (mul_op, 0), 1)) == 16
9615 && (GET_CODE (XEXP (mul_op, 1)) == SIGN_EXTEND
9616 || (GET_CODE (XEXP (mul_op, 1)) == ASHIFTRT
9617 && CONST_INT_P (XEXP (XEXP (mul_op, 1), 1))
9618 && (INTVAL (XEXP (XEXP (mul_op, 1), 1))
9619 == 16))))))
9621 /* SMLA[BT][BT]. */
9622 if (speed_p)
9623 *cost += extra_cost->mult[0].extend_add;
9624 *cost += (rtx_cost (XEXP (XEXP (mul_op, 0), 0),
9625 SIGN_EXTEND, 0, speed_p)
9626 + rtx_cost (XEXP (XEXP (mul_op, 1), 0),
9627 SIGN_EXTEND, 0, speed_p)
9628 + rtx_cost (XEXP (x, 1), PLUS, 1, speed_p));
9629 return true;
9632 if (speed_p)
9633 *cost += extra_cost->mult[0].add;
9634 *cost += (rtx_cost (XEXP (mul_op, 0), MULT, 0, speed_p)
9635 + rtx_cost (XEXP (mul_op, 1), MULT, 1, speed_p)
9636 + rtx_cost (XEXP (x, 1), PLUS, 1, speed_p));
9637 return true;
9639 if (CONST_INT_P (XEXP (x, 1)))
9641 int insns = arm_gen_constant (PLUS, SImode, NULL_RTX,
9642 INTVAL (XEXP (x, 1)), NULL_RTX,
9643 NULL_RTX, 1, 0);
9644 *cost = COSTS_N_INSNS (insns);
9645 if (speed_p)
9646 *cost += insns * extra_cost->alu.arith;
9647 *cost += rtx_cost (XEXP (x, 0), PLUS, 0, speed_p);
9648 return true;
9650 return false;
9653 if (mode == DImode)
9655 if (arm_arch3m
9656 && GET_CODE (XEXP (x, 0)) == MULT
9657 && ((GET_CODE (XEXP (XEXP (x, 0), 0)) == ZERO_EXTEND
9658 && GET_CODE (XEXP (XEXP (x, 0), 1)) == ZERO_EXTEND)
9659 || (GET_CODE (XEXP (XEXP (x, 0), 0)) == SIGN_EXTEND
9660 && GET_CODE (XEXP (XEXP (x, 0), 1)) == SIGN_EXTEND)))
9662 *cost = COSTS_N_INSNS (1);
9663 if (speed_p)
9664 *cost += extra_cost->mult[1].extend_add;
9665 *cost += (rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0),
9666 ZERO_EXTEND, 0, speed_p)
9667 + rtx_cost (XEXP (XEXP (XEXP (x, 0), 1), 0),
9668 ZERO_EXTEND, 0, speed_p)
9669 + rtx_cost (XEXP (x, 1), PLUS, 1, speed_p));
9670 return true;
9673 *cost = COSTS_N_INSNS (2);
9675 if (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
9676 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND)
9678 if (speed_p)
9679 *cost += (extra_cost->alu.arith
9680 + (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
9681 ? extra_cost->alu.arith
9682 : extra_cost->alu.arith_shift));
9684 *cost += (rtx_cost (XEXP (XEXP (x, 0), 0), ZERO_EXTEND, 0,
9685 speed_p)
9686 + rtx_cost (XEXP (x, 1), PLUS, 1, speed_p));
9687 return true;
9690 if (speed_p)
9691 *cost += 2 * extra_cost->alu.arith;
9692 return false;
9695 /* Vector mode? */
9696 *cost = LIBCALL_COST (2);
9697 return false;
9699 case AND: case XOR: case IOR:
9700 if (mode == SImode)
9702 enum rtx_code subcode = GET_CODE (XEXP (x, 0));
9703 rtx op0 = XEXP (x, 0);
9704 rtx shift_op, shift_reg;
9706 *cost = COSTS_N_INSNS (1);
9708 if (subcode == NOT
9709 && (code == AND
9710 || (code == IOR && TARGET_THUMB2)))
9711 op0 = XEXP (op0, 0);
9713 shift_reg = NULL;
9714 shift_op = shifter_op_p (op0, &shift_reg);
9715 if (shift_op != NULL)
9717 if (shift_reg)
9719 if (speed_p)
9720 *cost += extra_cost->alu.log_shift_reg;
9721 *cost += rtx_cost (shift_reg, ASHIFT, 1, speed_p);
9723 else if (speed_p)
9724 *cost += extra_cost->alu.log_shift;
9726 *cost += (rtx_cost (shift_op, ASHIFT, 0, speed_p)
9727 + rtx_cost (XEXP (x, 1), code, 1, speed_p));
9728 return true;
9731 if (CONST_INT_P (XEXP (x, 1)))
9733 int insns = arm_gen_constant (code, SImode, NULL_RTX,
9734 INTVAL (XEXP (x, 1)), NULL_RTX,
9735 NULL_RTX, 1, 0);
9737 *cost = COSTS_N_INSNS (insns);
9738 if (speed_p)
9739 *cost += insns * extra_cost->alu.logical;
9740 *cost += rtx_cost (op0, code, 0, speed_p);
9741 return true;
9744 if (speed_p)
9745 *cost += extra_cost->alu.logical;
9746 *cost += (rtx_cost (op0, code, 0, speed_p)
9747 + rtx_cost (XEXP (x, 1), code, 1, speed_p));
9748 return true;
9751 if (mode == DImode)
9753 rtx op0 = XEXP (x, 0);
9754 enum rtx_code subcode = GET_CODE (op0);
9756 *cost = COSTS_N_INSNS (2);
9758 if (subcode == NOT
9759 && (code == AND
9760 || (code == IOR && TARGET_THUMB2)))
9761 op0 = XEXP (op0, 0);
9763 if (GET_CODE (op0) == ZERO_EXTEND)
9765 if (speed_p)
9766 *cost += 2 * extra_cost->alu.logical;
9768 *cost += (rtx_cost (XEXP (op0, 0), ZERO_EXTEND, 0, speed_p)
9769 + rtx_cost (XEXP (x, 1), code, 0, speed_p));
9770 return true;
9772 else if (GET_CODE (op0) == SIGN_EXTEND)
9774 if (speed_p)
9775 *cost += extra_cost->alu.logical + extra_cost->alu.log_shift;
9777 *cost += (rtx_cost (XEXP (op0, 0), SIGN_EXTEND, 0, speed_p)
9778 + rtx_cost (XEXP (x, 1), code, 0, speed_p));
9779 return true;
9782 if (speed_p)
9783 *cost += 2 * extra_cost->alu.logical;
9785 return true;
9787 /* Vector mode? */
9789 *cost = LIBCALL_COST (2);
9790 return false;
9792 case MULT:
9793 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
9794 && (mode == SFmode || !TARGET_VFP_SINGLE))
9796 rtx op0 = XEXP (x, 0);
9798 *cost = COSTS_N_INSNS (1);
9800 if (GET_CODE (op0) == NEG)
9801 op0 = XEXP (op0, 0);
9803 if (speed_p)
9804 *cost += extra_cost->fp[mode != SFmode].mult;
9806 *cost += (rtx_cost (op0, MULT, 0, speed_p)
9807 + rtx_cost (XEXP (x, 1), MULT, 1, speed_p));
9808 return true;
9810 else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
9812 *cost = LIBCALL_COST (2);
9813 return false;
9816 if (mode == SImode)
9818 *cost = COSTS_N_INSNS (1);
9819 if (TARGET_DSP_MULTIPLY
9820 && ((GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
9821 && (GET_CODE (XEXP (x, 1)) == SIGN_EXTEND
9822 || (GET_CODE (XEXP (x, 1)) == ASHIFTRT
9823 && CONST_INT_P (XEXP (XEXP (x, 1), 1))
9824 && INTVAL (XEXP (XEXP (x, 1), 1)) == 16)))
9825 || (GET_CODE (XEXP (x, 0)) == ASHIFTRT
9826 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
9827 && INTVAL (XEXP (XEXP (x, 0), 1)) == 16
9828 && (GET_CODE (XEXP (x, 1)) == SIGN_EXTEND
9829 || (GET_CODE (XEXP (x, 1)) == ASHIFTRT
9830 && CONST_INT_P (XEXP (XEXP (x, 1), 1))
9831 && (INTVAL (XEXP (XEXP (x, 1), 1))
9832 == 16))))))
9834 /* SMUL[TB][TB]. */
9835 if (speed_p)
9836 *cost += extra_cost->mult[0].extend;
9837 *cost += (rtx_cost (XEXP (x, 0), SIGN_EXTEND, 0, speed_p)
9838 + rtx_cost (XEXP (x, 1), SIGN_EXTEND, 0, speed_p));
9839 return true;
9841 if (speed_p)
9842 *cost += extra_cost->mult[0].simple;
9843 return false;
9846 if (mode == DImode)
9848 if (arm_arch3m
9849 && ((GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
9850 && GET_CODE (XEXP (x, 1)) == ZERO_EXTEND)
9851 || (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
9852 && GET_CODE (XEXP (x, 1)) == SIGN_EXTEND)))
9854 *cost = COSTS_N_INSNS (1);
9855 if (speed_p)
9856 *cost += extra_cost->mult[1].extend;
9857 *cost += (rtx_cost (XEXP (XEXP (x, 0), 0),
9858 ZERO_EXTEND, 0, speed_p)
9859 + rtx_cost (XEXP (XEXP (x, 1), 0),
9860 ZERO_EXTEND, 0, speed_p));
9861 return true;
9864 *cost = LIBCALL_COST (2);
9865 return false;
9868 /* Vector mode? */
9869 *cost = LIBCALL_COST (2);
9870 return false;
9872 case NEG:
9873 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
9874 && (mode == SFmode || !TARGET_VFP_SINGLE))
9876 *cost = COSTS_N_INSNS (1);
9877 if (speed_p)
9878 *cost += extra_cost->fp[mode != SFmode].neg;
9880 return false;
9882 else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
9884 *cost = LIBCALL_COST (1);
9885 return false;
9888 if (mode == SImode)
9890 if (GET_CODE (XEXP (x, 0)) == ABS)
9892 *cost = COSTS_N_INSNS (2);
9893 /* Assume the non-flag-changing variant. */
9894 if (speed_p)
9895 *cost += (extra_cost->alu.log_shift
9896 + extra_cost->alu.arith_shift);
9897 *cost += rtx_cost (XEXP (XEXP (x, 0), 0), ABS, 0, speed_p);
9898 return true;
9901 if (GET_RTX_CLASS (GET_CODE (XEXP (x, 0))) == RTX_COMPARE
9902 || GET_RTX_CLASS (GET_CODE (XEXP (x, 0))) == RTX_COMM_COMPARE)
9904 *cost = COSTS_N_INSNS (2);
9905 /* No extra cost for MOV imm and MVN imm. */
9906 /* If the comparison op is using the flags, there's no further
9907 cost, otherwise we need to add the cost of the comparison. */
9908 if (!(REG_P (XEXP (XEXP (x, 0), 0))
9909 && REGNO (XEXP (XEXP (x, 0), 0)) == CC_REGNUM
9910 && XEXP (XEXP (x, 0), 1) == const0_rtx))
9912 *cost += (COSTS_N_INSNS (1)
9913 + rtx_cost (XEXP (XEXP (x, 0), 0), COMPARE, 0,
9914 speed_p)
9915 + rtx_cost (XEXP (XEXP (x, 0), 1), COMPARE, 1,
9916 speed_p));
9917 if (speed_p)
9918 *cost += extra_cost->alu.arith;
9920 return true;
9922 *cost = COSTS_N_INSNS (1);
9923 if (speed_p)
9924 *cost += extra_cost->alu.arith;
9925 return false;
9928 if (GET_MODE_CLASS (mode) == MODE_INT
9929 && GET_MODE_SIZE (mode) < 4)
9931 /* Slightly disparage, as we might need an extend operation. */
9932 *cost = 1 + COSTS_N_INSNS (1);
9933 if (speed_p)
9934 *cost += extra_cost->alu.arith;
9935 return false;
9938 if (mode == DImode)
9940 *cost = COSTS_N_INSNS (2);
9941 if (speed_p)
9942 *cost += 2 * extra_cost->alu.arith;
9943 return false;
9946 /* Vector mode? */
9947 *cost = LIBCALL_COST (1);
9948 return false;
9950 case NOT:
9951 if (mode == SImode)
9953 rtx shift_op;
9954 rtx shift_reg = NULL;
9956 *cost = COSTS_N_INSNS (1);
9957 shift_op = shifter_op_p (XEXP (x, 0), &shift_reg);
9959 if (shift_op)
9961 if (shift_reg != NULL)
9963 if (speed_p)
9964 *cost += extra_cost->alu.log_shift_reg;
9965 *cost += rtx_cost (shift_reg, ASHIFT, 1, speed_p);
9967 else if (speed_p)
9968 *cost += extra_cost->alu.log_shift;
9969 *cost += rtx_cost (shift_op, ASHIFT, 0, speed_p);
9970 return true;
9973 if (speed_p)
9974 *cost += extra_cost->alu.logical;
9975 return false;
9977 if (mode == DImode)
9979 *cost = COSTS_N_INSNS (2);
9980 return false;
9983 /* Vector mode? */
9985 *cost += LIBCALL_COST (1);
9986 return false;
9988 case IF_THEN_ELSE:
9990 if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
9992 *cost = COSTS_N_INSNS (4);
9993 return true;
9995 int op1cost = rtx_cost (XEXP (x, 1), SET, 1, speed_p);
9996 int op2cost = rtx_cost (XEXP (x, 2), SET, 1, speed_p);
9998 *cost = rtx_cost (XEXP (x, 0), IF_THEN_ELSE, 0, speed_p);
9999 /* Assume that if one arm of the if_then_else is a register,
10000 that it will be tied with the result and eliminate the
10001 conditional insn. */
10002 if (REG_P (XEXP (x, 1)))
10003 *cost += op2cost;
10004 else if (REG_P (XEXP (x, 2)))
10005 *cost += op1cost;
10006 else
10008 if (speed_p)
10010 if (extra_cost->alu.non_exec_costs_exec)
10011 *cost += op1cost + op2cost + extra_cost->alu.non_exec;
10012 else
10013 *cost += MAX (op1cost, op2cost) + extra_cost->alu.non_exec;
10015 else
10016 *cost += op1cost + op2cost;
10019 return true;
10021 case COMPARE:
10022 if (cc_register (XEXP (x, 0), VOIDmode) && XEXP (x, 1) == const0_rtx)
10023 *cost = 0;
10024 else
10026 enum machine_mode op0mode;
10027 /* We'll mostly assume that the cost of a compare is the cost of the
10028 LHS. However, there are some notable exceptions. */
10030 /* Floating point compares are never done as side-effects. */
10031 op0mode = GET_MODE (XEXP (x, 0));
10032 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (op0mode) == MODE_FLOAT
10033 && (op0mode == SFmode || !TARGET_VFP_SINGLE))
10035 *cost = COSTS_N_INSNS (1);
10036 if (speed_p)
10037 *cost += extra_cost->fp[op0mode != SFmode].compare;
10039 if (XEXP (x, 1) == CONST0_RTX (op0mode))
10041 *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
10042 return true;
10045 return false;
10047 else if (GET_MODE_CLASS (op0mode) == MODE_FLOAT)
10049 *cost = LIBCALL_COST (2);
10050 return false;
10053 /* DImode compares normally take two insns. */
10054 if (op0mode == DImode)
10056 *cost = COSTS_N_INSNS (2);
10057 if (speed_p)
10058 *cost += 2 * extra_cost->alu.arith;
10059 return false;
10062 if (op0mode == SImode)
10064 rtx shift_op;
10065 rtx shift_reg;
10067 if (XEXP (x, 1) == const0_rtx
10068 && !(REG_P (XEXP (x, 0))
10069 || (GET_CODE (XEXP (x, 0)) == SUBREG
10070 && REG_P (SUBREG_REG (XEXP (x, 0))))))
10072 *cost = rtx_cost (XEXP (x, 0), COMPARE, 0, speed_p);
10074 /* Multiply operations that set the flags are often
10075 significantly more expensive. */
10076 if (speed_p
10077 && GET_CODE (XEXP (x, 0)) == MULT
10078 && !power_of_two_operand (XEXP (XEXP (x, 0), 1), mode))
10079 *cost += extra_cost->mult[0].flag_setting;
10081 if (speed_p
10082 && GET_CODE (XEXP (x, 0)) == PLUS
10083 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
10084 && !power_of_two_operand (XEXP (XEXP (XEXP (x, 0),
10085 0), 1), mode))
10086 *cost += extra_cost->mult[0].flag_setting;
10087 return true;
10090 shift_reg = NULL;
10091 shift_op = shifter_op_p (XEXP (x, 0), &shift_reg);
10092 if (shift_op != NULL)
10094 *cost = COSTS_N_INSNS (1);
10095 if (shift_reg != NULL)
10097 *cost += rtx_cost (shift_reg, ASHIFT, 1, speed_p);
10098 if (speed_p)
10099 *cost += extra_cost->alu.arith_shift_reg;
10101 else if (speed_p)
10102 *cost += extra_cost->alu.arith_shift;
10103 *cost += (rtx_cost (shift_op, ASHIFT, 0, speed_p)
10104 + rtx_cost (XEXP (x, 1), COMPARE, 1, speed_p));
10105 return true;
10108 *cost = COSTS_N_INSNS (1);
10109 if (speed_p)
10110 *cost += extra_cost->alu.arith;
10111 if (CONST_INT_P (XEXP (x, 1))
10112 && const_ok_for_op (INTVAL (XEXP (x, 1)), COMPARE))
10114 *cost += rtx_cost (XEXP (x, 0), COMPARE, 0, speed_p);
10115 return true;
10117 return false;
10120 /* Vector mode? */
10122 *cost = LIBCALL_COST (2);
10123 return false;
10125 return true;
10127 case EQ:
10128 case NE:
10129 case LT:
10130 case LE:
10131 case GT:
10132 case GE:
10133 case LTU:
10134 case LEU:
10135 case GEU:
10136 case GTU:
10137 case ORDERED:
10138 case UNORDERED:
10139 case UNEQ:
10140 case UNLE:
10141 case UNLT:
10142 case UNGE:
10143 case UNGT:
10144 case LTGT:
10145 if (outer_code == SET)
10147 /* Is it a store-flag operation? */
10148 if (REG_P (XEXP (x, 0)) && REGNO (XEXP (x, 0)) == CC_REGNUM
10149 && XEXP (x, 1) == const0_rtx)
10151 /* Thumb also needs an IT insn. */
10152 *cost = COSTS_N_INSNS (TARGET_THUMB ? 3 : 2);
10153 return true;
10155 if (XEXP (x, 1) == const0_rtx)
10157 switch (code)
10159 case LT:
10160 /* LSR Rd, Rn, #31. */
10161 *cost = COSTS_N_INSNS (1);
10162 if (speed_p)
10163 *cost += extra_cost->alu.shift;
10164 break;
10166 case EQ:
10167 /* RSBS T1, Rn, #0
10168 ADC Rd, Rn, T1. */
10170 case NE:
10171 /* SUBS T1, Rn, #1
10172 SBC Rd, Rn, T1. */
10173 *cost = COSTS_N_INSNS (2);
10174 break;
10176 case LE:
10177 /* RSBS T1, Rn, Rn, LSR #31
10178 ADC Rd, Rn, T1. */
10179 *cost = COSTS_N_INSNS (2);
10180 if (speed_p)
10181 *cost += extra_cost->alu.arith_shift;
10182 break;
10184 case GT:
10185 /* RSB Rd, Rn, Rn, ASR #1
10186 LSR Rd, Rd, #31. */
10187 *cost = COSTS_N_INSNS (2);
10188 if (speed_p)
10189 *cost += (extra_cost->alu.arith_shift
10190 + extra_cost->alu.shift);
10191 break;
10193 case GE:
10194 /* ASR Rd, Rn, #31
10195 ADD Rd, Rn, #1. */
10196 *cost = COSTS_N_INSNS (2);
10197 if (speed_p)
10198 *cost += extra_cost->alu.shift;
10199 break;
10201 default:
10202 /* Remaining cases are either meaningless or would take
10203 three insns anyway. */
10204 *cost = COSTS_N_INSNS (3);
10205 break;
10207 *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
10208 return true;
10210 else
10212 *cost = COSTS_N_INSNS (TARGET_THUMB ? 4 : 3);
10213 if (CONST_INT_P (XEXP (x, 1))
10214 && const_ok_for_op (INTVAL (XEXP (x, 1)), COMPARE))
10216 *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
10217 return true;
10220 return false;
10223 /* Not directly inside a set. If it involves the condition code
10224 register it must be the condition for a branch, cond_exec or
10225 I_T_E operation. Since the comparison is performed elsewhere
10226 this is just the control part which has no additional
10227 cost. */
10228 else if (REG_P (XEXP (x, 0)) && REGNO (XEXP (x, 0)) == CC_REGNUM
10229 && XEXP (x, 1) == const0_rtx)
10231 *cost = 0;
10232 return true;
10234 return false;
10236 case ABS:
10237 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
10238 && (mode == SFmode || !TARGET_VFP_SINGLE))
10240 *cost = COSTS_N_INSNS (1);
10241 if (speed_p)
10242 *cost += extra_cost->fp[mode != SFmode].neg;
10244 return false;
10246 else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
10248 *cost = LIBCALL_COST (1);
10249 return false;
10252 if (mode == SImode)
10254 *cost = COSTS_N_INSNS (1);
10255 if (speed_p)
10256 *cost += extra_cost->alu.log_shift + extra_cost->alu.arith_shift;
10257 return false;
10259 /* Vector mode? */
10260 *cost = LIBCALL_COST (1);
10261 return false;
10263 case SIGN_EXTEND:
10264 if ((arm_arch4 || GET_MODE (XEXP (x, 0)) == SImode)
10265 && MEM_P (XEXP (x, 0)))
10267 *cost = rtx_cost (XEXP (x, 0), code, 0, speed_p);
10269 if (mode == DImode)
10270 *cost += COSTS_N_INSNS (1);
10272 if (!speed_p)
10273 return true;
10275 if (GET_MODE (XEXP (x, 0)) == SImode)
10276 *cost += extra_cost->ldst.load;
10277 else
10278 *cost += extra_cost->ldst.load_sign_extend;
10280 if (mode == DImode)
10281 *cost += extra_cost->alu.shift;
10283 return true;
10286 /* Widening from less than 32-bits requires an extend operation. */
10287 if (GET_MODE (XEXP (x, 0)) != SImode && arm_arch6)
10289 /* We have SXTB/SXTH. */
10290 *cost = COSTS_N_INSNS (1);
10291 *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
10292 if (speed_p)
10293 *cost += extra_cost->alu.extnd;
10295 else if (GET_MODE (XEXP (x, 0)) != SImode)
10297 /* Needs two shifts. */
10298 *cost = COSTS_N_INSNS (2);
10299 *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
10300 if (speed_p)
10301 *cost += 2 * extra_cost->alu.shift;
10304 /* Widening beyond 32-bits requires one more insn. */
10305 if (mode == DImode)
10307 *cost += COSTS_N_INSNS (1);
10308 if (speed_p)
10309 *cost += extra_cost->alu.shift;
10312 return true;
10314 case ZERO_EXTEND:
10315 if ((arm_arch4
10316 || GET_MODE (XEXP (x, 0)) == SImode
10317 || GET_MODE (XEXP (x, 0)) == QImode)
10318 && MEM_P (XEXP (x, 0)))
10320 *cost = rtx_cost (XEXP (x, 0), code, 0, speed_p);
10322 if (mode == DImode)
10323 *cost += COSTS_N_INSNS (1); /* No speed penalty. */
10325 return true;
10328 /* Widening from less than 32-bits requires an extend operation. */
10329 if (GET_MODE (XEXP (x, 0)) == QImode)
10331 /* UXTB can be a shorter instruction in Thumb2, but it might
10332 be slower than the AND Rd, Rn, #255 alternative. When
10333 optimizing for speed it should never be slower to use
10334 AND, and we don't really model 16-bit vs 32-bit insns
10335 here. */
10336 *cost = COSTS_N_INSNS (1);
10337 if (speed_p)
10338 *cost += extra_cost->alu.logical;
10340 else if (GET_MODE (XEXP (x, 0)) != SImode && arm_arch6)
10342 /* We have UXTB/UXTH. */
10343 *cost = COSTS_N_INSNS (1);
10344 *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
10345 if (speed_p)
10346 *cost += extra_cost->alu.extnd;
10348 else if (GET_MODE (XEXP (x, 0)) != SImode)
10350 /* Needs two shifts. It's marginally preferable to use
10351 shifts rather than two BIC instructions as the second
10352 shift may merge with a subsequent insn as a shifter
10353 op. */
10354 *cost = COSTS_N_INSNS (2);
10355 *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
10356 if (speed_p)
10357 *cost += 2 * extra_cost->alu.shift;
10359 else /* GET_MODE (XEXP (x, 0)) == SImode. */
10360 *cost = COSTS_N_INSNS (1);
10362 /* Widening beyond 32-bits requires one more insn. */
10363 if (mode == DImode)
10365 *cost += COSTS_N_INSNS (1); /* No speed penalty. */
10368 return true;
10370 case CONST_INT:
10371 *cost = 0;
10372 /* CONST_INT has no mode, so we cannot tell for sure how many
10373 insns are really going to be needed. The best we can do is
10374 look at the value passed. If it fits in SImode, then assume
10375 that's the mode it will be used for. Otherwise assume it
10376 will be used in DImode. */
10377 if (INTVAL (x) == trunc_int_for_mode (INTVAL (x), SImode))
10378 mode = SImode;
10379 else
10380 mode = DImode;
10382 /* Avoid blowing up in arm_gen_constant (). */
10383 if (!(outer_code == PLUS
10384 || outer_code == AND
10385 || outer_code == IOR
10386 || outer_code == XOR
10387 || outer_code == MINUS))
10388 outer_code = SET;
10390 const_int_cost:
10391 if (mode == SImode)
10393 *cost += 0;
10394 *cost += COSTS_N_INSNS (arm_gen_constant (outer_code, SImode, NULL,
10395 INTVAL (x), NULL, NULL,
10396 0, 0));
10397 /* Extra costs? */
10399 else
10401 *cost += COSTS_N_INSNS (arm_gen_constant
10402 (outer_code, SImode, NULL,
10403 trunc_int_for_mode (INTVAL (x), SImode),
10404 NULL, NULL, 0, 0)
10405 + arm_gen_constant (outer_code, SImode, NULL,
10406 INTVAL (x) >> 32, NULL,
10407 NULL, 0, 0));
10408 /* Extra costs? */
10411 return true;
10413 case CONST:
10414 case LABEL_REF:
10415 case SYMBOL_REF:
10416 if (speed_p)
10418 if (arm_arch_thumb2 && !flag_pic)
10419 *cost = COSTS_N_INSNS (2);
10420 else
10421 *cost = COSTS_N_INSNS (1) + extra_cost->ldst.load;
10423 else
10424 *cost = COSTS_N_INSNS (2);
10426 if (flag_pic)
10428 *cost += COSTS_N_INSNS (1);
10429 if (speed_p)
10430 *cost += extra_cost->alu.arith;
10433 return true;
10435 case CONST_FIXED:
10436 *cost = COSTS_N_INSNS (4);
10437 /* Fixme. */
10438 return true;
10440 case CONST_DOUBLE:
10441 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
10442 && (mode == SFmode || !TARGET_VFP_SINGLE))
10444 if (vfp3_const_double_rtx (x))
10446 *cost = COSTS_N_INSNS (1);
10447 if (speed_p)
10448 *cost += extra_cost->fp[mode == DFmode].fpconst;
10449 return true;
10452 if (speed_p)
10454 *cost = COSTS_N_INSNS (1);
10455 if (mode == DFmode)
10456 *cost += extra_cost->ldst.loadd;
10457 else
10458 *cost += extra_cost->ldst.loadf;
10460 else
10461 *cost = COSTS_N_INSNS (2 + (mode == DFmode));
10463 return true;
10465 *cost = COSTS_N_INSNS (4);
10466 return true;
10468 case CONST_VECTOR:
10469 /* Fixme. */
10470 if (TARGET_NEON
10471 && TARGET_HARD_FLOAT
10472 && (VALID_NEON_DREG_MODE (mode) || VALID_NEON_QREG_MODE (mode))
10473 && neon_immediate_valid_for_move (x, mode, NULL, NULL))
10474 *cost = COSTS_N_INSNS (1);
10475 else
10476 *cost = COSTS_N_INSNS (4);
10477 return true;
10479 case HIGH:
10480 case LO_SUM:
10481 *cost = COSTS_N_INSNS (1);
10482 /* When optimizing for size, we prefer constant pool entries to
10483 MOVW/MOVT pairs, so bump the cost of these slightly. */
10484 if (!speed_p)
10485 *cost += 1;
10486 return true;
10488 case CLZ:
10489 *cost = COSTS_N_INSNS (1);
10490 if (speed_p)
10491 *cost += extra_cost->alu.clz;
10492 return false;
10494 case SMIN:
10495 if (XEXP (x, 1) == const0_rtx)
10497 *cost = COSTS_N_INSNS (1);
10498 if (speed_p)
10499 *cost += extra_cost->alu.log_shift;
10500 *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
10501 return true;
10503 /* Fall through. */
10504 case SMAX:
10505 case UMIN:
10506 case UMAX:
10507 *cost = COSTS_N_INSNS (2);
10508 return false;
10510 case TRUNCATE:
10511 if (GET_CODE (XEXP (x, 0)) == ASHIFTRT
10512 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
10513 && INTVAL (XEXP (XEXP (x, 0), 1)) == 32
10514 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
10515 && ((GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == SIGN_EXTEND
10516 && GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1)) == SIGN_EXTEND)
10517 || (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == ZERO_EXTEND
10518 && (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1))
10519 == ZERO_EXTEND))))
10521 *cost = COSTS_N_INSNS (1);
10522 if (speed_p)
10523 *cost += extra_cost->mult[1].extend;
10524 *cost += (rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0), ZERO_EXTEND, 0,
10525 speed_p)
10526 + rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 1), ZERO_EXTEND,
10527 0, speed_p));
10528 return true;
10530 *cost = LIBCALL_COST (1);
10531 return false;
10533 case UNSPEC:
10534 return arm_unspec_cost (x, outer_code, speed_p, cost);
10536 case PC:
10537 /* Reading the PC is like reading any other register. Writing it
10538 is more expensive, but we take that into account elsewhere. */
10539 *cost = 0;
10540 return true;
10542 case ZERO_EXTRACT:
10543 /* TODO: Simple zero_extract of bottom bits using AND. */
10544 /* Fall through. */
10545 case SIGN_EXTRACT:
10546 if (arm_arch6
10547 && mode == SImode
10548 && CONST_INT_P (XEXP (x, 1))
10549 && CONST_INT_P (XEXP (x, 2)))
10551 *cost = COSTS_N_INSNS (1);
10552 if (speed_p)
10553 *cost += extra_cost->alu.bfx;
10554 *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
10555 return true;
10557 /* Without UBFX/SBFX, need to resort to shift operations. */
10558 *cost = COSTS_N_INSNS (2);
10559 if (speed_p)
10560 *cost += 2 * extra_cost->alu.shift;
10561 *cost += rtx_cost (XEXP (x, 0), ASHIFT, 0, speed_p);
10562 return true;
10564 case FLOAT_EXTEND:
10565 if (TARGET_HARD_FLOAT)
10567 *cost = COSTS_N_INSNS (1);
10568 if (speed_p)
10569 *cost += extra_cost->fp[mode == DFmode].widen;
10570 if (!TARGET_FPU_ARMV8
10571 && GET_MODE (XEXP (x, 0)) == HFmode)
10573 /* Pre v8, widening HF->DF is a two-step process, first
10574 widening to SFmode. */
10575 *cost += COSTS_N_INSNS (1);
10576 if (speed_p)
10577 *cost += extra_cost->fp[0].widen;
10579 *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
10580 return true;
10583 *cost = LIBCALL_COST (1);
10584 return false;
10586 case FLOAT_TRUNCATE:
10587 if (TARGET_HARD_FLOAT)
10589 *cost = COSTS_N_INSNS (1);
10590 if (speed_p)
10591 *cost += extra_cost->fp[mode == DFmode].narrow;
10592 *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
10593 return true;
10594 /* Vector modes? */
10596 *cost = LIBCALL_COST (1);
10597 return false;
10599 case FIX:
10600 case UNSIGNED_FIX:
10601 if (TARGET_HARD_FLOAT)
10603 if (GET_MODE_CLASS (mode) == MODE_INT)
10605 *cost = COSTS_N_INSNS (1);
10606 if (speed_p)
10607 *cost += extra_cost->fp[GET_MODE (XEXP (x, 0)) == DFmode].toint;
10608 /* Strip of the 'cost' of rounding towards zero. */
10609 if (GET_CODE (XEXP (x, 0)) == FIX)
10610 *cost += rtx_cost (XEXP (XEXP (x, 0), 0), code, 0, speed_p);
10611 else
10612 *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
10613 /* ??? Increase the cost to deal with transferring from
10614 FP -> CORE registers? */
10615 return true;
10617 else if (GET_MODE_CLASS (mode) == MODE_FLOAT
10618 && TARGET_FPU_ARMV8)
10620 *cost = COSTS_N_INSNS (1);
10621 if (speed_p)
10622 *cost += extra_cost->fp[mode == DFmode].roundint;
10623 return false;
10625 /* Vector costs? */
10627 *cost = LIBCALL_COST (1);
10628 return false;
10630 case FLOAT:
10631 case UNSIGNED_FLOAT:
10632 if (TARGET_HARD_FLOAT)
10634 /* ??? Increase the cost to deal with transferring from CORE
10635 -> FP registers? */
10636 *cost = COSTS_N_INSNS (1);
10637 if (speed_p)
10638 *cost += extra_cost->fp[mode == DFmode].fromint;
10639 return false;
10641 *cost = LIBCALL_COST (1);
10642 return false;
10644 case CALL:
10645 *cost = COSTS_N_INSNS (1);
10646 return true;
10648 case ASM_OPERANDS:
10649 /* Just a guess. Cost one insn per input. */
10650 *cost = COSTS_N_INSNS (ASM_OPERANDS_INPUT_LENGTH (x));
10651 return true;
10653 default:
10654 if (mode != VOIDmode)
10655 *cost = COSTS_N_INSNS (ARM_NUM_REGS (mode));
10656 else
10657 *cost = COSTS_N_INSNS (4); /* Who knows? */
10658 return false;
10662 #undef HANDLE_NARROW_SHIFT_ARITH
10664 /* RTX costs when optimizing for size. */
10665 static bool
10666 arm_rtx_costs (rtx x, int code, int outer_code, int opno ATTRIBUTE_UNUSED,
10667 int *total, bool speed)
10669 bool result;
10671 if (TARGET_OLD_RTX_COSTS
10672 || (!current_tune->insn_extra_cost && !TARGET_NEW_GENERIC_COSTS))
10674 /* Old way. (Deprecated.) */
10675 if (!speed)
10676 result = arm_size_rtx_costs (x, (enum rtx_code) code,
10677 (enum rtx_code) outer_code, total);
10678 else
10679 result = current_tune->rtx_costs (x, (enum rtx_code) code,
10680 (enum rtx_code) outer_code, total,
10681 speed);
10683 else
10685 /* New way. */
10686 if (current_tune->insn_extra_cost)
10687 result = arm_new_rtx_costs (x, (enum rtx_code) code,
10688 (enum rtx_code) outer_code,
10689 current_tune->insn_extra_cost,
10690 total, speed);
10691 /* TARGET_NEW_GENERIC_COSTS && !TARGET_OLD_RTX_COSTS
10692 && current_tune->insn_extra_cost != NULL */
10693 else
10694 result = arm_new_rtx_costs (x, (enum rtx_code) code,
10695 (enum rtx_code) outer_code,
10696 &generic_extra_costs, total, speed);
10699 if (dump_file && (dump_flags & TDF_DETAILS))
10701 print_rtl_single (dump_file, x);
10702 fprintf (dump_file, "\n%s cost: %d (%s)\n", speed ? "Hot" : "Cold",
10703 *total, result ? "final" : "partial");
10705 return result;
10708 /* RTX costs for cores with a slow MUL implementation. Thumb-2 is not
10709 supported on any "slowmul" cores, so it can be ignored. */
10711 static bool
10712 arm_slowmul_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
10713 int *total, bool speed)
10715 enum machine_mode mode = GET_MODE (x);
10717 if (TARGET_THUMB)
10719 *total = thumb1_rtx_costs (x, code, outer_code);
10720 return true;
10723 switch (code)
10725 case MULT:
10726 if (GET_MODE_CLASS (mode) == MODE_FLOAT
10727 || mode == DImode)
10729 *total = COSTS_N_INSNS (20);
10730 return false;
10733 if (CONST_INT_P (XEXP (x, 1)))
10735 unsigned HOST_WIDE_INT i = (INTVAL (XEXP (x, 1))
10736 & (unsigned HOST_WIDE_INT) 0xffffffff);
10737 int cost, const_ok = const_ok_for_arm (i);
10738 int j, booth_unit_size;
10740 /* Tune as appropriate. */
10741 cost = const_ok ? 4 : 8;
10742 booth_unit_size = 2;
10743 for (j = 0; i && j < 32; j += booth_unit_size)
10745 i >>= booth_unit_size;
10746 cost++;
10749 *total = COSTS_N_INSNS (cost);
10750 *total += rtx_cost (XEXP (x, 0), code, 0, speed);
10751 return true;
10754 *total = COSTS_N_INSNS (20);
10755 return false;
10757 default:
10758 return arm_rtx_costs_1 (x, outer_code, total, speed);;
10763 /* RTX cost for cores with a fast multiply unit (M variants). */
10765 static bool
10766 arm_fastmul_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
10767 int *total, bool speed)
10769 enum machine_mode mode = GET_MODE (x);
10771 if (TARGET_THUMB1)
10773 *total = thumb1_rtx_costs (x, code, outer_code);
10774 return true;
10777 /* ??? should thumb2 use different costs? */
10778 switch (code)
10780 case MULT:
10781 /* There is no point basing this on the tuning, since it is always the
10782 fast variant if it exists at all. */
10783 if (mode == DImode
10784 && (GET_CODE (XEXP (x, 0)) == GET_CODE (XEXP (x, 1)))
10785 && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
10786 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
10788 *total = COSTS_N_INSNS(2);
10789 return false;
10793 if (mode == DImode)
10795 *total = COSTS_N_INSNS (5);
10796 return false;
10799 if (CONST_INT_P (XEXP (x, 1)))
10801 unsigned HOST_WIDE_INT i = (INTVAL (XEXP (x, 1))
10802 & (unsigned HOST_WIDE_INT) 0xffffffff);
10803 int cost, const_ok = const_ok_for_arm (i);
10804 int j, booth_unit_size;
10806 /* Tune as appropriate. */
10807 cost = const_ok ? 4 : 8;
10808 booth_unit_size = 8;
10809 for (j = 0; i && j < 32; j += booth_unit_size)
10811 i >>= booth_unit_size;
10812 cost++;
10815 *total = COSTS_N_INSNS(cost);
10816 return false;
10819 if (mode == SImode)
10821 *total = COSTS_N_INSNS (4);
10822 return false;
10825 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
10827 if (TARGET_HARD_FLOAT
10828 && (mode == SFmode
10829 || (mode == DFmode && !TARGET_VFP_SINGLE)))
10831 *total = COSTS_N_INSNS (1);
10832 return false;
10836 /* Requires a lib call */
10837 *total = COSTS_N_INSNS (20);
10838 return false;
10840 default:
10841 return arm_rtx_costs_1 (x, outer_code, total, speed);
10846 /* RTX cost for XScale CPUs. Thumb-2 is not supported on any xscale cores,
10847 so it can be ignored. */
10849 static bool
10850 arm_xscale_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
10851 int *total, bool speed)
10853 enum machine_mode mode = GET_MODE (x);
10855 if (TARGET_THUMB)
10857 *total = thumb1_rtx_costs (x, code, outer_code);
10858 return true;
10861 switch (code)
10863 case COMPARE:
10864 if (GET_CODE (XEXP (x, 0)) != MULT)
10865 return arm_rtx_costs_1 (x, outer_code, total, speed);
10867 /* A COMPARE of a MULT is slow on XScale; the muls instruction
10868 will stall until the multiplication is complete. */
10869 *total = COSTS_N_INSNS (3);
10870 return false;
10872 case MULT:
10873 /* There is no point basing this on the tuning, since it is always the
10874 fast variant if it exists at all. */
10875 if (mode == DImode
10876 && (GET_CODE (XEXP (x, 0)) == GET_CODE (XEXP (x, 1)))
10877 && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
10878 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
10880 *total = COSTS_N_INSNS (2);
10881 return false;
10885 if (mode == DImode)
10887 *total = COSTS_N_INSNS (5);
10888 return false;
10891 if (CONST_INT_P (XEXP (x, 1)))
10893 /* If operand 1 is a constant we can more accurately
10894 calculate the cost of the multiply. The multiplier can
10895 retire 15 bits on the first cycle and a further 12 on the
10896 second. We do, of course, have to load the constant into
10897 a register first. */
10898 unsigned HOST_WIDE_INT i = INTVAL (XEXP (x, 1));
10899 /* There's a general overhead of one cycle. */
10900 int cost = 1;
10901 unsigned HOST_WIDE_INT masked_const;
10903 if (i & 0x80000000)
10904 i = ~i;
10906 i &= (unsigned HOST_WIDE_INT) 0xffffffff;
10908 masked_const = i & 0xffff8000;
10909 if (masked_const != 0)
10911 cost++;
10912 masked_const = i & 0xf8000000;
10913 if (masked_const != 0)
10914 cost++;
10916 *total = COSTS_N_INSNS (cost);
10917 return false;
10920 if (mode == SImode)
10922 *total = COSTS_N_INSNS (3);
10923 return false;
10926 /* Requires a lib call */
10927 *total = COSTS_N_INSNS (20);
10928 return false;
10930 default:
10931 return arm_rtx_costs_1 (x, outer_code, total, speed);
10936 /* RTX costs for 9e (and later) cores. */
10938 static bool
10939 arm_9e_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
10940 int *total, bool speed)
10942 enum machine_mode mode = GET_MODE (x);
10944 if (TARGET_THUMB1)
10946 switch (code)
10948 case MULT:
10949 *total = COSTS_N_INSNS (3);
10950 return true;
10952 default:
10953 *total = thumb1_rtx_costs (x, code, outer_code);
10954 return true;
10958 switch (code)
10960 case MULT:
10961 /* There is no point basing this on the tuning, since it is always the
10962 fast variant if it exists at all. */
10963 if (mode == DImode
10964 && (GET_CODE (XEXP (x, 0)) == GET_CODE (XEXP (x, 1)))
10965 && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
10966 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
10968 *total = COSTS_N_INSNS (2);
10969 return false;
10973 if (mode == DImode)
10975 *total = COSTS_N_INSNS (5);
10976 return false;
10979 if (mode == SImode)
10981 *total = COSTS_N_INSNS (2);
10982 return false;
10985 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
10987 if (TARGET_HARD_FLOAT
10988 && (mode == SFmode
10989 || (mode == DFmode && !TARGET_VFP_SINGLE)))
10991 *total = COSTS_N_INSNS (1);
10992 return false;
10996 *total = COSTS_N_INSNS (20);
10997 return false;
10999 default:
11000 return arm_rtx_costs_1 (x, outer_code, total, speed);
11003 /* All address computations that can be done are free, but rtx cost returns
11004 the same for practically all of them. So we weight the different types
11005 of address here in the order (most pref first):
11006 PRE/POST_INC/DEC, SHIFT or NON-INT sum, INT sum, REG, MEM or LABEL. */
11007 static inline int
11008 arm_arm_address_cost (rtx x)
11010 enum rtx_code c = GET_CODE (x);
11012 if (c == PRE_INC || c == PRE_DEC || c == POST_INC || c == POST_DEC)
11013 return 0;
11014 if (c == MEM || c == LABEL_REF || c == SYMBOL_REF)
11015 return 10;
11017 if (c == PLUS)
11019 if (CONST_INT_P (XEXP (x, 1)))
11020 return 2;
11022 if (ARITHMETIC_P (XEXP (x, 0)) || ARITHMETIC_P (XEXP (x, 1)))
11023 return 3;
11025 return 4;
11028 return 6;
11031 static inline int
11032 arm_thumb_address_cost (rtx x)
11034 enum rtx_code c = GET_CODE (x);
11036 if (c == REG)
11037 return 1;
11038 if (c == PLUS
11039 && REG_P (XEXP (x, 0))
11040 && CONST_INT_P (XEXP (x, 1)))
11041 return 1;
11043 return 2;
11046 static int
11047 arm_address_cost (rtx x, enum machine_mode mode ATTRIBUTE_UNUSED,
11048 addr_space_t as ATTRIBUTE_UNUSED, bool speed ATTRIBUTE_UNUSED)
11050 return TARGET_32BIT ? arm_arm_address_cost (x) : arm_thumb_address_cost (x);
11053 /* Adjust cost hook for XScale. */
11054 static bool
11055 xscale_sched_adjust_cost (rtx insn, rtx link, rtx dep, int * cost)
11057 /* Some true dependencies can have a higher cost depending
11058 on precisely how certain input operands are used. */
11059 if (REG_NOTE_KIND(link) == 0
11060 && recog_memoized (insn) >= 0
11061 && recog_memoized (dep) >= 0)
11063 int shift_opnum = get_attr_shift (insn);
11064 enum attr_type attr_type = get_attr_type (dep);
11066 /* If nonzero, SHIFT_OPNUM contains the operand number of a shifted
11067 operand for INSN. If we have a shifted input operand and the
11068 instruction we depend on is another ALU instruction, then we may
11069 have to account for an additional stall. */
11070 if (shift_opnum != 0
11071 && (attr_type == TYPE_ALU_SHIFT_IMM
11072 || attr_type == TYPE_ALUS_SHIFT_IMM
11073 || attr_type == TYPE_LOGIC_SHIFT_IMM
11074 || attr_type == TYPE_LOGICS_SHIFT_IMM
11075 || attr_type == TYPE_ALU_SHIFT_REG
11076 || attr_type == TYPE_ALUS_SHIFT_REG
11077 || attr_type == TYPE_LOGIC_SHIFT_REG
11078 || attr_type == TYPE_LOGICS_SHIFT_REG
11079 || attr_type == TYPE_MOV_SHIFT
11080 || attr_type == TYPE_MVN_SHIFT
11081 || attr_type == TYPE_MOV_SHIFT_REG
11082 || attr_type == TYPE_MVN_SHIFT_REG))
11084 rtx shifted_operand;
11085 int opno;
11087 /* Get the shifted operand. */
11088 extract_insn (insn);
11089 shifted_operand = recog_data.operand[shift_opnum];
11091 /* Iterate over all the operands in DEP. If we write an operand
11092 that overlaps with SHIFTED_OPERAND, then we have increase the
11093 cost of this dependency. */
11094 extract_insn (dep);
11095 preprocess_constraints ();
11096 for (opno = 0; opno < recog_data.n_operands; opno++)
11098 /* We can ignore strict inputs. */
11099 if (recog_data.operand_type[opno] == OP_IN)
11100 continue;
11102 if (reg_overlap_mentioned_p (recog_data.operand[opno],
11103 shifted_operand))
11105 *cost = 2;
11106 return false;
11111 return true;
11114 /* Adjust cost hook for Cortex A9. */
11115 static bool
11116 cortex_a9_sched_adjust_cost (rtx insn, rtx link, rtx dep, int * cost)
11118 switch (REG_NOTE_KIND (link))
11120 case REG_DEP_ANTI:
11121 *cost = 0;
11122 return false;
11124 case REG_DEP_TRUE:
11125 case REG_DEP_OUTPUT:
11126 if (recog_memoized (insn) >= 0
11127 && recog_memoized (dep) >= 0)
11129 if (GET_CODE (PATTERN (insn)) == SET)
11131 if (GET_MODE_CLASS
11132 (GET_MODE (SET_DEST (PATTERN (insn)))) == MODE_FLOAT
11133 || GET_MODE_CLASS
11134 (GET_MODE (SET_SRC (PATTERN (insn)))) == MODE_FLOAT)
11136 enum attr_type attr_type_insn = get_attr_type (insn);
11137 enum attr_type attr_type_dep = get_attr_type (dep);
11139 /* By default all dependencies of the form
11140 s0 = s0 <op> s1
11141 s0 = s0 <op> s2
11142 have an extra latency of 1 cycle because
11143 of the input and output dependency in this
11144 case. However this gets modeled as an true
11145 dependency and hence all these checks. */
11146 if (REG_P (SET_DEST (PATTERN (insn)))
11147 && REG_P (SET_DEST (PATTERN (dep)))
11148 && reg_overlap_mentioned_p (SET_DEST (PATTERN (insn)),
11149 SET_DEST (PATTERN (dep))))
11151 /* FMACS is a special case where the dependent
11152 instruction can be issued 3 cycles before
11153 the normal latency in case of an output
11154 dependency. */
11155 if ((attr_type_insn == TYPE_FMACS
11156 || attr_type_insn == TYPE_FMACD)
11157 && (attr_type_dep == TYPE_FMACS
11158 || attr_type_dep == TYPE_FMACD))
11160 if (REG_NOTE_KIND (link) == REG_DEP_OUTPUT)
11161 *cost = insn_default_latency (dep) - 3;
11162 else
11163 *cost = insn_default_latency (dep);
11164 return false;
11166 else
11168 if (REG_NOTE_KIND (link) == REG_DEP_OUTPUT)
11169 *cost = insn_default_latency (dep) + 1;
11170 else
11171 *cost = insn_default_latency (dep);
11173 return false;
11178 break;
11180 default:
11181 gcc_unreachable ();
11184 return true;
11187 /* Adjust cost hook for FA726TE. */
11188 static bool
11189 fa726te_sched_adjust_cost (rtx insn, rtx link, rtx dep, int * cost)
11191 /* For FA726TE, true dependency on CPSR (i.e. set cond followed by predicated)
11192 have penalty of 3. */
11193 if (REG_NOTE_KIND (link) == REG_DEP_TRUE
11194 && recog_memoized (insn) >= 0
11195 && recog_memoized (dep) >= 0
11196 && get_attr_conds (dep) == CONDS_SET)
11198 /* Use of carry (e.g. 64-bit arithmetic) in ALU: 3-cycle latency. */
11199 if (get_attr_conds (insn) == CONDS_USE
11200 && get_attr_type (insn) != TYPE_BRANCH)
11202 *cost = 3;
11203 return false;
11206 if (GET_CODE (PATTERN (insn)) == COND_EXEC
11207 || get_attr_conds (insn) == CONDS_USE)
11209 *cost = 0;
11210 return false;
11214 return true;
11217 /* Implement TARGET_REGISTER_MOVE_COST.
11219 Moves between VFP_REGS and GENERAL_REGS are a single insn, but
11220 it is typically more expensive than a single memory access. We set
11221 the cost to less than two memory accesses so that floating
11222 point to integer conversion does not go through memory. */
11225 arm_register_move_cost (enum machine_mode mode ATTRIBUTE_UNUSED,
11226 reg_class_t from, reg_class_t to)
11228 if (TARGET_32BIT)
11230 if ((IS_VFP_CLASS (from) && !IS_VFP_CLASS (to))
11231 || (!IS_VFP_CLASS (from) && IS_VFP_CLASS (to)))
11232 return 15;
11233 else if ((from == IWMMXT_REGS && to != IWMMXT_REGS)
11234 || (from != IWMMXT_REGS && to == IWMMXT_REGS))
11235 return 4;
11236 else if (from == IWMMXT_GR_REGS || to == IWMMXT_GR_REGS)
11237 return 20;
11238 else
11239 return 2;
11241 else
11243 if (from == HI_REGS || to == HI_REGS)
11244 return 4;
11245 else
11246 return 2;
11250 /* Implement TARGET_MEMORY_MOVE_COST. */
11253 arm_memory_move_cost (enum machine_mode mode, reg_class_t rclass,
11254 bool in ATTRIBUTE_UNUSED)
11256 if (TARGET_32BIT)
11257 return 10;
11258 else
11260 if (GET_MODE_SIZE (mode) < 4)
11261 return 8;
11262 else
11263 return ((2 * GET_MODE_SIZE (mode)) * (rclass == LO_REGS ? 1 : 2));
11267 /* Vectorizer cost model implementation. */
11269 /* Implement targetm.vectorize.builtin_vectorization_cost. */
11270 static int
11271 arm_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
11272 tree vectype,
11273 int misalign ATTRIBUTE_UNUSED)
11275 unsigned elements;
11277 switch (type_of_cost)
11279 case scalar_stmt:
11280 return current_tune->vec_costs->scalar_stmt_cost;
11282 case scalar_load:
11283 return current_tune->vec_costs->scalar_load_cost;
11285 case scalar_store:
11286 return current_tune->vec_costs->scalar_store_cost;
11288 case vector_stmt:
11289 return current_tune->vec_costs->vec_stmt_cost;
11291 case vector_load:
11292 return current_tune->vec_costs->vec_align_load_cost;
11294 case vector_store:
11295 return current_tune->vec_costs->vec_store_cost;
11297 case vec_to_scalar:
11298 return current_tune->vec_costs->vec_to_scalar_cost;
11300 case scalar_to_vec:
11301 return current_tune->vec_costs->scalar_to_vec_cost;
11303 case unaligned_load:
11304 return current_tune->vec_costs->vec_unalign_load_cost;
11306 case unaligned_store:
11307 return current_tune->vec_costs->vec_unalign_store_cost;
11309 case cond_branch_taken:
11310 return current_tune->vec_costs->cond_taken_branch_cost;
11312 case cond_branch_not_taken:
11313 return current_tune->vec_costs->cond_not_taken_branch_cost;
11315 case vec_perm:
11316 case vec_promote_demote:
11317 return current_tune->vec_costs->vec_stmt_cost;
11319 case vec_construct:
11320 elements = TYPE_VECTOR_SUBPARTS (vectype);
11321 return elements / 2 + 1;
11323 default:
11324 gcc_unreachable ();
11328 /* Implement targetm.vectorize.add_stmt_cost. */
11330 static unsigned
11331 arm_add_stmt_cost (void *data, int count, enum vect_cost_for_stmt kind,
11332 struct _stmt_vec_info *stmt_info, int misalign,
11333 enum vect_cost_model_location where)
11335 unsigned *cost = (unsigned *) data;
11336 unsigned retval = 0;
11338 if (flag_vect_cost_model)
11340 tree vectype = stmt_info ? stmt_vectype (stmt_info) : NULL_TREE;
11341 int stmt_cost = arm_builtin_vectorization_cost (kind, vectype, misalign);
11343 /* Statements in an inner loop relative to the loop being
11344 vectorized are weighted more heavily. The value here is
11345 arbitrary and could potentially be improved with analysis. */
11346 if (where == vect_body && stmt_info && stmt_in_inner_loop_p (stmt_info))
11347 count *= 50; /* FIXME. */
11349 retval = (unsigned) (count * stmt_cost);
11350 cost[where] += retval;
11353 return retval;
11356 /* Return true if and only if this insn can dual-issue only as older. */
11357 static bool
11358 cortexa7_older_only (rtx insn)
11360 if (recog_memoized (insn) < 0)
11361 return false;
11363 switch (get_attr_type (insn))
11365 case TYPE_ALU_REG:
11366 case TYPE_ALUS_REG:
11367 case TYPE_LOGIC_REG:
11368 case TYPE_LOGICS_REG:
11369 case TYPE_ADC_REG:
11370 case TYPE_ADCS_REG:
11371 case TYPE_ADR:
11372 case TYPE_BFM:
11373 case TYPE_REV:
11374 case TYPE_MVN_REG:
11375 case TYPE_SHIFT_IMM:
11376 case TYPE_SHIFT_REG:
11377 case TYPE_LOAD_BYTE:
11378 case TYPE_LOAD1:
11379 case TYPE_STORE1:
11380 case TYPE_FFARITHS:
11381 case TYPE_FADDS:
11382 case TYPE_FFARITHD:
11383 case TYPE_FADDD:
11384 case TYPE_FMOV:
11385 case TYPE_F_CVT:
11386 case TYPE_FCMPS:
11387 case TYPE_FCMPD:
11388 case TYPE_FCONSTS:
11389 case TYPE_FCONSTD:
11390 case TYPE_FMULS:
11391 case TYPE_FMACS:
11392 case TYPE_FMULD:
11393 case TYPE_FMACD:
11394 case TYPE_FDIVS:
11395 case TYPE_FDIVD:
11396 case TYPE_F_MRC:
11397 case TYPE_F_MRRC:
11398 case TYPE_F_FLAG:
11399 case TYPE_F_LOADS:
11400 case TYPE_F_STORES:
11401 return true;
11402 default:
11403 return false;
11407 /* Return true if and only if this insn can dual-issue as younger. */
11408 static bool
11409 cortexa7_younger (FILE *file, int verbose, rtx insn)
11411 if (recog_memoized (insn) < 0)
11413 if (verbose > 5)
11414 fprintf (file, ";; not cortexa7_younger %d\n", INSN_UID (insn));
11415 return false;
11418 switch (get_attr_type (insn))
11420 case TYPE_ALU_IMM:
11421 case TYPE_ALUS_IMM:
11422 case TYPE_LOGIC_IMM:
11423 case TYPE_LOGICS_IMM:
11424 case TYPE_EXTEND:
11425 case TYPE_MVN_IMM:
11426 case TYPE_MOV_IMM:
11427 case TYPE_MOV_REG:
11428 case TYPE_MOV_SHIFT:
11429 case TYPE_MOV_SHIFT_REG:
11430 case TYPE_BRANCH:
11431 case TYPE_CALL:
11432 return true;
11433 default:
11434 return false;
11439 /* Look for an instruction that can dual issue only as an older
11440 instruction, and move it in front of any instructions that can
11441 dual-issue as younger, while preserving the relative order of all
11442 other instructions in the ready list. This is a hueuristic to help
11443 dual-issue in later cycles, by postponing issue of more flexible
11444 instructions. This heuristic may affect dual issue opportunities
11445 in the current cycle. */
11446 static void
11447 cortexa7_sched_reorder (FILE *file, int verbose, rtx *ready, int *n_readyp,
11448 int clock)
11450 int i;
11451 int first_older_only = -1, first_younger = -1;
11453 if (verbose > 5)
11454 fprintf (file,
11455 ";; sched_reorder for cycle %d with %d insns in ready list\n",
11456 clock,
11457 *n_readyp);
11459 /* Traverse the ready list from the head (the instruction to issue
11460 first), and looking for the first instruction that can issue as
11461 younger and the first instruction that can dual-issue only as
11462 older. */
11463 for (i = *n_readyp - 1; i >= 0; i--)
11465 rtx insn = ready[i];
11466 if (cortexa7_older_only (insn))
11468 first_older_only = i;
11469 if (verbose > 5)
11470 fprintf (file, ";; reorder older found %d\n", INSN_UID (insn));
11471 break;
11473 else if (cortexa7_younger (file, verbose, insn) && first_younger == -1)
11474 first_younger = i;
11477 /* Nothing to reorder because either no younger insn found or insn
11478 that can dual-issue only as older appears before any insn that
11479 can dual-issue as younger. */
11480 if (first_younger == -1)
11482 if (verbose > 5)
11483 fprintf (file, ";; sched_reorder nothing to reorder as no younger\n");
11484 return;
11487 /* Nothing to reorder because no older-only insn in the ready list. */
11488 if (first_older_only == -1)
11490 if (verbose > 5)
11491 fprintf (file, ";; sched_reorder nothing to reorder as no older_only\n");
11492 return;
11495 /* Move first_older_only insn before first_younger. */
11496 if (verbose > 5)
11497 fprintf (file, ";; cortexa7_sched_reorder insn %d before %d\n",
11498 INSN_UID(ready [first_older_only]),
11499 INSN_UID(ready [first_younger]));
11500 rtx first_older_only_insn = ready [first_older_only];
11501 for (i = first_older_only; i < first_younger; i++)
11503 ready[i] = ready[i+1];
11506 ready[i] = first_older_only_insn;
11507 return;
11510 /* Implement TARGET_SCHED_REORDER. */
11511 static int
11512 arm_sched_reorder (FILE *file, int verbose, rtx *ready, int *n_readyp,
11513 int clock)
11515 switch (arm_tune)
11517 case cortexa7:
11518 cortexa7_sched_reorder (file, verbose, ready, n_readyp, clock);
11519 break;
11520 default:
11521 /* Do nothing for other cores. */
11522 break;
11525 return arm_issue_rate ();
11528 /* This function implements the target macro TARGET_SCHED_ADJUST_COST.
11529 It corrects the value of COST based on the relationship between
11530 INSN and DEP through the dependence LINK. It returns the new
11531 value. There is a per-core adjust_cost hook to adjust scheduler costs
11532 and the per-core hook can choose to completely override the generic
11533 adjust_cost function. Only put bits of code into arm_adjust_cost that
11534 are common across all cores. */
11535 static int
11536 arm_adjust_cost (rtx insn, rtx link, rtx dep, int cost)
11538 rtx i_pat, d_pat;
11540 /* When generating Thumb-1 code, we want to place flag-setting operations
11541 close to a conditional branch which depends on them, so that we can
11542 omit the comparison. */
11543 if (TARGET_THUMB1
11544 && REG_NOTE_KIND (link) == 0
11545 && recog_memoized (insn) == CODE_FOR_cbranchsi4_insn
11546 && recog_memoized (dep) >= 0
11547 && get_attr_conds (dep) == CONDS_SET)
11548 return 0;
11550 if (current_tune->sched_adjust_cost != NULL)
11552 if (!current_tune->sched_adjust_cost (insn, link, dep, &cost))
11553 return cost;
11556 /* XXX Is this strictly true? */
11557 if (REG_NOTE_KIND (link) == REG_DEP_ANTI
11558 || REG_NOTE_KIND (link) == REG_DEP_OUTPUT)
11559 return 0;
11561 /* Call insns don't incur a stall, even if they follow a load. */
11562 if (REG_NOTE_KIND (link) == 0
11563 && CALL_P (insn))
11564 return 1;
11566 if ((i_pat = single_set (insn)) != NULL
11567 && MEM_P (SET_SRC (i_pat))
11568 && (d_pat = single_set (dep)) != NULL
11569 && MEM_P (SET_DEST (d_pat)))
11571 rtx src_mem = XEXP (SET_SRC (i_pat), 0);
11572 /* This is a load after a store, there is no conflict if the load reads
11573 from a cached area. Assume that loads from the stack, and from the
11574 constant pool are cached, and that others will miss. This is a
11575 hack. */
11577 if ((GET_CODE (src_mem) == SYMBOL_REF
11578 && CONSTANT_POOL_ADDRESS_P (src_mem))
11579 || reg_mentioned_p (stack_pointer_rtx, src_mem)
11580 || reg_mentioned_p (frame_pointer_rtx, src_mem)
11581 || reg_mentioned_p (hard_frame_pointer_rtx, src_mem))
11582 return 1;
11585 return cost;
11589 arm_max_conditional_execute (void)
11591 return max_insns_skipped;
11594 static int
11595 arm_default_branch_cost (bool speed_p, bool predictable_p ATTRIBUTE_UNUSED)
11597 if (TARGET_32BIT)
11598 return (TARGET_THUMB2 && !speed_p) ? 1 : 4;
11599 else
11600 return (optimize > 0) ? 2 : 0;
11603 static int
11604 arm_cortex_a5_branch_cost (bool speed_p, bool predictable_p)
11606 return speed_p ? 0 : arm_default_branch_cost (speed_p, predictable_p);
11609 /* Thumb-2 branches are relatively cheap on Cortex-M processors ("1 + P cycles"
11610 on Cortex-M4, where P varies from 1 to 3 according to some criteria), since
11611 sequences of non-executed instructions in IT blocks probably take the same
11612 amount of time as executed instructions (and the IT instruction itself takes
11613 space in icache). This function was experimentally determined to give good
11614 results on a popular embedded benchmark. */
11616 static int
11617 arm_cortex_m_branch_cost (bool speed_p, bool predictable_p)
11619 return (TARGET_32BIT && speed_p) ? 1
11620 : arm_default_branch_cost (speed_p, predictable_p);
11623 static bool fp_consts_inited = false;
11625 static REAL_VALUE_TYPE value_fp0;
11627 static void
11628 init_fp_table (void)
11630 REAL_VALUE_TYPE r;
11632 r = REAL_VALUE_ATOF ("0", DFmode);
11633 value_fp0 = r;
11634 fp_consts_inited = true;
11637 /* Return TRUE if rtx X is a valid immediate FP constant. */
11639 arm_const_double_rtx (rtx x)
11641 REAL_VALUE_TYPE r;
11643 if (!fp_consts_inited)
11644 init_fp_table ();
11646 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
11647 if (REAL_VALUE_MINUS_ZERO (r))
11648 return 0;
11650 if (REAL_VALUES_EQUAL (r, value_fp0))
11651 return 1;
11653 return 0;
11656 /* VFPv3 has a fairly wide range of representable immediates, formed from
11657 "quarter-precision" floating-point values. These can be evaluated using this
11658 formula (with ^ for exponentiation):
11660 -1^s * n * 2^-r
11662 Where 's' is a sign bit (0/1), 'n' and 'r' are integers such that
11663 16 <= n <= 31 and 0 <= r <= 7.
11665 These values are mapped onto an 8-bit integer ABCDEFGH s.t.
11667 - A (most-significant) is the sign bit.
11668 - BCD are the exponent (encoded as r XOR 3).
11669 - EFGH are the mantissa (encoded as n - 16).
11672 /* Return an integer index for a VFPv3 immediate operand X suitable for the
11673 fconst[sd] instruction, or -1 if X isn't suitable. */
11674 static int
11675 vfp3_const_double_index (rtx x)
11677 REAL_VALUE_TYPE r, m;
11678 int sign, exponent;
11679 unsigned HOST_WIDE_INT mantissa, mant_hi;
11680 unsigned HOST_WIDE_INT mask;
11681 HOST_WIDE_INT m1, m2;
11682 int point_pos = 2 * HOST_BITS_PER_WIDE_INT - 1;
11684 if (!TARGET_VFP3 || !CONST_DOUBLE_P (x))
11685 return -1;
11687 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
11689 /* We can't represent these things, so detect them first. */
11690 if (REAL_VALUE_ISINF (r) || REAL_VALUE_ISNAN (r) || REAL_VALUE_MINUS_ZERO (r))
11691 return -1;
11693 /* Extract sign, exponent and mantissa. */
11694 sign = REAL_VALUE_NEGATIVE (r) ? 1 : 0;
11695 r = real_value_abs (&r);
11696 exponent = REAL_EXP (&r);
11697 /* For the mantissa, we expand into two HOST_WIDE_INTS, apart from the
11698 highest (sign) bit, with a fixed binary point at bit point_pos.
11699 WARNING: If there's ever a VFP version which uses more than 2 * H_W_I - 1
11700 bits for the mantissa, this may fail (low bits would be lost). */
11701 real_ldexp (&m, &r, point_pos - exponent);
11702 REAL_VALUE_TO_INT (&m1, &m2, m);
11703 mantissa = m1;
11704 mant_hi = m2;
11706 /* If there are bits set in the low part of the mantissa, we can't
11707 represent this value. */
11708 if (mantissa != 0)
11709 return -1;
11711 /* Now make it so that mantissa contains the most-significant bits, and move
11712 the point_pos to indicate that the least-significant bits have been
11713 discarded. */
11714 point_pos -= HOST_BITS_PER_WIDE_INT;
11715 mantissa = mant_hi;
11717 /* We can permit four significant bits of mantissa only, plus a high bit
11718 which is always 1. */
11719 mask = ((unsigned HOST_WIDE_INT)1 << (point_pos - 5)) - 1;
11720 if ((mantissa & mask) != 0)
11721 return -1;
11723 /* Now we know the mantissa is in range, chop off the unneeded bits. */
11724 mantissa >>= point_pos - 5;
11726 /* The mantissa may be zero. Disallow that case. (It's possible to load the
11727 floating-point immediate zero with Neon using an integer-zero load, but
11728 that case is handled elsewhere.) */
11729 if (mantissa == 0)
11730 return -1;
11732 gcc_assert (mantissa >= 16 && mantissa <= 31);
11734 /* The value of 5 here would be 4 if GCC used IEEE754-like encoding (where
11735 normalized significands are in the range [1, 2). (Our mantissa is shifted
11736 left 4 places at this point relative to normalized IEEE754 values). GCC
11737 internally uses [0.5, 1) (see real.c), so the exponent returned from
11738 REAL_EXP must be altered. */
11739 exponent = 5 - exponent;
11741 if (exponent < 0 || exponent > 7)
11742 return -1;
11744 /* Sign, mantissa and exponent are now in the correct form to plug into the
11745 formula described in the comment above. */
11746 return (sign << 7) | ((exponent ^ 3) << 4) | (mantissa - 16);
11749 /* Return TRUE if rtx X is a valid immediate VFPv3 constant. */
11751 vfp3_const_double_rtx (rtx x)
11753 if (!TARGET_VFP3)
11754 return 0;
11756 return vfp3_const_double_index (x) != -1;
11759 /* Recognize immediates which can be used in various Neon instructions. Legal
11760 immediates are described by the following table (for VMVN variants, the
11761 bitwise inverse of the constant shown is recognized. In either case, VMOV
11762 is output and the correct instruction to use for a given constant is chosen
11763 by the assembler). The constant shown is replicated across all elements of
11764 the destination vector.
11766 insn elems variant constant (binary)
11767 ---- ----- ------- -----------------
11768 vmov i32 0 00000000 00000000 00000000 abcdefgh
11769 vmov i32 1 00000000 00000000 abcdefgh 00000000
11770 vmov i32 2 00000000 abcdefgh 00000000 00000000
11771 vmov i32 3 abcdefgh 00000000 00000000 00000000
11772 vmov i16 4 00000000 abcdefgh
11773 vmov i16 5 abcdefgh 00000000
11774 vmvn i32 6 00000000 00000000 00000000 abcdefgh
11775 vmvn i32 7 00000000 00000000 abcdefgh 00000000
11776 vmvn i32 8 00000000 abcdefgh 00000000 00000000
11777 vmvn i32 9 abcdefgh 00000000 00000000 00000000
11778 vmvn i16 10 00000000 abcdefgh
11779 vmvn i16 11 abcdefgh 00000000
11780 vmov i32 12 00000000 00000000 abcdefgh 11111111
11781 vmvn i32 13 00000000 00000000 abcdefgh 11111111
11782 vmov i32 14 00000000 abcdefgh 11111111 11111111
11783 vmvn i32 15 00000000 abcdefgh 11111111 11111111
11784 vmov i8 16 abcdefgh
11785 vmov i64 17 aaaaaaaa bbbbbbbb cccccccc dddddddd
11786 eeeeeeee ffffffff gggggggg hhhhhhhh
11787 vmov f32 18 aBbbbbbc defgh000 00000000 00000000
11788 vmov f32 19 00000000 00000000 00000000 00000000
11790 For case 18, B = !b. Representable values are exactly those accepted by
11791 vfp3_const_double_index, but are output as floating-point numbers rather
11792 than indices.
11794 For case 19, we will change it to vmov.i32 when assembling.
11796 Variants 0-5 (inclusive) may also be used as immediates for the second
11797 operand of VORR/VBIC instructions.
11799 The INVERSE argument causes the bitwise inverse of the given operand to be
11800 recognized instead (used for recognizing legal immediates for the VAND/VORN
11801 pseudo-instructions). If INVERSE is true, the value placed in *MODCONST is
11802 *not* inverted (i.e. the pseudo-instruction forms vand/vorn should still be
11803 output, rather than the real insns vbic/vorr).
11805 INVERSE makes no difference to the recognition of float vectors.
11807 The return value is the variant of immediate as shown in the above table, or
11808 -1 if the given value doesn't match any of the listed patterns.
11810 static int
11811 neon_valid_immediate (rtx op, enum machine_mode mode, int inverse,
11812 rtx *modconst, int *elementwidth)
11814 #define CHECK(STRIDE, ELSIZE, CLASS, TEST) \
11815 matches = 1; \
11816 for (i = 0; i < idx; i += (STRIDE)) \
11817 if (!(TEST)) \
11818 matches = 0; \
11819 if (matches) \
11821 immtype = (CLASS); \
11822 elsize = (ELSIZE); \
11823 break; \
11826 unsigned int i, elsize = 0, idx = 0, n_elts;
11827 unsigned int innersize;
11828 unsigned char bytes[16];
11829 int immtype = -1, matches;
11830 unsigned int invmask = inverse ? 0xff : 0;
11831 bool vector = GET_CODE (op) == CONST_VECTOR;
11833 if (vector)
11835 n_elts = CONST_VECTOR_NUNITS (op);
11836 innersize = GET_MODE_SIZE (GET_MODE_INNER (mode));
11838 else
11840 n_elts = 1;
11841 if (mode == VOIDmode)
11842 mode = DImode;
11843 innersize = GET_MODE_SIZE (mode);
11846 /* Vectors of float constants. */
11847 if (GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
11849 rtx el0 = CONST_VECTOR_ELT (op, 0);
11850 REAL_VALUE_TYPE r0;
11852 if (!vfp3_const_double_rtx (el0) && el0 != CONST0_RTX (GET_MODE (el0)))
11853 return -1;
11855 REAL_VALUE_FROM_CONST_DOUBLE (r0, el0);
11857 for (i = 1; i < n_elts; i++)
11859 rtx elt = CONST_VECTOR_ELT (op, i);
11860 REAL_VALUE_TYPE re;
11862 REAL_VALUE_FROM_CONST_DOUBLE (re, elt);
11864 if (!REAL_VALUES_EQUAL (r0, re))
11865 return -1;
11868 if (modconst)
11869 *modconst = CONST_VECTOR_ELT (op, 0);
11871 if (elementwidth)
11872 *elementwidth = 0;
11874 if (el0 == CONST0_RTX (GET_MODE (el0)))
11875 return 19;
11876 else
11877 return 18;
11880 /* Splat vector constant out into a byte vector. */
11881 for (i = 0; i < n_elts; i++)
11883 rtx el = vector ? CONST_VECTOR_ELT (op, i) : op;
11884 unsigned HOST_WIDE_INT elpart;
11885 unsigned int part, parts;
11887 if (CONST_INT_P (el))
11889 elpart = INTVAL (el);
11890 parts = 1;
11892 else if (CONST_DOUBLE_P (el))
11894 elpart = CONST_DOUBLE_LOW (el);
11895 parts = 2;
11897 else
11898 gcc_unreachable ();
11900 for (part = 0; part < parts; part++)
11902 unsigned int byte;
11903 for (byte = 0; byte < innersize; byte++)
11905 bytes[idx++] = (elpart & 0xff) ^ invmask;
11906 elpart >>= BITS_PER_UNIT;
11908 if (CONST_DOUBLE_P (el))
11909 elpart = CONST_DOUBLE_HIGH (el);
11913 /* Sanity check. */
11914 gcc_assert (idx == GET_MODE_SIZE (mode));
11918 CHECK (4, 32, 0, bytes[i] == bytes[0] && bytes[i + 1] == 0
11919 && bytes[i + 2] == 0 && bytes[i + 3] == 0);
11921 CHECK (4, 32, 1, bytes[i] == 0 && bytes[i + 1] == bytes[1]
11922 && bytes[i + 2] == 0 && bytes[i + 3] == 0);
11924 CHECK (4, 32, 2, bytes[i] == 0 && bytes[i + 1] == 0
11925 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0);
11927 CHECK (4, 32, 3, bytes[i] == 0 && bytes[i + 1] == 0
11928 && bytes[i + 2] == 0 && bytes[i + 3] == bytes[3]);
11930 CHECK (2, 16, 4, bytes[i] == bytes[0] && bytes[i + 1] == 0);
11932 CHECK (2, 16, 5, bytes[i] == 0 && bytes[i + 1] == bytes[1]);
11934 CHECK (4, 32, 6, bytes[i] == bytes[0] && bytes[i + 1] == 0xff
11935 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff);
11937 CHECK (4, 32, 7, bytes[i] == 0xff && bytes[i + 1] == bytes[1]
11938 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff);
11940 CHECK (4, 32, 8, bytes[i] == 0xff && bytes[i + 1] == 0xff
11941 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff);
11943 CHECK (4, 32, 9, bytes[i] == 0xff && bytes[i + 1] == 0xff
11944 && bytes[i + 2] == 0xff && bytes[i + 3] == bytes[3]);
11946 CHECK (2, 16, 10, bytes[i] == bytes[0] && bytes[i + 1] == 0xff);
11948 CHECK (2, 16, 11, bytes[i] == 0xff && bytes[i + 1] == bytes[1]);
11950 CHECK (4, 32, 12, bytes[i] == 0xff && bytes[i + 1] == bytes[1]
11951 && bytes[i + 2] == 0 && bytes[i + 3] == 0);
11953 CHECK (4, 32, 13, bytes[i] == 0 && bytes[i + 1] == bytes[1]
11954 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff);
11956 CHECK (4, 32, 14, bytes[i] == 0xff && bytes[i + 1] == 0xff
11957 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0);
11959 CHECK (4, 32, 15, bytes[i] == 0 && bytes[i + 1] == 0
11960 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff);
11962 CHECK (1, 8, 16, bytes[i] == bytes[0]);
11964 CHECK (1, 64, 17, (bytes[i] == 0 || bytes[i] == 0xff)
11965 && bytes[i] == bytes[(i + 8) % idx]);
11967 while (0);
11969 if (immtype == -1)
11970 return -1;
11972 if (elementwidth)
11973 *elementwidth = elsize;
11975 if (modconst)
11977 unsigned HOST_WIDE_INT imm = 0;
11979 /* Un-invert bytes of recognized vector, if necessary. */
11980 if (invmask != 0)
11981 for (i = 0; i < idx; i++)
11982 bytes[i] ^= invmask;
11984 if (immtype == 17)
11986 /* FIXME: Broken on 32-bit H_W_I hosts. */
11987 gcc_assert (sizeof (HOST_WIDE_INT) == 8);
11989 for (i = 0; i < 8; i++)
11990 imm |= (unsigned HOST_WIDE_INT) (bytes[i] ? 0xff : 0)
11991 << (i * BITS_PER_UNIT);
11993 *modconst = GEN_INT (imm);
11995 else
11997 unsigned HOST_WIDE_INT imm = 0;
11999 for (i = 0; i < elsize / BITS_PER_UNIT; i++)
12000 imm |= (unsigned HOST_WIDE_INT) bytes[i] << (i * BITS_PER_UNIT);
12002 *modconst = GEN_INT (imm);
12006 return immtype;
12007 #undef CHECK
12010 /* Return TRUE if rtx X is legal for use as either a Neon VMOV (or, implicitly,
12011 VMVN) immediate. Write back width per element to *ELEMENTWIDTH (or zero for
12012 float elements), and a modified constant (whatever should be output for a
12013 VMOV) in *MODCONST. */
12016 neon_immediate_valid_for_move (rtx op, enum machine_mode mode,
12017 rtx *modconst, int *elementwidth)
12019 rtx tmpconst;
12020 int tmpwidth;
12021 int retval = neon_valid_immediate (op, mode, 0, &tmpconst, &tmpwidth);
12023 if (retval == -1)
12024 return 0;
12026 if (modconst)
12027 *modconst = tmpconst;
12029 if (elementwidth)
12030 *elementwidth = tmpwidth;
12032 return 1;
12035 /* Return TRUE if rtx X is legal for use in a VORR or VBIC instruction. If
12036 the immediate is valid, write a constant suitable for using as an operand
12037 to VORR/VBIC/VAND/VORN to *MODCONST and the corresponding element width to
12038 *ELEMENTWIDTH. See neon_valid_immediate for description of INVERSE. */
12041 neon_immediate_valid_for_logic (rtx op, enum machine_mode mode, int inverse,
12042 rtx *modconst, int *elementwidth)
12044 rtx tmpconst;
12045 int tmpwidth;
12046 int retval = neon_valid_immediate (op, mode, inverse, &tmpconst, &tmpwidth);
12048 if (retval < 0 || retval > 5)
12049 return 0;
12051 if (modconst)
12052 *modconst = tmpconst;
12054 if (elementwidth)
12055 *elementwidth = tmpwidth;
12057 return 1;
12060 /* Return TRUE if rtx OP is legal for use in a VSHR or VSHL instruction. If
12061 the immediate is valid, write a constant suitable for using as an operand
12062 to VSHR/VSHL to *MODCONST and the corresponding element width to
12063 *ELEMENTWIDTH. ISLEFTSHIFT is for determine left or right shift,
12064 because they have different limitations. */
12067 neon_immediate_valid_for_shift (rtx op, enum machine_mode mode,
12068 rtx *modconst, int *elementwidth,
12069 bool isleftshift)
12071 unsigned int innersize = GET_MODE_SIZE (GET_MODE_INNER (mode));
12072 unsigned int n_elts = CONST_VECTOR_NUNITS (op), i;
12073 unsigned HOST_WIDE_INT last_elt = 0;
12074 unsigned HOST_WIDE_INT maxshift;
12076 /* Split vector constant out into a byte vector. */
12077 for (i = 0; i < n_elts; i++)
12079 rtx el = CONST_VECTOR_ELT (op, i);
12080 unsigned HOST_WIDE_INT elpart;
12082 if (CONST_INT_P (el))
12083 elpart = INTVAL (el);
12084 else if (CONST_DOUBLE_P (el))
12085 return 0;
12086 else
12087 gcc_unreachable ();
12089 if (i != 0 && elpart != last_elt)
12090 return 0;
12092 last_elt = elpart;
12095 /* Shift less than element size. */
12096 maxshift = innersize * 8;
12098 if (isleftshift)
12100 /* Left shift immediate value can be from 0 to <size>-1. */
12101 if (last_elt >= maxshift)
12102 return 0;
12104 else
12106 /* Right shift immediate value can be from 1 to <size>. */
12107 if (last_elt == 0 || last_elt > maxshift)
12108 return 0;
12111 if (elementwidth)
12112 *elementwidth = innersize * 8;
12114 if (modconst)
12115 *modconst = CONST_VECTOR_ELT (op, 0);
12117 return 1;
12120 /* Return a string suitable for output of Neon immediate logic operation
12121 MNEM. */
12123 char *
12124 neon_output_logic_immediate (const char *mnem, rtx *op2, enum machine_mode mode,
12125 int inverse, int quad)
12127 int width, is_valid;
12128 static char templ[40];
12130 is_valid = neon_immediate_valid_for_logic (*op2, mode, inverse, op2, &width);
12132 gcc_assert (is_valid != 0);
12134 if (quad)
12135 sprintf (templ, "%s.i%d\t%%q0, %%2", mnem, width);
12136 else
12137 sprintf (templ, "%s.i%d\t%%P0, %%2", mnem, width);
12139 return templ;
12142 /* Return a string suitable for output of Neon immediate shift operation
12143 (VSHR or VSHL) MNEM. */
12145 char *
12146 neon_output_shift_immediate (const char *mnem, char sign, rtx *op2,
12147 enum machine_mode mode, int quad,
12148 bool isleftshift)
12150 int width, is_valid;
12151 static char templ[40];
12153 is_valid = neon_immediate_valid_for_shift (*op2, mode, op2, &width, isleftshift);
12154 gcc_assert (is_valid != 0);
12156 if (quad)
12157 sprintf (templ, "%s.%c%d\t%%q0, %%q1, %%2", mnem, sign, width);
12158 else
12159 sprintf (templ, "%s.%c%d\t%%P0, %%P1, %%2", mnem, sign, width);
12161 return templ;
12164 /* Output a sequence of pairwise operations to implement a reduction.
12165 NOTE: We do "too much work" here, because pairwise operations work on two
12166 registers-worth of operands in one go. Unfortunately we can't exploit those
12167 extra calculations to do the full operation in fewer steps, I don't think.
12168 Although all vector elements of the result but the first are ignored, we
12169 actually calculate the same result in each of the elements. An alternative
12170 such as initially loading a vector with zero to use as each of the second
12171 operands would use up an additional register and take an extra instruction,
12172 for no particular gain. */
12174 void
12175 neon_pairwise_reduce (rtx op0, rtx op1, enum machine_mode mode,
12176 rtx (*reduc) (rtx, rtx, rtx))
12178 enum machine_mode inner = GET_MODE_INNER (mode);
12179 unsigned int i, parts = GET_MODE_SIZE (mode) / GET_MODE_SIZE (inner);
12180 rtx tmpsum = op1;
12182 for (i = parts / 2; i >= 1; i /= 2)
12184 rtx dest = (i == 1) ? op0 : gen_reg_rtx (mode);
12185 emit_insn (reduc (dest, tmpsum, tmpsum));
12186 tmpsum = dest;
12190 /* If VALS is a vector constant that can be loaded into a register
12191 using VDUP, generate instructions to do so and return an RTX to
12192 assign to the register. Otherwise return NULL_RTX. */
12194 static rtx
12195 neon_vdup_constant (rtx vals)
12197 enum machine_mode mode = GET_MODE (vals);
12198 enum machine_mode inner_mode = GET_MODE_INNER (mode);
12199 int n_elts = GET_MODE_NUNITS (mode);
12200 bool all_same = true;
12201 rtx x;
12202 int i;
12204 if (GET_CODE (vals) != CONST_VECTOR || GET_MODE_SIZE (inner_mode) > 4)
12205 return NULL_RTX;
12207 for (i = 0; i < n_elts; ++i)
12209 x = XVECEXP (vals, 0, i);
12210 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
12211 all_same = false;
12214 if (!all_same)
12215 /* The elements are not all the same. We could handle repeating
12216 patterns of a mode larger than INNER_MODE here (e.g. int8x8_t
12217 {0, C, 0, C, 0, C, 0, C} which can be loaded using
12218 vdup.i16). */
12219 return NULL_RTX;
12221 /* We can load this constant by using VDUP and a constant in a
12222 single ARM register. This will be cheaper than a vector
12223 load. */
12225 x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, 0));
12226 return gen_rtx_VEC_DUPLICATE (mode, x);
12229 /* Generate code to load VALS, which is a PARALLEL containing only
12230 constants (for vec_init) or CONST_VECTOR, efficiently into a
12231 register. Returns an RTX to copy into the register, or NULL_RTX
12232 for a PARALLEL that can not be converted into a CONST_VECTOR. */
12235 neon_make_constant (rtx vals)
12237 enum machine_mode mode = GET_MODE (vals);
12238 rtx target;
12239 rtx const_vec = NULL_RTX;
12240 int n_elts = GET_MODE_NUNITS (mode);
12241 int n_const = 0;
12242 int i;
12244 if (GET_CODE (vals) == CONST_VECTOR)
12245 const_vec = vals;
12246 else if (GET_CODE (vals) == PARALLEL)
12248 /* A CONST_VECTOR must contain only CONST_INTs and
12249 CONST_DOUBLEs, but CONSTANT_P allows more (e.g. SYMBOL_REF).
12250 Only store valid constants in a CONST_VECTOR. */
12251 for (i = 0; i < n_elts; ++i)
12253 rtx x = XVECEXP (vals, 0, i);
12254 if (CONST_INT_P (x) || CONST_DOUBLE_P (x))
12255 n_const++;
12257 if (n_const == n_elts)
12258 const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0));
12260 else
12261 gcc_unreachable ();
12263 if (const_vec != NULL
12264 && neon_immediate_valid_for_move (const_vec, mode, NULL, NULL))
12265 /* Load using VMOV. On Cortex-A8 this takes one cycle. */
12266 return const_vec;
12267 else if ((target = neon_vdup_constant (vals)) != NULL_RTX)
12268 /* Loaded using VDUP. On Cortex-A8 the VDUP takes one NEON
12269 pipeline cycle; creating the constant takes one or two ARM
12270 pipeline cycles. */
12271 return target;
12272 else if (const_vec != NULL_RTX)
12273 /* Load from constant pool. On Cortex-A8 this takes two cycles
12274 (for either double or quad vectors). We can not take advantage
12275 of single-cycle VLD1 because we need a PC-relative addressing
12276 mode. */
12277 return const_vec;
12278 else
12279 /* A PARALLEL containing something not valid inside CONST_VECTOR.
12280 We can not construct an initializer. */
12281 return NULL_RTX;
12284 /* Initialize vector TARGET to VALS. */
12286 void
12287 neon_expand_vector_init (rtx target, rtx vals)
12289 enum machine_mode mode = GET_MODE (target);
12290 enum machine_mode inner_mode = GET_MODE_INNER (mode);
12291 int n_elts = GET_MODE_NUNITS (mode);
12292 int n_var = 0, one_var = -1;
12293 bool all_same = true;
12294 rtx x, mem;
12295 int i;
12297 for (i = 0; i < n_elts; ++i)
12299 x = XVECEXP (vals, 0, i);
12300 if (!CONSTANT_P (x))
12301 ++n_var, one_var = i;
12303 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
12304 all_same = false;
12307 if (n_var == 0)
12309 rtx constant = neon_make_constant (vals);
12310 if (constant != NULL_RTX)
12312 emit_move_insn (target, constant);
12313 return;
12317 /* Splat a single non-constant element if we can. */
12318 if (all_same && GET_MODE_SIZE (inner_mode) <= 4)
12320 x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, 0));
12321 emit_insn (gen_rtx_SET (VOIDmode, target,
12322 gen_rtx_VEC_DUPLICATE (mode, x)));
12323 return;
12326 /* One field is non-constant. Load constant then overwrite varying
12327 field. This is more efficient than using the stack. */
12328 if (n_var == 1)
12330 rtx copy = copy_rtx (vals);
12331 rtx index = GEN_INT (one_var);
12333 /* Load constant part of vector, substitute neighboring value for
12334 varying element. */
12335 XVECEXP (copy, 0, one_var) = XVECEXP (vals, 0, (one_var + 1) % n_elts);
12336 neon_expand_vector_init (target, copy);
12338 /* Insert variable. */
12339 x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, one_var));
12340 switch (mode)
12342 case V8QImode:
12343 emit_insn (gen_neon_vset_lanev8qi (target, x, target, index));
12344 break;
12345 case V16QImode:
12346 emit_insn (gen_neon_vset_lanev16qi (target, x, target, index));
12347 break;
12348 case V4HImode:
12349 emit_insn (gen_neon_vset_lanev4hi (target, x, target, index));
12350 break;
12351 case V8HImode:
12352 emit_insn (gen_neon_vset_lanev8hi (target, x, target, index));
12353 break;
12354 case V2SImode:
12355 emit_insn (gen_neon_vset_lanev2si (target, x, target, index));
12356 break;
12357 case V4SImode:
12358 emit_insn (gen_neon_vset_lanev4si (target, x, target, index));
12359 break;
12360 case V2SFmode:
12361 emit_insn (gen_neon_vset_lanev2sf (target, x, target, index));
12362 break;
12363 case V4SFmode:
12364 emit_insn (gen_neon_vset_lanev4sf (target, x, target, index));
12365 break;
12366 case V2DImode:
12367 emit_insn (gen_neon_vset_lanev2di (target, x, target, index));
12368 break;
12369 default:
12370 gcc_unreachable ();
12372 return;
12375 /* Construct the vector in memory one field at a time
12376 and load the whole vector. */
12377 mem = assign_stack_temp (mode, GET_MODE_SIZE (mode));
12378 for (i = 0; i < n_elts; i++)
12379 emit_move_insn (adjust_address_nv (mem, inner_mode,
12380 i * GET_MODE_SIZE (inner_mode)),
12381 XVECEXP (vals, 0, i));
12382 emit_move_insn (target, mem);
12385 /* Ensure OPERAND lies between LOW (inclusive) and HIGH (exclusive). Raise
12386 ERR if it doesn't. FIXME: NEON bounds checks occur late in compilation, so
12387 reported source locations are bogus. */
12389 static void
12390 bounds_check (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high,
12391 const char *err)
12393 HOST_WIDE_INT lane;
12395 gcc_assert (CONST_INT_P (operand));
12397 lane = INTVAL (operand);
12399 if (lane < low || lane >= high)
12400 error (err);
12403 /* Bounds-check lanes. */
12405 void
12406 neon_lane_bounds (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high)
12408 bounds_check (operand, low, high, "lane out of range");
12411 /* Bounds-check constants. */
12413 void
12414 neon_const_bounds (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high)
12416 bounds_check (operand, low, high, "constant out of range");
12419 HOST_WIDE_INT
12420 neon_element_bits (enum machine_mode mode)
12422 if (mode == DImode)
12423 return GET_MODE_BITSIZE (mode);
12424 else
12425 return GET_MODE_BITSIZE (GET_MODE_INNER (mode));
12429 /* Predicates for `match_operand' and `match_operator'. */
12431 /* Return TRUE if OP is a valid coprocessor memory address pattern.
12432 WB is true if full writeback address modes are allowed and is false
12433 if limited writeback address modes (POST_INC and PRE_DEC) are
12434 allowed. */
12437 arm_coproc_mem_operand (rtx op, bool wb)
12439 rtx ind;
12441 /* Reject eliminable registers. */
12442 if (! (reload_in_progress || reload_completed || lra_in_progress)
12443 && ( reg_mentioned_p (frame_pointer_rtx, op)
12444 || reg_mentioned_p (arg_pointer_rtx, op)
12445 || reg_mentioned_p (virtual_incoming_args_rtx, op)
12446 || reg_mentioned_p (virtual_outgoing_args_rtx, op)
12447 || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
12448 || reg_mentioned_p (virtual_stack_vars_rtx, op)))
12449 return FALSE;
12451 /* Constants are converted into offsets from labels. */
12452 if (!MEM_P (op))
12453 return FALSE;
12455 ind = XEXP (op, 0);
12457 if (reload_completed
12458 && (GET_CODE (ind) == LABEL_REF
12459 || (GET_CODE (ind) == CONST
12460 && GET_CODE (XEXP (ind, 0)) == PLUS
12461 && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF
12462 && CONST_INT_P (XEXP (XEXP (ind, 0), 1)))))
12463 return TRUE;
12465 /* Match: (mem (reg)). */
12466 if (REG_P (ind))
12467 return arm_address_register_rtx_p (ind, 0);
12469 /* Autoincremment addressing modes. POST_INC and PRE_DEC are
12470 acceptable in any case (subject to verification by
12471 arm_address_register_rtx_p). We need WB to be true to accept
12472 PRE_INC and POST_DEC. */
12473 if (GET_CODE (ind) == POST_INC
12474 || GET_CODE (ind) == PRE_DEC
12475 || (wb
12476 && (GET_CODE (ind) == PRE_INC
12477 || GET_CODE (ind) == POST_DEC)))
12478 return arm_address_register_rtx_p (XEXP (ind, 0), 0);
12480 if (wb
12481 && (GET_CODE (ind) == POST_MODIFY || GET_CODE (ind) == PRE_MODIFY)
12482 && arm_address_register_rtx_p (XEXP (ind, 0), 0)
12483 && GET_CODE (XEXP (ind, 1)) == PLUS
12484 && rtx_equal_p (XEXP (XEXP (ind, 1), 0), XEXP (ind, 0)))
12485 ind = XEXP (ind, 1);
12487 /* Match:
12488 (plus (reg)
12489 (const)). */
12490 if (GET_CODE (ind) == PLUS
12491 && REG_P (XEXP (ind, 0))
12492 && REG_MODE_OK_FOR_BASE_P (XEXP (ind, 0), VOIDmode)
12493 && CONST_INT_P (XEXP (ind, 1))
12494 && INTVAL (XEXP (ind, 1)) > -1024
12495 && INTVAL (XEXP (ind, 1)) < 1024
12496 && (INTVAL (XEXP (ind, 1)) & 3) == 0)
12497 return TRUE;
12499 return FALSE;
12502 /* Return TRUE if OP is a memory operand which we can load or store a vector
12503 to/from. TYPE is one of the following values:
12504 0 - Vector load/stor (vldr)
12505 1 - Core registers (ldm)
12506 2 - Element/structure loads (vld1)
12509 neon_vector_mem_operand (rtx op, int type, bool strict)
12511 rtx ind;
12513 /* Reject eliminable registers. */
12514 if (! (reload_in_progress || reload_completed)
12515 && ( reg_mentioned_p (frame_pointer_rtx, op)
12516 || reg_mentioned_p (arg_pointer_rtx, op)
12517 || reg_mentioned_p (virtual_incoming_args_rtx, op)
12518 || reg_mentioned_p (virtual_outgoing_args_rtx, op)
12519 || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
12520 || reg_mentioned_p (virtual_stack_vars_rtx, op)))
12521 return !strict;
12523 /* Constants are converted into offsets from labels. */
12524 if (!MEM_P (op))
12525 return FALSE;
12527 ind = XEXP (op, 0);
12529 if (reload_completed
12530 && (GET_CODE (ind) == LABEL_REF
12531 || (GET_CODE (ind) == CONST
12532 && GET_CODE (XEXP (ind, 0)) == PLUS
12533 && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF
12534 && CONST_INT_P (XEXP (XEXP (ind, 0), 1)))))
12535 return TRUE;
12537 /* Match: (mem (reg)). */
12538 if (REG_P (ind))
12539 return arm_address_register_rtx_p (ind, 0);
12541 /* Allow post-increment with Neon registers. */
12542 if ((type != 1 && GET_CODE (ind) == POST_INC)
12543 || (type == 0 && GET_CODE (ind) == PRE_DEC))
12544 return arm_address_register_rtx_p (XEXP (ind, 0), 0);
12546 /* FIXME: vld1 allows register post-modify. */
12548 /* Match:
12549 (plus (reg)
12550 (const)). */
12551 if (type == 0
12552 && GET_CODE (ind) == PLUS
12553 && REG_P (XEXP (ind, 0))
12554 && REG_MODE_OK_FOR_BASE_P (XEXP (ind, 0), VOIDmode)
12555 && CONST_INT_P (XEXP (ind, 1))
12556 && INTVAL (XEXP (ind, 1)) > -1024
12557 /* For quad modes, we restrict the constant offset to be slightly less
12558 than what the instruction format permits. We have no such constraint
12559 on double mode offsets. (This must match arm_legitimate_index_p.) */
12560 && (INTVAL (XEXP (ind, 1))
12561 < (VALID_NEON_QREG_MODE (GET_MODE (op))? 1016 : 1024))
12562 && (INTVAL (XEXP (ind, 1)) & 3) == 0)
12563 return TRUE;
12565 return FALSE;
12568 /* Return TRUE if OP is a mem suitable for loading/storing a Neon struct
12569 type. */
12571 neon_struct_mem_operand (rtx op)
12573 rtx ind;
12575 /* Reject eliminable registers. */
12576 if (! (reload_in_progress || reload_completed)
12577 && ( reg_mentioned_p (frame_pointer_rtx, op)
12578 || reg_mentioned_p (arg_pointer_rtx, op)
12579 || reg_mentioned_p (virtual_incoming_args_rtx, op)
12580 || reg_mentioned_p (virtual_outgoing_args_rtx, op)
12581 || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
12582 || reg_mentioned_p (virtual_stack_vars_rtx, op)))
12583 return FALSE;
12585 /* Constants are converted into offsets from labels. */
12586 if (!MEM_P (op))
12587 return FALSE;
12589 ind = XEXP (op, 0);
12591 if (reload_completed
12592 && (GET_CODE (ind) == LABEL_REF
12593 || (GET_CODE (ind) == CONST
12594 && GET_CODE (XEXP (ind, 0)) == PLUS
12595 && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF
12596 && CONST_INT_P (XEXP (XEXP (ind, 0), 1)))))
12597 return TRUE;
12599 /* Match: (mem (reg)). */
12600 if (REG_P (ind))
12601 return arm_address_register_rtx_p (ind, 0);
12603 /* vldm/vstm allows POST_INC (ia) and PRE_DEC (db). */
12604 if (GET_CODE (ind) == POST_INC
12605 || GET_CODE (ind) == PRE_DEC)
12606 return arm_address_register_rtx_p (XEXP (ind, 0), 0);
12608 return FALSE;
12611 /* Return true if X is a register that will be eliminated later on. */
12613 arm_eliminable_register (rtx x)
12615 return REG_P (x) && (REGNO (x) == FRAME_POINTER_REGNUM
12616 || REGNO (x) == ARG_POINTER_REGNUM
12617 || (REGNO (x) >= FIRST_VIRTUAL_REGISTER
12618 && REGNO (x) <= LAST_VIRTUAL_REGISTER));
12621 /* Return GENERAL_REGS if a scratch register required to reload x to/from
12622 coprocessor registers. Otherwise return NO_REGS. */
12624 enum reg_class
12625 coproc_secondary_reload_class (enum machine_mode mode, rtx x, bool wb)
12627 if (mode == HFmode)
12629 if (!TARGET_NEON_FP16)
12630 return GENERAL_REGS;
12631 if (s_register_operand (x, mode) || neon_vector_mem_operand (x, 2, true))
12632 return NO_REGS;
12633 return GENERAL_REGS;
12636 /* The neon move patterns handle all legitimate vector and struct
12637 addresses. */
12638 if (TARGET_NEON
12639 && (MEM_P (x) || GET_CODE (x) == CONST_VECTOR)
12640 && (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
12641 || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT
12642 || VALID_NEON_STRUCT_MODE (mode)))
12643 return NO_REGS;
12645 if (arm_coproc_mem_operand (x, wb) || s_register_operand (x, mode))
12646 return NO_REGS;
12648 return GENERAL_REGS;
12651 /* Values which must be returned in the most-significant end of the return
12652 register. */
12654 static bool
12655 arm_return_in_msb (const_tree valtype)
12657 return (TARGET_AAPCS_BASED
12658 && BYTES_BIG_ENDIAN
12659 && (AGGREGATE_TYPE_P (valtype)
12660 || TREE_CODE (valtype) == COMPLEX_TYPE
12661 || FIXED_POINT_TYPE_P (valtype)));
12664 /* Return TRUE if X references a SYMBOL_REF. */
12666 symbol_mentioned_p (rtx x)
12668 const char * fmt;
12669 int i;
12671 if (GET_CODE (x) == SYMBOL_REF)
12672 return 1;
12674 /* UNSPEC_TLS entries for a symbol include the SYMBOL_REF, but they
12675 are constant offsets, not symbols. */
12676 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
12677 return 0;
12679 fmt = GET_RTX_FORMAT (GET_CODE (x));
12681 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
12683 if (fmt[i] == 'E')
12685 int j;
12687 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
12688 if (symbol_mentioned_p (XVECEXP (x, i, j)))
12689 return 1;
12691 else if (fmt[i] == 'e' && symbol_mentioned_p (XEXP (x, i)))
12692 return 1;
12695 return 0;
12698 /* Return TRUE if X references a LABEL_REF. */
12700 label_mentioned_p (rtx x)
12702 const char * fmt;
12703 int i;
12705 if (GET_CODE (x) == LABEL_REF)
12706 return 1;
12708 /* UNSPEC_TLS entries for a symbol include a LABEL_REF for the referencing
12709 instruction, but they are constant offsets, not symbols. */
12710 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
12711 return 0;
12713 fmt = GET_RTX_FORMAT (GET_CODE (x));
12714 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
12716 if (fmt[i] == 'E')
12718 int j;
12720 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
12721 if (label_mentioned_p (XVECEXP (x, i, j)))
12722 return 1;
12724 else if (fmt[i] == 'e' && label_mentioned_p (XEXP (x, i)))
12725 return 1;
12728 return 0;
12732 tls_mentioned_p (rtx x)
12734 switch (GET_CODE (x))
12736 case CONST:
12737 return tls_mentioned_p (XEXP (x, 0));
12739 case UNSPEC:
12740 if (XINT (x, 1) == UNSPEC_TLS)
12741 return 1;
12743 default:
12744 return 0;
12748 /* Must not copy any rtx that uses a pc-relative address. */
12750 static int
12751 arm_note_pic_base (rtx *x, void *date ATTRIBUTE_UNUSED)
12753 if (GET_CODE (*x) == UNSPEC
12754 && (XINT (*x, 1) == UNSPEC_PIC_BASE
12755 || XINT (*x, 1) == UNSPEC_PIC_UNIFIED))
12756 return 1;
12757 return 0;
12760 static bool
12761 arm_cannot_copy_insn_p (rtx insn)
12763 /* The tls call insn cannot be copied, as it is paired with a data
12764 word. */
12765 if (recog_memoized (insn) == CODE_FOR_tlscall)
12766 return true;
12768 return for_each_rtx (&PATTERN (insn), arm_note_pic_base, NULL);
12771 enum rtx_code
12772 minmax_code (rtx x)
12774 enum rtx_code code = GET_CODE (x);
12776 switch (code)
12778 case SMAX:
12779 return GE;
12780 case SMIN:
12781 return LE;
12782 case UMIN:
12783 return LEU;
12784 case UMAX:
12785 return GEU;
12786 default:
12787 gcc_unreachable ();
12791 /* Match pair of min/max operators that can be implemented via usat/ssat. */
12793 bool
12794 arm_sat_operator_match (rtx lo_bound, rtx hi_bound,
12795 int *mask, bool *signed_sat)
12797 /* The high bound must be a power of two minus one. */
12798 int log = exact_log2 (INTVAL (hi_bound) + 1);
12799 if (log == -1)
12800 return false;
12802 /* The low bound is either zero (for usat) or one less than the
12803 negation of the high bound (for ssat). */
12804 if (INTVAL (lo_bound) == 0)
12806 if (mask)
12807 *mask = log;
12808 if (signed_sat)
12809 *signed_sat = false;
12811 return true;
12814 if (INTVAL (lo_bound) == -INTVAL (hi_bound) - 1)
12816 if (mask)
12817 *mask = log + 1;
12818 if (signed_sat)
12819 *signed_sat = true;
12821 return true;
12824 return false;
12827 /* Return 1 if memory locations are adjacent. */
12829 adjacent_mem_locations (rtx a, rtx b)
12831 /* We don't guarantee to preserve the order of these memory refs. */
12832 if (volatile_refs_p (a) || volatile_refs_p (b))
12833 return 0;
12835 if ((REG_P (XEXP (a, 0))
12836 || (GET_CODE (XEXP (a, 0)) == PLUS
12837 && CONST_INT_P (XEXP (XEXP (a, 0), 1))))
12838 && (REG_P (XEXP (b, 0))
12839 || (GET_CODE (XEXP (b, 0)) == PLUS
12840 && CONST_INT_P (XEXP (XEXP (b, 0), 1)))))
12842 HOST_WIDE_INT val0 = 0, val1 = 0;
12843 rtx reg0, reg1;
12844 int val_diff;
12846 if (GET_CODE (XEXP (a, 0)) == PLUS)
12848 reg0 = XEXP (XEXP (a, 0), 0);
12849 val0 = INTVAL (XEXP (XEXP (a, 0), 1));
12851 else
12852 reg0 = XEXP (a, 0);
12854 if (GET_CODE (XEXP (b, 0)) == PLUS)
12856 reg1 = XEXP (XEXP (b, 0), 0);
12857 val1 = INTVAL (XEXP (XEXP (b, 0), 1));
12859 else
12860 reg1 = XEXP (b, 0);
12862 /* Don't accept any offset that will require multiple
12863 instructions to handle, since this would cause the
12864 arith_adjacentmem pattern to output an overlong sequence. */
12865 if (!const_ok_for_op (val0, PLUS) || !const_ok_for_op (val1, PLUS))
12866 return 0;
12868 /* Don't allow an eliminable register: register elimination can make
12869 the offset too large. */
12870 if (arm_eliminable_register (reg0))
12871 return 0;
12873 val_diff = val1 - val0;
12875 if (arm_ld_sched)
12877 /* If the target has load delay slots, then there's no benefit
12878 to using an ldm instruction unless the offset is zero and
12879 we are optimizing for size. */
12880 return (optimize_size && (REGNO (reg0) == REGNO (reg1))
12881 && (val0 == 0 || val1 == 0 || val0 == 4 || val1 == 4)
12882 && (val_diff == 4 || val_diff == -4));
12885 return ((REGNO (reg0) == REGNO (reg1))
12886 && (val_diff == 4 || val_diff == -4));
12889 return 0;
12892 /* Return true if OP is a valid load or store multiple operation. LOAD is true
12893 for load operations, false for store operations. CONSECUTIVE is true
12894 if the register numbers in the operation must be consecutive in the register
12895 bank. RETURN_PC is true if value is to be loaded in PC.
12896 The pattern we are trying to match for load is:
12897 [(SET (R_d0) (MEM (PLUS (addr) (offset))))
12898 (SET (R_d1) (MEM (PLUS (addr) (offset + <reg_increment>))))
12901 (SET (R_dn) (MEM (PLUS (addr) (offset + n * <reg_increment>))))
12903 where
12904 1. If offset is 0, first insn should be (SET (R_d0) (MEM (src_addr))).
12905 2. REGNO (R_d0) < REGNO (R_d1) < ... < REGNO (R_dn).
12906 3. If consecutive is TRUE, then for kth register being loaded,
12907 REGNO (R_dk) = REGNO (R_d0) + k.
12908 The pattern for store is similar. */
12909 bool
12910 ldm_stm_operation_p (rtx op, bool load, enum machine_mode mode,
12911 bool consecutive, bool return_pc)
12913 HOST_WIDE_INT count = XVECLEN (op, 0);
12914 rtx reg, mem, addr;
12915 unsigned regno;
12916 unsigned first_regno;
12917 HOST_WIDE_INT i = 1, base = 0, offset = 0;
12918 rtx elt;
12919 bool addr_reg_in_reglist = false;
12920 bool update = false;
12921 int reg_increment;
12922 int offset_adj;
12923 int regs_per_val;
12925 /* If not in SImode, then registers must be consecutive
12926 (e.g., VLDM instructions for DFmode). */
12927 gcc_assert ((mode == SImode) || consecutive);
12928 /* Setting return_pc for stores is illegal. */
12929 gcc_assert (!return_pc || load);
12931 /* Set up the increments and the regs per val based on the mode. */
12932 reg_increment = GET_MODE_SIZE (mode);
12933 regs_per_val = reg_increment / 4;
12934 offset_adj = return_pc ? 1 : 0;
12936 if (count <= 1
12937 || GET_CODE (XVECEXP (op, 0, offset_adj)) != SET
12938 || (load && !REG_P (SET_DEST (XVECEXP (op, 0, offset_adj)))))
12939 return false;
12941 /* Check if this is a write-back. */
12942 elt = XVECEXP (op, 0, offset_adj);
12943 if (GET_CODE (SET_SRC (elt)) == PLUS)
12945 i++;
12946 base = 1;
12947 update = true;
12949 /* The offset adjustment must be the number of registers being
12950 popped times the size of a single register. */
12951 if (!REG_P (SET_DEST (elt))
12952 || !REG_P (XEXP (SET_SRC (elt), 0))
12953 || (REGNO (SET_DEST (elt)) != REGNO (XEXP (SET_SRC (elt), 0)))
12954 || !CONST_INT_P (XEXP (SET_SRC (elt), 1))
12955 || INTVAL (XEXP (SET_SRC (elt), 1)) !=
12956 ((count - 1 - offset_adj) * reg_increment))
12957 return false;
12960 i = i + offset_adj;
12961 base = base + offset_adj;
12962 /* Perform a quick check so we don't blow up below. If only one reg is loaded,
12963 success depends on the type: VLDM can do just one reg,
12964 LDM must do at least two. */
12965 if ((count <= i) && (mode == SImode))
12966 return false;
12968 elt = XVECEXP (op, 0, i - 1);
12969 if (GET_CODE (elt) != SET)
12970 return false;
12972 if (load)
12974 reg = SET_DEST (elt);
12975 mem = SET_SRC (elt);
12977 else
12979 reg = SET_SRC (elt);
12980 mem = SET_DEST (elt);
12983 if (!REG_P (reg) || !MEM_P (mem))
12984 return false;
12986 regno = REGNO (reg);
12987 first_regno = regno;
12988 addr = XEXP (mem, 0);
12989 if (GET_CODE (addr) == PLUS)
12991 if (!CONST_INT_P (XEXP (addr, 1)))
12992 return false;
12994 offset = INTVAL (XEXP (addr, 1));
12995 addr = XEXP (addr, 0);
12998 if (!REG_P (addr))
12999 return false;
13001 /* Don't allow SP to be loaded unless it is also the base register. It
13002 guarantees that SP is reset correctly when an LDM instruction
13003 is interrupted. Otherwise, we might end up with a corrupt stack. */
13004 if (load && (REGNO (reg) == SP_REGNUM) && (REGNO (addr) != SP_REGNUM))
13005 return false;
13007 for (; i < count; i++)
13009 elt = XVECEXP (op, 0, i);
13010 if (GET_CODE (elt) != SET)
13011 return false;
13013 if (load)
13015 reg = SET_DEST (elt);
13016 mem = SET_SRC (elt);
13018 else
13020 reg = SET_SRC (elt);
13021 mem = SET_DEST (elt);
13024 if (!REG_P (reg)
13025 || GET_MODE (reg) != mode
13026 || REGNO (reg) <= regno
13027 || (consecutive
13028 && (REGNO (reg) !=
13029 (unsigned int) (first_regno + regs_per_val * (i - base))))
13030 /* Don't allow SP to be loaded unless it is also the base register. It
13031 guarantees that SP is reset correctly when an LDM instruction
13032 is interrupted. Otherwise, we might end up with a corrupt stack. */
13033 || (load && (REGNO (reg) == SP_REGNUM) && (REGNO (addr) != SP_REGNUM))
13034 || !MEM_P (mem)
13035 || GET_MODE (mem) != mode
13036 || ((GET_CODE (XEXP (mem, 0)) != PLUS
13037 || !rtx_equal_p (XEXP (XEXP (mem, 0), 0), addr)
13038 || !CONST_INT_P (XEXP (XEXP (mem, 0), 1))
13039 || (INTVAL (XEXP (XEXP (mem, 0), 1)) !=
13040 offset + (i - base) * reg_increment))
13041 && (!REG_P (XEXP (mem, 0))
13042 || offset + (i - base) * reg_increment != 0)))
13043 return false;
13045 regno = REGNO (reg);
13046 if (regno == REGNO (addr))
13047 addr_reg_in_reglist = true;
13050 if (load)
13052 if (update && addr_reg_in_reglist)
13053 return false;
13055 /* For Thumb-1, address register is always modified - either by write-back
13056 or by explicit load. If the pattern does not describe an update,
13057 then the address register must be in the list of loaded registers. */
13058 if (TARGET_THUMB1)
13059 return update || addr_reg_in_reglist;
13062 return true;
13065 /* Return true iff it would be profitable to turn a sequence of NOPS loads
13066 or stores (depending on IS_STORE) into a load-multiple or store-multiple
13067 instruction. ADD_OFFSET is nonzero if the base address register needs
13068 to be modified with an add instruction before we can use it. */
13070 static bool
13071 multiple_operation_profitable_p (bool is_store ATTRIBUTE_UNUSED,
13072 int nops, HOST_WIDE_INT add_offset)
13074 /* For ARM8,9 & StrongARM, 2 ldr instructions are faster than an ldm
13075 if the offset isn't small enough. The reason 2 ldrs are faster
13076 is because these ARMs are able to do more than one cache access
13077 in a single cycle. The ARM9 and StrongARM have Harvard caches,
13078 whilst the ARM8 has a double bandwidth cache. This means that
13079 these cores can do both an instruction fetch and a data fetch in
13080 a single cycle, so the trick of calculating the address into a
13081 scratch register (one of the result regs) and then doing a load
13082 multiple actually becomes slower (and no smaller in code size).
13083 That is the transformation
13085 ldr rd1, [rbase + offset]
13086 ldr rd2, [rbase + offset + 4]
13090 add rd1, rbase, offset
13091 ldmia rd1, {rd1, rd2}
13093 produces worse code -- '3 cycles + any stalls on rd2' instead of
13094 '2 cycles + any stalls on rd2'. On ARMs with only one cache
13095 access per cycle, the first sequence could never complete in less
13096 than 6 cycles, whereas the ldm sequence would only take 5 and
13097 would make better use of sequential accesses if not hitting the
13098 cache.
13100 We cheat here and test 'arm_ld_sched' which we currently know to
13101 only be true for the ARM8, ARM9 and StrongARM. If this ever
13102 changes, then the test below needs to be reworked. */
13103 if (nops == 2 && arm_ld_sched && add_offset != 0)
13104 return false;
13106 /* XScale has load-store double instructions, but they have stricter
13107 alignment requirements than load-store multiple, so we cannot
13108 use them.
13110 For XScale ldm requires 2 + NREGS cycles to complete and blocks
13111 the pipeline until completion.
13113 NREGS CYCLES
13119 An ldr instruction takes 1-3 cycles, but does not block the
13120 pipeline.
13122 NREGS CYCLES
13123 1 1-3
13124 2 2-6
13125 3 3-9
13126 4 4-12
13128 Best case ldr will always win. However, the more ldr instructions
13129 we issue, the less likely we are to be able to schedule them well.
13130 Using ldr instructions also increases code size.
13132 As a compromise, we use ldr for counts of 1 or 2 regs, and ldm
13133 for counts of 3 or 4 regs. */
13134 if (nops <= 2 && arm_tune_xscale && !optimize_size)
13135 return false;
13136 return true;
13139 /* Subroutine of load_multiple_sequence and store_multiple_sequence.
13140 Given an array of UNSORTED_OFFSETS, of which there are NOPS, compute
13141 an array ORDER which describes the sequence to use when accessing the
13142 offsets that produces an ascending order. In this sequence, each
13143 offset must be larger by exactly 4 than the previous one. ORDER[0]
13144 must have been filled in with the lowest offset by the caller.
13145 If UNSORTED_REGS is nonnull, it is an array of register numbers that
13146 we use to verify that ORDER produces an ascending order of registers.
13147 Return true if it was possible to construct such an order, false if
13148 not. */
13150 static bool
13151 compute_offset_order (int nops, HOST_WIDE_INT *unsorted_offsets, int *order,
13152 int *unsorted_regs)
13154 int i;
13155 for (i = 1; i < nops; i++)
13157 int j;
13159 order[i] = order[i - 1];
13160 for (j = 0; j < nops; j++)
13161 if (unsorted_offsets[j] == unsorted_offsets[order[i - 1]] + 4)
13163 /* We must find exactly one offset that is higher than the
13164 previous one by 4. */
13165 if (order[i] != order[i - 1])
13166 return false;
13167 order[i] = j;
13169 if (order[i] == order[i - 1])
13170 return false;
13171 /* The register numbers must be ascending. */
13172 if (unsorted_regs != NULL
13173 && unsorted_regs[order[i]] <= unsorted_regs[order[i - 1]])
13174 return false;
13176 return true;
13179 /* Used to determine in a peephole whether a sequence of load
13180 instructions can be changed into a load-multiple instruction.
13181 NOPS is the number of separate load instructions we are examining. The
13182 first NOPS entries in OPERANDS are the destination registers, the
13183 next NOPS entries are memory operands. If this function is
13184 successful, *BASE is set to the common base register of the memory
13185 accesses; *LOAD_OFFSET is set to the first memory location's offset
13186 from that base register.
13187 REGS is an array filled in with the destination register numbers.
13188 SAVED_ORDER (if nonnull), is an array filled in with an order that maps
13189 insn numbers to an ascending order of stores. If CHECK_REGS is true,
13190 the sequence of registers in REGS matches the loads from ascending memory
13191 locations, and the function verifies that the register numbers are
13192 themselves ascending. If CHECK_REGS is false, the register numbers
13193 are stored in the order they are found in the operands. */
13194 static int
13195 load_multiple_sequence (rtx *operands, int nops, int *regs, int *saved_order,
13196 int *base, HOST_WIDE_INT *load_offset, bool check_regs)
13198 int unsorted_regs[MAX_LDM_STM_OPS];
13199 HOST_WIDE_INT unsorted_offsets[MAX_LDM_STM_OPS];
13200 int order[MAX_LDM_STM_OPS];
13201 rtx base_reg_rtx = NULL;
13202 int base_reg = -1;
13203 int i, ldm_case;
13205 /* Can only handle up to MAX_LDM_STM_OPS insns at present, though could be
13206 easily extended if required. */
13207 gcc_assert (nops >= 2 && nops <= MAX_LDM_STM_OPS);
13209 memset (order, 0, MAX_LDM_STM_OPS * sizeof (int));
13211 /* Loop over the operands and check that the memory references are
13212 suitable (i.e. immediate offsets from the same base register). At
13213 the same time, extract the target register, and the memory
13214 offsets. */
13215 for (i = 0; i < nops; i++)
13217 rtx reg;
13218 rtx offset;
13220 /* Convert a subreg of a mem into the mem itself. */
13221 if (GET_CODE (operands[nops + i]) == SUBREG)
13222 operands[nops + i] = alter_subreg (operands + (nops + i), true);
13224 gcc_assert (MEM_P (operands[nops + i]));
13226 /* Don't reorder volatile memory references; it doesn't seem worth
13227 looking for the case where the order is ok anyway. */
13228 if (MEM_VOLATILE_P (operands[nops + i]))
13229 return 0;
13231 offset = const0_rtx;
13233 if ((REG_P (reg = XEXP (operands[nops + i], 0))
13234 || (GET_CODE (reg) == SUBREG
13235 && REG_P (reg = SUBREG_REG (reg))))
13236 || (GET_CODE (XEXP (operands[nops + i], 0)) == PLUS
13237 && ((REG_P (reg = XEXP (XEXP (operands[nops + i], 0), 0)))
13238 || (GET_CODE (reg) == SUBREG
13239 && REG_P (reg = SUBREG_REG (reg))))
13240 && (CONST_INT_P (offset
13241 = XEXP (XEXP (operands[nops + i], 0), 1)))))
13243 if (i == 0)
13245 base_reg = REGNO (reg);
13246 base_reg_rtx = reg;
13247 if (TARGET_THUMB1 && base_reg > LAST_LO_REGNUM)
13248 return 0;
13250 else if (base_reg != (int) REGNO (reg))
13251 /* Not addressed from the same base register. */
13252 return 0;
13254 unsorted_regs[i] = (REG_P (operands[i])
13255 ? REGNO (operands[i])
13256 : REGNO (SUBREG_REG (operands[i])));
13258 /* If it isn't an integer register, or if it overwrites the
13259 base register but isn't the last insn in the list, then
13260 we can't do this. */
13261 if (unsorted_regs[i] < 0
13262 || (TARGET_THUMB1 && unsorted_regs[i] > LAST_LO_REGNUM)
13263 || unsorted_regs[i] > 14
13264 || (i != nops - 1 && unsorted_regs[i] == base_reg))
13265 return 0;
13267 /* Don't allow SP to be loaded unless it is also the base
13268 register. It guarantees that SP is reset correctly when
13269 an LDM instruction is interrupted. Otherwise, we might
13270 end up with a corrupt stack. */
13271 if (unsorted_regs[i] == SP_REGNUM && base_reg != SP_REGNUM)
13272 return 0;
13274 unsorted_offsets[i] = INTVAL (offset);
13275 if (i == 0 || unsorted_offsets[i] < unsorted_offsets[order[0]])
13276 order[0] = i;
13278 else
13279 /* Not a suitable memory address. */
13280 return 0;
13283 /* All the useful information has now been extracted from the
13284 operands into unsorted_regs and unsorted_offsets; additionally,
13285 order[0] has been set to the lowest offset in the list. Sort
13286 the offsets into order, verifying that they are adjacent, and
13287 check that the register numbers are ascending. */
13288 if (!compute_offset_order (nops, unsorted_offsets, order,
13289 check_regs ? unsorted_regs : NULL))
13290 return 0;
13292 if (saved_order)
13293 memcpy (saved_order, order, sizeof order);
13295 if (base)
13297 *base = base_reg;
13299 for (i = 0; i < nops; i++)
13300 regs[i] = unsorted_regs[check_regs ? order[i] : i];
13302 *load_offset = unsorted_offsets[order[0]];
13305 if (TARGET_THUMB1
13306 && !peep2_reg_dead_p (nops, base_reg_rtx))
13307 return 0;
13309 if (unsorted_offsets[order[0]] == 0)
13310 ldm_case = 1; /* ldmia */
13311 else if (TARGET_ARM && unsorted_offsets[order[0]] == 4)
13312 ldm_case = 2; /* ldmib */
13313 else if (TARGET_ARM && unsorted_offsets[order[nops - 1]] == 0)
13314 ldm_case = 3; /* ldmda */
13315 else if (TARGET_32BIT && unsorted_offsets[order[nops - 1]] == -4)
13316 ldm_case = 4; /* ldmdb */
13317 else if (const_ok_for_arm (unsorted_offsets[order[0]])
13318 || const_ok_for_arm (-unsorted_offsets[order[0]]))
13319 ldm_case = 5;
13320 else
13321 return 0;
13323 if (!multiple_operation_profitable_p (false, nops,
13324 ldm_case == 5
13325 ? unsorted_offsets[order[0]] : 0))
13326 return 0;
13328 return ldm_case;
13331 /* Used to determine in a peephole whether a sequence of store instructions can
13332 be changed into a store-multiple instruction.
13333 NOPS is the number of separate store instructions we are examining.
13334 NOPS_TOTAL is the total number of instructions recognized by the peephole
13335 pattern.
13336 The first NOPS entries in OPERANDS are the source registers, the next
13337 NOPS entries are memory operands. If this function is successful, *BASE is
13338 set to the common base register of the memory accesses; *LOAD_OFFSET is set
13339 to the first memory location's offset from that base register. REGS is an
13340 array filled in with the source register numbers, REG_RTXS (if nonnull) is
13341 likewise filled with the corresponding rtx's.
13342 SAVED_ORDER (if nonnull), is an array filled in with an order that maps insn
13343 numbers to an ascending order of stores.
13344 If CHECK_REGS is true, the sequence of registers in *REGS matches the stores
13345 from ascending memory locations, and the function verifies that the register
13346 numbers are themselves ascending. If CHECK_REGS is false, the register
13347 numbers are stored in the order they are found in the operands. */
13348 static int
13349 store_multiple_sequence (rtx *operands, int nops, int nops_total,
13350 int *regs, rtx *reg_rtxs, int *saved_order, int *base,
13351 HOST_WIDE_INT *load_offset, bool check_regs)
13353 int unsorted_regs[MAX_LDM_STM_OPS];
13354 rtx unsorted_reg_rtxs[MAX_LDM_STM_OPS];
13355 HOST_WIDE_INT unsorted_offsets[MAX_LDM_STM_OPS];
13356 int order[MAX_LDM_STM_OPS];
13357 int base_reg = -1;
13358 rtx base_reg_rtx = NULL;
13359 int i, stm_case;
13361 /* Write back of base register is currently only supported for Thumb 1. */
13362 int base_writeback = TARGET_THUMB1;
13364 /* Can only handle up to MAX_LDM_STM_OPS insns at present, though could be
13365 easily extended if required. */
13366 gcc_assert (nops >= 2 && nops <= MAX_LDM_STM_OPS);
13368 memset (order, 0, MAX_LDM_STM_OPS * sizeof (int));
13370 /* Loop over the operands and check that the memory references are
13371 suitable (i.e. immediate offsets from the same base register). At
13372 the same time, extract the target register, and the memory
13373 offsets. */
13374 for (i = 0; i < nops; i++)
13376 rtx reg;
13377 rtx offset;
13379 /* Convert a subreg of a mem into the mem itself. */
13380 if (GET_CODE (operands[nops + i]) == SUBREG)
13381 operands[nops + i] = alter_subreg (operands + (nops + i), true);
13383 gcc_assert (MEM_P (operands[nops + i]));
13385 /* Don't reorder volatile memory references; it doesn't seem worth
13386 looking for the case where the order is ok anyway. */
13387 if (MEM_VOLATILE_P (operands[nops + i]))
13388 return 0;
13390 offset = const0_rtx;
13392 if ((REG_P (reg = XEXP (operands[nops + i], 0))
13393 || (GET_CODE (reg) == SUBREG
13394 && REG_P (reg = SUBREG_REG (reg))))
13395 || (GET_CODE (XEXP (operands[nops + i], 0)) == PLUS
13396 && ((REG_P (reg = XEXP (XEXP (operands[nops + i], 0), 0)))
13397 || (GET_CODE (reg) == SUBREG
13398 && REG_P (reg = SUBREG_REG (reg))))
13399 && (CONST_INT_P (offset
13400 = XEXP (XEXP (operands[nops + i], 0), 1)))))
13402 unsorted_reg_rtxs[i] = (REG_P (operands[i])
13403 ? operands[i] : SUBREG_REG (operands[i]));
13404 unsorted_regs[i] = REGNO (unsorted_reg_rtxs[i]);
13406 if (i == 0)
13408 base_reg = REGNO (reg);
13409 base_reg_rtx = reg;
13410 if (TARGET_THUMB1 && base_reg > LAST_LO_REGNUM)
13411 return 0;
13413 else if (base_reg != (int) REGNO (reg))
13414 /* Not addressed from the same base register. */
13415 return 0;
13417 /* If it isn't an integer register, then we can't do this. */
13418 if (unsorted_regs[i] < 0
13419 || (TARGET_THUMB1 && unsorted_regs[i] > LAST_LO_REGNUM)
13420 /* The effects are unpredictable if the base register is
13421 both updated and stored. */
13422 || (base_writeback && unsorted_regs[i] == base_reg)
13423 || (TARGET_THUMB2 && unsorted_regs[i] == SP_REGNUM)
13424 || unsorted_regs[i] > 14)
13425 return 0;
13427 unsorted_offsets[i] = INTVAL (offset);
13428 if (i == 0 || unsorted_offsets[i] < unsorted_offsets[order[0]])
13429 order[0] = i;
13431 else
13432 /* Not a suitable memory address. */
13433 return 0;
13436 /* All the useful information has now been extracted from the
13437 operands into unsorted_regs and unsorted_offsets; additionally,
13438 order[0] has been set to the lowest offset in the list. Sort
13439 the offsets into order, verifying that they are adjacent, and
13440 check that the register numbers are ascending. */
13441 if (!compute_offset_order (nops, unsorted_offsets, order,
13442 check_regs ? unsorted_regs : NULL))
13443 return 0;
13445 if (saved_order)
13446 memcpy (saved_order, order, sizeof order);
13448 if (base)
13450 *base = base_reg;
13452 for (i = 0; i < nops; i++)
13454 regs[i] = unsorted_regs[check_regs ? order[i] : i];
13455 if (reg_rtxs)
13456 reg_rtxs[i] = unsorted_reg_rtxs[check_regs ? order[i] : i];
13459 *load_offset = unsorted_offsets[order[0]];
13462 if (TARGET_THUMB1
13463 && !peep2_reg_dead_p (nops_total, base_reg_rtx))
13464 return 0;
13466 if (unsorted_offsets[order[0]] == 0)
13467 stm_case = 1; /* stmia */
13468 else if (TARGET_ARM && unsorted_offsets[order[0]] == 4)
13469 stm_case = 2; /* stmib */
13470 else if (TARGET_ARM && unsorted_offsets[order[nops - 1]] == 0)
13471 stm_case = 3; /* stmda */
13472 else if (TARGET_32BIT && unsorted_offsets[order[nops - 1]] == -4)
13473 stm_case = 4; /* stmdb */
13474 else
13475 return 0;
13477 if (!multiple_operation_profitable_p (false, nops, 0))
13478 return 0;
13480 return stm_case;
13483 /* Routines for use in generating RTL. */
13485 /* Generate a load-multiple instruction. COUNT is the number of loads in
13486 the instruction; REGS and MEMS are arrays containing the operands.
13487 BASEREG is the base register to be used in addressing the memory operands.
13488 WBACK_OFFSET is nonzero if the instruction should update the base
13489 register. */
13491 static rtx
13492 arm_gen_load_multiple_1 (int count, int *regs, rtx *mems, rtx basereg,
13493 HOST_WIDE_INT wback_offset)
13495 int i = 0, j;
13496 rtx result;
13498 if (!multiple_operation_profitable_p (false, count, 0))
13500 rtx seq;
13502 start_sequence ();
13504 for (i = 0; i < count; i++)
13505 emit_move_insn (gen_rtx_REG (SImode, regs[i]), mems[i]);
13507 if (wback_offset != 0)
13508 emit_move_insn (basereg, plus_constant (Pmode, basereg, wback_offset));
13510 seq = get_insns ();
13511 end_sequence ();
13513 return seq;
13516 result = gen_rtx_PARALLEL (VOIDmode,
13517 rtvec_alloc (count + (wback_offset != 0 ? 1 : 0)));
13518 if (wback_offset != 0)
13520 XVECEXP (result, 0, 0)
13521 = gen_rtx_SET (VOIDmode, basereg,
13522 plus_constant (Pmode, basereg, wback_offset));
13523 i = 1;
13524 count++;
13527 for (j = 0; i < count; i++, j++)
13528 XVECEXP (result, 0, i)
13529 = gen_rtx_SET (VOIDmode, gen_rtx_REG (SImode, regs[j]), mems[j]);
13531 return result;
13534 /* Generate a store-multiple instruction. COUNT is the number of stores in
13535 the instruction; REGS and MEMS are arrays containing the operands.
13536 BASEREG is the base register to be used in addressing the memory operands.
13537 WBACK_OFFSET is nonzero if the instruction should update the base
13538 register. */
13540 static rtx
13541 arm_gen_store_multiple_1 (int count, int *regs, rtx *mems, rtx basereg,
13542 HOST_WIDE_INT wback_offset)
13544 int i = 0, j;
13545 rtx result;
13547 if (GET_CODE (basereg) == PLUS)
13548 basereg = XEXP (basereg, 0);
13550 if (!multiple_operation_profitable_p (false, count, 0))
13552 rtx seq;
13554 start_sequence ();
13556 for (i = 0; i < count; i++)
13557 emit_move_insn (mems[i], gen_rtx_REG (SImode, regs[i]));
13559 if (wback_offset != 0)
13560 emit_move_insn (basereg, plus_constant (Pmode, basereg, wback_offset));
13562 seq = get_insns ();
13563 end_sequence ();
13565 return seq;
13568 result = gen_rtx_PARALLEL (VOIDmode,
13569 rtvec_alloc (count + (wback_offset != 0 ? 1 : 0)));
13570 if (wback_offset != 0)
13572 XVECEXP (result, 0, 0)
13573 = gen_rtx_SET (VOIDmode, basereg,
13574 plus_constant (Pmode, basereg, wback_offset));
13575 i = 1;
13576 count++;
13579 for (j = 0; i < count; i++, j++)
13580 XVECEXP (result, 0, i)
13581 = gen_rtx_SET (VOIDmode, mems[j], gen_rtx_REG (SImode, regs[j]));
13583 return result;
13586 /* Generate either a load-multiple or a store-multiple instruction. This
13587 function can be used in situations where we can start with a single MEM
13588 rtx and adjust its address upwards.
13589 COUNT is the number of operations in the instruction, not counting a
13590 possible update of the base register. REGS is an array containing the
13591 register operands.
13592 BASEREG is the base register to be used in addressing the memory operands,
13593 which are constructed from BASEMEM.
13594 WRITE_BACK specifies whether the generated instruction should include an
13595 update of the base register.
13596 OFFSETP is used to pass an offset to and from this function; this offset
13597 is not used when constructing the address (instead BASEMEM should have an
13598 appropriate offset in its address), it is used only for setting
13599 MEM_OFFSET. It is updated only if WRITE_BACK is true.*/
13601 static rtx
13602 arm_gen_multiple_op (bool is_load, int *regs, int count, rtx basereg,
13603 bool write_back, rtx basemem, HOST_WIDE_INT *offsetp)
13605 rtx mems[MAX_LDM_STM_OPS];
13606 HOST_WIDE_INT offset = *offsetp;
13607 int i;
13609 gcc_assert (count <= MAX_LDM_STM_OPS);
13611 if (GET_CODE (basereg) == PLUS)
13612 basereg = XEXP (basereg, 0);
13614 for (i = 0; i < count; i++)
13616 rtx addr = plus_constant (Pmode, basereg, i * 4);
13617 mems[i] = adjust_automodify_address_nv (basemem, SImode, addr, offset);
13618 offset += 4;
13621 if (write_back)
13622 *offsetp = offset;
13624 if (is_load)
13625 return arm_gen_load_multiple_1 (count, regs, mems, basereg,
13626 write_back ? 4 * count : 0);
13627 else
13628 return arm_gen_store_multiple_1 (count, regs, mems, basereg,
13629 write_back ? 4 * count : 0);
13633 arm_gen_load_multiple (int *regs, int count, rtx basereg, int write_back,
13634 rtx basemem, HOST_WIDE_INT *offsetp)
13636 return arm_gen_multiple_op (TRUE, regs, count, basereg, write_back, basemem,
13637 offsetp);
13641 arm_gen_store_multiple (int *regs, int count, rtx basereg, int write_back,
13642 rtx basemem, HOST_WIDE_INT *offsetp)
13644 return arm_gen_multiple_op (FALSE, regs, count, basereg, write_back, basemem,
13645 offsetp);
13648 /* Called from a peephole2 expander to turn a sequence of loads into an
13649 LDM instruction. OPERANDS are the operands found by the peephole matcher;
13650 NOPS indicates how many separate loads we are trying to combine. SORT_REGS
13651 is true if we can reorder the registers because they are used commutatively
13652 subsequently.
13653 Returns true iff we could generate a new instruction. */
13655 bool
13656 gen_ldm_seq (rtx *operands, int nops, bool sort_regs)
13658 int regs[MAX_LDM_STM_OPS], mem_order[MAX_LDM_STM_OPS];
13659 rtx mems[MAX_LDM_STM_OPS];
13660 int i, j, base_reg;
13661 rtx base_reg_rtx;
13662 HOST_WIDE_INT offset;
13663 int write_back = FALSE;
13664 int ldm_case;
13665 rtx addr;
13667 ldm_case = load_multiple_sequence (operands, nops, regs, mem_order,
13668 &base_reg, &offset, !sort_regs);
13670 if (ldm_case == 0)
13671 return false;
13673 if (sort_regs)
13674 for (i = 0; i < nops - 1; i++)
13675 for (j = i + 1; j < nops; j++)
13676 if (regs[i] > regs[j])
13678 int t = regs[i];
13679 regs[i] = regs[j];
13680 regs[j] = t;
13682 base_reg_rtx = gen_rtx_REG (Pmode, base_reg);
13684 if (TARGET_THUMB1)
13686 gcc_assert (peep2_reg_dead_p (nops, base_reg_rtx));
13687 gcc_assert (ldm_case == 1 || ldm_case == 5);
13688 write_back = TRUE;
13691 if (ldm_case == 5)
13693 rtx newbase = TARGET_THUMB1 ? base_reg_rtx : gen_rtx_REG (SImode, regs[0]);
13694 emit_insn (gen_addsi3 (newbase, base_reg_rtx, GEN_INT (offset)));
13695 offset = 0;
13696 if (!TARGET_THUMB1)
13698 base_reg = regs[0];
13699 base_reg_rtx = newbase;
13703 for (i = 0; i < nops; i++)
13705 addr = plus_constant (Pmode, base_reg_rtx, offset + i * 4);
13706 mems[i] = adjust_automodify_address_nv (operands[nops + mem_order[i]],
13707 SImode, addr, 0);
13709 emit_insn (arm_gen_load_multiple_1 (nops, regs, mems, base_reg_rtx,
13710 write_back ? offset + i * 4 : 0));
13711 return true;
13714 /* Called from a peephole2 expander to turn a sequence of stores into an
13715 STM instruction. OPERANDS are the operands found by the peephole matcher;
13716 NOPS indicates how many separate stores we are trying to combine.
13717 Returns true iff we could generate a new instruction. */
13719 bool
13720 gen_stm_seq (rtx *operands, int nops)
13722 int i;
13723 int regs[MAX_LDM_STM_OPS], mem_order[MAX_LDM_STM_OPS];
13724 rtx mems[MAX_LDM_STM_OPS];
13725 int base_reg;
13726 rtx base_reg_rtx;
13727 HOST_WIDE_INT offset;
13728 int write_back = FALSE;
13729 int stm_case;
13730 rtx addr;
13731 bool base_reg_dies;
13733 stm_case = store_multiple_sequence (operands, nops, nops, regs, NULL,
13734 mem_order, &base_reg, &offset, true);
13736 if (stm_case == 0)
13737 return false;
13739 base_reg_rtx = gen_rtx_REG (Pmode, base_reg);
13741 base_reg_dies = peep2_reg_dead_p (nops, base_reg_rtx);
13742 if (TARGET_THUMB1)
13744 gcc_assert (base_reg_dies);
13745 write_back = TRUE;
13748 if (stm_case == 5)
13750 gcc_assert (base_reg_dies);
13751 emit_insn (gen_addsi3 (base_reg_rtx, base_reg_rtx, GEN_INT (offset)));
13752 offset = 0;
13755 addr = plus_constant (Pmode, base_reg_rtx, offset);
13757 for (i = 0; i < nops; i++)
13759 addr = plus_constant (Pmode, base_reg_rtx, offset + i * 4);
13760 mems[i] = adjust_automodify_address_nv (operands[nops + mem_order[i]],
13761 SImode, addr, 0);
13763 emit_insn (arm_gen_store_multiple_1 (nops, regs, mems, base_reg_rtx,
13764 write_back ? offset + i * 4 : 0));
13765 return true;
13768 /* Called from a peephole2 expander to turn a sequence of stores that are
13769 preceded by constant loads into an STM instruction. OPERANDS are the
13770 operands found by the peephole matcher; NOPS indicates how many
13771 separate stores we are trying to combine; there are 2 * NOPS
13772 instructions in the peephole.
13773 Returns true iff we could generate a new instruction. */
13775 bool
13776 gen_const_stm_seq (rtx *operands, int nops)
13778 int regs[MAX_LDM_STM_OPS], sorted_regs[MAX_LDM_STM_OPS];
13779 int reg_order[MAX_LDM_STM_OPS], mem_order[MAX_LDM_STM_OPS];
13780 rtx reg_rtxs[MAX_LDM_STM_OPS], orig_reg_rtxs[MAX_LDM_STM_OPS];
13781 rtx mems[MAX_LDM_STM_OPS];
13782 int base_reg;
13783 rtx base_reg_rtx;
13784 HOST_WIDE_INT offset;
13785 int write_back = FALSE;
13786 int stm_case;
13787 rtx addr;
13788 bool base_reg_dies;
13789 int i, j;
13790 HARD_REG_SET allocated;
13792 stm_case = store_multiple_sequence (operands, nops, 2 * nops, regs, reg_rtxs,
13793 mem_order, &base_reg, &offset, false);
13795 if (stm_case == 0)
13796 return false;
13798 memcpy (orig_reg_rtxs, reg_rtxs, sizeof orig_reg_rtxs);
13800 /* If the same register is used more than once, try to find a free
13801 register. */
13802 CLEAR_HARD_REG_SET (allocated);
13803 for (i = 0; i < nops; i++)
13805 for (j = i + 1; j < nops; j++)
13806 if (regs[i] == regs[j])
13808 rtx t = peep2_find_free_register (0, nops * 2,
13809 TARGET_THUMB1 ? "l" : "r",
13810 SImode, &allocated);
13811 if (t == NULL_RTX)
13812 return false;
13813 reg_rtxs[i] = t;
13814 regs[i] = REGNO (t);
13818 /* Compute an ordering that maps the register numbers to an ascending
13819 sequence. */
13820 reg_order[0] = 0;
13821 for (i = 0; i < nops; i++)
13822 if (regs[i] < regs[reg_order[0]])
13823 reg_order[0] = i;
13825 for (i = 1; i < nops; i++)
13827 int this_order = reg_order[i - 1];
13828 for (j = 0; j < nops; j++)
13829 if (regs[j] > regs[reg_order[i - 1]]
13830 && (this_order == reg_order[i - 1]
13831 || regs[j] < regs[this_order]))
13832 this_order = j;
13833 reg_order[i] = this_order;
13836 /* Ensure that registers that must be live after the instruction end
13837 up with the correct value. */
13838 for (i = 0; i < nops; i++)
13840 int this_order = reg_order[i];
13841 if ((this_order != mem_order[i]
13842 || orig_reg_rtxs[this_order] != reg_rtxs[this_order])
13843 && !peep2_reg_dead_p (nops * 2, orig_reg_rtxs[this_order]))
13844 return false;
13847 /* Load the constants. */
13848 for (i = 0; i < nops; i++)
13850 rtx op = operands[2 * nops + mem_order[i]];
13851 sorted_regs[i] = regs[reg_order[i]];
13852 emit_move_insn (reg_rtxs[reg_order[i]], op);
13855 base_reg_rtx = gen_rtx_REG (Pmode, base_reg);
13857 base_reg_dies = peep2_reg_dead_p (nops * 2, base_reg_rtx);
13858 if (TARGET_THUMB1)
13860 gcc_assert (base_reg_dies);
13861 write_back = TRUE;
13864 if (stm_case == 5)
13866 gcc_assert (base_reg_dies);
13867 emit_insn (gen_addsi3 (base_reg_rtx, base_reg_rtx, GEN_INT (offset)));
13868 offset = 0;
13871 addr = plus_constant (Pmode, base_reg_rtx, offset);
13873 for (i = 0; i < nops; i++)
13875 addr = plus_constant (Pmode, base_reg_rtx, offset + i * 4);
13876 mems[i] = adjust_automodify_address_nv (operands[nops + mem_order[i]],
13877 SImode, addr, 0);
13879 emit_insn (arm_gen_store_multiple_1 (nops, sorted_regs, mems, base_reg_rtx,
13880 write_back ? offset + i * 4 : 0));
13881 return true;
13884 /* Copy a block of memory using plain ldr/str/ldrh/strh instructions, to permit
13885 unaligned copies on processors which support unaligned semantics for those
13886 instructions. INTERLEAVE_FACTOR can be used to attempt to hide load latency
13887 (using more registers) by doing e.g. load/load/store/store for a factor of 2.
13888 An interleave factor of 1 (the minimum) will perform no interleaving.
13889 Load/store multiple are used for aligned addresses where possible. */
13891 static void
13892 arm_block_move_unaligned_straight (rtx dstbase, rtx srcbase,
13893 HOST_WIDE_INT length,
13894 unsigned int interleave_factor)
13896 rtx *regs = XALLOCAVEC (rtx, interleave_factor);
13897 int *regnos = XALLOCAVEC (int, interleave_factor);
13898 HOST_WIDE_INT block_size_bytes = interleave_factor * UNITS_PER_WORD;
13899 HOST_WIDE_INT i, j;
13900 HOST_WIDE_INT remaining = length, words;
13901 rtx halfword_tmp = NULL, byte_tmp = NULL;
13902 rtx dst, src;
13903 bool src_aligned = MEM_ALIGN (srcbase) >= BITS_PER_WORD;
13904 bool dst_aligned = MEM_ALIGN (dstbase) >= BITS_PER_WORD;
13905 HOST_WIDE_INT srcoffset, dstoffset;
13906 HOST_WIDE_INT src_autoinc, dst_autoinc;
13907 rtx mem, addr;
13909 gcc_assert (1 <= interleave_factor && interleave_factor <= 4);
13911 /* Use hard registers if we have aligned source or destination so we can use
13912 load/store multiple with contiguous registers. */
13913 if (dst_aligned || src_aligned)
13914 for (i = 0; i < interleave_factor; i++)
13915 regs[i] = gen_rtx_REG (SImode, i);
13916 else
13917 for (i = 0; i < interleave_factor; i++)
13918 regs[i] = gen_reg_rtx (SImode);
13920 dst = copy_addr_to_reg (XEXP (dstbase, 0));
13921 src = copy_addr_to_reg (XEXP (srcbase, 0));
13923 srcoffset = dstoffset = 0;
13925 /* Calls to arm_gen_load_multiple and arm_gen_store_multiple update SRC/DST.
13926 For copying the last bytes we want to subtract this offset again. */
13927 src_autoinc = dst_autoinc = 0;
13929 for (i = 0; i < interleave_factor; i++)
13930 regnos[i] = i;
13932 /* Copy BLOCK_SIZE_BYTES chunks. */
13934 for (i = 0; i + block_size_bytes <= length; i += block_size_bytes)
13936 /* Load words. */
13937 if (src_aligned && interleave_factor > 1)
13939 emit_insn (arm_gen_load_multiple (regnos, interleave_factor, src,
13940 TRUE, srcbase, &srcoffset));
13941 src_autoinc += UNITS_PER_WORD * interleave_factor;
13943 else
13945 for (j = 0; j < interleave_factor; j++)
13947 addr = plus_constant (Pmode, src, (srcoffset + j * UNITS_PER_WORD
13948 - src_autoinc));
13949 mem = adjust_automodify_address (srcbase, SImode, addr,
13950 srcoffset + j * UNITS_PER_WORD);
13951 emit_insn (gen_unaligned_loadsi (regs[j], mem));
13953 srcoffset += block_size_bytes;
13956 /* Store words. */
13957 if (dst_aligned && interleave_factor > 1)
13959 emit_insn (arm_gen_store_multiple (regnos, interleave_factor, dst,
13960 TRUE, dstbase, &dstoffset));
13961 dst_autoinc += UNITS_PER_WORD * interleave_factor;
13963 else
13965 for (j = 0; j < interleave_factor; j++)
13967 addr = plus_constant (Pmode, dst, (dstoffset + j * UNITS_PER_WORD
13968 - dst_autoinc));
13969 mem = adjust_automodify_address (dstbase, SImode, addr,
13970 dstoffset + j * UNITS_PER_WORD);
13971 emit_insn (gen_unaligned_storesi (mem, regs[j]));
13973 dstoffset += block_size_bytes;
13976 remaining -= block_size_bytes;
13979 /* Copy any whole words left (note these aren't interleaved with any
13980 subsequent halfword/byte load/stores in the interests of simplicity). */
13982 words = remaining / UNITS_PER_WORD;
13984 gcc_assert (words < interleave_factor);
13986 if (src_aligned && words > 1)
13988 emit_insn (arm_gen_load_multiple (regnos, words, src, TRUE, srcbase,
13989 &srcoffset));
13990 src_autoinc += UNITS_PER_WORD * words;
13992 else
13994 for (j = 0; j < words; j++)
13996 addr = plus_constant (Pmode, src,
13997 srcoffset + j * UNITS_PER_WORD - src_autoinc);
13998 mem = adjust_automodify_address (srcbase, SImode, addr,
13999 srcoffset + j * UNITS_PER_WORD);
14000 emit_insn (gen_unaligned_loadsi (regs[j], mem));
14002 srcoffset += words * UNITS_PER_WORD;
14005 if (dst_aligned && words > 1)
14007 emit_insn (arm_gen_store_multiple (regnos, words, dst, TRUE, dstbase,
14008 &dstoffset));
14009 dst_autoinc += words * UNITS_PER_WORD;
14011 else
14013 for (j = 0; j < words; j++)
14015 addr = plus_constant (Pmode, dst,
14016 dstoffset + j * UNITS_PER_WORD - dst_autoinc);
14017 mem = adjust_automodify_address (dstbase, SImode, addr,
14018 dstoffset + j * UNITS_PER_WORD);
14019 emit_insn (gen_unaligned_storesi (mem, regs[j]));
14021 dstoffset += words * UNITS_PER_WORD;
14024 remaining -= words * UNITS_PER_WORD;
14026 gcc_assert (remaining < 4);
14028 /* Copy a halfword if necessary. */
14030 if (remaining >= 2)
14032 halfword_tmp = gen_reg_rtx (SImode);
14034 addr = plus_constant (Pmode, src, srcoffset - src_autoinc);
14035 mem = adjust_automodify_address (srcbase, HImode, addr, srcoffset);
14036 emit_insn (gen_unaligned_loadhiu (halfword_tmp, mem));
14038 /* Either write out immediately, or delay until we've loaded the last
14039 byte, depending on interleave factor. */
14040 if (interleave_factor == 1)
14042 addr = plus_constant (Pmode, dst, dstoffset - dst_autoinc);
14043 mem = adjust_automodify_address (dstbase, HImode, addr, dstoffset);
14044 emit_insn (gen_unaligned_storehi (mem,
14045 gen_lowpart (HImode, halfword_tmp)));
14046 halfword_tmp = NULL;
14047 dstoffset += 2;
14050 remaining -= 2;
14051 srcoffset += 2;
14054 gcc_assert (remaining < 2);
14056 /* Copy last byte. */
14058 if ((remaining & 1) != 0)
14060 byte_tmp = gen_reg_rtx (SImode);
14062 addr = plus_constant (Pmode, src, srcoffset - src_autoinc);
14063 mem = adjust_automodify_address (srcbase, QImode, addr, srcoffset);
14064 emit_move_insn (gen_lowpart (QImode, byte_tmp), mem);
14066 if (interleave_factor == 1)
14068 addr = plus_constant (Pmode, dst, dstoffset - dst_autoinc);
14069 mem = adjust_automodify_address (dstbase, QImode, addr, dstoffset);
14070 emit_move_insn (mem, gen_lowpart (QImode, byte_tmp));
14071 byte_tmp = NULL;
14072 dstoffset++;
14075 remaining--;
14076 srcoffset++;
14079 /* Store last halfword if we haven't done so already. */
14081 if (halfword_tmp)
14083 addr = plus_constant (Pmode, dst, dstoffset - dst_autoinc);
14084 mem = adjust_automodify_address (dstbase, HImode, addr, dstoffset);
14085 emit_insn (gen_unaligned_storehi (mem,
14086 gen_lowpart (HImode, halfword_tmp)));
14087 dstoffset += 2;
14090 /* Likewise for last byte. */
14092 if (byte_tmp)
14094 addr = plus_constant (Pmode, dst, dstoffset - dst_autoinc);
14095 mem = adjust_automodify_address (dstbase, QImode, addr, dstoffset);
14096 emit_move_insn (mem, gen_lowpart (QImode, byte_tmp));
14097 dstoffset++;
14100 gcc_assert (remaining == 0 && srcoffset == dstoffset);
14103 /* From mips_adjust_block_mem:
14105 Helper function for doing a loop-based block operation on memory
14106 reference MEM. Each iteration of the loop will operate on LENGTH
14107 bytes of MEM.
14109 Create a new base register for use within the loop and point it to
14110 the start of MEM. Create a new memory reference that uses this
14111 register. Store them in *LOOP_REG and *LOOP_MEM respectively. */
14113 static void
14114 arm_adjust_block_mem (rtx mem, HOST_WIDE_INT length, rtx *loop_reg,
14115 rtx *loop_mem)
14117 *loop_reg = copy_addr_to_reg (XEXP (mem, 0));
14119 /* Although the new mem does not refer to a known location,
14120 it does keep up to LENGTH bytes of alignment. */
14121 *loop_mem = change_address (mem, BLKmode, *loop_reg);
14122 set_mem_align (*loop_mem, MIN (MEM_ALIGN (mem), length * BITS_PER_UNIT));
14125 /* From mips_block_move_loop:
14127 Move LENGTH bytes from SRC to DEST using a loop that moves BYTES_PER_ITER
14128 bytes at a time. LENGTH must be at least BYTES_PER_ITER. Assume that
14129 the memory regions do not overlap. */
14131 static void
14132 arm_block_move_unaligned_loop (rtx dest, rtx src, HOST_WIDE_INT length,
14133 unsigned int interleave_factor,
14134 HOST_WIDE_INT bytes_per_iter)
14136 rtx label, src_reg, dest_reg, final_src, test;
14137 HOST_WIDE_INT leftover;
14139 leftover = length % bytes_per_iter;
14140 length -= leftover;
14142 /* Create registers and memory references for use within the loop. */
14143 arm_adjust_block_mem (src, bytes_per_iter, &src_reg, &src);
14144 arm_adjust_block_mem (dest, bytes_per_iter, &dest_reg, &dest);
14146 /* Calculate the value that SRC_REG should have after the last iteration of
14147 the loop. */
14148 final_src = expand_simple_binop (Pmode, PLUS, src_reg, GEN_INT (length),
14149 0, 0, OPTAB_WIDEN);
14151 /* Emit the start of the loop. */
14152 label = gen_label_rtx ();
14153 emit_label (label);
14155 /* Emit the loop body. */
14156 arm_block_move_unaligned_straight (dest, src, bytes_per_iter,
14157 interleave_factor);
14159 /* Move on to the next block. */
14160 emit_move_insn (src_reg, plus_constant (Pmode, src_reg, bytes_per_iter));
14161 emit_move_insn (dest_reg, plus_constant (Pmode, dest_reg, bytes_per_iter));
14163 /* Emit the loop condition. */
14164 test = gen_rtx_NE (VOIDmode, src_reg, final_src);
14165 emit_jump_insn (gen_cbranchsi4 (test, src_reg, final_src, label));
14167 /* Mop up any left-over bytes. */
14168 if (leftover)
14169 arm_block_move_unaligned_straight (dest, src, leftover, interleave_factor);
14172 /* Emit a block move when either the source or destination is unaligned (not
14173 aligned to a four-byte boundary). This may need further tuning depending on
14174 core type, optimize_size setting, etc. */
14176 static int
14177 arm_movmemqi_unaligned (rtx *operands)
14179 HOST_WIDE_INT length = INTVAL (operands[2]);
14181 if (optimize_size)
14183 bool src_aligned = MEM_ALIGN (operands[1]) >= BITS_PER_WORD;
14184 bool dst_aligned = MEM_ALIGN (operands[0]) >= BITS_PER_WORD;
14185 /* Inlined memcpy using ldr/str/ldrh/strh can be quite big: try to limit
14186 size of code if optimizing for size. We'll use ldm/stm if src_aligned
14187 or dst_aligned though: allow more interleaving in those cases since the
14188 resulting code can be smaller. */
14189 unsigned int interleave_factor = (src_aligned || dst_aligned) ? 2 : 1;
14190 HOST_WIDE_INT bytes_per_iter = (src_aligned || dst_aligned) ? 8 : 4;
14192 if (length > 12)
14193 arm_block_move_unaligned_loop (operands[0], operands[1], length,
14194 interleave_factor, bytes_per_iter);
14195 else
14196 arm_block_move_unaligned_straight (operands[0], operands[1], length,
14197 interleave_factor);
14199 else
14201 /* Note that the loop created by arm_block_move_unaligned_loop may be
14202 subject to loop unrolling, which makes tuning this condition a little
14203 redundant. */
14204 if (length > 32)
14205 arm_block_move_unaligned_loop (operands[0], operands[1], length, 4, 16);
14206 else
14207 arm_block_move_unaligned_straight (operands[0], operands[1], length, 4);
14210 return 1;
14214 arm_gen_movmemqi (rtx *operands)
14216 HOST_WIDE_INT in_words_to_go, out_words_to_go, last_bytes;
14217 HOST_WIDE_INT srcoffset, dstoffset;
14218 int i;
14219 rtx src, dst, srcbase, dstbase;
14220 rtx part_bytes_reg = NULL;
14221 rtx mem;
14223 if (!CONST_INT_P (operands[2])
14224 || !CONST_INT_P (operands[3])
14225 || INTVAL (operands[2]) > 64)
14226 return 0;
14228 if (unaligned_access && (INTVAL (operands[3]) & 3) != 0)
14229 return arm_movmemqi_unaligned (operands);
14231 if (INTVAL (operands[3]) & 3)
14232 return 0;
14234 dstbase = operands[0];
14235 srcbase = operands[1];
14237 dst = copy_to_mode_reg (SImode, XEXP (dstbase, 0));
14238 src = copy_to_mode_reg (SImode, XEXP (srcbase, 0));
14240 in_words_to_go = ARM_NUM_INTS (INTVAL (operands[2]));
14241 out_words_to_go = INTVAL (operands[2]) / 4;
14242 last_bytes = INTVAL (operands[2]) & 3;
14243 dstoffset = srcoffset = 0;
14245 if (out_words_to_go != in_words_to_go && ((in_words_to_go - 1) & 3) != 0)
14246 part_bytes_reg = gen_rtx_REG (SImode, (in_words_to_go - 1) & 3);
14248 for (i = 0; in_words_to_go >= 2; i+=4)
14250 if (in_words_to_go > 4)
14251 emit_insn (arm_gen_load_multiple (arm_regs_in_sequence, 4, src,
14252 TRUE, srcbase, &srcoffset));
14253 else
14254 emit_insn (arm_gen_load_multiple (arm_regs_in_sequence, in_words_to_go,
14255 src, FALSE, srcbase,
14256 &srcoffset));
14258 if (out_words_to_go)
14260 if (out_words_to_go > 4)
14261 emit_insn (arm_gen_store_multiple (arm_regs_in_sequence, 4, dst,
14262 TRUE, dstbase, &dstoffset));
14263 else if (out_words_to_go != 1)
14264 emit_insn (arm_gen_store_multiple (arm_regs_in_sequence,
14265 out_words_to_go, dst,
14266 (last_bytes == 0
14267 ? FALSE : TRUE),
14268 dstbase, &dstoffset));
14269 else
14271 mem = adjust_automodify_address (dstbase, SImode, dst, dstoffset);
14272 emit_move_insn (mem, gen_rtx_REG (SImode, 0));
14273 if (last_bytes != 0)
14275 emit_insn (gen_addsi3 (dst, dst, GEN_INT (4)));
14276 dstoffset += 4;
14281 in_words_to_go -= in_words_to_go < 4 ? in_words_to_go : 4;
14282 out_words_to_go -= out_words_to_go < 4 ? out_words_to_go : 4;
14285 /* OUT_WORDS_TO_GO will be zero here if there are byte stores to do. */
14286 if (out_words_to_go)
14288 rtx sreg;
14290 mem = adjust_automodify_address (srcbase, SImode, src, srcoffset);
14291 sreg = copy_to_reg (mem);
14293 mem = adjust_automodify_address (dstbase, SImode, dst, dstoffset);
14294 emit_move_insn (mem, sreg);
14295 in_words_to_go--;
14297 gcc_assert (!in_words_to_go); /* Sanity check */
14300 if (in_words_to_go)
14302 gcc_assert (in_words_to_go > 0);
14304 mem = adjust_automodify_address (srcbase, SImode, src, srcoffset);
14305 part_bytes_reg = copy_to_mode_reg (SImode, mem);
14308 gcc_assert (!last_bytes || part_bytes_reg);
14310 if (BYTES_BIG_ENDIAN && last_bytes)
14312 rtx tmp = gen_reg_rtx (SImode);
14314 /* The bytes we want are in the top end of the word. */
14315 emit_insn (gen_lshrsi3 (tmp, part_bytes_reg,
14316 GEN_INT (8 * (4 - last_bytes))));
14317 part_bytes_reg = tmp;
14319 while (last_bytes)
14321 mem = adjust_automodify_address (dstbase, QImode,
14322 plus_constant (Pmode, dst,
14323 last_bytes - 1),
14324 dstoffset + last_bytes - 1);
14325 emit_move_insn (mem, gen_lowpart (QImode, part_bytes_reg));
14327 if (--last_bytes)
14329 tmp = gen_reg_rtx (SImode);
14330 emit_insn (gen_lshrsi3 (tmp, part_bytes_reg, GEN_INT (8)));
14331 part_bytes_reg = tmp;
14336 else
14338 if (last_bytes > 1)
14340 mem = adjust_automodify_address (dstbase, HImode, dst, dstoffset);
14341 emit_move_insn (mem, gen_lowpart (HImode, part_bytes_reg));
14342 last_bytes -= 2;
14343 if (last_bytes)
14345 rtx tmp = gen_reg_rtx (SImode);
14346 emit_insn (gen_addsi3 (dst, dst, const2_rtx));
14347 emit_insn (gen_lshrsi3 (tmp, part_bytes_reg, GEN_INT (16)));
14348 part_bytes_reg = tmp;
14349 dstoffset += 2;
14353 if (last_bytes)
14355 mem = adjust_automodify_address (dstbase, QImode, dst, dstoffset);
14356 emit_move_insn (mem, gen_lowpart (QImode, part_bytes_reg));
14360 return 1;
14363 /* Helper for gen_movmem_ldrd_strd. Increase the address of memory rtx
14364 by mode size. */
14365 inline static rtx
14366 next_consecutive_mem (rtx mem)
14368 enum machine_mode mode = GET_MODE (mem);
14369 HOST_WIDE_INT offset = GET_MODE_SIZE (mode);
14370 rtx addr = plus_constant (Pmode, XEXP (mem, 0), offset);
14372 return adjust_automodify_address (mem, mode, addr, offset);
14375 /* Copy using LDRD/STRD instructions whenever possible.
14376 Returns true upon success. */
14377 bool
14378 gen_movmem_ldrd_strd (rtx *operands)
14380 unsigned HOST_WIDE_INT len;
14381 HOST_WIDE_INT align;
14382 rtx src, dst, base;
14383 rtx reg0;
14384 bool src_aligned, dst_aligned;
14385 bool src_volatile, dst_volatile;
14387 gcc_assert (CONST_INT_P (operands[2]));
14388 gcc_assert (CONST_INT_P (operands[3]));
14390 len = UINTVAL (operands[2]);
14391 if (len > 64)
14392 return false;
14394 /* Maximum alignment we can assume for both src and dst buffers. */
14395 align = INTVAL (operands[3]);
14397 if ((!unaligned_access) && (len >= 4) && ((align & 3) != 0))
14398 return false;
14400 /* Place src and dst addresses in registers
14401 and update the corresponding mem rtx. */
14402 dst = operands[0];
14403 dst_volatile = MEM_VOLATILE_P (dst);
14404 dst_aligned = MEM_ALIGN (dst) >= BITS_PER_WORD;
14405 base = copy_to_mode_reg (SImode, XEXP (dst, 0));
14406 dst = adjust_automodify_address (dst, VOIDmode, base, 0);
14408 src = operands[1];
14409 src_volatile = MEM_VOLATILE_P (src);
14410 src_aligned = MEM_ALIGN (src) >= BITS_PER_WORD;
14411 base = copy_to_mode_reg (SImode, XEXP (src, 0));
14412 src = adjust_automodify_address (src, VOIDmode, base, 0);
14414 if (!unaligned_access && !(src_aligned && dst_aligned))
14415 return false;
14417 if (src_volatile || dst_volatile)
14418 return false;
14420 /* If we cannot generate any LDRD/STRD, try to generate LDM/STM. */
14421 if (!(dst_aligned || src_aligned))
14422 return arm_gen_movmemqi (operands);
14424 src = adjust_address (src, DImode, 0);
14425 dst = adjust_address (dst, DImode, 0);
14426 while (len >= 8)
14428 len -= 8;
14429 reg0 = gen_reg_rtx (DImode);
14430 if (src_aligned)
14431 emit_move_insn (reg0, src);
14432 else
14433 emit_insn (gen_unaligned_loaddi (reg0, src));
14435 if (dst_aligned)
14436 emit_move_insn (dst, reg0);
14437 else
14438 emit_insn (gen_unaligned_storedi (dst, reg0));
14440 src = next_consecutive_mem (src);
14441 dst = next_consecutive_mem (dst);
14444 gcc_assert (len < 8);
14445 if (len >= 4)
14447 /* More than a word but less than a double-word to copy. Copy a word. */
14448 reg0 = gen_reg_rtx (SImode);
14449 src = adjust_address (src, SImode, 0);
14450 dst = adjust_address (dst, SImode, 0);
14451 if (src_aligned)
14452 emit_move_insn (reg0, src);
14453 else
14454 emit_insn (gen_unaligned_loadsi (reg0, src));
14456 if (dst_aligned)
14457 emit_move_insn (dst, reg0);
14458 else
14459 emit_insn (gen_unaligned_storesi (dst, reg0));
14461 src = next_consecutive_mem (src);
14462 dst = next_consecutive_mem (dst);
14463 len -= 4;
14466 if (len == 0)
14467 return true;
14469 /* Copy the remaining bytes. */
14470 if (len >= 2)
14472 dst = adjust_address (dst, HImode, 0);
14473 src = adjust_address (src, HImode, 0);
14474 reg0 = gen_reg_rtx (SImode);
14475 if (src_aligned)
14476 emit_insn (gen_zero_extendhisi2 (reg0, src));
14477 else
14478 emit_insn (gen_unaligned_loadhiu (reg0, src));
14480 if (dst_aligned)
14481 emit_insn (gen_movhi (dst, gen_lowpart(HImode, reg0)));
14482 else
14483 emit_insn (gen_unaligned_storehi (dst, gen_lowpart (HImode, reg0)));
14485 src = next_consecutive_mem (src);
14486 dst = next_consecutive_mem (dst);
14487 if (len == 2)
14488 return true;
14491 dst = adjust_address (dst, QImode, 0);
14492 src = adjust_address (src, QImode, 0);
14493 reg0 = gen_reg_rtx (QImode);
14494 emit_move_insn (reg0, src);
14495 emit_move_insn (dst, reg0);
14496 return true;
14499 /* Select a dominance comparison mode if possible for a test of the general
14500 form (OP (COND_OR (X) (Y)) (const_int 0)). We support three forms.
14501 COND_OR == DOM_CC_X_AND_Y => (X && Y)
14502 COND_OR == DOM_CC_NX_OR_Y => ((! X) || Y)
14503 COND_OR == DOM_CC_X_OR_Y => (X || Y)
14504 In all cases OP will be either EQ or NE, but we don't need to know which
14505 here. If we are unable to support a dominance comparison we return
14506 CC mode. This will then fail to match for the RTL expressions that
14507 generate this call. */
14508 enum machine_mode
14509 arm_select_dominance_cc_mode (rtx x, rtx y, HOST_WIDE_INT cond_or)
14511 enum rtx_code cond1, cond2;
14512 int swapped = 0;
14514 /* Currently we will probably get the wrong result if the individual
14515 comparisons are not simple. This also ensures that it is safe to
14516 reverse a comparison if necessary. */
14517 if ((arm_select_cc_mode (cond1 = GET_CODE (x), XEXP (x, 0), XEXP (x, 1))
14518 != CCmode)
14519 || (arm_select_cc_mode (cond2 = GET_CODE (y), XEXP (y, 0), XEXP (y, 1))
14520 != CCmode))
14521 return CCmode;
14523 /* The if_then_else variant of this tests the second condition if the
14524 first passes, but is true if the first fails. Reverse the first
14525 condition to get a true "inclusive-or" expression. */
14526 if (cond_or == DOM_CC_NX_OR_Y)
14527 cond1 = reverse_condition (cond1);
14529 /* If the comparisons are not equal, and one doesn't dominate the other,
14530 then we can't do this. */
14531 if (cond1 != cond2
14532 && !comparison_dominates_p (cond1, cond2)
14533 && (swapped = 1, !comparison_dominates_p (cond2, cond1)))
14534 return CCmode;
14536 if (swapped)
14538 enum rtx_code temp = cond1;
14539 cond1 = cond2;
14540 cond2 = temp;
14543 switch (cond1)
14545 case EQ:
14546 if (cond_or == DOM_CC_X_AND_Y)
14547 return CC_DEQmode;
14549 switch (cond2)
14551 case EQ: return CC_DEQmode;
14552 case LE: return CC_DLEmode;
14553 case LEU: return CC_DLEUmode;
14554 case GE: return CC_DGEmode;
14555 case GEU: return CC_DGEUmode;
14556 default: gcc_unreachable ();
14559 case LT:
14560 if (cond_or == DOM_CC_X_AND_Y)
14561 return CC_DLTmode;
14563 switch (cond2)
14565 case LT:
14566 return CC_DLTmode;
14567 case LE:
14568 return CC_DLEmode;
14569 case NE:
14570 return CC_DNEmode;
14571 default:
14572 gcc_unreachable ();
14575 case GT:
14576 if (cond_or == DOM_CC_X_AND_Y)
14577 return CC_DGTmode;
14579 switch (cond2)
14581 case GT:
14582 return CC_DGTmode;
14583 case GE:
14584 return CC_DGEmode;
14585 case NE:
14586 return CC_DNEmode;
14587 default:
14588 gcc_unreachable ();
14591 case LTU:
14592 if (cond_or == DOM_CC_X_AND_Y)
14593 return CC_DLTUmode;
14595 switch (cond2)
14597 case LTU:
14598 return CC_DLTUmode;
14599 case LEU:
14600 return CC_DLEUmode;
14601 case NE:
14602 return CC_DNEmode;
14603 default:
14604 gcc_unreachable ();
14607 case GTU:
14608 if (cond_or == DOM_CC_X_AND_Y)
14609 return CC_DGTUmode;
14611 switch (cond2)
14613 case GTU:
14614 return CC_DGTUmode;
14615 case GEU:
14616 return CC_DGEUmode;
14617 case NE:
14618 return CC_DNEmode;
14619 default:
14620 gcc_unreachable ();
14623 /* The remaining cases only occur when both comparisons are the
14624 same. */
14625 case NE:
14626 gcc_assert (cond1 == cond2);
14627 return CC_DNEmode;
14629 case LE:
14630 gcc_assert (cond1 == cond2);
14631 return CC_DLEmode;
14633 case GE:
14634 gcc_assert (cond1 == cond2);
14635 return CC_DGEmode;
14637 case LEU:
14638 gcc_assert (cond1 == cond2);
14639 return CC_DLEUmode;
14641 case GEU:
14642 gcc_assert (cond1 == cond2);
14643 return CC_DGEUmode;
14645 default:
14646 gcc_unreachable ();
14650 enum machine_mode
14651 arm_select_cc_mode (enum rtx_code op, rtx x, rtx y)
14653 /* All floating point compares return CCFP if it is an equality
14654 comparison, and CCFPE otherwise. */
14655 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
14657 switch (op)
14659 case EQ:
14660 case NE:
14661 case UNORDERED:
14662 case ORDERED:
14663 case UNLT:
14664 case UNLE:
14665 case UNGT:
14666 case UNGE:
14667 case UNEQ:
14668 case LTGT:
14669 return CCFPmode;
14671 case LT:
14672 case LE:
14673 case GT:
14674 case GE:
14675 return CCFPEmode;
14677 default:
14678 gcc_unreachable ();
14682 /* A compare with a shifted operand. Because of canonicalization, the
14683 comparison will have to be swapped when we emit the assembler. */
14684 if (GET_MODE (y) == SImode
14685 && (REG_P (y) || (GET_CODE (y) == SUBREG))
14686 && (GET_CODE (x) == ASHIFT || GET_CODE (x) == ASHIFTRT
14687 || GET_CODE (x) == LSHIFTRT || GET_CODE (x) == ROTATE
14688 || GET_CODE (x) == ROTATERT))
14689 return CC_SWPmode;
14691 /* This operation is performed swapped, but since we only rely on the Z
14692 flag we don't need an additional mode. */
14693 if (GET_MODE (y) == SImode
14694 && (REG_P (y) || (GET_CODE (y) == SUBREG))
14695 && GET_CODE (x) == NEG
14696 && (op == EQ || op == NE))
14697 return CC_Zmode;
14699 /* This is a special case that is used by combine to allow a
14700 comparison of a shifted byte load to be split into a zero-extend
14701 followed by a comparison of the shifted integer (only valid for
14702 equalities and unsigned inequalities). */
14703 if (GET_MODE (x) == SImode
14704 && GET_CODE (x) == ASHIFT
14705 && CONST_INT_P (XEXP (x, 1)) && INTVAL (XEXP (x, 1)) == 24
14706 && GET_CODE (XEXP (x, 0)) == SUBREG
14707 && MEM_P (SUBREG_REG (XEXP (x, 0)))
14708 && GET_MODE (SUBREG_REG (XEXP (x, 0))) == QImode
14709 && (op == EQ || op == NE
14710 || op == GEU || op == GTU || op == LTU || op == LEU)
14711 && CONST_INT_P (y))
14712 return CC_Zmode;
14714 /* A construct for a conditional compare, if the false arm contains
14715 0, then both conditions must be true, otherwise either condition
14716 must be true. Not all conditions are possible, so CCmode is
14717 returned if it can't be done. */
14718 if (GET_CODE (x) == IF_THEN_ELSE
14719 && (XEXP (x, 2) == const0_rtx
14720 || XEXP (x, 2) == const1_rtx)
14721 && COMPARISON_P (XEXP (x, 0))
14722 && COMPARISON_P (XEXP (x, 1)))
14723 return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
14724 INTVAL (XEXP (x, 2)));
14726 /* Alternate canonicalizations of the above. These are somewhat cleaner. */
14727 if (GET_CODE (x) == AND
14728 && (op == EQ || op == NE)
14729 && COMPARISON_P (XEXP (x, 0))
14730 && COMPARISON_P (XEXP (x, 1)))
14731 return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
14732 DOM_CC_X_AND_Y);
14734 if (GET_CODE (x) == IOR
14735 && (op == EQ || op == NE)
14736 && COMPARISON_P (XEXP (x, 0))
14737 && COMPARISON_P (XEXP (x, 1)))
14738 return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
14739 DOM_CC_X_OR_Y);
14741 /* An operation (on Thumb) where we want to test for a single bit.
14742 This is done by shifting that bit up into the top bit of a
14743 scratch register; we can then branch on the sign bit. */
14744 if (TARGET_THUMB1
14745 && GET_MODE (x) == SImode
14746 && (op == EQ || op == NE)
14747 && GET_CODE (x) == ZERO_EXTRACT
14748 && XEXP (x, 1) == const1_rtx)
14749 return CC_Nmode;
14751 /* An operation that sets the condition codes as a side-effect, the
14752 V flag is not set correctly, so we can only use comparisons where
14753 this doesn't matter. (For LT and GE we can use "mi" and "pl"
14754 instead.) */
14755 /* ??? Does the ZERO_EXTRACT case really apply to thumb2? */
14756 if (GET_MODE (x) == SImode
14757 && y == const0_rtx
14758 && (op == EQ || op == NE || op == LT || op == GE)
14759 && (GET_CODE (x) == PLUS || GET_CODE (x) == MINUS
14760 || GET_CODE (x) == AND || GET_CODE (x) == IOR
14761 || GET_CODE (x) == XOR || GET_CODE (x) == MULT
14762 || GET_CODE (x) == NOT || GET_CODE (x) == NEG
14763 || GET_CODE (x) == LSHIFTRT
14764 || GET_CODE (x) == ASHIFT || GET_CODE (x) == ASHIFTRT
14765 || GET_CODE (x) == ROTATERT
14766 || (TARGET_32BIT && GET_CODE (x) == ZERO_EXTRACT)))
14767 return CC_NOOVmode;
14769 if (GET_MODE (x) == QImode && (op == EQ || op == NE))
14770 return CC_Zmode;
14772 if (GET_MODE (x) == SImode && (op == LTU || op == GEU)
14773 && GET_CODE (x) == PLUS
14774 && (rtx_equal_p (XEXP (x, 0), y) || rtx_equal_p (XEXP (x, 1), y)))
14775 return CC_Cmode;
14777 if (GET_MODE (x) == DImode || GET_MODE (y) == DImode)
14779 switch (op)
14781 case EQ:
14782 case NE:
14783 /* A DImode comparison against zero can be implemented by
14784 or'ing the two halves together. */
14785 if (y == const0_rtx)
14786 return CC_Zmode;
14788 /* We can do an equality test in three Thumb instructions. */
14789 if (!TARGET_32BIT)
14790 return CC_Zmode;
14792 /* FALLTHROUGH */
14794 case LTU:
14795 case LEU:
14796 case GTU:
14797 case GEU:
14798 /* DImode unsigned comparisons can be implemented by cmp +
14799 cmpeq without a scratch register. Not worth doing in
14800 Thumb-2. */
14801 if (TARGET_32BIT)
14802 return CC_CZmode;
14804 /* FALLTHROUGH */
14806 case LT:
14807 case LE:
14808 case GT:
14809 case GE:
14810 /* DImode signed and unsigned comparisons can be implemented
14811 by cmp + sbcs with a scratch register, but that does not
14812 set the Z flag - we must reverse GT/LE/GTU/LEU. */
14813 gcc_assert (op != EQ && op != NE);
14814 return CC_NCVmode;
14816 default:
14817 gcc_unreachable ();
14821 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_CC)
14822 return GET_MODE (x);
14824 return CCmode;
14827 /* X and Y are two things to compare using CODE. Emit the compare insn and
14828 return the rtx for register 0 in the proper mode. FP means this is a
14829 floating point compare: I don't think that it is needed on the arm. */
14831 arm_gen_compare_reg (enum rtx_code code, rtx x, rtx y, rtx scratch)
14833 enum machine_mode mode;
14834 rtx cc_reg;
14835 int dimode_comparison = GET_MODE (x) == DImode || GET_MODE (y) == DImode;
14837 /* We might have X as a constant, Y as a register because of the predicates
14838 used for cmpdi. If so, force X to a register here. */
14839 if (dimode_comparison && !REG_P (x))
14840 x = force_reg (DImode, x);
14842 mode = SELECT_CC_MODE (code, x, y);
14843 cc_reg = gen_rtx_REG (mode, CC_REGNUM);
14845 if (dimode_comparison
14846 && mode != CC_CZmode)
14848 rtx clobber, set;
14850 /* To compare two non-zero values for equality, XOR them and
14851 then compare against zero. Not used for ARM mode; there
14852 CC_CZmode is cheaper. */
14853 if (mode == CC_Zmode && y != const0_rtx)
14855 gcc_assert (!reload_completed);
14856 x = expand_binop (DImode, xor_optab, x, y, NULL_RTX, 0, OPTAB_WIDEN);
14857 y = const0_rtx;
14860 /* A scratch register is required. */
14861 if (reload_completed)
14862 gcc_assert (scratch != NULL && GET_MODE (scratch) == SImode);
14863 else
14864 scratch = gen_rtx_SCRATCH (SImode);
14866 clobber = gen_rtx_CLOBBER (VOIDmode, scratch);
14867 set = gen_rtx_SET (VOIDmode, cc_reg, gen_rtx_COMPARE (mode, x, y));
14868 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, set, clobber)));
14870 else
14871 emit_set_insn (cc_reg, gen_rtx_COMPARE (mode, x, y));
14873 return cc_reg;
14876 /* Generate a sequence of insns that will generate the correct return
14877 address mask depending on the physical architecture that the program
14878 is running on. */
14880 arm_gen_return_addr_mask (void)
14882 rtx reg = gen_reg_rtx (Pmode);
14884 emit_insn (gen_return_addr_mask (reg));
14885 return reg;
14888 void
14889 arm_reload_in_hi (rtx *operands)
14891 rtx ref = operands[1];
14892 rtx base, scratch;
14893 HOST_WIDE_INT offset = 0;
14895 if (GET_CODE (ref) == SUBREG)
14897 offset = SUBREG_BYTE (ref);
14898 ref = SUBREG_REG (ref);
14901 if (REG_P (ref))
14903 /* We have a pseudo which has been spilt onto the stack; there
14904 are two cases here: the first where there is a simple
14905 stack-slot replacement and a second where the stack-slot is
14906 out of range, or is used as a subreg. */
14907 if (reg_equiv_mem (REGNO (ref)))
14909 ref = reg_equiv_mem (REGNO (ref));
14910 base = find_replacement (&XEXP (ref, 0));
14912 else
14913 /* The slot is out of range, or was dressed up in a SUBREG. */
14914 base = reg_equiv_address (REGNO (ref));
14916 else
14917 base = find_replacement (&XEXP (ref, 0));
14919 /* Handle the case where the address is too complex to be offset by 1. */
14920 if (GET_CODE (base) == MINUS
14921 || (GET_CODE (base) == PLUS && !CONST_INT_P (XEXP (base, 1))))
14923 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
14925 emit_set_insn (base_plus, base);
14926 base = base_plus;
14928 else if (GET_CODE (base) == PLUS)
14930 /* The addend must be CONST_INT, or we would have dealt with it above. */
14931 HOST_WIDE_INT hi, lo;
14933 offset += INTVAL (XEXP (base, 1));
14934 base = XEXP (base, 0);
14936 /* Rework the address into a legal sequence of insns. */
14937 /* Valid range for lo is -4095 -> 4095 */
14938 lo = (offset >= 0
14939 ? (offset & 0xfff)
14940 : -((-offset) & 0xfff));
14942 /* Corner case, if lo is the max offset then we would be out of range
14943 once we have added the additional 1 below, so bump the msb into the
14944 pre-loading insn(s). */
14945 if (lo == 4095)
14946 lo &= 0x7ff;
14948 hi = ((((offset - lo) & (HOST_WIDE_INT) 0xffffffff)
14949 ^ (HOST_WIDE_INT) 0x80000000)
14950 - (HOST_WIDE_INT) 0x80000000);
14952 gcc_assert (hi + lo == offset);
14954 if (hi != 0)
14956 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
14958 /* Get the base address; addsi3 knows how to handle constants
14959 that require more than one insn. */
14960 emit_insn (gen_addsi3 (base_plus, base, GEN_INT (hi)));
14961 base = base_plus;
14962 offset = lo;
14966 /* Operands[2] may overlap operands[0] (though it won't overlap
14967 operands[1]), that's why we asked for a DImode reg -- so we can
14968 use the bit that does not overlap. */
14969 if (REGNO (operands[2]) == REGNO (operands[0]))
14970 scratch = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
14971 else
14972 scratch = gen_rtx_REG (SImode, REGNO (operands[2]));
14974 emit_insn (gen_zero_extendqisi2 (scratch,
14975 gen_rtx_MEM (QImode,
14976 plus_constant (Pmode, base,
14977 offset))));
14978 emit_insn (gen_zero_extendqisi2 (gen_rtx_SUBREG (SImode, operands[0], 0),
14979 gen_rtx_MEM (QImode,
14980 plus_constant (Pmode, base,
14981 offset + 1))));
14982 if (!BYTES_BIG_ENDIAN)
14983 emit_set_insn (gen_rtx_SUBREG (SImode, operands[0], 0),
14984 gen_rtx_IOR (SImode,
14985 gen_rtx_ASHIFT
14986 (SImode,
14987 gen_rtx_SUBREG (SImode, operands[0], 0),
14988 GEN_INT (8)),
14989 scratch));
14990 else
14991 emit_set_insn (gen_rtx_SUBREG (SImode, operands[0], 0),
14992 gen_rtx_IOR (SImode,
14993 gen_rtx_ASHIFT (SImode, scratch,
14994 GEN_INT (8)),
14995 gen_rtx_SUBREG (SImode, operands[0], 0)));
14998 /* Handle storing a half-word to memory during reload by synthesizing as two
14999 byte stores. Take care not to clobber the input values until after we
15000 have moved them somewhere safe. This code assumes that if the DImode
15001 scratch in operands[2] overlaps either the input value or output address
15002 in some way, then that value must die in this insn (we absolutely need
15003 two scratch registers for some corner cases). */
15004 void
15005 arm_reload_out_hi (rtx *operands)
15007 rtx ref = operands[0];
15008 rtx outval = operands[1];
15009 rtx base, scratch;
15010 HOST_WIDE_INT offset = 0;
15012 if (GET_CODE (ref) == SUBREG)
15014 offset = SUBREG_BYTE (ref);
15015 ref = SUBREG_REG (ref);
15018 if (REG_P (ref))
15020 /* We have a pseudo which has been spilt onto the stack; there
15021 are two cases here: the first where there is a simple
15022 stack-slot replacement and a second where the stack-slot is
15023 out of range, or is used as a subreg. */
15024 if (reg_equiv_mem (REGNO (ref)))
15026 ref = reg_equiv_mem (REGNO (ref));
15027 base = find_replacement (&XEXP (ref, 0));
15029 else
15030 /* The slot is out of range, or was dressed up in a SUBREG. */
15031 base = reg_equiv_address (REGNO (ref));
15033 else
15034 base = find_replacement (&XEXP (ref, 0));
15036 scratch = gen_rtx_REG (SImode, REGNO (operands[2]));
15038 /* Handle the case where the address is too complex to be offset by 1. */
15039 if (GET_CODE (base) == MINUS
15040 || (GET_CODE (base) == PLUS && !CONST_INT_P (XEXP (base, 1))))
15042 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
15044 /* Be careful not to destroy OUTVAL. */
15045 if (reg_overlap_mentioned_p (base_plus, outval))
15047 /* Updating base_plus might destroy outval, see if we can
15048 swap the scratch and base_plus. */
15049 if (!reg_overlap_mentioned_p (scratch, outval))
15051 rtx tmp = scratch;
15052 scratch = base_plus;
15053 base_plus = tmp;
15055 else
15057 rtx scratch_hi = gen_rtx_REG (HImode, REGNO (operands[2]));
15059 /* Be conservative and copy OUTVAL into the scratch now,
15060 this should only be necessary if outval is a subreg
15061 of something larger than a word. */
15062 /* XXX Might this clobber base? I can't see how it can,
15063 since scratch is known to overlap with OUTVAL, and
15064 must be wider than a word. */
15065 emit_insn (gen_movhi (scratch_hi, outval));
15066 outval = scratch_hi;
15070 emit_set_insn (base_plus, base);
15071 base = base_plus;
15073 else if (GET_CODE (base) == PLUS)
15075 /* The addend must be CONST_INT, or we would have dealt with it above. */
15076 HOST_WIDE_INT hi, lo;
15078 offset += INTVAL (XEXP (base, 1));
15079 base = XEXP (base, 0);
15081 /* Rework the address into a legal sequence of insns. */
15082 /* Valid range for lo is -4095 -> 4095 */
15083 lo = (offset >= 0
15084 ? (offset & 0xfff)
15085 : -((-offset) & 0xfff));
15087 /* Corner case, if lo is the max offset then we would be out of range
15088 once we have added the additional 1 below, so bump the msb into the
15089 pre-loading insn(s). */
15090 if (lo == 4095)
15091 lo &= 0x7ff;
15093 hi = ((((offset - lo) & (HOST_WIDE_INT) 0xffffffff)
15094 ^ (HOST_WIDE_INT) 0x80000000)
15095 - (HOST_WIDE_INT) 0x80000000);
15097 gcc_assert (hi + lo == offset);
15099 if (hi != 0)
15101 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
15103 /* Be careful not to destroy OUTVAL. */
15104 if (reg_overlap_mentioned_p (base_plus, outval))
15106 /* Updating base_plus might destroy outval, see if we
15107 can swap the scratch and base_plus. */
15108 if (!reg_overlap_mentioned_p (scratch, outval))
15110 rtx tmp = scratch;
15111 scratch = base_plus;
15112 base_plus = tmp;
15114 else
15116 rtx scratch_hi = gen_rtx_REG (HImode, REGNO (operands[2]));
15118 /* Be conservative and copy outval into scratch now,
15119 this should only be necessary if outval is a
15120 subreg of something larger than a word. */
15121 /* XXX Might this clobber base? I can't see how it
15122 can, since scratch is known to overlap with
15123 outval. */
15124 emit_insn (gen_movhi (scratch_hi, outval));
15125 outval = scratch_hi;
15129 /* Get the base address; addsi3 knows how to handle constants
15130 that require more than one insn. */
15131 emit_insn (gen_addsi3 (base_plus, base, GEN_INT (hi)));
15132 base = base_plus;
15133 offset = lo;
15137 if (BYTES_BIG_ENDIAN)
15139 emit_insn (gen_movqi (gen_rtx_MEM (QImode,
15140 plus_constant (Pmode, base,
15141 offset + 1)),
15142 gen_lowpart (QImode, outval)));
15143 emit_insn (gen_lshrsi3 (scratch,
15144 gen_rtx_SUBREG (SImode, outval, 0),
15145 GEN_INT (8)));
15146 emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (Pmode, base,
15147 offset)),
15148 gen_lowpart (QImode, scratch)));
15150 else
15152 emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (Pmode, base,
15153 offset)),
15154 gen_lowpart (QImode, outval)));
15155 emit_insn (gen_lshrsi3 (scratch,
15156 gen_rtx_SUBREG (SImode, outval, 0),
15157 GEN_INT (8)));
15158 emit_insn (gen_movqi (gen_rtx_MEM (QImode,
15159 plus_constant (Pmode, base,
15160 offset + 1)),
15161 gen_lowpart (QImode, scratch)));
15165 /* Return true if a type must be passed in memory. For AAPCS, small aggregates
15166 (padded to the size of a word) should be passed in a register. */
15168 static bool
15169 arm_must_pass_in_stack (enum machine_mode mode, const_tree type)
15171 if (TARGET_AAPCS_BASED)
15172 return must_pass_in_stack_var_size (mode, type);
15173 else
15174 return must_pass_in_stack_var_size_or_pad (mode, type);
15178 /* For use by FUNCTION_ARG_PADDING (MODE, TYPE).
15179 Return true if an argument passed on the stack should be padded upwards,
15180 i.e. if the least-significant byte has useful data.
15181 For legacy APCS ABIs we use the default. For AAPCS based ABIs small
15182 aggregate types are placed in the lowest memory address. */
15184 bool
15185 arm_pad_arg_upward (enum machine_mode mode ATTRIBUTE_UNUSED, const_tree type)
15187 if (!TARGET_AAPCS_BASED)
15188 return DEFAULT_FUNCTION_ARG_PADDING(mode, type) == upward;
15190 if (type && BYTES_BIG_ENDIAN && INTEGRAL_TYPE_P (type))
15191 return false;
15193 return true;
15197 /* Similarly, for use by BLOCK_REG_PADDING (MODE, TYPE, FIRST).
15198 Return !BYTES_BIG_ENDIAN if the least significant byte of the
15199 register has useful data, and return the opposite if the most
15200 significant byte does. */
15202 bool
15203 arm_pad_reg_upward (enum machine_mode mode,
15204 tree type, int first ATTRIBUTE_UNUSED)
15206 if (TARGET_AAPCS_BASED && BYTES_BIG_ENDIAN)
15208 /* For AAPCS, small aggregates, small fixed-point types,
15209 and small complex types are always padded upwards. */
15210 if (type)
15212 if ((AGGREGATE_TYPE_P (type)
15213 || TREE_CODE (type) == COMPLEX_TYPE
15214 || FIXED_POINT_TYPE_P (type))
15215 && int_size_in_bytes (type) <= 4)
15216 return true;
15218 else
15220 if ((COMPLEX_MODE_P (mode) || ALL_FIXED_POINT_MODE_P (mode))
15221 && GET_MODE_SIZE (mode) <= 4)
15222 return true;
15226 /* Otherwise, use default padding. */
15227 return !BYTES_BIG_ENDIAN;
15230 /* Returns true iff OFFSET is valid for use in an LDRD/STRD instruction,
15231 assuming that the address in the base register is word aligned. */
15232 bool
15233 offset_ok_for_ldrd_strd (HOST_WIDE_INT offset)
15235 HOST_WIDE_INT max_offset;
15237 /* Offset must be a multiple of 4 in Thumb mode. */
15238 if (TARGET_THUMB2 && ((offset & 3) != 0))
15239 return false;
15241 if (TARGET_THUMB2)
15242 max_offset = 1020;
15243 else if (TARGET_ARM)
15244 max_offset = 255;
15245 else
15246 return false;
15248 return ((offset <= max_offset) && (offset >= -max_offset));
15251 /* Checks whether the operands are valid for use in an LDRD/STRD instruction.
15252 Assumes that RT, RT2, and RN are REG. This is guaranteed by the patterns.
15253 Assumes that the address in the base register RN is word aligned. Pattern
15254 guarantees that both memory accesses use the same base register,
15255 the offsets are constants within the range, and the gap between the offsets is 4.
15256 If preload complete then check that registers are legal. WBACK indicates whether
15257 address is updated. LOAD indicates whether memory access is load or store. */
15258 bool
15259 operands_ok_ldrd_strd (rtx rt, rtx rt2, rtx rn, HOST_WIDE_INT offset,
15260 bool wback, bool load)
15262 unsigned int t, t2, n;
15264 if (!reload_completed)
15265 return true;
15267 if (!offset_ok_for_ldrd_strd (offset))
15268 return false;
15270 t = REGNO (rt);
15271 t2 = REGNO (rt2);
15272 n = REGNO (rn);
15274 if ((TARGET_THUMB2)
15275 && ((wback && (n == t || n == t2))
15276 || (t == SP_REGNUM)
15277 || (t == PC_REGNUM)
15278 || (t2 == SP_REGNUM)
15279 || (t2 == PC_REGNUM)
15280 || (!load && (n == PC_REGNUM))
15281 || (load && (t == t2))
15282 /* Triggers Cortex-M3 LDRD errata. */
15283 || (!wback && load && fix_cm3_ldrd && (n == t))))
15284 return false;
15286 if ((TARGET_ARM)
15287 && ((wback && (n == t || n == t2))
15288 || (t2 == PC_REGNUM)
15289 || (t % 2 != 0) /* First destination register is not even. */
15290 || (t2 != t + 1)
15291 /* PC can be used as base register (for offset addressing only),
15292 but it is depricated. */
15293 || (n == PC_REGNUM)))
15294 return false;
15296 return true;
15299 /* Helper for gen_operands_ldrd_strd. Returns true iff the memory
15300 operand MEM's address contains an immediate offset from the base
15301 register and has no side effects, in which case it sets BASE and
15302 OFFSET accordingly. */
15303 static bool
15304 mem_ok_for_ldrd_strd (rtx mem, rtx *base, rtx *offset)
15306 rtx addr;
15308 gcc_assert (base != NULL && offset != NULL);
15310 /* TODO: Handle more general memory operand patterns, such as
15311 PRE_DEC and PRE_INC. */
15313 if (side_effects_p (mem))
15314 return false;
15316 /* Can't deal with subregs. */
15317 if (GET_CODE (mem) == SUBREG)
15318 return false;
15320 gcc_assert (MEM_P (mem));
15322 *offset = const0_rtx;
15324 addr = XEXP (mem, 0);
15326 /* If addr isn't valid for DImode, then we can't handle it. */
15327 if (!arm_legitimate_address_p (DImode, addr,
15328 reload_in_progress || reload_completed))
15329 return false;
15331 if (REG_P (addr))
15333 *base = addr;
15334 return true;
15336 else if (GET_CODE (addr) == PLUS || GET_CODE (addr) == MINUS)
15338 *base = XEXP (addr, 0);
15339 *offset = XEXP (addr, 1);
15340 return (REG_P (*base) && CONST_INT_P (*offset));
15343 return false;
15346 #define SWAP_RTX(x,y) do { rtx tmp = x; x = y; y = tmp; } while (0)
15348 /* Called from a peephole2 to replace two word-size accesses with a
15349 single LDRD/STRD instruction. Returns true iff we can generate a
15350 new instruction sequence. That is, both accesses use the same base
15351 register and the gap between constant offsets is 4. This function
15352 may reorder its operands to match ldrd/strd RTL templates.
15353 OPERANDS are the operands found by the peephole matcher;
15354 OPERANDS[0,1] are register operands, and OPERANDS[2,3] are the
15355 corresponding memory operands. LOAD indicaates whether the access
15356 is load or store. CONST_STORE indicates a store of constant
15357 integer values held in OPERANDS[4,5] and assumes that the pattern
15358 is of length 4 insn, for the purpose of checking dead registers.
15359 COMMUTE indicates that register operands may be reordered. */
15360 bool
15361 gen_operands_ldrd_strd (rtx *operands, bool load,
15362 bool const_store, bool commute)
15364 int nops = 2;
15365 HOST_WIDE_INT offsets[2], offset;
15366 rtx base = NULL_RTX;
15367 rtx cur_base, cur_offset, tmp;
15368 int i, gap;
15369 HARD_REG_SET regset;
15371 gcc_assert (!const_store || !load);
15372 /* Check that the memory references are immediate offsets from the
15373 same base register. Extract the base register, the destination
15374 registers, and the corresponding memory offsets. */
15375 for (i = 0; i < nops; i++)
15377 if (!mem_ok_for_ldrd_strd (operands[nops+i], &cur_base, &cur_offset))
15378 return false;
15380 if (i == 0)
15381 base = cur_base;
15382 else if (REGNO (base) != REGNO (cur_base))
15383 return false;
15385 offsets[i] = INTVAL (cur_offset);
15386 if (GET_CODE (operands[i]) == SUBREG)
15388 tmp = SUBREG_REG (operands[i]);
15389 gcc_assert (GET_MODE (operands[i]) == GET_MODE (tmp));
15390 operands[i] = tmp;
15394 /* Make sure there is no dependency between the individual loads. */
15395 if (load && REGNO (operands[0]) == REGNO (base))
15396 return false; /* RAW */
15398 if (load && REGNO (operands[0]) == REGNO (operands[1]))
15399 return false; /* WAW */
15401 /* If the same input register is used in both stores
15402 when storing different constants, try to find a free register.
15403 For example, the code
15404 mov r0, 0
15405 str r0, [r2]
15406 mov r0, 1
15407 str r0, [r2, #4]
15408 can be transformed into
15409 mov r1, 0
15410 strd r1, r0, [r2]
15411 in Thumb mode assuming that r1 is free. */
15412 if (const_store
15413 && REGNO (operands[0]) == REGNO (operands[1])
15414 && INTVAL (operands[4]) != INTVAL (operands[5]))
15416 if (TARGET_THUMB2)
15418 CLEAR_HARD_REG_SET (regset);
15419 tmp = peep2_find_free_register (0, 4, "r", SImode, &regset);
15420 if (tmp == NULL_RTX)
15421 return false;
15423 /* Use the new register in the first load to ensure that
15424 if the original input register is not dead after peephole,
15425 then it will have the correct constant value. */
15426 operands[0] = tmp;
15428 else if (TARGET_ARM)
15430 return false;
15431 int regno = REGNO (operands[0]);
15432 if (!peep2_reg_dead_p (4, operands[0]))
15434 /* When the input register is even and is not dead after the
15435 pattern, it has to hold the second constant but we cannot
15436 form a legal STRD in ARM mode with this register as the second
15437 register. */
15438 if (regno % 2 == 0)
15439 return false;
15441 /* Is regno-1 free? */
15442 SET_HARD_REG_SET (regset);
15443 CLEAR_HARD_REG_BIT(regset, regno - 1);
15444 tmp = peep2_find_free_register (0, 4, "r", SImode, &regset);
15445 if (tmp == NULL_RTX)
15446 return false;
15448 operands[0] = tmp;
15450 else
15452 /* Find a DImode register. */
15453 CLEAR_HARD_REG_SET (regset);
15454 tmp = peep2_find_free_register (0, 4, "r", DImode, &regset);
15455 if (tmp != NULL_RTX)
15457 operands[0] = simplify_gen_subreg (SImode, tmp, DImode, 0);
15458 operands[1] = simplify_gen_subreg (SImode, tmp, DImode, 4);
15460 else
15462 /* Can we use the input register to form a DI register? */
15463 SET_HARD_REG_SET (regset);
15464 CLEAR_HARD_REG_BIT(regset,
15465 regno % 2 == 0 ? regno + 1 : regno - 1);
15466 tmp = peep2_find_free_register (0, 4, "r", SImode, &regset);
15467 if (tmp == NULL_RTX)
15468 return false;
15469 operands[regno % 2 == 1 ? 0 : 1] = tmp;
15473 gcc_assert (operands[0] != NULL_RTX);
15474 gcc_assert (operands[1] != NULL_RTX);
15475 gcc_assert (REGNO (operands[0]) % 2 == 0);
15476 gcc_assert (REGNO (operands[1]) == REGNO (operands[0]) + 1);
15480 /* Make sure the instructions are ordered with lower memory access first. */
15481 if (offsets[0] > offsets[1])
15483 gap = offsets[0] - offsets[1];
15484 offset = offsets[1];
15486 /* Swap the instructions such that lower memory is accessed first. */
15487 SWAP_RTX (operands[0], operands[1]);
15488 SWAP_RTX (operands[2], operands[3]);
15489 if (const_store)
15490 SWAP_RTX (operands[4], operands[5]);
15492 else
15494 gap = offsets[1] - offsets[0];
15495 offset = offsets[0];
15498 /* Make sure accesses are to consecutive memory locations. */
15499 if (gap != 4)
15500 return false;
15502 /* Make sure we generate legal instructions. */
15503 if (operands_ok_ldrd_strd (operands[0], operands[1], base, offset,
15504 false, load))
15505 return true;
15507 /* In Thumb state, where registers are almost unconstrained, there
15508 is little hope to fix it. */
15509 if (TARGET_THUMB2)
15510 return false;
15512 if (load && commute)
15514 /* Try reordering registers. */
15515 SWAP_RTX (operands[0], operands[1]);
15516 if (operands_ok_ldrd_strd (operands[0], operands[1], base, offset,
15517 false, load))
15518 return true;
15521 if (const_store)
15523 /* If input registers are dead after this pattern, they can be
15524 reordered or replaced by other registers that are free in the
15525 current pattern. */
15526 if (!peep2_reg_dead_p (4, operands[0])
15527 || !peep2_reg_dead_p (4, operands[1]))
15528 return false;
15530 /* Try to reorder the input registers. */
15531 /* For example, the code
15532 mov r0, 0
15533 mov r1, 1
15534 str r1, [r2]
15535 str r0, [r2, #4]
15536 can be transformed into
15537 mov r1, 0
15538 mov r0, 1
15539 strd r0, [r2]
15541 if (operands_ok_ldrd_strd (operands[1], operands[0], base, offset,
15542 false, false))
15544 SWAP_RTX (operands[0], operands[1]);
15545 return true;
15548 /* Try to find a free DI register. */
15549 CLEAR_HARD_REG_SET (regset);
15550 add_to_hard_reg_set (&regset, SImode, REGNO (operands[0]));
15551 add_to_hard_reg_set (&regset, SImode, REGNO (operands[1]));
15552 while (true)
15554 tmp = peep2_find_free_register (0, 4, "r", DImode, &regset);
15555 if (tmp == NULL_RTX)
15556 return false;
15558 /* DREG must be an even-numbered register in DImode.
15559 Split it into SI registers. */
15560 operands[0] = simplify_gen_subreg (SImode, tmp, DImode, 0);
15561 operands[1] = simplify_gen_subreg (SImode, tmp, DImode, 4);
15562 gcc_assert (operands[0] != NULL_RTX);
15563 gcc_assert (operands[1] != NULL_RTX);
15564 gcc_assert (REGNO (operands[0]) % 2 == 0);
15565 gcc_assert (REGNO (operands[0]) + 1 == REGNO (operands[1]));
15567 return (operands_ok_ldrd_strd (operands[0], operands[1],
15568 base, offset,
15569 false, load));
15573 return false;
15575 #undef SWAP_RTX
15580 /* Print a symbolic form of X to the debug file, F. */
15581 static void
15582 arm_print_value (FILE *f, rtx x)
15584 switch (GET_CODE (x))
15586 case CONST_INT:
15587 fprintf (f, HOST_WIDE_INT_PRINT_HEX, INTVAL (x));
15588 return;
15590 case CONST_DOUBLE:
15591 fprintf (f, "<0x%lx,0x%lx>", (long)XWINT (x, 2), (long)XWINT (x, 3));
15592 return;
15594 case CONST_VECTOR:
15596 int i;
15598 fprintf (f, "<");
15599 for (i = 0; i < CONST_VECTOR_NUNITS (x); i++)
15601 fprintf (f, HOST_WIDE_INT_PRINT_HEX, INTVAL (CONST_VECTOR_ELT (x, i)));
15602 if (i < (CONST_VECTOR_NUNITS (x) - 1))
15603 fputc (',', f);
15605 fprintf (f, ">");
15607 return;
15609 case CONST_STRING:
15610 fprintf (f, "\"%s\"", XSTR (x, 0));
15611 return;
15613 case SYMBOL_REF:
15614 fprintf (f, "`%s'", XSTR (x, 0));
15615 return;
15617 case LABEL_REF:
15618 fprintf (f, "L%d", INSN_UID (XEXP (x, 0)));
15619 return;
15621 case CONST:
15622 arm_print_value (f, XEXP (x, 0));
15623 return;
15625 case PLUS:
15626 arm_print_value (f, XEXP (x, 0));
15627 fprintf (f, "+");
15628 arm_print_value (f, XEXP (x, 1));
15629 return;
15631 case PC:
15632 fprintf (f, "pc");
15633 return;
15635 default:
15636 fprintf (f, "????");
15637 return;
15641 /* Routines for manipulation of the constant pool. */
15643 /* Arm instructions cannot load a large constant directly into a
15644 register; they have to come from a pc relative load. The constant
15645 must therefore be placed in the addressable range of the pc
15646 relative load. Depending on the precise pc relative load
15647 instruction the range is somewhere between 256 bytes and 4k. This
15648 means that we often have to dump a constant inside a function, and
15649 generate code to branch around it.
15651 It is important to minimize this, since the branches will slow
15652 things down and make the code larger.
15654 Normally we can hide the table after an existing unconditional
15655 branch so that there is no interruption of the flow, but in the
15656 worst case the code looks like this:
15658 ldr rn, L1
15660 b L2
15661 align
15662 L1: .long value
15666 ldr rn, L3
15668 b L4
15669 align
15670 L3: .long value
15674 We fix this by performing a scan after scheduling, which notices
15675 which instructions need to have their operands fetched from the
15676 constant table and builds the table.
15678 The algorithm starts by building a table of all the constants that
15679 need fixing up and all the natural barriers in the function (places
15680 where a constant table can be dropped without breaking the flow).
15681 For each fixup we note how far the pc-relative replacement will be
15682 able to reach and the offset of the instruction into the function.
15684 Having built the table we then group the fixes together to form
15685 tables that are as large as possible (subject to addressing
15686 constraints) and emit each table of constants after the last
15687 barrier that is within range of all the instructions in the group.
15688 If a group does not contain a barrier, then we forcibly create one
15689 by inserting a jump instruction into the flow. Once the table has
15690 been inserted, the insns are then modified to reference the
15691 relevant entry in the pool.
15693 Possible enhancements to the algorithm (not implemented) are:
15695 1) For some processors and object formats, there may be benefit in
15696 aligning the pools to the start of cache lines; this alignment
15697 would need to be taken into account when calculating addressability
15698 of a pool. */
15700 /* These typedefs are located at the start of this file, so that
15701 they can be used in the prototypes there. This comment is to
15702 remind readers of that fact so that the following structures
15703 can be understood more easily.
15705 typedef struct minipool_node Mnode;
15706 typedef struct minipool_fixup Mfix; */
15708 struct minipool_node
15710 /* Doubly linked chain of entries. */
15711 Mnode * next;
15712 Mnode * prev;
15713 /* The maximum offset into the code that this entry can be placed. While
15714 pushing fixes for forward references, all entries are sorted in order
15715 of increasing max_address. */
15716 HOST_WIDE_INT max_address;
15717 /* Similarly for an entry inserted for a backwards ref. */
15718 HOST_WIDE_INT min_address;
15719 /* The number of fixes referencing this entry. This can become zero
15720 if we "unpush" an entry. In this case we ignore the entry when we
15721 come to emit the code. */
15722 int refcount;
15723 /* The offset from the start of the minipool. */
15724 HOST_WIDE_INT offset;
15725 /* The value in table. */
15726 rtx value;
15727 /* The mode of value. */
15728 enum machine_mode mode;
15729 /* The size of the value. With iWMMXt enabled
15730 sizes > 4 also imply an alignment of 8-bytes. */
15731 int fix_size;
15734 struct minipool_fixup
15736 Mfix * next;
15737 rtx insn;
15738 HOST_WIDE_INT address;
15739 rtx * loc;
15740 enum machine_mode mode;
15741 int fix_size;
15742 rtx value;
15743 Mnode * minipool;
15744 HOST_WIDE_INT forwards;
15745 HOST_WIDE_INT backwards;
15748 /* Fixes less than a word need padding out to a word boundary. */
15749 #define MINIPOOL_FIX_SIZE(mode) \
15750 (GET_MODE_SIZE ((mode)) >= 4 ? GET_MODE_SIZE ((mode)) : 4)
15752 static Mnode * minipool_vector_head;
15753 static Mnode * minipool_vector_tail;
15754 static rtx minipool_vector_label;
15755 static int minipool_pad;
15757 /* The linked list of all minipool fixes required for this function. */
15758 Mfix * minipool_fix_head;
15759 Mfix * minipool_fix_tail;
15760 /* The fix entry for the current minipool, once it has been placed. */
15761 Mfix * minipool_barrier;
15763 #ifndef JUMP_TABLES_IN_TEXT_SECTION
15764 #define JUMP_TABLES_IN_TEXT_SECTION 0
15765 #endif
15767 static HOST_WIDE_INT
15768 get_jump_table_size (rtx insn)
15770 /* ADDR_VECs only take room if read-only data does into the text
15771 section. */
15772 if (JUMP_TABLES_IN_TEXT_SECTION || readonly_data_section == text_section)
15774 rtx body = PATTERN (insn);
15775 int elt = GET_CODE (body) == ADDR_DIFF_VEC ? 1 : 0;
15776 HOST_WIDE_INT size;
15777 HOST_WIDE_INT modesize;
15779 modesize = GET_MODE_SIZE (GET_MODE (body));
15780 size = modesize * XVECLEN (body, elt);
15781 switch (modesize)
15783 case 1:
15784 /* Round up size of TBB table to a halfword boundary. */
15785 size = (size + 1) & ~(HOST_WIDE_INT)1;
15786 break;
15787 case 2:
15788 /* No padding necessary for TBH. */
15789 break;
15790 case 4:
15791 /* Add two bytes for alignment on Thumb. */
15792 if (TARGET_THUMB)
15793 size += 2;
15794 break;
15795 default:
15796 gcc_unreachable ();
15798 return size;
15801 return 0;
15804 /* Return the maximum amount of padding that will be inserted before
15805 label LABEL. */
15807 static HOST_WIDE_INT
15808 get_label_padding (rtx label)
15810 HOST_WIDE_INT align, min_insn_size;
15812 align = 1 << label_to_alignment (label);
15813 min_insn_size = TARGET_THUMB ? 2 : 4;
15814 return align > min_insn_size ? align - min_insn_size : 0;
15817 /* Move a minipool fix MP from its current location to before MAX_MP.
15818 If MAX_MP is NULL, then MP doesn't need moving, but the addressing
15819 constraints may need updating. */
15820 static Mnode *
15821 move_minipool_fix_forward_ref (Mnode *mp, Mnode *max_mp,
15822 HOST_WIDE_INT max_address)
15824 /* The code below assumes these are different. */
15825 gcc_assert (mp != max_mp);
15827 if (max_mp == NULL)
15829 if (max_address < mp->max_address)
15830 mp->max_address = max_address;
15832 else
15834 if (max_address > max_mp->max_address - mp->fix_size)
15835 mp->max_address = max_mp->max_address - mp->fix_size;
15836 else
15837 mp->max_address = max_address;
15839 /* Unlink MP from its current position. Since max_mp is non-null,
15840 mp->prev must be non-null. */
15841 mp->prev->next = mp->next;
15842 if (mp->next != NULL)
15843 mp->next->prev = mp->prev;
15844 else
15845 minipool_vector_tail = mp->prev;
15847 /* Re-insert it before MAX_MP. */
15848 mp->next = max_mp;
15849 mp->prev = max_mp->prev;
15850 max_mp->prev = mp;
15852 if (mp->prev != NULL)
15853 mp->prev->next = mp;
15854 else
15855 minipool_vector_head = mp;
15858 /* Save the new entry. */
15859 max_mp = mp;
15861 /* Scan over the preceding entries and adjust their addresses as
15862 required. */
15863 while (mp->prev != NULL
15864 && mp->prev->max_address > mp->max_address - mp->prev->fix_size)
15866 mp->prev->max_address = mp->max_address - mp->prev->fix_size;
15867 mp = mp->prev;
15870 return max_mp;
15873 /* Add a constant to the minipool for a forward reference. Returns the
15874 node added or NULL if the constant will not fit in this pool. */
15875 static Mnode *
15876 add_minipool_forward_ref (Mfix *fix)
15878 /* If set, max_mp is the first pool_entry that has a lower
15879 constraint than the one we are trying to add. */
15880 Mnode * max_mp = NULL;
15881 HOST_WIDE_INT max_address = fix->address + fix->forwards - minipool_pad;
15882 Mnode * mp;
15884 /* If the minipool starts before the end of FIX->INSN then this FIX
15885 can not be placed into the current pool. Furthermore, adding the
15886 new constant pool entry may cause the pool to start FIX_SIZE bytes
15887 earlier. */
15888 if (minipool_vector_head &&
15889 (fix->address + get_attr_length (fix->insn)
15890 >= minipool_vector_head->max_address - fix->fix_size))
15891 return NULL;
15893 /* Scan the pool to see if a constant with the same value has
15894 already been added. While we are doing this, also note the
15895 location where we must insert the constant if it doesn't already
15896 exist. */
15897 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
15899 if (GET_CODE (fix->value) == GET_CODE (mp->value)
15900 && fix->mode == mp->mode
15901 && (!LABEL_P (fix->value)
15902 || (CODE_LABEL_NUMBER (fix->value)
15903 == CODE_LABEL_NUMBER (mp->value)))
15904 && rtx_equal_p (fix->value, mp->value))
15906 /* More than one fix references this entry. */
15907 mp->refcount++;
15908 return move_minipool_fix_forward_ref (mp, max_mp, max_address);
15911 /* Note the insertion point if necessary. */
15912 if (max_mp == NULL
15913 && mp->max_address > max_address)
15914 max_mp = mp;
15916 /* If we are inserting an 8-bytes aligned quantity and
15917 we have not already found an insertion point, then
15918 make sure that all such 8-byte aligned quantities are
15919 placed at the start of the pool. */
15920 if (ARM_DOUBLEWORD_ALIGN
15921 && max_mp == NULL
15922 && fix->fix_size >= 8
15923 && mp->fix_size < 8)
15925 max_mp = mp;
15926 max_address = mp->max_address;
15930 /* The value is not currently in the minipool, so we need to create
15931 a new entry for it. If MAX_MP is NULL, the entry will be put on
15932 the end of the list since the placement is less constrained than
15933 any existing entry. Otherwise, we insert the new fix before
15934 MAX_MP and, if necessary, adjust the constraints on the other
15935 entries. */
15936 mp = XNEW (Mnode);
15937 mp->fix_size = fix->fix_size;
15938 mp->mode = fix->mode;
15939 mp->value = fix->value;
15940 mp->refcount = 1;
15941 /* Not yet required for a backwards ref. */
15942 mp->min_address = -65536;
15944 if (max_mp == NULL)
15946 mp->max_address = max_address;
15947 mp->next = NULL;
15948 mp->prev = minipool_vector_tail;
15950 if (mp->prev == NULL)
15952 minipool_vector_head = mp;
15953 minipool_vector_label = gen_label_rtx ();
15955 else
15956 mp->prev->next = mp;
15958 minipool_vector_tail = mp;
15960 else
15962 if (max_address > max_mp->max_address - mp->fix_size)
15963 mp->max_address = max_mp->max_address - mp->fix_size;
15964 else
15965 mp->max_address = max_address;
15967 mp->next = max_mp;
15968 mp->prev = max_mp->prev;
15969 max_mp->prev = mp;
15970 if (mp->prev != NULL)
15971 mp->prev->next = mp;
15972 else
15973 minipool_vector_head = mp;
15976 /* Save the new entry. */
15977 max_mp = mp;
15979 /* Scan over the preceding entries and adjust their addresses as
15980 required. */
15981 while (mp->prev != NULL
15982 && mp->prev->max_address > mp->max_address - mp->prev->fix_size)
15984 mp->prev->max_address = mp->max_address - mp->prev->fix_size;
15985 mp = mp->prev;
15988 return max_mp;
15991 static Mnode *
15992 move_minipool_fix_backward_ref (Mnode *mp, Mnode *min_mp,
15993 HOST_WIDE_INT min_address)
15995 HOST_WIDE_INT offset;
15997 /* The code below assumes these are different. */
15998 gcc_assert (mp != min_mp);
16000 if (min_mp == NULL)
16002 if (min_address > mp->min_address)
16003 mp->min_address = min_address;
16005 else
16007 /* We will adjust this below if it is too loose. */
16008 mp->min_address = min_address;
16010 /* Unlink MP from its current position. Since min_mp is non-null,
16011 mp->next must be non-null. */
16012 mp->next->prev = mp->prev;
16013 if (mp->prev != NULL)
16014 mp->prev->next = mp->next;
16015 else
16016 minipool_vector_head = mp->next;
16018 /* Reinsert it after MIN_MP. */
16019 mp->prev = min_mp;
16020 mp->next = min_mp->next;
16021 min_mp->next = mp;
16022 if (mp->next != NULL)
16023 mp->next->prev = mp;
16024 else
16025 minipool_vector_tail = mp;
16028 min_mp = mp;
16030 offset = 0;
16031 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
16033 mp->offset = offset;
16034 if (mp->refcount > 0)
16035 offset += mp->fix_size;
16037 if (mp->next && mp->next->min_address < mp->min_address + mp->fix_size)
16038 mp->next->min_address = mp->min_address + mp->fix_size;
16041 return min_mp;
16044 /* Add a constant to the minipool for a backward reference. Returns the
16045 node added or NULL if the constant will not fit in this pool.
16047 Note that the code for insertion for a backwards reference can be
16048 somewhat confusing because the calculated offsets for each fix do
16049 not take into account the size of the pool (which is still under
16050 construction. */
16051 static Mnode *
16052 add_minipool_backward_ref (Mfix *fix)
16054 /* If set, min_mp is the last pool_entry that has a lower constraint
16055 than the one we are trying to add. */
16056 Mnode *min_mp = NULL;
16057 /* This can be negative, since it is only a constraint. */
16058 HOST_WIDE_INT min_address = fix->address - fix->backwards;
16059 Mnode *mp;
16061 /* If we can't reach the current pool from this insn, or if we can't
16062 insert this entry at the end of the pool without pushing other
16063 fixes out of range, then we don't try. This ensures that we
16064 can't fail later on. */
16065 if (min_address >= minipool_barrier->address
16066 || (minipool_vector_tail->min_address + fix->fix_size
16067 >= minipool_barrier->address))
16068 return NULL;
16070 /* Scan the pool to see if a constant with the same value has
16071 already been added. While we are doing this, also note the
16072 location where we must insert the constant if it doesn't already
16073 exist. */
16074 for (mp = minipool_vector_tail; mp != NULL; mp = mp->prev)
16076 if (GET_CODE (fix->value) == GET_CODE (mp->value)
16077 && fix->mode == mp->mode
16078 && (!LABEL_P (fix->value)
16079 || (CODE_LABEL_NUMBER (fix->value)
16080 == CODE_LABEL_NUMBER (mp->value)))
16081 && rtx_equal_p (fix->value, mp->value)
16082 /* Check that there is enough slack to move this entry to the
16083 end of the table (this is conservative). */
16084 && (mp->max_address
16085 > (minipool_barrier->address
16086 + minipool_vector_tail->offset
16087 + minipool_vector_tail->fix_size)))
16089 mp->refcount++;
16090 return move_minipool_fix_backward_ref (mp, min_mp, min_address);
16093 if (min_mp != NULL)
16094 mp->min_address += fix->fix_size;
16095 else
16097 /* Note the insertion point if necessary. */
16098 if (mp->min_address < min_address)
16100 /* For now, we do not allow the insertion of 8-byte alignment
16101 requiring nodes anywhere but at the start of the pool. */
16102 if (ARM_DOUBLEWORD_ALIGN
16103 && fix->fix_size >= 8 && mp->fix_size < 8)
16104 return NULL;
16105 else
16106 min_mp = mp;
16108 else if (mp->max_address
16109 < minipool_barrier->address + mp->offset + fix->fix_size)
16111 /* Inserting before this entry would push the fix beyond
16112 its maximum address (which can happen if we have
16113 re-located a forwards fix); force the new fix to come
16114 after it. */
16115 if (ARM_DOUBLEWORD_ALIGN
16116 && fix->fix_size >= 8 && mp->fix_size < 8)
16117 return NULL;
16118 else
16120 min_mp = mp;
16121 min_address = mp->min_address + fix->fix_size;
16124 /* Do not insert a non-8-byte aligned quantity before 8-byte
16125 aligned quantities. */
16126 else if (ARM_DOUBLEWORD_ALIGN
16127 && fix->fix_size < 8
16128 && mp->fix_size >= 8)
16130 min_mp = mp;
16131 min_address = mp->min_address + fix->fix_size;
16136 /* We need to create a new entry. */
16137 mp = XNEW (Mnode);
16138 mp->fix_size = fix->fix_size;
16139 mp->mode = fix->mode;
16140 mp->value = fix->value;
16141 mp->refcount = 1;
16142 mp->max_address = minipool_barrier->address + 65536;
16144 mp->min_address = min_address;
16146 if (min_mp == NULL)
16148 mp->prev = NULL;
16149 mp->next = minipool_vector_head;
16151 if (mp->next == NULL)
16153 minipool_vector_tail = mp;
16154 minipool_vector_label = gen_label_rtx ();
16156 else
16157 mp->next->prev = mp;
16159 minipool_vector_head = mp;
16161 else
16163 mp->next = min_mp->next;
16164 mp->prev = min_mp;
16165 min_mp->next = mp;
16167 if (mp->next != NULL)
16168 mp->next->prev = mp;
16169 else
16170 minipool_vector_tail = mp;
16173 /* Save the new entry. */
16174 min_mp = mp;
16176 if (mp->prev)
16177 mp = mp->prev;
16178 else
16179 mp->offset = 0;
16181 /* Scan over the following entries and adjust their offsets. */
16182 while (mp->next != NULL)
16184 if (mp->next->min_address < mp->min_address + mp->fix_size)
16185 mp->next->min_address = mp->min_address + mp->fix_size;
16187 if (mp->refcount)
16188 mp->next->offset = mp->offset + mp->fix_size;
16189 else
16190 mp->next->offset = mp->offset;
16192 mp = mp->next;
16195 return min_mp;
16198 static void
16199 assign_minipool_offsets (Mfix *barrier)
16201 HOST_WIDE_INT offset = 0;
16202 Mnode *mp;
16204 minipool_barrier = barrier;
16206 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
16208 mp->offset = offset;
16210 if (mp->refcount > 0)
16211 offset += mp->fix_size;
16215 /* Output the literal table */
16216 static void
16217 dump_minipool (rtx scan)
16219 Mnode * mp;
16220 Mnode * nmp;
16221 int align64 = 0;
16223 if (ARM_DOUBLEWORD_ALIGN)
16224 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
16225 if (mp->refcount > 0 && mp->fix_size >= 8)
16227 align64 = 1;
16228 break;
16231 if (dump_file)
16232 fprintf (dump_file,
16233 ";; Emitting minipool after insn %u; address %ld; align %d (bytes)\n",
16234 INSN_UID (scan), (unsigned long) minipool_barrier->address, align64 ? 8 : 4);
16236 scan = emit_label_after (gen_label_rtx (), scan);
16237 scan = emit_insn_after (align64 ? gen_align_8 () : gen_align_4 (), scan);
16238 scan = emit_label_after (minipool_vector_label, scan);
16240 for (mp = minipool_vector_head; mp != NULL; mp = nmp)
16242 if (mp->refcount > 0)
16244 if (dump_file)
16246 fprintf (dump_file,
16247 ";; Offset %u, min %ld, max %ld ",
16248 (unsigned) mp->offset, (unsigned long) mp->min_address,
16249 (unsigned long) mp->max_address);
16250 arm_print_value (dump_file, mp->value);
16251 fputc ('\n', dump_file);
16254 switch (mp->fix_size)
16256 #ifdef HAVE_consttable_1
16257 case 1:
16258 scan = emit_insn_after (gen_consttable_1 (mp->value), scan);
16259 break;
16261 #endif
16262 #ifdef HAVE_consttable_2
16263 case 2:
16264 scan = emit_insn_after (gen_consttable_2 (mp->value), scan);
16265 break;
16267 #endif
16268 #ifdef HAVE_consttable_4
16269 case 4:
16270 scan = emit_insn_after (gen_consttable_4 (mp->value), scan);
16271 break;
16273 #endif
16274 #ifdef HAVE_consttable_8
16275 case 8:
16276 scan = emit_insn_after (gen_consttable_8 (mp->value), scan);
16277 break;
16279 #endif
16280 #ifdef HAVE_consttable_16
16281 case 16:
16282 scan = emit_insn_after (gen_consttable_16 (mp->value), scan);
16283 break;
16285 #endif
16286 default:
16287 gcc_unreachable ();
16291 nmp = mp->next;
16292 free (mp);
16295 minipool_vector_head = minipool_vector_tail = NULL;
16296 scan = emit_insn_after (gen_consttable_end (), scan);
16297 scan = emit_barrier_after (scan);
16300 /* Return the cost of forcibly inserting a barrier after INSN. */
16301 static int
16302 arm_barrier_cost (rtx insn)
16304 /* Basing the location of the pool on the loop depth is preferable,
16305 but at the moment, the basic block information seems to be
16306 corrupt by this stage of the compilation. */
16307 int base_cost = 50;
16308 rtx next = next_nonnote_insn (insn);
16310 if (next != NULL && LABEL_P (next))
16311 base_cost -= 20;
16313 switch (GET_CODE (insn))
16315 case CODE_LABEL:
16316 /* It will always be better to place the table before the label, rather
16317 than after it. */
16318 return 50;
16320 case INSN:
16321 case CALL_INSN:
16322 return base_cost;
16324 case JUMP_INSN:
16325 return base_cost - 10;
16327 default:
16328 return base_cost + 10;
16332 /* Find the best place in the insn stream in the range
16333 (FIX->address,MAX_ADDRESS) to forcibly insert a minipool barrier.
16334 Create the barrier by inserting a jump and add a new fix entry for
16335 it. */
16336 static Mfix *
16337 create_fix_barrier (Mfix *fix, HOST_WIDE_INT max_address)
16339 HOST_WIDE_INT count = 0;
16340 rtx barrier;
16341 rtx from = fix->insn;
16342 /* The instruction after which we will insert the jump. */
16343 rtx selected = NULL;
16344 int selected_cost;
16345 /* The address at which the jump instruction will be placed. */
16346 HOST_WIDE_INT selected_address;
16347 Mfix * new_fix;
16348 HOST_WIDE_INT max_count = max_address - fix->address;
16349 rtx label = gen_label_rtx ();
16351 selected_cost = arm_barrier_cost (from);
16352 selected_address = fix->address;
16354 while (from && count < max_count)
16356 rtx tmp;
16357 int new_cost;
16359 /* This code shouldn't have been called if there was a natural barrier
16360 within range. */
16361 gcc_assert (!BARRIER_P (from));
16363 /* Count the length of this insn. This must stay in sync with the
16364 code that pushes minipool fixes. */
16365 if (LABEL_P (from))
16366 count += get_label_padding (from);
16367 else
16368 count += get_attr_length (from);
16370 /* If there is a jump table, add its length. */
16371 if (tablejump_p (from, NULL, &tmp))
16373 count += get_jump_table_size (tmp);
16375 /* Jump tables aren't in a basic block, so base the cost on
16376 the dispatch insn. If we select this location, we will
16377 still put the pool after the table. */
16378 new_cost = arm_barrier_cost (from);
16380 if (count < max_count
16381 && (!selected || new_cost <= selected_cost))
16383 selected = tmp;
16384 selected_cost = new_cost;
16385 selected_address = fix->address + count;
16388 /* Continue after the dispatch table. */
16389 from = NEXT_INSN (tmp);
16390 continue;
16393 new_cost = arm_barrier_cost (from);
16395 if (count < max_count
16396 && (!selected || new_cost <= selected_cost))
16398 selected = from;
16399 selected_cost = new_cost;
16400 selected_address = fix->address + count;
16403 from = NEXT_INSN (from);
16406 /* Make sure that we found a place to insert the jump. */
16407 gcc_assert (selected);
16409 /* Make sure we do not split a call and its corresponding
16410 CALL_ARG_LOCATION note. */
16411 if (CALL_P (selected))
16413 rtx next = NEXT_INSN (selected);
16414 if (next && NOTE_P (next)
16415 && NOTE_KIND (next) == NOTE_INSN_CALL_ARG_LOCATION)
16416 selected = next;
16419 /* Create a new JUMP_INSN that branches around a barrier. */
16420 from = emit_jump_insn_after (gen_jump (label), selected);
16421 JUMP_LABEL (from) = label;
16422 barrier = emit_barrier_after (from);
16423 emit_label_after (label, barrier);
16425 /* Create a minipool barrier entry for the new barrier. */
16426 new_fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* new_fix));
16427 new_fix->insn = barrier;
16428 new_fix->address = selected_address;
16429 new_fix->next = fix->next;
16430 fix->next = new_fix;
16432 return new_fix;
16435 /* Record that there is a natural barrier in the insn stream at
16436 ADDRESS. */
16437 static void
16438 push_minipool_barrier (rtx insn, HOST_WIDE_INT address)
16440 Mfix * fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* fix));
16442 fix->insn = insn;
16443 fix->address = address;
16445 fix->next = NULL;
16446 if (minipool_fix_head != NULL)
16447 minipool_fix_tail->next = fix;
16448 else
16449 minipool_fix_head = fix;
16451 minipool_fix_tail = fix;
16454 /* Record INSN, which will need fixing up to load a value from the
16455 minipool. ADDRESS is the offset of the insn since the start of the
16456 function; LOC is a pointer to the part of the insn which requires
16457 fixing; VALUE is the constant that must be loaded, which is of type
16458 MODE. */
16459 static void
16460 push_minipool_fix (rtx insn, HOST_WIDE_INT address, rtx *loc,
16461 enum machine_mode mode, rtx value)
16463 Mfix * fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* fix));
16465 fix->insn = insn;
16466 fix->address = address;
16467 fix->loc = loc;
16468 fix->mode = mode;
16469 fix->fix_size = MINIPOOL_FIX_SIZE (mode);
16470 fix->value = value;
16471 fix->forwards = get_attr_pool_range (insn);
16472 fix->backwards = get_attr_neg_pool_range (insn);
16473 fix->minipool = NULL;
16475 /* If an insn doesn't have a range defined for it, then it isn't
16476 expecting to be reworked by this code. Better to stop now than
16477 to generate duff assembly code. */
16478 gcc_assert (fix->forwards || fix->backwards);
16480 /* If an entry requires 8-byte alignment then assume all constant pools
16481 require 4 bytes of padding. Trying to do this later on a per-pool
16482 basis is awkward because existing pool entries have to be modified. */
16483 if (ARM_DOUBLEWORD_ALIGN && fix->fix_size >= 8)
16484 minipool_pad = 4;
16486 if (dump_file)
16488 fprintf (dump_file,
16489 ";; %smode fixup for i%d; addr %lu, range (%ld,%ld): ",
16490 GET_MODE_NAME (mode),
16491 INSN_UID (insn), (unsigned long) address,
16492 -1 * (long)fix->backwards, (long)fix->forwards);
16493 arm_print_value (dump_file, fix->value);
16494 fprintf (dump_file, "\n");
16497 /* Add it to the chain of fixes. */
16498 fix->next = NULL;
16500 if (minipool_fix_head != NULL)
16501 minipool_fix_tail->next = fix;
16502 else
16503 minipool_fix_head = fix;
16505 minipool_fix_tail = fix;
16508 /* Return maximum allowed cost of synthesizing a 64-bit constant VAL inline.
16509 Returns the number of insns needed, or 99 if we always want to synthesize
16510 the value. */
16512 arm_max_const_double_inline_cost ()
16514 /* Let the value get synthesized to avoid the use of literal pools. */
16515 if (arm_disable_literal_pool)
16516 return 99;
16518 return ((optimize_size || arm_ld_sched) ? 3 : 4);
16521 /* Return the cost of synthesizing a 64-bit constant VAL inline.
16522 Returns the number of insns needed, or 99 if we don't know how to
16523 do it. */
16525 arm_const_double_inline_cost (rtx val)
16527 rtx lowpart, highpart;
16528 enum machine_mode mode;
16530 mode = GET_MODE (val);
16532 if (mode == VOIDmode)
16533 mode = DImode;
16535 gcc_assert (GET_MODE_SIZE (mode) == 8);
16537 lowpart = gen_lowpart (SImode, val);
16538 highpart = gen_highpart_mode (SImode, mode, val);
16540 gcc_assert (CONST_INT_P (lowpart));
16541 gcc_assert (CONST_INT_P (highpart));
16543 return (arm_gen_constant (SET, SImode, NULL_RTX, INTVAL (lowpart),
16544 NULL_RTX, NULL_RTX, 0, 0)
16545 + arm_gen_constant (SET, SImode, NULL_RTX, INTVAL (highpart),
16546 NULL_RTX, NULL_RTX, 0, 0));
16549 /* Return true if it is worthwhile to split a 64-bit constant into two
16550 32-bit operations. This is the case if optimizing for size, or
16551 if we have load delay slots, or if one 32-bit part can be done with
16552 a single data operation. */
16553 bool
16554 arm_const_double_by_parts (rtx val)
16556 enum machine_mode mode = GET_MODE (val);
16557 rtx part;
16559 if (optimize_size || arm_ld_sched)
16560 return true;
16562 if (mode == VOIDmode)
16563 mode = DImode;
16565 part = gen_highpart_mode (SImode, mode, val);
16567 gcc_assert (CONST_INT_P (part));
16569 if (const_ok_for_arm (INTVAL (part))
16570 || const_ok_for_arm (~INTVAL (part)))
16571 return true;
16573 part = gen_lowpart (SImode, val);
16575 gcc_assert (CONST_INT_P (part));
16577 if (const_ok_for_arm (INTVAL (part))
16578 || const_ok_for_arm (~INTVAL (part)))
16579 return true;
16581 return false;
16584 /* Return true if it is possible to inline both the high and low parts
16585 of a 64-bit constant into 32-bit data processing instructions. */
16586 bool
16587 arm_const_double_by_immediates (rtx val)
16589 enum machine_mode mode = GET_MODE (val);
16590 rtx part;
16592 if (mode == VOIDmode)
16593 mode = DImode;
16595 part = gen_highpart_mode (SImode, mode, val);
16597 gcc_assert (CONST_INT_P (part));
16599 if (!const_ok_for_arm (INTVAL (part)))
16600 return false;
16602 part = gen_lowpart (SImode, val);
16604 gcc_assert (CONST_INT_P (part));
16606 if (!const_ok_for_arm (INTVAL (part)))
16607 return false;
16609 return true;
16612 /* Scan INSN and note any of its operands that need fixing.
16613 If DO_PUSHES is false we do not actually push any of the fixups
16614 needed. */
16615 static void
16616 note_invalid_constants (rtx insn, HOST_WIDE_INT address, int do_pushes)
16618 int opno;
16620 extract_insn (insn);
16622 if (!constrain_operands (1))
16623 fatal_insn_not_found (insn);
16625 if (recog_data.n_alternatives == 0)
16626 return;
16628 /* Fill in recog_op_alt with information about the constraints of
16629 this insn. */
16630 preprocess_constraints ();
16632 for (opno = 0; opno < recog_data.n_operands; opno++)
16634 /* Things we need to fix can only occur in inputs. */
16635 if (recog_data.operand_type[opno] != OP_IN)
16636 continue;
16638 /* If this alternative is a memory reference, then any mention
16639 of constants in this alternative is really to fool reload
16640 into allowing us to accept one there. We need to fix them up
16641 now so that we output the right code. */
16642 if (recog_op_alt[opno][which_alternative].memory_ok)
16644 rtx op = recog_data.operand[opno];
16646 if (CONSTANT_P (op))
16648 if (do_pushes)
16649 push_minipool_fix (insn, address, recog_data.operand_loc[opno],
16650 recog_data.operand_mode[opno], op);
16652 else if (MEM_P (op)
16653 && GET_CODE (XEXP (op, 0)) == SYMBOL_REF
16654 && CONSTANT_POOL_ADDRESS_P (XEXP (op, 0)))
16656 if (do_pushes)
16658 rtx cop = avoid_constant_pool_reference (op);
16660 /* Casting the address of something to a mode narrower
16661 than a word can cause avoid_constant_pool_reference()
16662 to return the pool reference itself. That's no good to
16663 us here. Lets just hope that we can use the
16664 constant pool value directly. */
16665 if (op == cop)
16666 cop = get_pool_constant (XEXP (op, 0));
16668 push_minipool_fix (insn, address,
16669 recog_data.operand_loc[opno],
16670 recog_data.operand_mode[opno], cop);
16677 return;
16680 /* Rewrite move insn into subtract of 0 if the condition codes will
16681 be useful in next conditional jump insn. */
16683 static void
16684 thumb1_reorg (void)
16686 basic_block bb;
16688 FOR_EACH_BB_FN (bb, cfun)
16690 rtx dest, src;
16691 rtx pat, op0, set = NULL;
16692 rtx prev, insn = BB_END (bb);
16693 bool insn_clobbered = false;
16695 while (insn != BB_HEAD (bb) && DEBUG_INSN_P (insn))
16696 insn = PREV_INSN (insn);
16698 /* Find the last cbranchsi4_insn in basic block BB. */
16699 if (INSN_CODE (insn) != CODE_FOR_cbranchsi4_insn)
16700 continue;
16702 /* Get the register with which we are comparing. */
16703 pat = PATTERN (insn);
16704 op0 = XEXP (XEXP (SET_SRC (pat), 0), 0);
16706 /* Find the first flag setting insn before INSN in basic block BB. */
16707 gcc_assert (insn != BB_HEAD (bb));
16708 for (prev = PREV_INSN (insn);
16709 (!insn_clobbered
16710 && prev != BB_HEAD (bb)
16711 && (NOTE_P (prev)
16712 || DEBUG_INSN_P (prev)
16713 || ((set = single_set (prev)) != NULL
16714 && get_attr_conds (prev) == CONDS_NOCOND)));
16715 prev = PREV_INSN (prev))
16717 if (reg_set_p (op0, prev))
16718 insn_clobbered = true;
16721 /* Skip if op0 is clobbered by insn other than prev. */
16722 if (insn_clobbered)
16723 continue;
16725 if (!set)
16726 continue;
16728 dest = SET_DEST (set);
16729 src = SET_SRC (set);
16730 if (!low_register_operand (dest, SImode)
16731 || !low_register_operand (src, SImode))
16732 continue;
16734 /* Rewrite move into subtract of 0 if its operand is compared with ZERO
16735 in INSN. Both src and dest of the move insn are checked. */
16736 if (REGNO (op0) == REGNO (src) || REGNO (op0) == REGNO (dest))
16738 dest = copy_rtx (dest);
16739 src = copy_rtx (src);
16740 src = gen_rtx_MINUS (SImode, src, const0_rtx);
16741 PATTERN (prev) = gen_rtx_SET (VOIDmode, dest, src);
16742 INSN_CODE (prev) = -1;
16743 /* Set test register in INSN to dest. */
16744 XEXP (XEXP (SET_SRC (pat), 0), 0) = copy_rtx (dest);
16745 INSN_CODE (insn) = -1;
16750 /* Convert instructions to their cc-clobbering variant if possible, since
16751 that allows us to use smaller encodings. */
16753 static void
16754 thumb2_reorg (void)
16756 basic_block bb;
16757 regset_head live;
16759 INIT_REG_SET (&live);
16761 /* We are freeing block_for_insn in the toplev to keep compatibility
16762 with old MDEP_REORGS that are not CFG based. Recompute it now. */
16763 compute_bb_for_insn ();
16764 df_analyze ();
16766 FOR_EACH_BB_FN (bb, cfun)
16768 rtx insn;
16770 COPY_REG_SET (&live, DF_LR_OUT (bb));
16771 df_simulate_initialize_backwards (bb, &live);
16772 FOR_BB_INSNS_REVERSE (bb, insn)
16774 if (NONJUMP_INSN_P (insn)
16775 && !REGNO_REG_SET_P (&live, CC_REGNUM)
16776 && GET_CODE (PATTERN (insn)) == SET)
16778 enum {SKIP, CONV, SWAP_CONV} action = SKIP;
16779 rtx pat = PATTERN (insn);
16780 rtx dst = XEXP (pat, 0);
16781 rtx src = XEXP (pat, 1);
16782 rtx op0 = NULL_RTX, op1 = NULL_RTX;
16784 if (!OBJECT_P (src))
16785 op0 = XEXP (src, 0);
16787 if (BINARY_P (src))
16788 op1 = XEXP (src, 1);
16790 if (low_register_operand (dst, SImode))
16792 switch (GET_CODE (src))
16794 case PLUS:
16795 /* Adding two registers and storing the result
16796 in the first source is already a 16-bit
16797 operation. */
16798 if (rtx_equal_p (dst, op0)
16799 && register_operand (op1, SImode))
16800 break;
16802 if (low_register_operand (op0, SImode))
16804 /* ADDS <Rd>,<Rn>,<Rm> */
16805 if (low_register_operand (op1, SImode))
16806 action = CONV;
16807 /* ADDS <Rdn>,#<imm8> */
16808 /* SUBS <Rdn>,#<imm8> */
16809 else if (rtx_equal_p (dst, op0)
16810 && CONST_INT_P (op1)
16811 && IN_RANGE (INTVAL (op1), -255, 255))
16812 action = CONV;
16813 /* ADDS <Rd>,<Rn>,#<imm3> */
16814 /* SUBS <Rd>,<Rn>,#<imm3> */
16815 else if (CONST_INT_P (op1)
16816 && IN_RANGE (INTVAL (op1), -7, 7))
16817 action = CONV;
16819 /* ADCS <Rd>, <Rn> */
16820 else if (GET_CODE (XEXP (src, 0)) == PLUS
16821 && rtx_equal_p (XEXP (XEXP (src, 0), 0), dst)
16822 && low_register_operand (XEXP (XEXP (src, 0), 1),
16823 SImode)
16824 && COMPARISON_P (op1)
16825 && cc_register (XEXP (op1, 0), VOIDmode)
16826 && maybe_get_arm_condition_code (op1) == ARM_CS
16827 && XEXP (op1, 1) == const0_rtx)
16828 action = CONV;
16829 break;
16831 case MINUS:
16832 /* RSBS <Rd>,<Rn>,#0
16833 Not handled here: see NEG below. */
16834 /* SUBS <Rd>,<Rn>,#<imm3>
16835 SUBS <Rdn>,#<imm8>
16836 Not handled here: see PLUS above. */
16837 /* SUBS <Rd>,<Rn>,<Rm> */
16838 if (low_register_operand (op0, SImode)
16839 && low_register_operand (op1, SImode))
16840 action = CONV;
16841 break;
16843 case MULT:
16844 /* MULS <Rdm>,<Rn>,<Rdm>
16845 As an exception to the rule, this is only used
16846 when optimizing for size since MULS is slow on all
16847 known implementations. We do not even want to use
16848 MULS in cold code, if optimizing for speed, so we
16849 test the global flag here. */
16850 if (!optimize_size)
16851 break;
16852 /* else fall through. */
16853 case AND:
16854 case IOR:
16855 case XOR:
16856 /* ANDS <Rdn>,<Rm> */
16857 if (rtx_equal_p (dst, op0)
16858 && low_register_operand (op1, SImode))
16859 action = CONV;
16860 else if (rtx_equal_p (dst, op1)
16861 && low_register_operand (op0, SImode))
16862 action = SWAP_CONV;
16863 break;
16865 case ASHIFTRT:
16866 case ASHIFT:
16867 case LSHIFTRT:
16868 /* ASRS <Rdn>,<Rm> */
16869 /* LSRS <Rdn>,<Rm> */
16870 /* LSLS <Rdn>,<Rm> */
16871 if (rtx_equal_p (dst, op0)
16872 && low_register_operand (op1, SImode))
16873 action = CONV;
16874 /* ASRS <Rd>,<Rm>,#<imm5> */
16875 /* LSRS <Rd>,<Rm>,#<imm5> */
16876 /* LSLS <Rd>,<Rm>,#<imm5> */
16877 else if (low_register_operand (op0, SImode)
16878 && CONST_INT_P (op1)
16879 && IN_RANGE (INTVAL (op1), 0, 31))
16880 action = CONV;
16881 break;
16883 case ROTATERT:
16884 /* RORS <Rdn>,<Rm> */
16885 if (rtx_equal_p (dst, op0)
16886 && low_register_operand (op1, SImode))
16887 action = CONV;
16888 break;
16890 case NOT:
16891 case NEG:
16892 /* MVNS <Rd>,<Rm> */
16893 /* NEGS <Rd>,<Rm> (a.k.a RSBS) */
16894 if (low_register_operand (op0, SImode))
16895 action = CONV;
16896 break;
16898 case CONST_INT:
16899 /* MOVS <Rd>,#<imm8> */
16900 if (CONST_INT_P (src)
16901 && IN_RANGE (INTVAL (src), 0, 255))
16902 action = CONV;
16903 break;
16905 case REG:
16906 /* MOVS and MOV<c> with registers have different
16907 encodings, so are not relevant here. */
16908 break;
16910 default:
16911 break;
16915 if (action != SKIP)
16917 rtx ccreg = gen_rtx_REG (CCmode, CC_REGNUM);
16918 rtx clobber = gen_rtx_CLOBBER (VOIDmode, ccreg);
16919 rtvec vec;
16921 if (action == SWAP_CONV)
16923 src = copy_rtx (src);
16924 XEXP (src, 0) = op1;
16925 XEXP (src, 1) = op0;
16926 pat = gen_rtx_SET (VOIDmode, dst, src);
16927 vec = gen_rtvec (2, pat, clobber);
16929 else /* action == CONV */
16930 vec = gen_rtvec (2, pat, clobber);
16932 PATTERN (insn) = gen_rtx_PARALLEL (VOIDmode, vec);
16933 INSN_CODE (insn) = -1;
16937 if (NONDEBUG_INSN_P (insn))
16938 df_simulate_one_insn_backwards (bb, insn, &live);
16942 CLEAR_REG_SET (&live);
16945 /* Gcc puts the pool in the wrong place for ARM, since we can only
16946 load addresses a limited distance around the pc. We do some
16947 special munging to move the constant pool values to the correct
16948 point in the code. */
16949 static void
16950 arm_reorg (void)
16952 rtx insn;
16953 HOST_WIDE_INT address = 0;
16954 Mfix * fix;
16956 if (TARGET_THUMB1)
16957 thumb1_reorg ();
16958 else if (TARGET_THUMB2)
16959 thumb2_reorg ();
16961 /* Ensure all insns that must be split have been split at this point.
16962 Otherwise, the pool placement code below may compute incorrect
16963 insn lengths. Note that when optimizing, all insns have already
16964 been split at this point. */
16965 if (!optimize)
16966 split_all_insns_noflow ();
16968 minipool_fix_head = minipool_fix_tail = NULL;
16970 /* The first insn must always be a note, or the code below won't
16971 scan it properly. */
16972 insn = get_insns ();
16973 gcc_assert (NOTE_P (insn));
16974 minipool_pad = 0;
16976 /* Scan all the insns and record the operands that will need fixing. */
16977 for (insn = next_nonnote_insn (insn); insn; insn = next_nonnote_insn (insn))
16979 if (BARRIER_P (insn))
16980 push_minipool_barrier (insn, address);
16981 else if (INSN_P (insn))
16983 rtx table;
16985 note_invalid_constants (insn, address, true);
16986 address += get_attr_length (insn);
16988 /* If the insn is a vector jump, add the size of the table
16989 and skip the table. */
16990 if (tablejump_p (insn, NULL, &table))
16992 address += get_jump_table_size (table);
16993 insn = table;
16996 else if (LABEL_P (insn))
16997 /* Add the worst-case padding due to alignment. We don't add
16998 the _current_ padding because the minipool insertions
16999 themselves might change it. */
17000 address += get_label_padding (insn);
17003 fix = minipool_fix_head;
17005 /* Now scan the fixups and perform the required changes. */
17006 while (fix)
17008 Mfix * ftmp;
17009 Mfix * fdel;
17010 Mfix * last_added_fix;
17011 Mfix * last_barrier = NULL;
17012 Mfix * this_fix;
17014 /* Skip any further barriers before the next fix. */
17015 while (fix && BARRIER_P (fix->insn))
17016 fix = fix->next;
17018 /* No more fixes. */
17019 if (fix == NULL)
17020 break;
17022 last_added_fix = NULL;
17024 for (ftmp = fix; ftmp; ftmp = ftmp->next)
17026 if (BARRIER_P (ftmp->insn))
17028 if (ftmp->address >= minipool_vector_head->max_address)
17029 break;
17031 last_barrier = ftmp;
17033 else if ((ftmp->minipool = add_minipool_forward_ref (ftmp)) == NULL)
17034 break;
17036 last_added_fix = ftmp; /* Keep track of the last fix added. */
17039 /* If we found a barrier, drop back to that; any fixes that we
17040 could have reached but come after the barrier will now go in
17041 the next mini-pool. */
17042 if (last_barrier != NULL)
17044 /* Reduce the refcount for those fixes that won't go into this
17045 pool after all. */
17046 for (fdel = last_barrier->next;
17047 fdel && fdel != ftmp;
17048 fdel = fdel->next)
17050 fdel->minipool->refcount--;
17051 fdel->minipool = NULL;
17054 ftmp = last_barrier;
17056 else
17058 /* ftmp is first fix that we can't fit into this pool and
17059 there no natural barriers that we could use. Insert a
17060 new barrier in the code somewhere between the previous
17061 fix and this one, and arrange to jump around it. */
17062 HOST_WIDE_INT max_address;
17064 /* The last item on the list of fixes must be a barrier, so
17065 we can never run off the end of the list of fixes without
17066 last_barrier being set. */
17067 gcc_assert (ftmp);
17069 max_address = minipool_vector_head->max_address;
17070 /* Check that there isn't another fix that is in range that
17071 we couldn't fit into this pool because the pool was
17072 already too large: we need to put the pool before such an
17073 instruction. The pool itself may come just after the
17074 fix because create_fix_barrier also allows space for a
17075 jump instruction. */
17076 if (ftmp->address < max_address)
17077 max_address = ftmp->address + 1;
17079 last_barrier = create_fix_barrier (last_added_fix, max_address);
17082 assign_minipool_offsets (last_barrier);
17084 while (ftmp)
17086 if (!BARRIER_P (ftmp->insn)
17087 && ((ftmp->minipool = add_minipool_backward_ref (ftmp))
17088 == NULL))
17089 break;
17091 ftmp = ftmp->next;
17094 /* Scan over the fixes we have identified for this pool, fixing them
17095 up and adding the constants to the pool itself. */
17096 for (this_fix = fix; this_fix && ftmp != this_fix;
17097 this_fix = this_fix->next)
17098 if (!BARRIER_P (this_fix->insn))
17100 rtx addr
17101 = plus_constant (Pmode,
17102 gen_rtx_LABEL_REF (VOIDmode,
17103 minipool_vector_label),
17104 this_fix->minipool->offset);
17105 *this_fix->loc = gen_rtx_MEM (this_fix->mode, addr);
17108 dump_minipool (last_barrier->insn);
17109 fix = ftmp;
17112 /* From now on we must synthesize any constants that we can't handle
17113 directly. This can happen if the RTL gets split during final
17114 instruction generation. */
17115 after_arm_reorg = 1;
17117 /* Free the minipool memory. */
17118 obstack_free (&minipool_obstack, minipool_startobj);
17121 /* Routines to output assembly language. */
17123 /* If the rtx is the correct value then return the string of the number.
17124 In this way we can ensure that valid double constants are generated even
17125 when cross compiling. */
17126 const char *
17127 fp_immediate_constant (rtx x)
17129 REAL_VALUE_TYPE r;
17131 if (!fp_consts_inited)
17132 init_fp_table ();
17134 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
17136 gcc_assert (REAL_VALUES_EQUAL (r, value_fp0));
17137 return "0";
17140 /* As for fp_immediate_constant, but value is passed directly, not in rtx. */
17141 static const char *
17142 fp_const_from_val (REAL_VALUE_TYPE *r)
17144 if (!fp_consts_inited)
17145 init_fp_table ();
17147 gcc_assert (REAL_VALUES_EQUAL (*r, value_fp0));
17148 return "0";
17151 /* OPERANDS[0] is the entire list of insns that constitute pop,
17152 OPERANDS[1] is the base register, RETURN_PC is true iff return insn
17153 is in the list, UPDATE is true iff the list contains explicit
17154 update of base register. */
17155 void
17156 arm_output_multireg_pop (rtx *operands, bool return_pc, rtx cond, bool reverse,
17157 bool update)
17159 int i;
17160 char pattern[100];
17161 int offset;
17162 const char *conditional;
17163 int num_saves = XVECLEN (operands[0], 0);
17164 unsigned int regno;
17165 unsigned int regno_base = REGNO (operands[1]);
17167 offset = 0;
17168 offset += update ? 1 : 0;
17169 offset += return_pc ? 1 : 0;
17171 /* Is the base register in the list? */
17172 for (i = offset; i < num_saves; i++)
17174 regno = REGNO (XEXP (XVECEXP (operands[0], 0, i), 0));
17175 /* If SP is in the list, then the base register must be SP. */
17176 gcc_assert ((regno != SP_REGNUM) || (regno_base == SP_REGNUM));
17177 /* If base register is in the list, there must be no explicit update. */
17178 if (regno == regno_base)
17179 gcc_assert (!update);
17182 conditional = reverse ? "%?%D0" : "%?%d0";
17183 if ((regno_base == SP_REGNUM) && TARGET_UNIFIED_ASM)
17185 /* Output pop (not stmfd) because it has a shorter encoding. */
17186 gcc_assert (update);
17187 sprintf (pattern, "pop%s\t{", conditional);
17189 else
17191 /* Output ldmfd when the base register is SP, otherwise output ldmia.
17192 It's just a convention, their semantics are identical. */
17193 if (regno_base == SP_REGNUM)
17194 sprintf (pattern, "ldm%sfd\t", conditional);
17195 else if (TARGET_UNIFIED_ASM)
17196 sprintf (pattern, "ldmia%s\t", conditional);
17197 else
17198 sprintf (pattern, "ldm%sia\t", conditional);
17200 strcat (pattern, reg_names[regno_base]);
17201 if (update)
17202 strcat (pattern, "!, {");
17203 else
17204 strcat (pattern, ", {");
17207 /* Output the first destination register. */
17208 strcat (pattern,
17209 reg_names[REGNO (XEXP (XVECEXP (operands[0], 0, offset), 0))]);
17211 /* Output the rest of the destination registers. */
17212 for (i = offset + 1; i < num_saves; i++)
17214 strcat (pattern, ", ");
17215 strcat (pattern,
17216 reg_names[REGNO (XEXP (XVECEXP (operands[0], 0, i), 0))]);
17219 strcat (pattern, "}");
17221 if (IS_INTERRUPT (arm_current_func_type ()) && return_pc)
17222 strcat (pattern, "^");
17224 output_asm_insn (pattern, &cond);
17228 /* Output the assembly for a store multiple. */
17230 const char *
17231 vfp_output_fstmd (rtx * operands)
17233 char pattern[100];
17234 int p;
17235 int base;
17236 int i;
17238 strcpy (pattern, "fstmfdd%?\t%m0!, {%P1");
17239 p = strlen (pattern);
17241 gcc_assert (REG_P (operands[1]));
17243 base = (REGNO (operands[1]) - FIRST_VFP_REGNUM) / 2;
17244 for (i = 1; i < XVECLEN (operands[2], 0); i++)
17246 p += sprintf (&pattern[p], ", d%d", base + i);
17248 strcpy (&pattern[p], "}");
17250 output_asm_insn (pattern, operands);
17251 return "";
17255 /* Emit RTL to save block of VFP register pairs to the stack. Returns the
17256 number of bytes pushed. */
17258 static int
17259 vfp_emit_fstmd (int base_reg, int count)
17261 rtx par;
17262 rtx dwarf;
17263 rtx tmp, reg;
17264 int i;
17266 /* Workaround ARM10 VFPr1 bug. Data corruption can occur when exactly two
17267 register pairs are stored by a store multiple insn. We avoid this
17268 by pushing an extra pair. */
17269 if (count == 2 && !arm_arch6)
17271 if (base_reg == LAST_VFP_REGNUM - 3)
17272 base_reg -= 2;
17273 count++;
17276 /* FSTMD may not store more than 16 doubleword registers at once. Split
17277 larger stores into multiple parts (up to a maximum of two, in
17278 practice). */
17279 if (count > 16)
17281 int saved;
17282 /* NOTE: base_reg is an internal register number, so each D register
17283 counts as 2. */
17284 saved = vfp_emit_fstmd (base_reg + 32, count - 16);
17285 saved += vfp_emit_fstmd (base_reg, 16);
17286 return saved;
17289 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (count));
17290 dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (count + 1));
17292 reg = gen_rtx_REG (DFmode, base_reg);
17293 base_reg += 2;
17295 XVECEXP (par, 0, 0)
17296 = gen_rtx_SET (VOIDmode,
17297 gen_frame_mem
17298 (BLKmode,
17299 gen_rtx_PRE_MODIFY (Pmode,
17300 stack_pointer_rtx,
17301 plus_constant
17302 (Pmode, stack_pointer_rtx,
17303 - (count * 8)))
17305 gen_rtx_UNSPEC (BLKmode,
17306 gen_rtvec (1, reg),
17307 UNSPEC_PUSH_MULT));
17309 tmp = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
17310 plus_constant (Pmode, stack_pointer_rtx, -(count * 8)));
17311 RTX_FRAME_RELATED_P (tmp) = 1;
17312 XVECEXP (dwarf, 0, 0) = tmp;
17314 tmp = gen_rtx_SET (VOIDmode,
17315 gen_frame_mem (DFmode, stack_pointer_rtx),
17316 reg);
17317 RTX_FRAME_RELATED_P (tmp) = 1;
17318 XVECEXP (dwarf, 0, 1) = tmp;
17320 for (i = 1; i < count; i++)
17322 reg = gen_rtx_REG (DFmode, base_reg);
17323 base_reg += 2;
17324 XVECEXP (par, 0, i) = gen_rtx_USE (VOIDmode, reg);
17326 tmp = gen_rtx_SET (VOIDmode,
17327 gen_frame_mem (DFmode,
17328 plus_constant (Pmode,
17329 stack_pointer_rtx,
17330 i * 8)),
17331 reg);
17332 RTX_FRAME_RELATED_P (tmp) = 1;
17333 XVECEXP (dwarf, 0, i + 1) = tmp;
17336 par = emit_insn (par);
17337 add_reg_note (par, REG_FRAME_RELATED_EXPR, dwarf);
17338 RTX_FRAME_RELATED_P (par) = 1;
17340 return count * 8;
17343 /* Emit a call instruction with pattern PAT. ADDR is the address of
17344 the call target. */
17346 void
17347 arm_emit_call_insn (rtx pat, rtx addr)
17349 rtx insn;
17351 insn = emit_call_insn (pat);
17353 /* The PIC register is live on entry to VxWorks PIC PLT entries.
17354 If the call might use such an entry, add a use of the PIC register
17355 to the instruction's CALL_INSN_FUNCTION_USAGE. */
17356 if (TARGET_VXWORKS_RTP
17357 && flag_pic
17358 && GET_CODE (addr) == SYMBOL_REF
17359 && (SYMBOL_REF_DECL (addr)
17360 ? !targetm.binds_local_p (SYMBOL_REF_DECL (addr))
17361 : !SYMBOL_REF_LOCAL_P (addr)))
17363 require_pic_register ();
17364 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), cfun->machine->pic_reg);
17368 /* Output a 'call' insn. */
17369 const char *
17370 output_call (rtx *operands)
17372 gcc_assert (!arm_arch5); /* Patterns should call blx <reg> directly. */
17374 /* Handle calls to lr using ip (which may be clobbered in subr anyway). */
17375 if (REGNO (operands[0]) == LR_REGNUM)
17377 operands[0] = gen_rtx_REG (SImode, IP_REGNUM);
17378 output_asm_insn ("mov%?\t%0, %|lr", operands);
17381 output_asm_insn ("mov%?\t%|lr, %|pc", operands);
17383 if (TARGET_INTERWORK || arm_arch4t)
17384 output_asm_insn ("bx%?\t%0", operands);
17385 else
17386 output_asm_insn ("mov%?\t%|pc, %0", operands);
17388 return "";
17391 /* Output a 'call' insn that is a reference in memory. This is
17392 disabled for ARMv5 and we prefer a blx instead because otherwise
17393 there's a significant performance overhead. */
17394 const char *
17395 output_call_mem (rtx *operands)
17397 gcc_assert (!arm_arch5);
17398 if (TARGET_INTERWORK)
17400 output_asm_insn ("ldr%?\t%|ip, %0", operands);
17401 output_asm_insn ("mov%?\t%|lr, %|pc", operands);
17402 output_asm_insn ("bx%?\t%|ip", operands);
17404 else if (regno_use_in (LR_REGNUM, operands[0]))
17406 /* LR is used in the memory address. We load the address in the
17407 first instruction. It's safe to use IP as the target of the
17408 load since the call will kill it anyway. */
17409 output_asm_insn ("ldr%?\t%|ip, %0", operands);
17410 output_asm_insn ("mov%?\t%|lr, %|pc", operands);
17411 if (arm_arch4t)
17412 output_asm_insn ("bx%?\t%|ip", operands);
17413 else
17414 output_asm_insn ("mov%?\t%|pc, %|ip", operands);
17416 else
17418 output_asm_insn ("mov%?\t%|lr, %|pc", operands);
17419 output_asm_insn ("ldr%?\t%|pc, %0", operands);
17422 return "";
17426 /* Output a move from arm registers to arm registers of a long double
17427 OPERANDS[0] is the destination.
17428 OPERANDS[1] is the source. */
17429 const char *
17430 output_mov_long_double_arm_from_arm (rtx *operands)
17432 /* We have to be careful here because the two might overlap. */
17433 int dest_start = REGNO (operands[0]);
17434 int src_start = REGNO (operands[1]);
17435 rtx ops[2];
17436 int i;
17438 if (dest_start < src_start)
17440 for (i = 0; i < 3; i++)
17442 ops[0] = gen_rtx_REG (SImode, dest_start + i);
17443 ops[1] = gen_rtx_REG (SImode, src_start + i);
17444 output_asm_insn ("mov%?\t%0, %1", ops);
17447 else
17449 for (i = 2; i >= 0; i--)
17451 ops[0] = gen_rtx_REG (SImode, dest_start + i);
17452 ops[1] = gen_rtx_REG (SImode, src_start + i);
17453 output_asm_insn ("mov%?\t%0, %1", ops);
17457 return "";
17460 void
17461 arm_emit_movpair (rtx dest, rtx src)
17463 /* If the src is an immediate, simplify it. */
17464 if (CONST_INT_P (src))
17466 HOST_WIDE_INT val = INTVAL (src);
17467 emit_set_insn (dest, GEN_INT (val & 0x0000ffff));
17468 if ((val >> 16) & 0x0000ffff)
17469 emit_set_insn (gen_rtx_ZERO_EXTRACT (SImode, dest, GEN_INT (16),
17470 GEN_INT (16)),
17471 GEN_INT ((val >> 16) & 0x0000ffff));
17472 return;
17474 emit_set_insn (dest, gen_rtx_HIGH (SImode, src));
17475 emit_set_insn (dest, gen_rtx_LO_SUM (SImode, dest, src));
17478 /* Output a move between double words. It must be REG<-MEM
17479 or MEM<-REG. */
17480 const char *
17481 output_move_double (rtx *operands, bool emit, int *count)
17483 enum rtx_code code0 = GET_CODE (operands[0]);
17484 enum rtx_code code1 = GET_CODE (operands[1]);
17485 rtx otherops[3];
17486 if (count)
17487 *count = 1;
17489 /* The only case when this might happen is when
17490 you are looking at the length of a DImode instruction
17491 that has an invalid constant in it. */
17492 if (code0 == REG && code1 != MEM)
17494 gcc_assert (!emit);
17495 *count = 2;
17496 return "";
17499 if (code0 == REG)
17501 unsigned int reg0 = REGNO (operands[0]);
17503 otherops[0] = gen_rtx_REG (SImode, 1 + reg0);
17505 gcc_assert (code1 == MEM); /* Constraints should ensure this. */
17507 switch (GET_CODE (XEXP (operands[1], 0)))
17509 case REG:
17511 if (emit)
17513 if (TARGET_LDRD
17514 && !(fix_cm3_ldrd && reg0 == REGNO(XEXP (operands[1], 0))))
17515 output_asm_insn ("ldr%(d%)\t%0, [%m1]", operands);
17516 else
17517 output_asm_insn ("ldm%(ia%)\t%m1, %M0", operands);
17519 break;
17521 case PRE_INC:
17522 gcc_assert (TARGET_LDRD);
17523 if (emit)
17524 output_asm_insn ("ldr%(d%)\t%0, [%m1, #8]!", operands);
17525 break;
17527 case PRE_DEC:
17528 if (emit)
17530 if (TARGET_LDRD)
17531 output_asm_insn ("ldr%(d%)\t%0, [%m1, #-8]!", operands);
17532 else
17533 output_asm_insn ("ldm%(db%)\t%m1!, %M0", operands);
17535 break;
17537 case POST_INC:
17538 if (emit)
17540 if (TARGET_LDRD)
17541 output_asm_insn ("ldr%(d%)\t%0, [%m1], #8", operands);
17542 else
17543 output_asm_insn ("ldm%(ia%)\t%m1!, %M0", operands);
17545 break;
17547 case POST_DEC:
17548 gcc_assert (TARGET_LDRD);
17549 if (emit)
17550 output_asm_insn ("ldr%(d%)\t%0, [%m1], #-8", operands);
17551 break;
17553 case PRE_MODIFY:
17554 case POST_MODIFY:
17555 /* Autoicrement addressing modes should never have overlapping
17556 base and destination registers, and overlapping index registers
17557 are already prohibited, so this doesn't need to worry about
17558 fix_cm3_ldrd. */
17559 otherops[0] = operands[0];
17560 otherops[1] = XEXP (XEXP (XEXP (operands[1], 0), 1), 0);
17561 otherops[2] = XEXP (XEXP (XEXP (operands[1], 0), 1), 1);
17563 if (GET_CODE (XEXP (operands[1], 0)) == PRE_MODIFY)
17565 if (reg_overlap_mentioned_p (otherops[0], otherops[2]))
17567 /* Registers overlap so split out the increment. */
17568 if (emit)
17570 output_asm_insn ("add%?\t%1, %1, %2", otherops);
17571 output_asm_insn ("ldr%(d%)\t%0, [%1] @split", otherops);
17573 if (count)
17574 *count = 2;
17576 else
17578 /* Use a single insn if we can.
17579 FIXME: IWMMXT allows offsets larger than ldrd can
17580 handle, fix these up with a pair of ldr. */
17581 if (TARGET_THUMB2
17582 || !CONST_INT_P (otherops[2])
17583 || (INTVAL (otherops[2]) > -256
17584 && INTVAL (otherops[2]) < 256))
17586 if (emit)
17587 output_asm_insn ("ldr%(d%)\t%0, [%1, %2]!", otherops);
17589 else
17591 if (emit)
17593 output_asm_insn ("ldr%?\t%0, [%1, %2]!", otherops);
17594 output_asm_insn ("ldr%?\t%H0, [%1, #4]", otherops);
17596 if (count)
17597 *count = 2;
17602 else
17604 /* Use a single insn if we can.
17605 FIXME: IWMMXT allows offsets larger than ldrd can handle,
17606 fix these up with a pair of ldr. */
17607 if (TARGET_THUMB2
17608 || !CONST_INT_P (otherops[2])
17609 || (INTVAL (otherops[2]) > -256
17610 && INTVAL (otherops[2]) < 256))
17612 if (emit)
17613 output_asm_insn ("ldr%(d%)\t%0, [%1], %2", otherops);
17615 else
17617 if (emit)
17619 output_asm_insn ("ldr%?\t%H0, [%1, #4]", otherops);
17620 output_asm_insn ("ldr%?\t%0, [%1], %2", otherops);
17622 if (count)
17623 *count = 2;
17626 break;
17628 case LABEL_REF:
17629 case CONST:
17630 /* We might be able to use ldrd %0, %1 here. However the range is
17631 different to ldr/adr, and it is broken on some ARMv7-M
17632 implementations. */
17633 /* Use the second register of the pair to avoid problematic
17634 overlap. */
17635 otherops[1] = operands[1];
17636 if (emit)
17637 output_asm_insn ("adr%?\t%0, %1", otherops);
17638 operands[1] = otherops[0];
17639 if (emit)
17641 if (TARGET_LDRD)
17642 output_asm_insn ("ldr%(d%)\t%0, [%1]", operands);
17643 else
17644 output_asm_insn ("ldm%(ia%)\t%1, %M0", operands);
17647 if (count)
17648 *count = 2;
17649 break;
17651 /* ??? This needs checking for thumb2. */
17652 default:
17653 if (arm_add_operand (XEXP (XEXP (operands[1], 0), 1),
17654 GET_MODE (XEXP (XEXP (operands[1], 0), 1))))
17656 otherops[0] = operands[0];
17657 otherops[1] = XEXP (XEXP (operands[1], 0), 0);
17658 otherops[2] = XEXP (XEXP (operands[1], 0), 1);
17660 if (GET_CODE (XEXP (operands[1], 0)) == PLUS)
17662 if (CONST_INT_P (otherops[2]) && !TARGET_LDRD)
17664 switch ((int) INTVAL (otherops[2]))
17666 case -8:
17667 if (emit)
17668 output_asm_insn ("ldm%(db%)\t%1, %M0", otherops);
17669 return "";
17670 case -4:
17671 if (TARGET_THUMB2)
17672 break;
17673 if (emit)
17674 output_asm_insn ("ldm%(da%)\t%1, %M0", otherops);
17675 return "";
17676 case 4:
17677 if (TARGET_THUMB2)
17678 break;
17679 if (emit)
17680 output_asm_insn ("ldm%(ib%)\t%1, %M0", otherops);
17681 return "";
17684 otherops[0] = gen_rtx_REG(SImode, REGNO(operands[0]) + 1);
17685 operands[1] = otherops[0];
17686 if (TARGET_LDRD
17687 && (REG_P (otherops[2])
17688 || TARGET_THUMB2
17689 || (CONST_INT_P (otherops[2])
17690 && INTVAL (otherops[2]) > -256
17691 && INTVAL (otherops[2]) < 256)))
17693 if (reg_overlap_mentioned_p (operands[0],
17694 otherops[2]))
17696 rtx tmp;
17697 /* Swap base and index registers over to
17698 avoid a conflict. */
17699 tmp = otherops[1];
17700 otherops[1] = otherops[2];
17701 otherops[2] = tmp;
17703 /* If both registers conflict, it will usually
17704 have been fixed by a splitter. */
17705 if (reg_overlap_mentioned_p (operands[0], otherops[2])
17706 || (fix_cm3_ldrd && reg0 == REGNO (otherops[1])))
17708 if (emit)
17710 output_asm_insn ("add%?\t%0, %1, %2", otherops);
17711 output_asm_insn ("ldr%(d%)\t%0, [%1]", operands);
17713 if (count)
17714 *count = 2;
17716 else
17718 otherops[0] = operands[0];
17719 if (emit)
17720 output_asm_insn ("ldr%(d%)\t%0, [%1, %2]", otherops);
17722 return "";
17725 if (CONST_INT_P (otherops[2]))
17727 if (emit)
17729 if (!(const_ok_for_arm (INTVAL (otherops[2]))))
17730 output_asm_insn ("sub%?\t%0, %1, #%n2", otherops);
17731 else
17732 output_asm_insn ("add%?\t%0, %1, %2", otherops);
17735 else
17737 if (emit)
17738 output_asm_insn ("add%?\t%0, %1, %2", otherops);
17741 else
17743 if (emit)
17744 output_asm_insn ("sub%?\t%0, %1, %2", otherops);
17747 if (count)
17748 *count = 2;
17750 if (TARGET_LDRD)
17751 return "ldr%(d%)\t%0, [%1]";
17753 return "ldm%(ia%)\t%1, %M0";
17755 else
17757 otherops[1] = adjust_address (operands[1], SImode, 4);
17758 /* Take care of overlapping base/data reg. */
17759 if (reg_mentioned_p (operands[0], operands[1]))
17761 if (emit)
17763 output_asm_insn ("ldr%?\t%0, %1", otherops);
17764 output_asm_insn ("ldr%?\t%0, %1", operands);
17766 if (count)
17767 *count = 2;
17770 else
17772 if (emit)
17774 output_asm_insn ("ldr%?\t%0, %1", operands);
17775 output_asm_insn ("ldr%?\t%0, %1", otherops);
17777 if (count)
17778 *count = 2;
17783 else
17785 /* Constraints should ensure this. */
17786 gcc_assert (code0 == MEM && code1 == REG);
17787 gcc_assert ((REGNO (operands[1]) != IP_REGNUM)
17788 || (TARGET_ARM && TARGET_LDRD));
17790 switch (GET_CODE (XEXP (operands[0], 0)))
17792 case REG:
17793 if (emit)
17795 if (TARGET_LDRD)
17796 output_asm_insn ("str%(d%)\t%1, [%m0]", operands);
17797 else
17798 output_asm_insn ("stm%(ia%)\t%m0, %M1", operands);
17800 break;
17802 case PRE_INC:
17803 gcc_assert (TARGET_LDRD);
17804 if (emit)
17805 output_asm_insn ("str%(d%)\t%1, [%m0, #8]!", operands);
17806 break;
17808 case PRE_DEC:
17809 if (emit)
17811 if (TARGET_LDRD)
17812 output_asm_insn ("str%(d%)\t%1, [%m0, #-8]!", operands);
17813 else
17814 output_asm_insn ("stm%(db%)\t%m0!, %M1", operands);
17816 break;
17818 case POST_INC:
17819 if (emit)
17821 if (TARGET_LDRD)
17822 output_asm_insn ("str%(d%)\t%1, [%m0], #8", operands);
17823 else
17824 output_asm_insn ("stm%(ia%)\t%m0!, %M1", operands);
17826 break;
17828 case POST_DEC:
17829 gcc_assert (TARGET_LDRD);
17830 if (emit)
17831 output_asm_insn ("str%(d%)\t%1, [%m0], #-8", operands);
17832 break;
17834 case PRE_MODIFY:
17835 case POST_MODIFY:
17836 otherops[0] = operands[1];
17837 otherops[1] = XEXP (XEXP (XEXP (operands[0], 0), 1), 0);
17838 otherops[2] = XEXP (XEXP (XEXP (operands[0], 0), 1), 1);
17840 /* IWMMXT allows offsets larger than ldrd can handle,
17841 fix these up with a pair of ldr. */
17842 if (!TARGET_THUMB2
17843 && CONST_INT_P (otherops[2])
17844 && (INTVAL(otherops[2]) <= -256
17845 || INTVAL(otherops[2]) >= 256))
17847 if (GET_CODE (XEXP (operands[0], 0)) == PRE_MODIFY)
17849 if (emit)
17851 output_asm_insn ("str%?\t%0, [%1, %2]!", otherops);
17852 output_asm_insn ("str%?\t%H0, [%1, #4]", otherops);
17854 if (count)
17855 *count = 2;
17857 else
17859 if (emit)
17861 output_asm_insn ("str%?\t%H0, [%1, #4]", otherops);
17862 output_asm_insn ("str%?\t%0, [%1], %2", otherops);
17864 if (count)
17865 *count = 2;
17868 else if (GET_CODE (XEXP (operands[0], 0)) == PRE_MODIFY)
17870 if (emit)
17871 output_asm_insn ("str%(d%)\t%0, [%1, %2]!", otherops);
17873 else
17875 if (emit)
17876 output_asm_insn ("str%(d%)\t%0, [%1], %2", otherops);
17878 break;
17880 case PLUS:
17881 otherops[2] = XEXP (XEXP (operands[0], 0), 1);
17882 if (CONST_INT_P (otherops[2]) && !TARGET_LDRD)
17884 switch ((int) INTVAL (XEXP (XEXP (operands[0], 0), 1)))
17886 case -8:
17887 if (emit)
17888 output_asm_insn ("stm%(db%)\t%m0, %M1", operands);
17889 return "";
17891 case -4:
17892 if (TARGET_THUMB2)
17893 break;
17894 if (emit)
17895 output_asm_insn ("stm%(da%)\t%m0, %M1", operands);
17896 return "";
17898 case 4:
17899 if (TARGET_THUMB2)
17900 break;
17901 if (emit)
17902 output_asm_insn ("stm%(ib%)\t%m0, %M1", operands);
17903 return "";
17906 if (TARGET_LDRD
17907 && (REG_P (otherops[2])
17908 || TARGET_THUMB2
17909 || (CONST_INT_P (otherops[2])
17910 && INTVAL (otherops[2]) > -256
17911 && INTVAL (otherops[2]) < 256)))
17913 otherops[0] = operands[1];
17914 otherops[1] = XEXP (XEXP (operands[0], 0), 0);
17915 if (emit)
17916 output_asm_insn ("str%(d%)\t%0, [%1, %2]", otherops);
17917 return "";
17919 /* Fall through */
17921 default:
17922 otherops[0] = adjust_address (operands[0], SImode, 4);
17923 otherops[1] = operands[1];
17924 if (emit)
17926 output_asm_insn ("str%?\t%1, %0", operands);
17927 output_asm_insn ("str%?\t%H1, %0", otherops);
17929 if (count)
17930 *count = 2;
17934 return "";
17937 /* Output a move, load or store for quad-word vectors in ARM registers. Only
17938 handles MEMs accepted by neon_vector_mem_operand with TYPE=1. */
17940 const char *
17941 output_move_quad (rtx *operands)
17943 if (REG_P (operands[0]))
17945 /* Load, or reg->reg move. */
17947 if (MEM_P (operands[1]))
17949 switch (GET_CODE (XEXP (operands[1], 0)))
17951 case REG:
17952 output_asm_insn ("ldm%(ia%)\t%m1, %M0", operands);
17953 break;
17955 case LABEL_REF:
17956 case CONST:
17957 output_asm_insn ("adr%?\t%0, %1", operands);
17958 output_asm_insn ("ldm%(ia%)\t%0, %M0", operands);
17959 break;
17961 default:
17962 gcc_unreachable ();
17965 else
17967 rtx ops[2];
17968 int dest, src, i;
17970 gcc_assert (REG_P (operands[1]));
17972 dest = REGNO (operands[0]);
17973 src = REGNO (operands[1]);
17975 /* This seems pretty dumb, but hopefully GCC won't try to do it
17976 very often. */
17977 if (dest < src)
17978 for (i = 0; i < 4; i++)
17980 ops[0] = gen_rtx_REG (SImode, dest + i);
17981 ops[1] = gen_rtx_REG (SImode, src + i);
17982 output_asm_insn ("mov%?\t%0, %1", ops);
17984 else
17985 for (i = 3; i >= 0; i--)
17987 ops[0] = gen_rtx_REG (SImode, dest + i);
17988 ops[1] = gen_rtx_REG (SImode, src + i);
17989 output_asm_insn ("mov%?\t%0, %1", ops);
17993 else
17995 gcc_assert (MEM_P (operands[0]));
17996 gcc_assert (REG_P (operands[1]));
17997 gcc_assert (!reg_overlap_mentioned_p (operands[1], operands[0]));
17999 switch (GET_CODE (XEXP (operands[0], 0)))
18001 case REG:
18002 output_asm_insn ("stm%(ia%)\t%m0, %M1", operands);
18003 break;
18005 default:
18006 gcc_unreachable ();
18010 return "";
18013 /* Output a VFP load or store instruction. */
18015 const char *
18016 output_move_vfp (rtx *operands)
18018 rtx reg, mem, addr, ops[2];
18019 int load = REG_P (operands[0]);
18020 int dp = GET_MODE_SIZE (GET_MODE (operands[0])) == 8;
18021 int integer_p = GET_MODE_CLASS (GET_MODE (operands[0])) == MODE_INT;
18022 const char *templ;
18023 char buff[50];
18024 enum machine_mode mode;
18026 reg = operands[!load];
18027 mem = operands[load];
18029 mode = GET_MODE (reg);
18031 gcc_assert (REG_P (reg));
18032 gcc_assert (IS_VFP_REGNUM (REGNO (reg)));
18033 gcc_assert (mode == SFmode
18034 || mode == DFmode
18035 || mode == SImode
18036 || mode == DImode
18037 || (TARGET_NEON && VALID_NEON_DREG_MODE (mode)));
18038 gcc_assert (MEM_P (mem));
18040 addr = XEXP (mem, 0);
18042 switch (GET_CODE (addr))
18044 case PRE_DEC:
18045 templ = "f%smdb%c%%?\t%%0!, {%%%s1}%s";
18046 ops[0] = XEXP (addr, 0);
18047 ops[1] = reg;
18048 break;
18050 case POST_INC:
18051 templ = "f%smia%c%%?\t%%0!, {%%%s1}%s";
18052 ops[0] = XEXP (addr, 0);
18053 ops[1] = reg;
18054 break;
18056 default:
18057 templ = "f%s%c%%?\t%%%s0, %%1%s";
18058 ops[0] = reg;
18059 ops[1] = mem;
18060 break;
18063 sprintf (buff, templ,
18064 load ? "ld" : "st",
18065 dp ? 'd' : 's',
18066 dp ? "P" : "",
18067 integer_p ? "\t%@ int" : "");
18068 output_asm_insn (buff, ops);
18070 return "";
18073 /* Output a Neon double-word or quad-word load or store, or a load
18074 or store for larger structure modes.
18076 WARNING: The ordering of elements is weird in big-endian mode,
18077 because the EABI requires that vectors stored in memory appear
18078 as though they were stored by a VSTM, as required by the EABI.
18079 GCC RTL defines element ordering based on in-memory order.
18080 This can be different from the architectural ordering of elements
18081 within a NEON register. The intrinsics defined in arm_neon.h use the
18082 NEON register element ordering, not the GCC RTL element ordering.
18084 For example, the in-memory ordering of a big-endian a quadword
18085 vector with 16-bit elements when stored from register pair {d0,d1}
18086 will be (lowest address first, d0[N] is NEON register element N):
18088 [d0[3], d0[2], d0[1], d0[0], d1[7], d1[6], d1[5], d1[4]]
18090 When necessary, quadword registers (dN, dN+1) are moved to ARM
18091 registers from rN in the order:
18093 dN -> (rN+1, rN), dN+1 -> (rN+3, rN+2)
18095 So that STM/LDM can be used on vectors in ARM registers, and the
18096 same memory layout will result as if VSTM/VLDM were used.
18098 Instead of VSTM/VLDM we prefer to use VST1.64/VLD1.64 where
18099 possible, which allows use of appropriate alignment tags.
18100 Note that the choice of "64" is independent of the actual vector
18101 element size; this size simply ensures that the behavior is
18102 equivalent to VSTM/VLDM in both little-endian and big-endian mode.
18104 Due to limitations of those instructions, use of VST1.64/VLD1.64
18105 is not possible if:
18106 - the address contains PRE_DEC, or
18107 - the mode refers to more than 4 double-word registers
18109 In those cases, it would be possible to replace VSTM/VLDM by a
18110 sequence of instructions; this is not currently implemented since
18111 this is not certain to actually improve performance. */
18113 const char *
18114 output_move_neon (rtx *operands)
18116 rtx reg, mem, addr, ops[2];
18117 int regno, nregs, load = REG_P (operands[0]);
18118 const char *templ;
18119 char buff[50];
18120 enum machine_mode mode;
18122 reg = operands[!load];
18123 mem = operands[load];
18125 mode = GET_MODE (reg);
18127 gcc_assert (REG_P (reg));
18128 regno = REGNO (reg);
18129 nregs = HARD_REGNO_NREGS (regno, mode) / 2;
18130 gcc_assert (VFP_REGNO_OK_FOR_DOUBLE (regno)
18131 || NEON_REGNO_OK_FOR_QUAD (regno));
18132 gcc_assert (VALID_NEON_DREG_MODE (mode)
18133 || VALID_NEON_QREG_MODE (mode)
18134 || VALID_NEON_STRUCT_MODE (mode));
18135 gcc_assert (MEM_P (mem));
18137 addr = XEXP (mem, 0);
18139 /* Strip off const from addresses like (const (plus (...))). */
18140 if (GET_CODE (addr) == CONST && GET_CODE (XEXP (addr, 0)) == PLUS)
18141 addr = XEXP (addr, 0);
18143 switch (GET_CODE (addr))
18145 case POST_INC:
18146 /* We have to use vldm / vstm for too-large modes. */
18147 if (nregs > 4)
18149 templ = "v%smia%%?\t%%0!, %%h1";
18150 ops[0] = XEXP (addr, 0);
18152 else
18154 templ = "v%s1.64\t%%h1, %%A0";
18155 ops[0] = mem;
18157 ops[1] = reg;
18158 break;
18160 case PRE_DEC:
18161 /* We have to use vldm / vstm in this case, since there is no
18162 pre-decrement form of the vld1 / vst1 instructions. */
18163 templ = "v%smdb%%?\t%%0!, %%h1";
18164 ops[0] = XEXP (addr, 0);
18165 ops[1] = reg;
18166 break;
18168 case POST_MODIFY:
18169 /* FIXME: Not currently enabled in neon_vector_mem_operand. */
18170 gcc_unreachable ();
18172 case LABEL_REF:
18173 case PLUS:
18175 int i;
18176 int overlap = -1;
18177 for (i = 0; i < nregs; i++)
18179 /* We're only using DImode here because it's a convenient size. */
18180 ops[0] = gen_rtx_REG (DImode, REGNO (reg) + 2 * i);
18181 ops[1] = adjust_address (mem, DImode, 8 * i);
18182 if (reg_overlap_mentioned_p (ops[0], mem))
18184 gcc_assert (overlap == -1);
18185 overlap = i;
18187 else
18189 sprintf (buff, "v%sr%%?\t%%P0, %%1", load ? "ld" : "st");
18190 output_asm_insn (buff, ops);
18193 if (overlap != -1)
18195 ops[0] = gen_rtx_REG (DImode, REGNO (reg) + 2 * overlap);
18196 ops[1] = adjust_address (mem, SImode, 8 * overlap);
18197 sprintf (buff, "v%sr%%?\t%%P0, %%1", load ? "ld" : "st");
18198 output_asm_insn (buff, ops);
18201 return "";
18204 default:
18205 /* We have to use vldm / vstm for too-large modes. */
18206 if (nregs > 4)
18207 templ = "v%smia%%?\t%%m0, %%h1";
18208 else
18209 templ = "v%s1.64\t%%h1, %%A0";
18211 ops[0] = mem;
18212 ops[1] = reg;
18215 sprintf (buff, templ, load ? "ld" : "st");
18216 output_asm_insn (buff, ops);
18218 return "";
18221 /* Compute and return the length of neon_mov<mode>, where <mode> is
18222 one of VSTRUCT modes: EI, OI, CI or XI. */
18224 arm_attr_length_move_neon (rtx insn)
18226 rtx reg, mem, addr;
18227 int load;
18228 enum machine_mode mode;
18230 extract_insn_cached (insn);
18232 if (REG_P (recog_data.operand[0]) && REG_P (recog_data.operand[1]))
18234 mode = GET_MODE (recog_data.operand[0]);
18235 switch (mode)
18237 case EImode:
18238 case OImode:
18239 return 8;
18240 case CImode:
18241 return 12;
18242 case XImode:
18243 return 16;
18244 default:
18245 gcc_unreachable ();
18249 load = REG_P (recog_data.operand[0]);
18250 reg = recog_data.operand[!load];
18251 mem = recog_data.operand[load];
18253 gcc_assert (MEM_P (mem));
18255 mode = GET_MODE (reg);
18256 addr = XEXP (mem, 0);
18258 /* Strip off const from addresses like (const (plus (...))). */
18259 if (GET_CODE (addr) == CONST && GET_CODE (XEXP (addr, 0)) == PLUS)
18260 addr = XEXP (addr, 0);
18262 if (GET_CODE (addr) == LABEL_REF || GET_CODE (addr) == PLUS)
18264 int insns = HARD_REGNO_NREGS (REGNO (reg), mode) / 2;
18265 return insns * 4;
18267 else
18268 return 4;
18271 /* Return nonzero if the offset in the address is an immediate. Otherwise,
18272 return zero. */
18275 arm_address_offset_is_imm (rtx insn)
18277 rtx mem, addr;
18279 extract_insn_cached (insn);
18281 if (REG_P (recog_data.operand[0]))
18282 return 0;
18284 mem = recog_data.operand[0];
18286 gcc_assert (MEM_P (mem));
18288 addr = XEXP (mem, 0);
18290 if (REG_P (addr)
18291 || (GET_CODE (addr) == PLUS
18292 && REG_P (XEXP (addr, 0))
18293 && CONST_INT_P (XEXP (addr, 1))))
18294 return 1;
18295 else
18296 return 0;
18299 /* Output an ADD r, s, #n where n may be too big for one instruction.
18300 If adding zero to one register, output nothing. */
18301 const char *
18302 output_add_immediate (rtx *operands)
18304 HOST_WIDE_INT n = INTVAL (operands[2]);
18306 if (n != 0 || REGNO (operands[0]) != REGNO (operands[1]))
18308 if (n < 0)
18309 output_multi_immediate (operands,
18310 "sub%?\t%0, %1, %2", "sub%?\t%0, %0, %2", 2,
18311 -n);
18312 else
18313 output_multi_immediate (operands,
18314 "add%?\t%0, %1, %2", "add%?\t%0, %0, %2", 2,
18318 return "";
18321 /* Output a multiple immediate operation.
18322 OPERANDS is the vector of operands referred to in the output patterns.
18323 INSTR1 is the output pattern to use for the first constant.
18324 INSTR2 is the output pattern to use for subsequent constants.
18325 IMMED_OP is the index of the constant slot in OPERANDS.
18326 N is the constant value. */
18327 static const char *
18328 output_multi_immediate (rtx *operands, const char *instr1, const char *instr2,
18329 int immed_op, HOST_WIDE_INT n)
18331 #if HOST_BITS_PER_WIDE_INT > 32
18332 n &= 0xffffffff;
18333 #endif
18335 if (n == 0)
18337 /* Quick and easy output. */
18338 operands[immed_op] = const0_rtx;
18339 output_asm_insn (instr1, operands);
18341 else
18343 int i;
18344 const char * instr = instr1;
18346 /* Note that n is never zero here (which would give no output). */
18347 for (i = 0; i < 32; i += 2)
18349 if (n & (3 << i))
18351 operands[immed_op] = GEN_INT (n & (255 << i));
18352 output_asm_insn (instr, operands);
18353 instr = instr2;
18354 i += 6;
18359 return "";
18362 /* Return the name of a shifter operation. */
18363 static const char *
18364 arm_shift_nmem(enum rtx_code code)
18366 switch (code)
18368 case ASHIFT:
18369 return ARM_LSL_NAME;
18371 case ASHIFTRT:
18372 return "asr";
18374 case LSHIFTRT:
18375 return "lsr";
18377 case ROTATERT:
18378 return "ror";
18380 default:
18381 abort();
18385 /* Return the appropriate ARM instruction for the operation code.
18386 The returned result should not be overwritten. OP is the rtx of the
18387 operation. SHIFT_FIRST_ARG is TRUE if the first argument of the operator
18388 was shifted. */
18389 const char *
18390 arithmetic_instr (rtx op, int shift_first_arg)
18392 switch (GET_CODE (op))
18394 case PLUS:
18395 return "add";
18397 case MINUS:
18398 return shift_first_arg ? "rsb" : "sub";
18400 case IOR:
18401 return "orr";
18403 case XOR:
18404 return "eor";
18406 case AND:
18407 return "and";
18409 case ASHIFT:
18410 case ASHIFTRT:
18411 case LSHIFTRT:
18412 case ROTATERT:
18413 return arm_shift_nmem(GET_CODE(op));
18415 default:
18416 gcc_unreachable ();
18420 /* Ensure valid constant shifts and return the appropriate shift mnemonic
18421 for the operation code. The returned result should not be overwritten.
18422 OP is the rtx code of the shift.
18423 On exit, *AMOUNTP will be -1 if the shift is by a register, or a constant
18424 shift. */
18425 static const char *
18426 shift_op (rtx op, HOST_WIDE_INT *amountp)
18428 const char * mnem;
18429 enum rtx_code code = GET_CODE (op);
18431 switch (code)
18433 case ROTATE:
18434 if (!CONST_INT_P (XEXP (op, 1)))
18436 output_operand_lossage ("invalid shift operand");
18437 return NULL;
18440 code = ROTATERT;
18441 *amountp = 32 - INTVAL (XEXP (op, 1));
18442 mnem = "ror";
18443 break;
18445 case ASHIFT:
18446 case ASHIFTRT:
18447 case LSHIFTRT:
18448 case ROTATERT:
18449 mnem = arm_shift_nmem(code);
18450 if (CONST_INT_P (XEXP (op, 1)))
18452 *amountp = INTVAL (XEXP (op, 1));
18454 else if (REG_P (XEXP (op, 1)))
18456 *amountp = -1;
18457 return mnem;
18459 else
18461 output_operand_lossage ("invalid shift operand");
18462 return NULL;
18464 break;
18466 case MULT:
18467 /* We never have to worry about the amount being other than a
18468 power of 2, since this case can never be reloaded from a reg. */
18469 if (!CONST_INT_P (XEXP (op, 1)))
18471 output_operand_lossage ("invalid shift operand");
18472 return NULL;
18475 *amountp = INTVAL (XEXP (op, 1)) & 0xFFFFFFFF;
18477 /* Amount must be a power of two. */
18478 if (*amountp & (*amountp - 1))
18480 output_operand_lossage ("invalid shift operand");
18481 return NULL;
18484 *amountp = int_log2 (*amountp);
18485 return ARM_LSL_NAME;
18487 default:
18488 output_operand_lossage ("invalid shift operand");
18489 return NULL;
18492 /* This is not 100% correct, but follows from the desire to merge
18493 multiplication by a power of 2 with the recognizer for a
18494 shift. >=32 is not a valid shift for "lsl", so we must try and
18495 output a shift that produces the correct arithmetical result.
18496 Using lsr #32 is identical except for the fact that the carry bit
18497 is not set correctly if we set the flags; but we never use the
18498 carry bit from such an operation, so we can ignore that. */
18499 if (code == ROTATERT)
18500 /* Rotate is just modulo 32. */
18501 *amountp &= 31;
18502 else if (*amountp != (*amountp & 31))
18504 if (code == ASHIFT)
18505 mnem = "lsr";
18506 *amountp = 32;
18509 /* Shifts of 0 are no-ops. */
18510 if (*amountp == 0)
18511 return NULL;
18513 return mnem;
18516 /* Obtain the shift from the POWER of two. */
18518 static HOST_WIDE_INT
18519 int_log2 (HOST_WIDE_INT power)
18521 HOST_WIDE_INT shift = 0;
18523 while ((((HOST_WIDE_INT) 1 << shift) & power) == 0)
18525 gcc_assert (shift <= 31);
18526 shift++;
18529 return shift;
18532 /* Output a .ascii pseudo-op, keeping track of lengths. This is
18533 because /bin/as is horribly restrictive. The judgement about
18534 whether or not each character is 'printable' (and can be output as
18535 is) or not (and must be printed with an octal escape) must be made
18536 with reference to the *host* character set -- the situation is
18537 similar to that discussed in the comments above pp_c_char in
18538 c-pretty-print.c. */
18540 #define MAX_ASCII_LEN 51
18542 void
18543 output_ascii_pseudo_op (FILE *stream, const unsigned char *p, int len)
18545 int i;
18546 int len_so_far = 0;
18548 fputs ("\t.ascii\t\"", stream);
18550 for (i = 0; i < len; i++)
18552 int c = p[i];
18554 if (len_so_far >= MAX_ASCII_LEN)
18556 fputs ("\"\n\t.ascii\t\"", stream);
18557 len_so_far = 0;
18560 if (ISPRINT (c))
18562 if (c == '\\' || c == '\"')
18564 putc ('\\', stream);
18565 len_so_far++;
18567 putc (c, stream);
18568 len_so_far++;
18570 else
18572 fprintf (stream, "\\%03o", c);
18573 len_so_far += 4;
18577 fputs ("\"\n", stream);
18580 /* Compute the register save mask for registers 0 through 12
18581 inclusive. This code is used by arm_compute_save_reg_mask. */
18583 static unsigned long
18584 arm_compute_save_reg0_reg12_mask (void)
18586 unsigned long func_type = arm_current_func_type ();
18587 unsigned long save_reg_mask = 0;
18588 unsigned int reg;
18590 if (IS_INTERRUPT (func_type))
18592 unsigned int max_reg;
18593 /* Interrupt functions must not corrupt any registers,
18594 even call clobbered ones. If this is a leaf function
18595 we can just examine the registers used by the RTL, but
18596 otherwise we have to assume that whatever function is
18597 called might clobber anything, and so we have to save
18598 all the call-clobbered registers as well. */
18599 if (ARM_FUNC_TYPE (func_type) == ARM_FT_FIQ)
18600 /* FIQ handlers have registers r8 - r12 banked, so
18601 we only need to check r0 - r7, Normal ISRs only
18602 bank r14 and r15, so we must check up to r12.
18603 r13 is the stack pointer which is always preserved,
18604 so we do not need to consider it here. */
18605 max_reg = 7;
18606 else
18607 max_reg = 12;
18609 for (reg = 0; reg <= max_reg; reg++)
18610 if (df_regs_ever_live_p (reg)
18611 || (! crtl->is_leaf && call_used_regs[reg]))
18612 save_reg_mask |= (1 << reg);
18614 /* Also save the pic base register if necessary. */
18615 if (flag_pic
18616 && !TARGET_SINGLE_PIC_BASE
18617 && arm_pic_register != INVALID_REGNUM
18618 && crtl->uses_pic_offset_table)
18619 save_reg_mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
18621 else if (IS_VOLATILE(func_type))
18623 /* For noreturn functions we historically omitted register saves
18624 altogether. However this really messes up debugging. As a
18625 compromise save just the frame pointers. Combined with the link
18626 register saved elsewhere this should be sufficient to get
18627 a backtrace. */
18628 if (frame_pointer_needed)
18629 save_reg_mask |= 1 << HARD_FRAME_POINTER_REGNUM;
18630 if (df_regs_ever_live_p (ARM_HARD_FRAME_POINTER_REGNUM))
18631 save_reg_mask |= 1 << ARM_HARD_FRAME_POINTER_REGNUM;
18632 if (df_regs_ever_live_p (THUMB_HARD_FRAME_POINTER_REGNUM))
18633 save_reg_mask |= 1 << THUMB_HARD_FRAME_POINTER_REGNUM;
18635 else
18637 /* In the normal case we only need to save those registers
18638 which are call saved and which are used by this function. */
18639 for (reg = 0; reg <= 11; reg++)
18640 if (df_regs_ever_live_p (reg) && ! call_used_regs[reg])
18641 save_reg_mask |= (1 << reg);
18643 /* Handle the frame pointer as a special case. */
18644 if (frame_pointer_needed)
18645 save_reg_mask |= 1 << HARD_FRAME_POINTER_REGNUM;
18647 /* If we aren't loading the PIC register,
18648 don't stack it even though it may be live. */
18649 if (flag_pic
18650 && !TARGET_SINGLE_PIC_BASE
18651 && arm_pic_register != INVALID_REGNUM
18652 && (df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM)
18653 || crtl->uses_pic_offset_table))
18654 save_reg_mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
18656 /* The prologue will copy SP into R0, so save it. */
18657 if (IS_STACKALIGN (func_type))
18658 save_reg_mask |= 1;
18661 /* Save registers so the exception handler can modify them. */
18662 if (crtl->calls_eh_return)
18664 unsigned int i;
18666 for (i = 0; ; i++)
18668 reg = EH_RETURN_DATA_REGNO (i);
18669 if (reg == INVALID_REGNUM)
18670 break;
18671 save_reg_mask |= 1 << reg;
18675 return save_reg_mask;
18678 /* Return true if r3 is live at the start of the function. */
18680 static bool
18681 arm_r3_live_at_start_p (void)
18683 /* Just look at cfg info, which is still close enough to correct at this
18684 point. This gives false positives for broken functions that might use
18685 uninitialized data that happens to be allocated in r3, but who cares? */
18686 return REGNO_REG_SET_P (df_get_live_out (ENTRY_BLOCK_PTR_FOR_FN (cfun)), 3);
18689 /* Compute the number of bytes used to store the static chain register on the
18690 stack, above the stack frame. We need to know this accurately to get the
18691 alignment of the rest of the stack frame correct. */
18693 static int
18694 arm_compute_static_chain_stack_bytes (void)
18696 /* See the defining assertion in arm_expand_prologue. */
18697 if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM
18698 && IS_NESTED (arm_current_func_type ())
18699 && arm_r3_live_at_start_p ()
18700 && crtl->args.pretend_args_size == 0)
18701 return 4;
18703 return 0;
18706 /* Compute a bit mask of which registers need to be
18707 saved on the stack for the current function.
18708 This is used by arm_get_frame_offsets, which may add extra registers. */
18710 static unsigned long
18711 arm_compute_save_reg_mask (void)
18713 unsigned int save_reg_mask = 0;
18714 unsigned long func_type = arm_current_func_type ();
18715 unsigned int reg;
18717 if (IS_NAKED (func_type))
18718 /* This should never really happen. */
18719 return 0;
18721 /* If we are creating a stack frame, then we must save the frame pointer,
18722 IP (which will hold the old stack pointer), LR and the PC. */
18723 if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
18724 save_reg_mask |=
18725 (1 << ARM_HARD_FRAME_POINTER_REGNUM)
18726 | (1 << IP_REGNUM)
18727 | (1 << LR_REGNUM)
18728 | (1 << PC_REGNUM);
18730 save_reg_mask |= arm_compute_save_reg0_reg12_mask ();
18732 /* Decide if we need to save the link register.
18733 Interrupt routines have their own banked link register,
18734 so they never need to save it.
18735 Otherwise if we do not use the link register we do not need to save
18736 it. If we are pushing other registers onto the stack however, we
18737 can save an instruction in the epilogue by pushing the link register
18738 now and then popping it back into the PC. This incurs extra memory
18739 accesses though, so we only do it when optimizing for size, and only
18740 if we know that we will not need a fancy return sequence. */
18741 if (df_regs_ever_live_p (LR_REGNUM)
18742 || (save_reg_mask
18743 && optimize_size
18744 && ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL
18745 && !crtl->calls_eh_return))
18746 save_reg_mask |= 1 << LR_REGNUM;
18748 if (cfun->machine->lr_save_eliminated)
18749 save_reg_mask &= ~ (1 << LR_REGNUM);
18751 if (TARGET_REALLY_IWMMXT
18752 && ((bit_count (save_reg_mask)
18753 + ARM_NUM_INTS (crtl->args.pretend_args_size +
18754 arm_compute_static_chain_stack_bytes())
18755 ) % 2) != 0)
18757 /* The total number of registers that are going to be pushed
18758 onto the stack is odd. We need to ensure that the stack
18759 is 64-bit aligned before we start to save iWMMXt registers,
18760 and also before we start to create locals. (A local variable
18761 might be a double or long long which we will load/store using
18762 an iWMMXt instruction). Therefore we need to push another
18763 ARM register, so that the stack will be 64-bit aligned. We
18764 try to avoid using the arg registers (r0 -r3) as they might be
18765 used to pass values in a tail call. */
18766 for (reg = 4; reg <= 12; reg++)
18767 if ((save_reg_mask & (1 << reg)) == 0)
18768 break;
18770 if (reg <= 12)
18771 save_reg_mask |= (1 << reg);
18772 else
18774 cfun->machine->sibcall_blocked = 1;
18775 save_reg_mask |= (1 << 3);
18779 /* We may need to push an additional register for use initializing the
18780 PIC base register. */
18781 if (TARGET_THUMB2 && IS_NESTED (func_type) && flag_pic
18782 && (save_reg_mask & THUMB2_WORK_REGS) == 0)
18784 reg = thumb_find_work_register (1 << 4);
18785 if (!call_used_regs[reg])
18786 save_reg_mask |= (1 << reg);
18789 return save_reg_mask;
18793 /* Compute a bit mask of which registers need to be
18794 saved on the stack for the current function. */
18795 static unsigned long
18796 thumb1_compute_save_reg_mask (void)
18798 unsigned long mask;
18799 unsigned reg;
18801 mask = 0;
18802 for (reg = 0; reg < 12; reg ++)
18803 if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
18804 mask |= 1 << reg;
18806 if (flag_pic
18807 && !TARGET_SINGLE_PIC_BASE
18808 && arm_pic_register != INVALID_REGNUM
18809 && crtl->uses_pic_offset_table)
18810 mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
18812 /* See if we might need r11 for calls to _interwork_r11_call_via_rN(). */
18813 if (!frame_pointer_needed && CALLER_INTERWORKING_SLOT_SIZE > 0)
18814 mask |= 1 << ARM_HARD_FRAME_POINTER_REGNUM;
18816 /* LR will also be pushed if any lo regs are pushed. */
18817 if (mask & 0xff || thumb_force_lr_save ())
18818 mask |= (1 << LR_REGNUM);
18820 /* Make sure we have a low work register if we need one.
18821 We will need one if we are going to push a high register,
18822 but we are not currently intending to push a low register. */
18823 if ((mask & 0xff) == 0
18824 && ((mask & 0x0f00) || TARGET_BACKTRACE))
18826 /* Use thumb_find_work_register to choose which register
18827 we will use. If the register is live then we will
18828 have to push it. Use LAST_LO_REGNUM as our fallback
18829 choice for the register to select. */
18830 reg = thumb_find_work_register (1 << LAST_LO_REGNUM);
18831 /* Make sure the register returned by thumb_find_work_register is
18832 not part of the return value. */
18833 if (reg * UNITS_PER_WORD <= (unsigned) arm_size_return_regs ())
18834 reg = LAST_LO_REGNUM;
18836 if (! call_used_regs[reg])
18837 mask |= 1 << reg;
18840 /* The 504 below is 8 bytes less than 512 because there are two possible
18841 alignment words. We can't tell here if they will be present or not so we
18842 have to play it safe and assume that they are. */
18843 if ((CALLER_INTERWORKING_SLOT_SIZE +
18844 ROUND_UP_WORD (get_frame_size ()) +
18845 crtl->outgoing_args_size) >= 504)
18847 /* This is the same as the code in thumb1_expand_prologue() which
18848 determines which register to use for stack decrement. */
18849 for (reg = LAST_ARG_REGNUM + 1; reg <= LAST_LO_REGNUM; reg++)
18850 if (mask & (1 << reg))
18851 break;
18853 if (reg > LAST_LO_REGNUM)
18855 /* Make sure we have a register available for stack decrement. */
18856 mask |= 1 << LAST_LO_REGNUM;
18860 return mask;
18864 /* Return the number of bytes required to save VFP registers. */
18865 static int
18866 arm_get_vfp_saved_size (void)
18868 unsigned int regno;
18869 int count;
18870 int saved;
18872 saved = 0;
18873 /* Space for saved VFP registers. */
18874 if (TARGET_HARD_FLOAT && TARGET_VFP)
18876 count = 0;
18877 for (regno = FIRST_VFP_REGNUM;
18878 regno < LAST_VFP_REGNUM;
18879 regno += 2)
18881 if ((!df_regs_ever_live_p (regno) || call_used_regs[regno])
18882 && (!df_regs_ever_live_p (regno + 1) || call_used_regs[regno + 1]))
18884 if (count > 0)
18886 /* Workaround ARM10 VFPr1 bug. */
18887 if (count == 2 && !arm_arch6)
18888 count++;
18889 saved += count * 8;
18891 count = 0;
18893 else
18894 count++;
18896 if (count > 0)
18898 if (count == 2 && !arm_arch6)
18899 count++;
18900 saved += count * 8;
18903 return saved;
18907 /* Generate a function exit sequence. If REALLY_RETURN is false, then do
18908 everything bar the final return instruction. If simple_return is true,
18909 then do not output epilogue, because it has already been emitted in RTL. */
18910 const char *
18911 output_return_instruction (rtx operand, bool really_return, bool reverse,
18912 bool simple_return)
18914 char conditional[10];
18915 char instr[100];
18916 unsigned reg;
18917 unsigned long live_regs_mask;
18918 unsigned long func_type;
18919 arm_stack_offsets *offsets;
18921 func_type = arm_current_func_type ();
18923 if (IS_NAKED (func_type))
18924 return "";
18926 if (IS_VOLATILE (func_type) && TARGET_ABORT_NORETURN)
18928 /* If this function was declared non-returning, and we have
18929 found a tail call, then we have to trust that the called
18930 function won't return. */
18931 if (really_return)
18933 rtx ops[2];
18935 /* Otherwise, trap an attempted return by aborting. */
18936 ops[0] = operand;
18937 ops[1] = gen_rtx_SYMBOL_REF (Pmode, NEED_PLT_RELOC ? "abort(PLT)"
18938 : "abort");
18939 assemble_external_libcall (ops[1]);
18940 output_asm_insn (reverse ? "bl%D0\t%a1" : "bl%d0\t%a1", ops);
18943 return "";
18946 gcc_assert (!cfun->calls_alloca || really_return);
18948 sprintf (conditional, "%%?%%%c0", reverse ? 'D' : 'd');
18950 cfun->machine->return_used_this_function = 1;
18952 offsets = arm_get_frame_offsets ();
18953 live_regs_mask = offsets->saved_regs_mask;
18955 if (!simple_return && live_regs_mask)
18957 const char * return_reg;
18959 /* If we do not have any special requirements for function exit
18960 (e.g. interworking) then we can load the return address
18961 directly into the PC. Otherwise we must load it into LR. */
18962 if (really_return
18963 && (IS_INTERRUPT (func_type) || !TARGET_INTERWORK))
18964 return_reg = reg_names[PC_REGNUM];
18965 else
18966 return_reg = reg_names[LR_REGNUM];
18968 if ((live_regs_mask & (1 << IP_REGNUM)) == (1 << IP_REGNUM))
18970 /* There are three possible reasons for the IP register
18971 being saved. 1) a stack frame was created, in which case
18972 IP contains the old stack pointer, or 2) an ISR routine
18973 corrupted it, or 3) it was saved to align the stack on
18974 iWMMXt. In case 1, restore IP into SP, otherwise just
18975 restore IP. */
18976 if (frame_pointer_needed)
18978 live_regs_mask &= ~ (1 << IP_REGNUM);
18979 live_regs_mask |= (1 << SP_REGNUM);
18981 else
18982 gcc_assert (IS_INTERRUPT (func_type) || TARGET_REALLY_IWMMXT);
18985 /* On some ARM architectures it is faster to use LDR rather than
18986 LDM to load a single register. On other architectures, the
18987 cost is the same. In 26 bit mode, or for exception handlers,
18988 we have to use LDM to load the PC so that the CPSR is also
18989 restored. */
18990 for (reg = 0; reg <= LAST_ARM_REGNUM; reg++)
18991 if (live_regs_mask == (1U << reg))
18992 break;
18994 if (reg <= LAST_ARM_REGNUM
18995 && (reg != LR_REGNUM
18996 || ! really_return
18997 || ! IS_INTERRUPT (func_type)))
18999 sprintf (instr, "ldr%s\t%%|%s, [%%|sp], #4", conditional,
19000 (reg == LR_REGNUM) ? return_reg : reg_names[reg]);
19002 else
19004 char *p;
19005 int first = 1;
19007 /* Generate the load multiple instruction to restore the
19008 registers. Note we can get here, even if
19009 frame_pointer_needed is true, but only if sp already
19010 points to the base of the saved core registers. */
19011 if (live_regs_mask & (1 << SP_REGNUM))
19013 unsigned HOST_WIDE_INT stack_adjust;
19015 stack_adjust = offsets->outgoing_args - offsets->saved_regs;
19016 gcc_assert (stack_adjust == 0 || stack_adjust == 4);
19018 if (stack_adjust && arm_arch5 && TARGET_ARM)
19019 if (TARGET_UNIFIED_ASM)
19020 sprintf (instr, "ldmib%s\t%%|sp, {", conditional);
19021 else
19022 sprintf (instr, "ldm%sib\t%%|sp, {", conditional);
19023 else
19025 /* If we can't use ldmib (SA110 bug),
19026 then try to pop r3 instead. */
19027 if (stack_adjust)
19028 live_regs_mask |= 1 << 3;
19030 if (TARGET_UNIFIED_ASM)
19031 sprintf (instr, "ldmfd%s\t%%|sp, {", conditional);
19032 else
19033 sprintf (instr, "ldm%sfd\t%%|sp, {", conditional);
19036 else
19037 if (TARGET_UNIFIED_ASM)
19038 sprintf (instr, "pop%s\t{", conditional);
19039 else
19040 sprintf (instr, "ldm%sfd\t%%|sp!, {", conditional);
19042 p = instr + strlen (instr);
19044 for (reg = 0; reg <= SP_REGNUM; reg++)
19045 if (live_regs_mask & (1 << reg))
19047 int l = strlen (reg_names[reg]);
19049 if (first)
19050 first = 0;
19051 else
19053 memcpy (p, ", ", 2);
19054 p += 2;
19057 memcpy (p, "%|", 2);
19058 memcpy (p + 2, reg_names[reg], l);
19059 p += l + 2;
19062 if (live_regs_mask & (1 << LR_REGNUM))
19064 sprintf (p, "%s%%|%s}", first ? "" : ", ", return_reg);
19065 /* If returning from an interrupt, restore the CPSR. */
19066 if (IS_INTERRUPT (func_type))
19067 strcat (p, "^");
19069 else
19070 strcpy (p, "}");
19073 output_asm_insn (instr, & operand);
19075 /* See if we need to generate an extra instruction to
19076 perform the actual function return. */
19077 if (really_return
19078 && func_type != ARM_FT_INTERWORKED
19079 && (live_regs_mask & (1 << LR_REGNUM)) != 0)
19081 /* The return has already been handled
19082 by loading the LR into the PC. */
19083 return "";
19087 if (really_return)
19089 switch ((int) ARM_FUNC_TYPE (func_type))
19091 case ARM_FT_ISR:
19092 case ARM_FT_FIQ:
19093 /* ??? This is wrong for unified assembly syntax. */
19094 sprintf (instr, "sub%ss\t%%|pc, %%|lr, #4", conditional);
19095 break;
19097 case ARM_FT_INTERWORKED:
19098 sprintf (instr, "bx%s\t%%|lr", conditional);
19099 break;
19101 case ARM_FT_EXCEPTION:
19102 /* ??? This is wrong for unified assembly syntax. */
19103 sprintf (instr, "mov%ss\t%%|pc, %%|lr", conditional);
19104 break;
19106 default:
19107 /* Use bx if it's available. */
19108 if (arm_arch5 || arm_arch4t)
19109 sprintf (instr, "bx%s\t%%|lr", conditional);
19110 else
19111 sprintf (instr, "mov%s\t%%|pc, %%|lr", conditional);
19112 break;
19115 output_asm_insn (instr, & operand);
19118 return "";
19121 /* Write the function name into the code section, directly preceding
19122 the function prologue.
19124 Code will be output similar to this:
19126 .ascii "arm_poke_function_name", 0
19127 .align
19129 .word 0xff000000 + (t1 - t0)
19130 arm_poke_function_name
19131 mov ip, sp
19132 stmfd sp!, {fp, ip, lr, pc}
19133 sub fp, ip, #4
19135 When performing a stack backtrace, code can inspect the value
19136 of 'pc' stored at 'fp' + 0. If the trace function then looks
19137 at location pc - 12 and the top 8 bits are set, then we know
19138 that there is a function name embedded immediately preceding this
19139 location and has length ((pc[-3]) & 0xff000000).
19141 We assume that pc is declared as a pointer to an unsigned long.
19143 It is of no benefit to output the function name if we are assembling
19144 a leaf function. These function types will not contain a stack
19145 backtrace structure, therefore it is not possible to determine the
19146 function name. */
19147 void
19148 arm_poke_function_name (FILE *stream, const char *name)
19150 unsigned long alignlength;
19151 unsigned long length;
19152 rtx x;
19154 length = strlen (name) + 1;
19155 alignlength = ROUND_UP_WORD (length);
19157 ASM_OUTPUT_ASCII (stream, name, length);
19158 ASM_OUTPUT_ALIGN (stream, 2);
19159 x = GEN_INT ((unsigned HOST_WIDE_INT) 0xff000000 + alignlength);
19160 assemble_aligned_integer (UNITS_PER_WORD, x);
19163 /* Place some comments into the assembler stream
19164 describing the current function. */
19165 static void
19166 arm_output_function_prologue (FILE *f, HOST_WIDE_INT frame_size)
19168 unsigned long func_type;
19170 /* ??? Do we want to print some of the below anyway? */
19171 if (TARGET_THUMB1)
19172 return;
19174 /* Sanity check. */
19175 gcc_assert (!arm_ccfsm_state && !arm_target_insn);
19177 func_type = arm_current_func_type ();
19179 switch ((int) ARM_FUNC_TYPE (func_type))
19181 default:
19182 case ARM_FT_NORMAL:
19183 break;
19184 case ARM_FT_INTERWORKED:
19185 asm_fprintf (f, "\t%@ Function supports interworking.\n");
19186 break;
19187 case ARM_FT_ISR:
19188 asm_fprintf (f, "\t%@ Interrupt Service Routine.\n");
19189 break;
19190 case ARM_FT_FIQ:
19191 asm_fprintf (f, "\t%@ Fast Interrupt Service Routine.\n");
19192 break;
19193 case ARM_FT_EXCEPTION:
19194 asm_fprintf (f, "\t%@ ARM Exception Handler.\n");
19195 break;
19198 if (IS_NAKED (func_type))
19199 asm_fprintf (f, "\t%@ Naked Function: prologue and epilogue provided by programmer.\n");
19201 if (IS_VOLATILE (func_type))
19202 asm_fprintf (f, "\t%@ Volatile: function does not return.\n");
19204 if (IS_NESTED (func_type))
19205 asm_fprintf (f, "\t%@ Nested: function declared inside another function.\n");
19206 if (IS_STACKALIGN (func_type))
19207 asm_fprintf (f, "\t%@ Stack Align: May be called with mis-aligned SP.\n");
19209 asm_fprintf (f, "\t%@ args = %d, pretend = %d, frame = %wd\n",
19210 crtl->args.size,
19211 crtl->args.pretend_args_size, frame_size);
19213 asm_fprintf (f, "\t%@ frame_needed = %d, uses_anonymous_args = %d\n",
19214 frame_pointer_needed,
19215 cfun->machine->uses_anonymous_args);
19217 if (cfun->machine->lr_save_eliminated)
19218 asm_fprintf (f, "\t%@ link register save eliminated.\n");
19220 if (crtl->calls_eh_return)
19221 asm_fprintf (f, "\t@ Calls __builtin_eh_return.\n");
19225 static void
19226 arm_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED,
19227 HOST_WIDE_INT frame_size ATTRIBUTE_UNUSED)
19229 arm_stack_offsets *offsets;
19231 if (TARGET_THUMB1)
19233 int regno;
19235 /* Emit any call-via-reg trampolines that are needed for v4t support
19236 of call_reg and call_value_reg type insns. */
19237 for (regno = 0; regno < LR_REGNUM; regno++)
19239 rtx label = cfun->machine->call_via[regno];
19241 if (label != NULL)
19243 switch_to_section (function_section (current_function_decl));
19244 targetm.asm_out.internal_label (asm_out_file, "L",
19245 CODE_LABEL_NUMBER (label));
19246 asm_fprintf (asm_out_file, "\tbx\t%r\n", regno);
19250 /* ??? Probably not safe to set this here, since it assumes that a
19251 function will be emitted as assembly immediately after we generate
19252 RTL for it. This does not happen for inline functions. */
19253 cfun->machine->return_used_this_function = 0;
19255 else /* TARGET_32BIT */
19257 /* We need to take into account any stack-frame rounding. */
19258 offsets = arm_get_frame_offsets ();
19260 gcc_assert (!use_return_insn (FALSE, NULL)
19261 || (cfun->machine->return_used_this_function != 0)
19262 || offsets->saved_regs == offsets->outgoing_args
19263 || frame_pointer_needed);
19265 /* Reset the ARM-specific per-function variables. */
19266 after_arm_reorg = 0;
19270 /* Generate and emit a sequence of insns equivalent to PUSH, but using
19271 STR and STRD. If an even number of registers are being pushed, one
19272 or more STRD patterns are created for each register pair. If an
19273 odd number of registers are pushed, emit an initial STR followed by
19274 as many STRD instructions as are needed. This works best when the
19275 stack is initially 64-bit aligned (the normal case), since it
19276 ensures that each STRD is also 64-bit aligned. */
19277 static void
19278 thumb2_emit_strd_push (unsigned long saved_regs_mask)
19280 int num_regs = 0;
19281 int i;
19282 int regno;
19283 rtx par = NULL_RTX;
19284 rtx dwarf = NULL_RTX;
19285 rtx tmp;
19286 bool first = true;
19288 num_regs = bit_count (saved_regs_mask);
19290 /* Must be at least one register to save, and can't save SP or PC. */
19291 gcc_assert (num_regs > 0 && num_regs <= 14);
19292 gcc_assert (!(saved_regs_mask & (1 << SP_REGNUM)));
19293 gcc_assert (!(saved_regs_mask & (1 << PC_REGNUM)));
19295 /* Create sequence for DWARF info. All the frame-related data for
19296 debugging is held in this wrapper. */
19297 dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (num_regs + 1));
19299 /* Describe the stack adjustment. */
19300 tmp = gen_rtx_SET (VOIDmode,
19301 stack_pointer_rtx,
19302 plus_constant (Pmode, stack_pointer_rtx, -4 * num_regs));
19303 RTX_FRAME_RELATED_P (tmp) = 1;
19304 XVECEXP (dwarf, 0, 0) = tmp;
19306 /* Find the first register. */
19307 for (regno = 0; (saved_regs_mask & (1 << regno)) == 0; regno++)
19310 i = 0;
19312 /* If there's an odd number of registers to push. Start off by
19313 pushing a single register. This ensures that subsequent strd
19314 operations are dword aligned (assuming that SP was originally
19315 64-bit aligned). */
19316 if ((num_regs & 1) != 0)
19318 rtx reg, mem, insn;
19320 reg = gen_rtx_REG (SImode, regno);
19321 if (num_regs == 1)
19322 mem = gen_frame_mem (Pmode, gen_rtx_PRE_DEC (Pmode,
19323 stack_pointer_rtx));
19324 else
19325 mem = gen_frame_mem (Pmode,
19326 gen_rtx_PRE_MODIFY
19327 (Pmode, stack_pointer_rtx,
19328 plus_constant (Pmode, stack_pointer_rtx,
19329 -4 * num_regs)));
19331 tmp = gen_rtx_SET (VOIDmode, mem, reg);
19332 RTX_FRAME_RELATED_P (tmp) = 1;
19333 insn = emit_insn (tmp);
19334 RTX_FRAME_RELATED_P (insn) = 1;
19335 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
19336 tmp = gen_rtx_SET (VOIDmode, gen_frame_mem (Pmode, stack_pointer_rtx),
19337 reg);
19338 RTX_FRAME_RELATED_P (tmp) = 1;
19339 i++;
19340 regno++;
19341 XVECEXP (dwarf, 0, i) = tmp;
19342 first = false;
19345 while (i < num_regs)
19346 if (saved_regs_mask & (1 << regno))
19348 rtx reg1, reg2, mem1, mem2;
19349 rtx tmp0, tmp1, tmp2;
19350 int regno2;
19352 /* Find the register to pair with this one. */
19353 for (regno2 = regno + 1; (saved_regs_mask & (1 << regno2)) == 0;
19354 regno2++)
19357 reg1 = gen_rtx_REG (SImode, regno);
19358 reg2 = gen_rtx_REG (SImode, regno2);
19360 if (first)
19362 rtx insn;
19364 first = false;
19365 mem1 = gen_frame_mem (Pmode, plus_constant (Pmode,
19366 stack_pointer_rtx,
19367 -4 * num_regs));
19368 mem2 = gen_frame_mem (Pmode, plus_constant (Pmode,
19369 stack_pointer_rtx,
19370 -4 * (num_regs - 1)));
19371 tmp0 = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
19372 plus_constant (Pmode, stack_pointer_rtx,
19373 -4 * (num_regs)));
19374 tmp1 = gen_rtx_SET (VOIDmode, mem1, reg1);
19375 tmp2 = gen_rtx_SET (VOIDmode, mem2, reg2);
19376 RTX_FRAME_RELATED_P (tmp0) = 1;
19377 RTX_FRAME_RELATED_P (tmp1) = 1;
19378 RTX_FRAME_RELATED_P (tmp2) = 1;
19379 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (3));
19380 XVECEXP (par, 0, 0) = tmp0;
19381 XVECEXP (par, 0, 1) = tmp1;
19382 XVECEXP (par, 0, 2) = tmp2;
19383 insn = emit_insn (par);
19384 RTX_FRAME_RELATED_P (insn) = 1;
19385 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
19387 else
19389 mem1 = gen_frame_mem (Pmode, plus_constant (Pmode,
19390 stack_pointer_rtx,
19391 4 * i));
19392 mem2 = gen_frame_mem (Pmode, plus_constant (Pmode,
19393 stack_pointer_rtx,
19394 4 * (i + 1)));
19395 tmp1 = gen_rtx_SET (VOIDmode, mem1, reg1);
19396 tmp2 = gen_rtx_SET (VOIDmode, mem2, reg2);
19397 RTX_FRAME_RELATED_P (tmp1) = 1;
19398 RTX_FRAME_RELATED_P (tmp2) = 1;
19399 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
19400 XVECEXP (par, 0, 0) = tmp1;
19401 XVECEXP (par, 0, 1) = tmp2;
19402 emit_insn (par);
19405 /* Create unwind information. This is an approximation. */
19406 tmp1 = gen_rtx_SET (VOIDmode,
19407 gen_frame_mem (Pmode,
19408 plus_constant (Pmode,
19409 stack_pointer_rtx,
19410 4 * i)),
19411 reg1);
19412 tmp2 = gen_rtx_SET (VOIDmode,
19413 gen_frame_mem (Pmode,
19414 plus_constant (Pmode,
19415 stack_pointer_rtx,
19416 4 * (i + 1))),
19417 reg2);
19419 RTX_FRAME_RELATED_P (tmp1) = 1;
19420 RTX_FRAME_RELATED_P (tmp2) = 1;
19421 XVECEXP (dwarf, 0, i + 1) = tmp1;
19422 XVECEXP (dwarf, 0, i + 2) = tmp2;
19423 i += 2;
19424 regno = regno2 + 1;
19426 else
19427 regno++;
19429 return;
19432 /* STRD in ARM mode requires consecutive registers. This function emits STRD
19433 whenever possible, otherwise it emits single-word stores. The first store
19434 also allocates stack space for all saved registers, using writeback with
19435 post-addressing mode. All other stores use offset addressing. If no STRD
19436 can be emitted, this function emits a sequence of single-word stores,
19437 and not an STM as before, because single-word stores provide more freedom
19438 scheduling and can be turned into an STM by peephole optimizations. */
19439 static void
19440 arm_emit_strd_push (unsigned long saved_regs_mask)
19442 int num_regs = 0;
19443 int i, j, dwarf_index = 0;
19444 int offset = 0;
19445 rtx dwarf = NULL_RTX;
19446 rtx insn = NULL_RTX;
19447 rtx tmp, mem;
19449 /* TODO: A more efficient code can be emitted by changing the
19450 layout, e.g., first push all pairs that can use STRD to keep the
19451 stack aligned, and then push all other registers. */
19452 for (i = 0; i <= LAST_ARM_REGNUM; i++)
19453 if (saved_regs_mask & (1 << i))
19454 num_regs++;
19456 gcc_assert (!(saved_regs_mask & (1 << SP_REGNUM)));
19457 gcc_assert (!(saved_regs_mask & (1 << PC_REGNUM)));
19458 gcc_assert (num_regs > 0);
19460 /* Create sequence for DWARF info. */
19461 dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (num_regs + 1));
19463 /* For dwarf info, we generate explicit stack update. */
19464 tmp = gen_rtx_SET (VOIDmode,
19465 stack_pointer_rtx,
19466 plus_constant (Pmode, stack_pointer_rtx, -4 * num_regs));
19467 RTX_FRAME_RELATED_P (tmp) = 1;
19468 XVECEXP (dwarf, 0, dwarf_index++) = tmp;
19470 /* Save registers. */
19471 offset = - 4 * num_regs;
19472 j = 0;
19473 while (j <= LAST_ARM_REGNUM)
19474 if (saved_regs_mask & (1 << j))
19476 if ((j % 2 == 0)
19477 && (saved_regs_mask & (1 << (j + 1))))
19479 /* Current register and previous register form register pair for
19480 which STRD can be generated. */
19481 if (offset < 0)
19483 /* Allocate stack space for all saved registers. */
19484 tmp = plus_constant (Pmode, stack_pointer_rtx, offset);
19485 tmp = gen_rtx_PRE_MODIFY (Pmode, stack_pointer_rtx, tmp);
19486 mem = gen_frame_mem (DImode, tmp);
19487 offset = 0;
19489 else if (offset > 0)
19490 mem = gen_frame_mem (DImode,
19491 plus_constant (Pmode,
19492 stack_pointer_rtx,
19493 offset));
19494 else
19495 mem = gen_frame_mem (DImode, stack_pointer_rtx);
19497 tmp = gen_rtx_SET (DImode, mem, gen_rtx_REG (DImode, j));
19498 RTX_FRAME_RELATED_P (tmp) = 1;
19499 tmp = emit_insn (tmp);
19501 /* Record the first store insn. */
19502 if (dwarf_index == 1)
19503 insn = tmp;
19505 /* Generate dwarf info. */
19506 mem = gen_frame_mem (SImode,
19507 plus_constant (Pmode,
19508 stack_pointer_rtx,
19509 offset));
19510 tmp = gen_rtx_SET (SImode, mem, gen_rtx_REG (SImode, j));
19511 RTX_FRAME_RELATED_P (tmp) = 1;
19512 XVECEXP (dwarf, 0, dwarf_index++) = tmp;
19514 mem = gen_frame_mem (SImode,
19515 plus_constant (Pmode,
19516 stack_pointer_rtx,
19517 offset + 4));
19518 tmp = gen_rtx_SET (SImode, mem, gen_rtx_REG (SImode, j + 1));
19519 RTX_FRAME_RELATED_P (tmp) = 1;
19520 XVECEXP (dwarf, 0, dwarf_index++) = tmp;
19522 offset += 8;
19523 j += 2;
19525 else
19527 /* Emit a single word store. */
19528 if (offset < 0)
19530 /* Allocate stack space for all saved registers. */
19531 tmp = plus_constant (Pmode, stack_pointer_rtx, offset);
19532 tmp = gen_rtx_PRE_MODIFY (Pmode, stack_pointer_rtx, tmp);
19533 mem = gen_frame_mem (SImode, tmp);
19534 offset = 0;
19536 else if (offset > 0)
19537 mem = gen_frame_mem (SImode,
19538 plus_constant (Pmode,
19539 stack_pointer_rtx,
19540 offset));
19541 else
19542 mem = gen_frame_mem (SImode, stack_pointer_rtx);
19544 tmp = gen_rtx_SET (SImode, mem, gen_rtx_REG (SImode, j));
19545 RTX_FRAME_RELATED_P (tmp) = 1;
19546 tmp = emit_insn (tmp);
19548 /* Record the first store insn. */
19549 if (dwarf_index == 1)
19550 insn = tmp;
19552 /* Generate dwarf info. */
19553 mem = gen_frame_mem (SImode,
19554 plus_constant(Pmode,
19555 stack_pointer_rtx,
19556 offset));
19557 tmp = gen_rtx_SET (SImode, mem, gen_rtx_REG (SImode, j));
19558 RTX_FRAME_RELATED_P (tmp) = 1;
19559 XVECEXP (dwarf, 0, dwarf_index++) = tmp;
19561 offset += 4;
19562 j += 1;
19565 else
19566 j++;
19568 /* Attach dwarf info to the first insn we generate. */
19569 gcc_assert (insn != NULL_RTX);
19570 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
19571 RTX_FRAME_RELATED_P (insn) = 1;
19574 /* Generate and emit an insn that we will recognize as a push_multi.
19575 Unfortunately, since this insn does not reflect very well the actual
19576 semantics of the operation, we need to annotate the insn for the benefit
19577 of DWARF2 frame unwind information. */
19578 static rtx
19579 emit_multi_reg_push (unsigned long mask)
19581 int num_regs = 0;
19582 int num_dwarf_regs;
19583 int i, j;
19584 rtx par;
19585 rtx dwarf;
19586 int dwarf_par_index;
19587 rtx tmp, reg;
19589 for (i = 0; i <= LAST_ARM_REGNUM; i++)
19590 if (mask & (1 << i))
19591 num_regs++;
19593 gcc_assert (num_regs && num_regs <= 16);
19595 /* We don't record the PC in the dwarf frame information. */
19596 num_dwarf_regs = num_regs;
19597 if (mask & (1 << PC_REGNUM))
19598 num_dwarf_regs--;
19600 /* For the body of the insn we are going to generate an UNSPEC in
19601 parallel with several USEs. This allows the insn to be recognized
19602 by the push_multi pattern in the arm.md file.
19604 The body of the insn looks something like this:
19606 (parallel [
19607 (set (mem:BLK (pre_modify:SI (reg:SI sp)
19608 (const_int:SI <num>)))
19609 (unspec:BLK [(reg:SI r4)] UNSPEC_PUSH_MULT))
19610 (use (reg:SI XX))
19611 (use (reg:SI YY))
19615 For the frame note however, we try to be more explicit and actually
19616 show each register being stored into the stack frame, plus a (single)
19617 decrement of the stack pointer. We do it this way in order to be
19618 friendly to the stack unwinding code, which only wants to see a single
19619 stack decrement per instruction. The RTL we generate for the note looks
19620 something like this:
19622 (sequence [
19623 (set (reg:SI sp) (plus:SI (reg:SI sp) (const_int -20)))
19624 (set (mem:SI (reg:SI sp)) (reg:SI r4))
19625 (set (mem:SI (plus:SI (reg:SI sp) (const_int 4))) (reg:SI XX))
19626 (set (mem:SI (plus:SI (reg:SI sp) (const_int 8))) (reg:SI YY))
19630 FIXME:: In an ideal world the PRE_MODIFY would not exist and
19631 instead we'd have a parallel expression detailing all
19632 the stores to the various memory addresses so that debug
19633 information is more up-to-date. Remember however while writing
19634 this to take care of the constraints with the push instruction.
19636 Note also that this has to be taken care of for the VFP registers.
19638 For more see PR43399. */
19640 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (num_regs));
19641 dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (num_dwarf_regs + 1));
19642 dwarf_par_index = 1;
19644 for (i = 0; i <= LAST_ARM_REGNUM; i++)
19646 if (mask & (1 << i))
19648 reg = gen_rtx_REG (SImode, i);
19650 XVECEXP (par, 0, 0)
19651 = gen_rtx_SET (VOIDmode,
19652 gen_frame_mem
19653 (BLKmode,
19654 gen_rtx_PRE_MODIFY (Pmode,
19655 stack_pointer_rtx,
19656 plus_constant
19657 (Pmode, stack_pointer_rtx,
19658 -4 * num_regs))
19660 gen_rtx_UNSPEC (BLKmode,
19661 gen_rtvec (1, reg),
19662 UNSPEC_PUSH_MULT));
19664 if (i != PC_REGNUM)
19666 tmp = gen_rtx_SET (VOIDmode,
19667 gen_frame_mem (SImode, stack_pointer_rtx),
19668 reg);
19669 RTX_FRAME_RELATED_P (tmp) = 1;
19670 XVECEXP (dwarf, 0, dwarf_par_index) = tmp;
19671 dwarf_par_index++;
19674 break;
19678 for (j = 1, i++; j < num_regs; i++)
19680 if (mask & (1 << i))
19682 reg = gen_rtx_REG (SImode, i);
19684 XVECEXP (par, 0, j) = gen_rtx_USE (VOIDmode, reg);
19686 if (i != PC_REGNUM)
19689 = gen_rtx_SET (VOIDmode,
19690 gen_frame_mem
19691 (SImode,
19692 plus_constant (Pmode, stack_pointer_rtx,
19693 4 * j)),
19694 reg);
19695 RTX_FRAME_RELATED_P (tmp) = 1;
19696 XVECEXP (dwarf, 0, dwarf_par_index++) = tmp;
19699 j++;
19703 par = emit_insn (par);
19705 tmp = gen_rtx_SET (VOIDmode,
19706 stack_pointer_rtx,
19707 plus_constant (Pmode, stack_pointer_rtx, -4 * num_regs));
19708 RTX_FRAME_RELATED_P (tmp) = 1;
19709 XVECEXP (dwarf, 0, 0) = tmp;
19711 add_reg_note (par, REG_FRAME_RELATED_EXPR, dwarf);
19713 return par;
19716 /* Add a REG_CFA_ADJUST_CFA REG note to INSN.
19717 SIZE is the offset to be adjusted.
19718 DEST and SRC might be stack_pointer_rtx or hard_frame_pointer_rtx. */
19719 static void
19720 arm_add_cfa_adjust_cfa_note (rtx insn, int size, rtx dest, rtx src)
19722 rtx dwarf;
19724 RTX_FRAME_RELATED_P (insn) = 1;
19725 dwarf = gen_rtx_SET (VOIDmode, dest, plus_constant (Pmode, src, size));
19726 add_reg_note (insn, REG_CFA_ADJUST_CFA, dwarf);
19729 /* Generate and emit an insn pattern that we will recognize as a pop_multi.
19730 SAVED_REGS_MASK shows which registers need to be restored.
19732 Unfortunately, since this insn does not reflect very well the actual
19733 semantics of the operation, we need to annotate the insn for the benefit
19734 of DWARF2 frame unwind information. */
19735 static void
19736 arm_emit_multi_reg_pop (unsigned long saved_regs_mask)
19738 int num_regs = 0;
19739 int i, j;
19740 rtx par;
19741 rtx dwarf = NULL_RTX;
19742 rtx tmp, reg;
19743 bool return_in_pc;
19744 int offset_adj;
19745 int emit_update;
19747 return_in_pc = (saved_regs_mask & (1 << PC_REGNUM)) ? true : false;
19748 offset_adj = return_in_pc ? 1 : 0;
19749 for (i = 0; i <= LAST_ARM_REGNUM; i++)
19750 if (saved_regs_mask & (1 << i))
19751 num_regs++;
19753 gcc_assert (num_regs && num_regs <= 16);
19755 /* If SP is in reglist, then we don't emit SP update insn. */
19756 emit_update = (saved_regs_mask & (1 << SP_REGNUM)) ? 0 : 1;
19758 /* The parallel needs to hold num_regs SETs
19759 and one SET for the stack update. */
19760 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (num_regs + emit_update + offset_adj));
19762 if (return_in_pc)
19764 tmp = ret_rtx;
19765 XVECEXP (par, 0, 0) = tmp;
19768 if (emit_update)
19770 /* Increment the stack pointer, based on there being
19771 num_regs 4-byte registers to restore. */
19772 tmp = gen_rtx_SET (VOIDmode,
19773 stack_pointer_rtx,
19774 plus_constant (Pmode,
19775 stack_pointer_rtx,
19776 4 * num_regs));
19777 RTX_FRAME_RELATED_P (tmp) = 1;
19778 XVECEXP (par, 0, offset_adj) = tmp;
19781 /* Now restore every reg, which may include PC. */
19782 for (j = 0, i = 0; j < num_regs; i++)
19783 if (saved_regs_mask & (1 << i))
19785 reg = gen_rtx_REG (SImode, i);
19786 if ((num_regs == 1) && emit_update && !return_in_pc)
19788 /* Emit single load with writeback. */
19789 tmp = gen_frame_mem (SImode,
19790 gen_rtx_POST_INC (Pmode,
19791 stack_pointer_rtx));
19792 tmp = emit_insn (gen_rtx_SET (VOIDmode, reg, tmp));
19793 REG_NOTES (tmp) = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
19794 return;
19797 tmp = gen_rtx_SET (VOIDmode,
19798 reg,
19799 gen_frame_mem
19800 (SImode,
19801 plus_constant (Pmode, stack_pointer_rtx, 4 * j)));
19802 RTX_FRAME_RELATED_P (tmp) = 1;
19803 XVECEXP (par, 0, j + emit_update + offset_adj) = tmp;
19805 /* We need to maintain a sequence for DWARF info too. As dwarf info
19806 should not have PC, skip PC. */
19807 if (i != PC_REGNUM)
19808 dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
19810 j++;
19813 if (return_in_pc)
19814 par = emit_jump_insn (par);
19815 else
19816 par = emit_insn (par);
19818 REG_NOTES (par) = dwarf;
19819 if (!return_in_pc)
19820 arm_add_cfa_adjust_cfa_note (par, UNITS_PER_WORD * num_regs,
19821 stack_pointer_rtx, stack_pointer_rtx);
19824 /* Generate and emit an insn pattern that we will recognize as a pop_multi
19825 of NUM_REGS consecutive VFP regs, starting at FIRST_REG.
19827 Unfortunately, since this insn does not reflect very well the actual
19828 semantics of the operation, we need to annotate the insn for the benefit
19829 of DWARF2 frame unwind information. */
19830 static void
19831 arm_emit_vfp_multi_reg_pop (int first_reg, int num_regs, rtx base_reg)
19833 int i, j;
19834 rtx par;
19835 rtx dwarf = NULL_RTX;
19836 rtx tmp, reg;
19838 gcc_assert (num_regs && num_regs <= 32);
19840 /* Workaround ARM10 VFPr1 bug. */
19841 if (num_regs == 2 && !arm_arch6)
19843 if (first_reg == 15)
19844 first_reg--;
19846 num_regs++;
19849 /* We can emit at most 16 D-registers in a single pop_multi instruction, and
19850 there could be up to 32 D-registers to restore.
19851 If there are more than 16 D-registers, make two recursive calls,
19852 each of which emits one pop_multi instruction. */
19853 if (num_regs > 16)
19855 arm_emit_vfp_multi_reg_pop (first_reg, 16, base_reg);
19856 arm_emit_vfp_multi_reg_pop (first_reg + 16, num_regs - 16, base_reg);
19857 return;
19860 /* The parallel needs to hold num_regs SETs
19861 and one SET for the stack update. */
19862 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (num_regs + 1));
19864 /* Increment the stack pointer, based on there being
19865 num_regs 8-byte registers to restore. */
19866 tmp = gen_rtx_SET (VOIDmode,
19867 base_reg,
19868 plus_constant (Pmode, base_reg, 8 * num_regs));
19869 RTX_FRAME_RELATED_P (tmp) = 1;
19870 XVECEXP (par, 0, 0) = tmp;
19872 /* Now show every reg that will be restored, using a SET for each. */
19873 for (j = 0, i=first_reg; j < num_regs; i += 2)
19875 reg = gen_rtx_REG (DFmode, i);
19877 tmp = gen_rtx_SET (VOIDmode,
19878 reg,
19879 gen_frame_mem
19880 (DFmode,
19881 plus_constant (Pmode, base_reg, 8 * j)));
19882 RTX_FRAME_RELATED_P (tmp) = 1;
19883 XVECEXP (par, 0, j + 1) = tmp;
19885 dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
19887 j++;
19890 par = emit_insn (par);
19891 REG_NOTES (par) = dwarf;
19893 arm_add_cfa_adjust_cfa_note (par, 2 * UNITS_PER_WORD * num_regs,
19894 base_reg, base_reg);
19897 /* Generate and emit a pattern that will be recognized as LDRD pattern. If even
19898 number of registers are being popped, multiple LDRD patterns are created for
19899 all register pairs. If odd number of registers are popped, last register is
19900 loaded by using LDR pattern. */
19901 static void
19902 thumb2_emit_ldrd_pop (unsigned long saved_regs_mask)
19904 int num_regs = 0;
19905 int i, j;
19906 rtx par = NULL_RTX;
19907 rtx dwarf = NULL_RTX;
19908 rtx tmp, reg, tmp1;
19909 bool return_in_pc;
19911 return_in_pc = (saved_regs_mask & (1 << PC_REGNUM)) ? true : false;
19912 for (i = 0; i <= LAST_ARM_REGNUM; i++)
19913 if (saved_regs_mask & (1 << i))
19914 num_regs++;
19916 gcc_assert (num_regs && num_regs <= 16);
19918 /* We cannot generate ldrd for PC. Hence, reduce the count if PC is
19919 to be popped. So, if num_regs is even, now it will become odd,
19920 and we can generate pop with PC. If num_regs is odd, it will be
19921 even now, and ldr with return can be generated for PC. */
19922 if (return_in_pc)
19923 num_regs--;
19925 gcc_assert (!(saved_regs_mask & (1 << SP_REGNUM)));
19927 /* Var j iterates over all the registers to gather all the registers in
19928 saved_regs_mask. Var i gives index of saved registers in stack frame.
19929 A PARALLEL RTX of register-pair is created here, so that pattern for
19930 LDRD can be matched. As PC is always last register to be popped, and
19931 we have already decremented num_regs if PC, we don't have to worry
19932 about PC in this loop. */
19933 for (i = 0, j = 0; i < (num_regs - (num_regs % 2)); j++)
19934 if (saved_regs_mask & (1 << j))
19936 /* Create RTX for memory load. */
19937 reg = gen_rtx_REG (SImode, j);
19938 tmp = gen_rtx_SET (SImode,
19939 reg,
19940 gen_frame_mem (SImode,
19941 plus_constant (Pmode,
19942 stack_pointer_rtx, 4 * i)));
19943 RTX_FRAME_RELATED_P (tmp) = 1;
19945 if (i % 2 == 0)
19947 /* When saved-register index (i) is even, the RTX to be emitted is
19948 yet to be created. Hence create it first. The LDRD pattern we
19949 are generating is :
19950 [ (SET (reg_t0) (MEM (PLUS (SP) (NUM))))
19951 (SET (reg_t1) (MEM (PLUS (SP) (NUM + 4)))) ]
19952 where target registers need not be consecutive. */
19953 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
19954 dwarf = NULL_RTX;
19957 /* ith register is added in PARALLEL RTX. If i is even, the reg_i is
19958 added as 0th element and if i is odd, reg_i is added as 1st element
19959 of LDRD pattern shown above. */
19960 XVECEXP (par, 0, (i % 2)) = tmp;
19961 dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
19963 if ((i % 2) == 1)
19965 /* When saved-register index (i) is odd, RTXs for both the registers
19966 to be loaded are generated in above given LDRD pattern, and the
19967 pattern can be emitted now. */
19968 par = emit_insn (par);
19969 REG_NOTES (par) = dwarf;
19970 RTX_FRAME_RELATED_P (par) = 1;
19973 i++;
19976 /* If the number of registers pushed is odd AND return_in_pc is false OR
19977 number of registers are even AND return_in_pc is true, last register is
19978 popped using LDR. It can be PC as well. Hence, adjust the stack first and
19979 then LDR with post increment. */
19981 /* Increment the stack pointer, based on there being
19982 num_regs 4-byte registers to restore. */
19983 tmp = gen_rtx_SET (VOIDmode,
19984 stack_pointer_rtx,
19985 plus_constant (Pmode, stack_pointer_rtx, 4 * i));
19986 RTX_FRAME_RELATED_P (tmp) = 1;
19987 tmp = emit_insn (tmp);
19988 if (!return_in_pc)
19990 arm_add_cfa_adjust_cfa_note (tmp, UNITS_PER_WORD * i,
19991 stack_pointer_rtx, stack_pointer_rtx);
19994 dwarf = NULL_RTX;
19996 if (((num_regs % 2) == 1 && !return_in_pc)
19997 || ((num_regs % 2) == 0 && return_in_pc))
19999 /* Scan for the single register to be popped. Skip until the saved
20000 register is found. */
20001 for (; (saved_regs_mask & (1 << j)) == 0; j++);
20003 /* Gen LDR with post increment here. */
20004 tmp1 = gen_rtx_MEM (SImode,
20005 gen_rtx_POST_INC (SImode,
20006 stack_pointer_rtx));
20007 set_mem_alias_set (tmp1, get_frame_alias_set ());
20009 reg = gen_rtx_REG (SImode, j);
20010 tmp = gen_rtx_SET (SImode, reg, tmp1);
20011 RTX_FRAME_RELATED_P (tmp) = 1;
20012 dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
20014 if (return_in_pc)
20016 /* If return_in_pc, j must be PC_REGNUM. */
20017 gcc_assert (j == PC_REGNUM);
20018 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
20019 XVECEXP (par, 0, 0) = ret_rtx;
20020 XVECEXP (par, 0, 1) = tmp;
20021 par = emit_jump_insn (par);
20023 else
20025 par = emit_insn (tmp);
20026 REG_NOTES (par) = dwarf;
20027 arm_add_cfa_adjust_cfa_note (par, UNITS_PER_WORD,
20028 stack_pointer_rtx, stack_pointer_rtx);
20032 else if ((num_regs % 2) == 1 && return_in_pc)
20034 /* There are 2 registers to be popped. So, generate the pattern
20035 pop_multiple_with_stack_update_and_return to pop in PC. */
20036 arm_emit_multi_reg_pop (saved_regs_mask & (~((1 << j) - 1)));
20039 return;
20042 /* LDRD in ARM mode needs consecutive registers as operands. This function
20043 emits LDRD whenever possible, otherwise it emits single-word loads. It uses
20044 offset addressing and then generates one separate stack udpate. This provides
20045 more scheduling freedom, compared to writeback on every load. However,
20046 if the function returns using load into PC directly
20047 (i.e., if PC is in SAVED_REGS_MASK), the stack needs to be updated
20048 before the last load. TODO: Add a peephole optimization to recognize
20049 the new epilogue sequence as an LDM instruction whenever possible. TODO: Add
20050 peephole optimization to merge the load at stack-offset zero
20051 with the stack update instruction using load with writeback
20052 in post-index addressing mode. */
20053 static void
20054 arm_emit_ldrd_pop (unsigned long saved_regs_mask)
20056 int j = 0;
20057 int offset = 0;
20058 rtx par = NULL_RTX;
20059 rtx dwarf = NULL_RTX;
20060 rtx tmp, mem;
20062 /* Restore saved registers. */
20063 gcc_assert (!((saved_regs_mask & (1 << SP_REGNUM))));
20064 j = 0;
20065 while (j <= LAST_ARM_REGNUM)
20066 if (saved_regs_mask & (1 << j))
20068 if ((j % 2) == 0
20069 && (saved_regs_mask & (1 << (j + 1)))
20070 && (j + 1) != PC_REGNUM)
20072 /* Current register and next register form register pair for which
20073 LDRD can be generated. PC is always the last register popped, and
20074 we handle it separately. */
20075 if (offset > 0)
20076 mem = gen_frame_mem (DImode,
20077 plus_constant (Pmode,
20078 stack_pointer_rtx,
20079 offset));
20080 else
20081 mem = gen_frame_mem (DImode, stack_pointer_rtx);
20083 tmp = gen_rtx_SET (DImode, gen_rtx_REG (DImode, j), mem);
20084 tmp = emit_insn (tmp);
20085 RTX_FRAME_RELATED_P (tmp) = 1;
20087 /* Generate dwarf info. */
20089 dwarf = alloc_reg_note (REG_CFA_RESTORE,
20090 gen_rtx_REG (SImode, j),
20091 NULL_RTX);
20092 dwarf = alloc_reg_note (REG_CFA_RESTORE,
20093 gen_rtx_REG (SImode, j + 1),
20094 dwarf);
20096 REG_NOTES (tmp) = dwarf;
20098 offset += 8;
20099 j += 2;
20101 else if (j != PC_REGNUM)
20103 /* Emit a single word load. */
20104 if (offset > 0)
20105 mem = gen_frame_mem (SImode,
20106 plus_constant (Pmode,
20107 stack_pointer_rtx,
20108 offset));
20109 else
20110 mem = gen_frame_mem (SImode, stack_pointer_rtx);
20112 tmp = gen_rtx_SET (SImode, gen_rtx_REG (SImode, j), mem);
20113 tmp = emit_insn (tmp);
20114 RTX_FRAME_RELATED_P (tmp) = 1;
20116 /* Generate dwarf info. */
20117 REG_NOTES (tmp) = alloc_reg_note (REG_CFA_RESTORE,
20118 gen_rtx_REG (SImode, j),
20119 NULL_RTX);
20121 offset += 4;
20122 j += 1;
20124 else /* j == PC_REGNUM */
20125 j++;
20127 else
20128 j++;
20130 /* Update the stack. */
20131 if (offset > 0)
20133 tmp = gen_rtx_SET (Pmode,
20134 stack_pointer_rtx,
20135 plus_constant (Pmode,
20136 stack_pointer_rtx,
20137 offset));
20138 tmp = emit_insn (tmp);
20139 arm_add_cfa_adjust_cfa_note (tmp, offset,
20140 stack_pointer_rtx, stack_pointer_rtx);
20141 offset = 0;
20144 if (saved_regs_mask & (1 << PC_REGNUM))
20146 /* Only PC is to be popped. */
20147 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
20148 XVECEXP (par, 0, 0) = ret_rtx;
20149 tmp = gen_rtx_SET (SImode,
20150 gen_rtx_REG (SImode, PC_REGNUM),
20151 gen_frame_mem (SImode,
20152 gen_rtx_POST_INC (SImode,
20153 stack_pointer_rtx)));
20154 RTX_FRAME_RELATED_P (tmp) = 1;
20155 XVECEXP (par, 0, 1) = tmp;
20156 par = emit_jump_insn (par);
20158 /* Generate dwarf info. */
20159 dwarf = alloc_reg_note (REG_CFA_RESTORE,
20160 gen_rtx_REG (SImode, PC_REGNUM),
20161 NULL_RTX);
20162 REG_NOTES (par) = dwarf;
20163 arm_add_cfa_adjust_cfa_note (par, UNITS_PER_WORD,
20164 stack_pointer_rtx, stack_pointer_rtx);
20168 /* Calculate the size of the return value that is passed in registers. */
20169 static unsigned
20170 arm_size_return_regs (void)
20172 enum machine_mode mode;
20174 if (crtl->return_rtx != 0)
20175 mode = GET_MODE (crtl->return_rtx);
20176 else
20177 mode = DECL_MODE (DECL_RESULT (current_function_decl));
20179 return GET_MODE_SIZE (mode);
20182 /* Return true if the current function needs to save/restore LR. */
20183 static bool
20184 thumb_force_lr_save (void)
20186 return !cfun->machine->lr_save_eliminated
20187 && (!leaf_function_p ()
20188 || thumb_far_jump_used_p ()
20189 || df_regs_ever_live_p (LR_REGNUM));
20192 /* We do not know if r3 will be available because
20193 we do have an indirect tailcall happening in this
20194 particular case. */
20195 static bool
20196 is_indirect_tailcall_p (rtx call)
20198 rtx pat = PATTERN (call);
20200 /* Indirect tail call. */
20201 pat = XVECEXP (pat, 0, 0);
20202 if (GET_CODE (pat) == SET)
20203 pat = SET_SRC (pat);
20205 pat = XEXP (XEXP (pat, 0), 0);
20206 return REG_P (pat);
20209 /* Return true if r3 is used by any of the tail call insns in the
20210 current function. */
20211 static bool
20212 any_sibcall_could_use_r3 (void)
20214 edge_iterator ei;
20215 edge e;
20217 if (!crtl->tail_call_emit)
20218 return false;
20219 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
20220 if (e->flags & EDGE_SIBCALL)
20222 rtx call = BB_END (e->src);
20223 if (!CALL_P (call))
20224 call = prev_nonnote_nondebug_insn (call);
20225 gcc_assert (CALL_P (call) && SIBLING_CALL_P (call));
20226 if (find_regno_fusage (call, USE, 3)
20227 || is_indirect_tailcall_p (call))
20228 return true;
20230 return false;
20234 /* Compute the distance from register FROM to register TO.
20235 These can be the arg pointer (26), the soft frame pointer (25),
20236 the stack pointer (13) or the hard frame pointer (11).
20237 In thumb mode r7 is used as the soft frame pointer, if needed.
20238 Typical stack layout looks like this:
20240 old stack pointer -> | |
20241 ----
20242 | | \
20243 | | saved arguments for
20244 | | vararg functions
20245 | | /
20247 hard FP & arg pointer -> | | \
20248 | | stack
20249 | | frame
20250 | | /
20252 | | \
20253 | | call saved
20254 | | registers
20255 soft frame pointer -> | | /
20257 | | \
20258 | | local
20259 | | variables
20260 locals base pointer -> | | /
20262 | | \
20263 | | outgoing
20264 | | arguments
20265 current stack pointer -> | | /
20268 For a given function some or all of these stack components
20269 may not be needed, giving rise to the possibility of
20270 eliminating some of the registers.
20272 The values returned by this function must reflect the behavior
20273 of arm_expand_prologue() and arm_compute_save_reg_mask().
20275 The sign of the number returned reflects the direction of stack
20276 growth, so the values are positive for all eliminations except
20277 from the soft frame pointer to the hard frame pointer.
20279 SFP may point just inside the local variables block to ensure correct
20280 alignment. */
20283 /* Calculate stack offsets. These are used to calculate register elimination
20284 offsets and in prologue/epilogue code. Also calculates which registers
20285 should be saved. */
20287 static arm_stack_offsets *
20288 arm_get_frame_offsets (void)
20290 struct arm_stack_offsets *offsets;
20291 unsigned long func_type;
20292 int leaf;
20293 int saved;
20294 int core_saved;
20295 HOST_WIDE_INT frame_size;
20296 int i;
20298 offsets = &cfun->machine->stack_offsets;
20300 /* We need to know if we are a leaf function. Unfortunately, it
20301 is possible to be called after start_sequence has been called,
20302 which causes get_insns to return the insns for the sequence,
20303 not the function, which will cause leaf_function_p to return
20304 the incorrect result.
20306 to know about leaf functions once reload has completed, and the
20307 frame size cannot be changed after that time, so we can safely
20308 use the cached value. */
20310 if (reload_completed)
20311 return offsets;
20313 /* Initially this is the size of the local variables. It will translated
20314 into an offset once we have determined the size of preceding data. */
20315 frame_size = ROUND_UP_WORD (get_frame_size ());
20317 leaf = leaf_function_p ();
20319 /* Space for variadic functions. */
20320 offsets->saved_args = crtl->args.pretend_args_size;
20322 /* In Thumb mode this is incorrect, but never used. */
20323 offsets->frame
20324 = (offsets->saved_args
20325 + arm_compute_static_chain_stack_bytes ()
20326 + (frame_pointer_needed ? 4 : 0));
20328 if (TARGET_32BIT)
20330 unsigned int regno;
20332 offsets->saved_regs_mask = arm_compute_save_reg_mask ();
20333 core_saved = bit_count (offsets->saved_regs_mask) * 4;
20334 saved = core_saved;
20336 /* We know that SP will be doubleword aligned on entry, and we must
20337 preserve that condition at any subroutine call. We also require the
20338 soft frame pointer to be doubleword aligned. */
20340 if (TARGET_REALLY_IWMMXT)
20342 /* Check for the call-saved iWMMXt registers. */
20343 for (regno = FIRST_IWMMXT_REGNUM;
20344 regno <= LAST_IWMMXT_REGNUM;
20345 regno++)
20346 if (df_regs_ever_live_p (regno) && ! call_used_regs[regno])
20347 saved += 8;
20350 func_type = arm_current_func_type ();
20351 /* Space for saved VFP registers. */
20352 if (! IS_VOLATILE (func_type)
20353 && TARGET_HARD_FLOAT && TARGET_VFP)
20354 saved += arm_get_vfp_saved_size ();
20356 else /* TARGET_THUMB1 */
20358 offsets->saved_regs_mask = thumb1_compute_save_reg_mask ();
20359 core_saved = bit_count (offsets->saved_regs_mask) * 4;
20360 saved = core_saved;
20361 if (TARGET_BACKTRACE)
20362 saved += 16;
20365 /* Saved registers include the stack frame. */
20366 offsets->saved_regs
20367 = offsets->saved_args + arm_compute_static_chain_stack_bytes () + saved;
20368 offsets->soft_frame = offsets->saved_regs + CALLER_INTERWORKING_SLOT_SIZE;
20370 /* A leaf function does not need any stack alignment if it has nothing
20371 on the stack. */
20372 if (leaf && frame_size == 0
20373 /* However if it calls alloca(), we have a dynamically allocated
20374 block of BIGGEST_ALIGNMENT on stack, so still do stack alignment. */
20375 && ! cfun->calls_alloca)
20377 offsets->outgoing_args = offsets->soft_frame;
20378 offsets->locals_base = offsets->soft_frame;
20379 return offsets;
20382 /* Ensure SFP has the correct alignment. */
20383 if (ARM_DOUBLEWORD_ALIGN
20384 && (offsets->soft_frame & 7))
20386 offsets->soft_frame += 4;
20387 /* Try to align stack by pushing an extra reg. Don't bother doing this
20388 when there is a stack frame as the alignment will be rolled into
20389 the normal stack adjustment. */
20390 if (frame_size + crtl->outgoing_args_size == 0)
20392 int reg = -1;
20394 /* If it is safe to use r3, then do so. This sometimes
20395 generates better code on Thumb-2 by avoiding the need to
20396 use 32-bit push/pop instructions. */
20397 if (! any_sibcall_could_use_r3 ()
20398 && arm_size_return_regs () <= 12
20399 && (offsets->saved_regs_mask & (1 << 3)) == 0
20400 && (TARGET_THUMB2
20401 || !(TARGET_LDRD && current_tune->prefer_ldrd_strd)))
20403 reg = 3;
20405 else
20406 for (i = 4; i <= (TARGET_THUMB1 ? LAST_LO_REGNUM : 11); i++)
20408 /* Avoid fixed registers; they may be changed at
20409 arbitrary times so it's unsafe to restore them
20410 during the epilogue. */
20411 if (!fixed_regs[i]
20412 && (offsets->saved_regs_mask & (1 << i)) == 0)
20414 reg = i;
20415 break;
20419 if (reg != -1)
20421 offsets->saved_regs += 4;
20422 offsets->saved_regs_mask |= (1 << reg);
20427 offsets->locals_base = offsets->soft_frame + frame_size;
20428 offsets->outgoing_args = (offsets->locals_base
20429 + crtl->outgoing_args_size);
20431 if (ARM_DOUBLEWORD_ALIGN)
20433 /* Ensure SP remains doubleword aligned. */
20434 if (offsets->outgoing_args & 7)
20435 offsets->outgoing_args += 4;
20436 gcc_assert (!(offsets->outgoing_args & 7));
20439 return offsets;
20443 /* Calculate the relative offsets for the different stack pointers. Positive
20444 offsets are in the direction of stack growth. */
20446 HOST_WIDE_INT
20447 arm_compute_initial_elimination_offset (unsigned int from, unsigned int to)
20449 arm_stack_offsets *offsets;
20451 offsets = arm_get_frame_offsets ();
20453 /* OK, now we have enough information to compute the distances.
20454 There must be an entry in these switch tables for each pair
20455 of registers in ELIMINABLE_REGS, even if some of the entries
20456 seem to be redundant or useless. */
20457 switch (from)
20459 case ARG_POINTER_REGNUM:
20460 switch (to)
20462 case THUMB_HARD_FRAME_POINTER_REGNUM:
20463 return 0;
20465 case FRAME_POINTER_REGNUM:
20466 /* This is the reverse of the soft frame pointer
20467 to hard frame pointer elimination below. */
20468 return offsets->soft_frame - offsets->saved_args;
20470 case ARM_HARD_FRAME_POINTER_REGNUM:
20471 /* This is only non-zero in the case where the static chain register
20472 is stored above the frame. */
20473 return offsets->frame - offsets->saved_args - 4;
20475 case STACK_POINTER_REGNUM:
20476 /* If nothing has been pushed on the stack at all
20477 then this will return -4. This *is* correct! */
20478 return offsets->outgoing_args - (offsets->saved_args + 4);
20480 default:
20481 gcc_unreachable ();
20483 gcc_unreachable ();
20485 case FRAME_POINTER_REGNUM:
20486 switch (to)
20488 case THUMB_HARD_FRAME_POINTER_REGNUM:
20489 return 0;
20491 case ARM_HARD_FRAME_POINTER_REGNUM:
20492 /* The hard frame pointer points to the top entry in the
20493 stack frame. The soft frame pointer to the bottom entry
20494 in the stack frame. If there is no stack frame at all,
20495 then they are identical. */
20497 return offsets->frame - offsets->soft_frame;
20499 case STACK_POINTER_REGNUM:
20500 return offsets->outgoing_args - offsets->soft_frame;
20502 default:
20503 gcc_unreachable ();
20505 gcc_unreachable ();
20507 default:
20508 /* You cannot eliminate from the stack pointer.
20509 In theory you could eliminate from the hard frame
20510 pointer to the stack pointer, but this will never
20511 happen, since if a stack frame is not needed the
20512 hard frame pointer will never be used. */
20513 gcc_unreachable ();
20517 /* Given FROM and TO register numbers, say whether this elimination is
20518 allowed. Frame pointer elimination is automatically handled.
20520 All eliminations are permissible. Note that ARG_POINTER_REGNUM and
20521 HARD_FRAME_POINTER_REGNUM are in fact the same thing. If we need a frame
20522 pointer, we must eliminate FRAME_POINTER_REGNUM into
20523 HARD_FRAME_POINTER_REGNUM and not into STACK_POINTER_REGNUM or
20524 ARG_POINTER_REGNUM. */
20526 bool
20527 arm_can_eliminate (const int from, const int to)
20529 return ((to == FRAME_POINTER_REGNUM && from == ARG_POINTER_REGNUM) ? false :
20530 (to == STACK_POINTER_REGNUM && frame_pointer_needed) ? false :
20531 (to == ARM_HARD_FRAME_POINTER_REGNUM && TARGET_THUMB) ? false :
20532 (to == THUMB_HARD_FRAME_POINTER_REGNUM && TARGET_ARM) ? false :
20533 true);
20536 /* Emit RTL to save coprocessor registers on function entry. Returns the
20537 number of bytes pushed. */
20539 static int
20540 arm_save_coproc_regs(void)
20542 int saved_size = 0;
20543 unsigned reg;
20544 unsigned start_reg;
20545 rtx insn;
20547 for (reg = LAST_IWMMXT_REGNUM; reg >= FIRST_IWMMXT_REGNUM; reg--)
20548 if (df_regs_ever_live_p (reg) && ! call_used_regs[reg])
20550 insn = gen_rtx_PRE_DEC (Pmode, stack_pointer_rtx);
20551 insn = gen_rtx_MEM (V2SImode, insn);
20552 insn = emit_set_insn (insn, gen_rtx_REG (V2SImode, reg));
20553 RTX_FRAME_RELATED_P (insn) = 1;
20554 saved_size += 8;
20557 if (TARGET_HARD_FLOAT && TARGET_VFP)
20559 start_reg = FIRST_VFP_REGNUM;
20561 for (reg = FIRST_VFP_REGNUM; reg < LAST_VFP_REGNUM; reg += 2)
20563 if ((!df_regs_ever_live_p (reg) || call_used_regs[reg])
20564 && (!df_regs_ever_live_p (reg + 1) || call_used_regs[reg + 1]))
20566 if (start_reg != reg)
20567 saved_size += vfp_emit_fstmd (start_reg,
20568 (reg - start_reg) / 2);
20569 start_reg = reg + 2;
20572 if (start_reg != reg)
20573 saved_size += vfp_emit_fstmd (start_reg,
20574 (reg - start_reg) / 2);
20576 return saved_size;
20580 /* Set the Thumb frame pointer from the stack pointer. */
20582 static void
20583 thumb_set_frame_pointer (arm_stack_offsets *offsets)
20585 HOST_WIDE_INT amount;
20586 rtx insn, dwarf;
20588 amount = offsets->outgoing_args - offsets->locals_base;
20589 if (amount < 1024)
20590 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
20591 stack_pointer_rtx, GEN_INT (amount)));
20592 else
20594 emit_insn (gen_movsi (hard_frame_pointer_rtx, GEN_INT (amount)));
20595 /* Thumb-2 RTL patterns expect sp as the first input. Thumb-1
20596 expects the first two operands to be the same. */
20597 if (TARGET_THUMB2)
20599 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
20600 stack_pointer_rtx,
20601 hard_frame_pointer_rtx));
20603 else
20605 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
20606 hard_frame_pointer_rtx,
20607 stack_pointer_rtx));
20609 dwarf = gen_rtx_SET (VOIDmode, hard_frame_pointer_rtx,
20610 plus_constant (Pmode, stack_pointer_rtx, amount));
20611 RTX_FRAME_RELATED_P (dwarf) = 1;
20612 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
20615 RTX_FRAME_RELATED_P (insn) = 1;
20618 /* Generate the prologue instructions for entry into an ARM or Thumb-2
20619 function. */
20620 void
20621 arm_expand_prologue (void)
20623 rtx amount;
20624 rtx insn;
20625 rtx ip_rtx;
20626 unsigned long live_regs_mask;
20627 unsigned long func_type;
20628 int fp_offset = 0;
20629 int saved_pretend_args = 0;
20630 int saved_regs = 0;
20631 unsigned HOST_WIDE_INT args_to_push;
20632 arm_stack_offsets *offsets;
20634 func_type = arm_current_func_type ();
20636 /* Naked functions don't have prologues. */
20637 if (IS_NAKED (func_type))
20638 return;
20640 /* Make a copy of c_f_p_a_s as we may need to modify it locally. */
20641 args_to_push = crtl->args.pretend_args_size;
20643 /* Compute which register we will have to save onto the stack. */
20644 offsets = arm_get_frame_offsets ();
20645 live_regs_mask = offsets->saved_regs_mask;
20647 ip_rtx = gen_rtx_REG (SImode, IP_REGNUM);
20649 if (IS_STACKALIGN (func_type))
20651 rtx r0, r1;
20653 /* Handle a word-aligned stack pointer. We generate the following:
20655 mov r0, sp
20656 bic r1, r0, #7
20657 mov sp, r1
20658 <save and restore r0 in normal prologue/epilogue>
20659 mov sp, r0
20660 bx lr
20662 The unwinder doesn't need to know about the stack realignment.
20663 Just tell it we saved SP in r0. */
20664 gcc_assert (TARGET_THUMB2 && !arm_arch_notm && args_to_push == 0);
20666 r0 = gen_rtx_REG (SImode, 0);
20667 r1 = gen_rtx_REG (SImode, 1);
20669 insn = emit_insn (gen_movsi (r0, stack_pointer_rtx));
20670 RTX_FRAME_RELATED_P (insn) = 1;
20671 add_reg_note (insn, REG_CFA_REGISTER, NULL);
20673 emit_insn (gen_andsi3 (r1, r0, GEN_INT (~(HOST_WIDE_INT)7)));
20675 /* ??? The CFA changes here, which may cause GDB to conclude that it
20676 has entered a different function. That said, the unwind info is
20677 correct, individually, before and after this instruction because
20678 we've described the save of SP, which will override the default
20679 handling of SP as restoring from the CFA. */
20680 emit_insn (gen_movsi (stack_pointer_rtx, r1));
20683 /* For APCS frames, if IP register is clobbered
20684 when creating frame, save that register in a special
20685 way. */
20686 if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
20688 if (IS_INTERRUPT (func_type))
20690 /* Interrupt functions must not corrupt any registers.
20691 Creating a frame pointer however, corrupts the IP
20692 register, so we must push it first. */
20693 emit_multi_reg_push (1 << IP_REGNUM);
20695 /* Do not set RTX_FRAME_RELATED_P on this insn.
20696 The dwarf stack unwinding code only wants to see one
20697 stack decrement per function, and this is not it. If
20698 this instruction is labeled as being part of the frame
20699 creation sequence then dwarf2out_frame_debug_expr will
20700 die when it encounters the assignment of IP to FP
20701 later on, since the use of SP here establishes SP as
20702 the CFA register and not IP.
20704 Anyway this instruction is not really part of the stack
20705 frame creation although it is part of the prologue. */
20707 else if (IS_NESTED (func_type))
20709 /* The static chain register is the same as the IP register
20710 used as a scratch register during stack frame creation.
20711 To get around this need to find somewhere to store IP
20712 whilst the frame is being created. We try the following
20713 places in order:
20715 1. The last argument register r3 if it is available.
20716 2. A slot on the stack above the frame if there are no
20717 arguments to push onto the stack.
20718 3. Register r3 again, after pushing the argument registers
20719 onto the stack, if this is a varargs function.
20720 4. The last slot on the stack created for the arguments to
20721 push, if this isn't a varargs function.
20723 Note - we only need to tell the dwarf2 backend about the SP
20724 adjustment in the second variant; the static chain register
20725 doesn't need to be unwound, as it doesn't contain a value
20726 inherited from the caller. */
20728 if (!arm_r3_live_at_start_p ())
20729 insn = emit_set_insn (gen_rtx_REG (SImode, 3), ip_rtx);
20730 else if (args_to_push == 0)
20732 rtx addr, dwarf;
20734 gcc_assert(arm_compute_static_chain_stack_bytes() == 4);
20735 saved_regs += 4;
20737 addr = gen_rtx_PRE_DEC (Pmode, stack_pointer_rtx);
20738 insn = emit_set_insn (gen_frame_mem (SImode, addr), ip_rtx);
20739 fp_offset = 4;
20741 /* Just tell the dwarf backend that we adjusted SP. */
20742 dwarf = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
20743 plus_constant (Pmode, stack_pointer_rtx,
20744 -fp_offset));
20745 RTX_FRAME_RELATED_P (insn) = 1;
20746 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
20748 else
20750 /* Store the args on the stack. */
20751 if (cfun->machine->uses_anonymous_args)
20753 insn
20754 = emit_multi_reg_push ((0xf0 >> (args_to_push / 4)) & 0xf);
20755 emit_set_insn (gen_rtx_REG (SImode, 3), ip_rtx);
20756 saved_pretend_args = 1;
20758 else
20760 rtx addr, dwarf;
20762 if (args_to_push == 4)
20763 addr = gen_rtx_PRE_DEC (Pmode, stack_pointer_rtx);
20764 else
20765 addr
20766 = gen_rtx_PRE_MODIFY (Pmode, stack_pointer_rtx,
20767 plus_constant (Pmode,
20768 stack_pointer_rtx,
20769 -args_to_push));
20771 insn = emit_set_insn (gen_frame_mem (SImode, addr), ip_rtx);
20773 /* Just tell the dwarf backend that we adjusted SP. */
20774 dwarf
20775 = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
20776 plus_constant (Pmode, stack_pointer_rtx,
20777 -args_to_push));
20778 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
20781 RTX_FRAME_RELATED_P (insn) = 1;
20782 fp_offset = args_to_push;
20783 args_to_push = 0;
20787 insn = emit_set_insn (ip_rtx,
20788 plus_constant (Pmode, stack_pointer_rtx,
20789 fp_offset));
20790 RTX_FRAME_RELATED_P (insn) = 1;
20793 if (args_to_push)
20795 /* Push the argument registers, or reserve space for them. */
20796 if (cfun->machine->uses_anonymous_args)
20797 insn = emit_multi_reg_push
20798 ((0xf0 >> (args_to_push / 4)) & 0xf);
20799 else
20800 insn = emit_insn
20801 (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
20802 GEN_INT (- args_to_push)));
20803 RTX_FRAME_RELATED_P (insn) = 1;
20806 /* If this is an interrupt service routine, and the link register
20807 is going to be pushed, and we're not generating extra
20808 push of IP (needed when frame is needed and frame layout if apcs),
20809 subtracting four from LR now will mean that the function return
20810 can be done with a single instruction. */
20811 if ((func_type == ARM_FT_ISR || func_type == ARM_FT_FIQ)
20812 && (live_regs_mask & (1 << LR_REGNUM)) != 0
20813 && !(frame_pointer_needed && TARGET_APCS_FRAME)
20814 && TARGET_ARM)
20816 rtx lr = gen_rtx_REG (SImode, LR_REGNUM);
20818 emit_set_insn (lr, plus_constant (SImode, lr, -4));
20821 if (live_regs_mask)
20823 saved_regs += bit_count (live_regs_mask) * 4;
20824 if (optimize_size && !frame_pointer_needed
20825 && saved_regs == offsets->saved_regs - offsets->saved_args)
20827 /* If no coprocessor registers are being pushed and we don't have
20828 to worry about a frame pointer then push extra registers to
20829 create the stack frame. This is done is a way that does not
20830 alter the frame layout, so is independent of the epilogue. */
20831 int n;
20832 int frame;
20833 n = 0;
20834 while (n < 8 && (live_regs_mask & (1 << n)) == 0)
20835 n++;
20836 frame = offsets->outgoing_args - (offsets->saved_args + saved_regs);
20837 if (frame && n * 4 >= frame)
20839 n = frame / 4;
20840 live_regs_mask |= (1 << n) - 1;
20841 saved_regs += frame;
20845 if (TARGET_LDRD
20846 && current_tune->prefer_ldrd_strd
20847 && !optimize_function_for_size_p (cfun))
20849 if (TARGET_THUMB2)
20851 thumb2_emit_strd_push (live_regs_mask);
20853 else if (TARGET_ARM
20854 && !TARGET_APCS_FRAME
20855 && !IS_INTERRUPT (func_type))
20857 arm_emit_strd_push (live_regs_mask);
20859 else
20861 insn = emit_multi_reg_push (live_regs_mask);
20862 RTX_FRAME_RELATED_P (insn) = 1;
20865 else
20867 insn = emit_multi_reg_push (live_regs_mask);
20868 RTX_FRAME_RELATED_P (insn) = 1;
20872 if (! IS_VOLATILE (func_type))
20873 saved_regs += arm_save_coproc_regs ();
20875 if (frame_pointer_needed && TARGET_ARM)
20877 /* Create the new frame pointer. */
20878 if (TARGET_APCS_FRAME)
20880 insn = GEN_INT (-(4 + args_to_push + fp_offset));
20881 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx, ip_rtx, insn));
20882 RTX_FRAME_RELATED_P (insn) = 1;
20884 if (IS_NESTED (func_type))
20886 /* Recover the static chain register. */
20887 if (!arm_r3_live_at_start_p () || saved_pretend_args)
20888 insn = gen_rtx_REG (SImode, 3);
20889 else
20891 insn = plus_constant (Pmode, hard_frame_pointer_rtx, 4);
20892 insn = gen_frame_mem (SImode, insn);
20894 emit_set_insn (ip_rtx, insn);
20895 /* Add a USE to stop propagate_one_insn() from barfing. */
20896 emit_insn (gen_force_register_use (ip_rtx));
20899 else
20901 insn = GEN_INT (saved_regs - 4);
20902 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
20903 stack_pointer_rtx, insn));
20904 RTX_FRAME_RELATED_P (insn) = 1;
20908 if (flag_stack_usage_info)
20909 current_function_static_stack_size
20910 = offsets->outgoing_args - offsets->saved_args;
20912 if (offsets->outgoing_args != offsets->saved_args + saved_regs)
20914 /* This add can produce multiple insns for a large constant, so we
20915 need to get tricky. */
20916 rtx last = get_last_insn ();
20918 amount = GEN_INT (offsets->saved_args + saved_regs
20919 - offsets->outgoing_args);
20921 insn = emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
20922 amount));
20925 last = last ? NEXT_INSN (last) : get_insns ();
20926 RTX_FRAME_RELATED_P (last) = 1;
20928 while (last != insn);
20930 /* If the frame pointer is needed, emit a special barrier that
20931 will prevent the scheduler from moving stores to the frame
20932 before the stack adjustment. */
20933 if (frame_pointer_needed)
20934 insn = emit_insn (gen_stack_tie (stack_pointer_rtx,
20935 hard_frame_pointer_rtx));
20939 if (frame_pointer_needed && TARGET_THUMB2)
20940 thumb_set_frame_pointer (offsets);
20942 if (flag_pic && arm_pic_register != INVALID_REGNUM)
20944 unsigned long mask;
20946 mask = live_regs_mask;
20947 mask &= THUMB2_WORK_REGS;
20948 if (!IS_NESTED (func_type))
20949 mask |= (1 << IP_REGNUM);
20950 arm_load_pic_register (mask);
20953 /* If we are profiling, make sure no instructions are scheduled before
20954 the call to mcount. Similarly if the user has requested no
20955 scheduling in the prolog. Similarly if we want non-call exceptions
20956 using the EABI unwinder, to prevent faulting instructions from being
20957 swapped with a stack adjustment. */
20958 if (crtl->profile || !TARGET_SCHED_PROLOG
20959 || (arm_except_unwind_info (&global_options) == UI_TARGET
20960 && cfun->can_throw_non_call_exceptions))
20961 emit_insn (gen_blockage ());
20963 /* If the link register is being kept alive, with the return address in it,
20964 then make sure that it does not get reused by the ce2 pass. */
20965 if ((live_regs_mask & (1 << LR_REGNUM)) == 0)
20966 cfun->machine->lr_save_eliminated = 1;
20969 /* Print condition code to STREAM. Helper function for arm_print_operand. */
20970 static void
20971 arm_print_condition (FILE *stream)
20973 if (arm_ccfsm_state == 3 || arm_ccfsm_state == 4)
20975 /* Branch conversion is not implemented for Thumb-2. */
20976 if (TARGET_THUMB)
20978 output_operand_lossage ("predicated Thumb instruction");
20979 return;
20981 if (current_insn_predicate != NULL)
20983 output_operand_lossage
20984 ("predicated instruction in conditional sequence");
20985 return;
20988 fputs (arm_condition_codes[arm_current_cc], stream);
20990 else if (current_insn_predicate)
20992 enum arm_cond_code code;
20994 if (TARGET_THUMB1)
20996 output_operand_lossage ("predicated Thumb instruction");
20997 return;
21000 code = get_arm_condition_code (current_insn_predicate);
21001 fputs (arm_condition_codes[code], stream);
21006 /* If CODE is 'd', then the X is a condition operand and the instruction
21007 should only be executed if the condition is true.
21008 if CODE is 'D', then the X is a condition operand and the instruction
21009 should only be executed if the condition is false: however, if the mode
21010 of the comparison is CCFPEmode, then always execute the instruction -- we
21011 do this because in these circumstances !GE does not necessarily imply LT;
21012 in these cases the instruction pattern will take care to make sure that
21013 an instruction containing %d will follow, thereby undoing the effects of
21014 doing this instruction unconditionally.
21015 If CODE is 'N' then X is a floating point operand that must be negated
21016 before output.
21017 If CODE is 'B' then output a bitwise inverted value of X (a const int).
21018 If X is a REG and CODE is `M', output a ldm/stm style multi-reg. */
21019 static void
21020 arm_print_operand (FILE *stream, rtx x, int code)
21022 switch (code)
21024 case '@':
21025 fputs (ASM_COMMENT_START, stream);
21026 return;
21028 case '_':
21029 fputs (user_label_prefix, stream);
21030 return;
21032 case '|':
21033 fputs (REGISTER_PREFIX, stream);
21034 return;
21036 case '?':
21037 arm_print_condition (stream);
21038 return;
21040 case '(':
21041 /* Nothing in unified syntax, otherwise the current condition code. */
21042 if (!TARGET_UNIFIED_ASM)
21043 arm_print_condition (stream);
21044 break;
21046 case ')':
21047 /* The current condition code in unified syntax, otherwise nothing. */
21048 if (TARGET_UNIFIED_ASM)
21049 arm_print_condition (stream);
21050 break;
21052 case '.':
21053 /* The current condition code for a condition code setting instruction.
21054 Preceded by 's' in unified syntax, otherwise followed by 's'. */
21055 if (TARGET_UNIFIED_ASM)
21057 fputc('s', stream);
21058 arm_print_condition (stream);
21060 else
21062 arm_print_condition (stream);
21063 fputc('s', stream);
21065 return;
21067 case '!':
21068 /* If the instruction is conditionally executed then print
21069 the current condition code, otherwise print 's'. */
21070 gcc_assert (TARGET_THUMB2 && TARGET_UNIFIED_ASM);
21071 if (current_insn_predicate)
21072 arm_print_condition (stream);
21073 else
21074 fputc('s', stream);
21075 break;
21077 /* %# is a "break" sequence. It doesn't output anything, but is used to
21078 separate e.g. operand numbers from following text, if that text consists
21079 of further digits which we don't want to be part of the operand
21080 number. */
21081 case '#':
21082 return;
21084 case 'N':
21086 REAL_VALUE_TYPE r;
21087 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
21088 r = real_value_negate (&r);
21089 fprintf (stream, "%s", fp_const_from_val (&r));
21091 return;
21093 /* An integer or symbol address without a preceding # sign. */
21094 case 'c':
21095 switch (GET_CODE (x))
21097 case CONST_INT:
21098 fprintf (stream, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
21099 break;
21101 case SYMBOL_REF:
21102 output_addr_const (stream, x);
21103 break;
21105 case CONST:
21106 if (GET_CODE (XEXP (x, 0)) == PLUS
21107 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF)
21109 output_addr_const (stream, x);
21110 break;
21112 /* Fall through. */
21114 default:
21115 output_operand_lossage ("Unsupported operand for code '%c'", code);
21117 return;
21119 /* An integer that we want to print in HEX. */
21120 case 'x':
21121 switch (GET_CODE (x))
21123 case CONST_INT:
21124 fprintf (stream, "#" HOST_WIDE_INT_PRINT_HEX, INTVAL (x));
21125 break;
21127 default:
21128 output_operand_lossage ("Unsupported operand for code '%c'", code);
21130 return;
21132 case 'B':
21133 if (CONST_INT_P (x))
21135 HOST_WIDE_INT val;
21136 val = ARM_SIGN_EXTEND (~INTVAL (x));
21137 fprintf (stream, HOST_WIDE_INT_PRINT_DEC, val);
21139 else
21141 putc ('~', stream);
21142 output_addr_const (stream, x);
21144 return;
21146 case 'L':
21147 /* The low 16 bits of an immediate constant. */
21148 fprintf (stream, HOST_WIDE_INT_PRINT_DEC, INTVAL(x) & 0xffff);
21149 return;
21151 case 'i':
21152 fprintf (stream, "%s", arithmetic_instr (x, 1));
21153 return;
21155 case 'I':
21156 fprintf (stream, "%s", arithmetic_instr (x, 0));
21157 return;
21159 case 'S':
21161 HOST_WIDE_INT val;
21162 const char *shift;
21164 shift = shift_op (x, &val);
21166 if (shift)
21168 fprintf (stream, ", %s ", shift);
21169 if (val == -1)
21170 arm_print_operand (stream, XEXP (x, 1), 0);
21171 else
21172 fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, val);
21175 return;
21177 /* An explanation of the 'Q', 'R' and 'H' register operands:
21179 In a pair of registers containing a DI or DF value the 'Q'
21180 operand returns the register number of the register containing
21181 the least significant part of the value. The 'R' operand returns
21182 the register number of the register containing the most
21183 significant part of the value.
21185 The 'H' operand returns the higher of the two register numbers.
21186 On a run where WORDS_BIG_ENDIAN is true the 'H' operand is the
21187 same as the 'Q' operand, since the most significant part of the
21188 value is held in the lower number register. The reverse is true
21189 on systems where WORDS_BIG_ENDIAN is false.
21191 The purpose of these operands is to distinguish between cases
21192 where the endian-ness of the values is important (for example
21193 when they are added together), and cases where the endian-ness
21194 is irrelevant, but the order of register operations is important.
21195 For example when loading a value from memory into a register
21196 pair, the endian-ness does not matter. Provided that the value
21197 from the lower memory address is put into the lower numbered
21198 register, and the value from the higher address is put into the
21199 higher numbered register, the load will work regardless of whether
21200 the value being loaded is big-wordian or little-wordian. The
21201 order of the two register loads can matter however, if the address
21202 of the memory location is actually held in one of the registers
21203 being overwritten by the load.
21205 The 'Q' and 'R' constraints are also available for 64-bit
21206 constants. */
21207 case 'Q':
21208 if (CONST_INT_P (x) || CONST_DOUBLE_P (x))
21210 rtx part = gen_lowpart (SImode, x);
21211 fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, INTVAL (part));
21212 return;
21215 if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
21217 output_operand_lossage ("invalid operand for code '%c'", code);
21218 return;
21221 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 1 : 0));
21222 return;
21224 case 'R':
21225 if (CONST_INT_P (x) || CONST_DOUBLE_P (x))
21227 enum machine_mode mode = GET_MODE (x);
21228 rtx part;
21230 if (mode == VOIDmode)
21231 mode = DImode;
21232 part = gen_highpart_mode (SImode, mode, x);
21233 fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, INTVAL (part));
21234 return;
21237 if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
21239 output_operand_lossage ("invalid operand for code '%c'", code);
21240 return;
21243 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 0 : 1));
21244 return;
21246 case 'H':
21247 if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
21249 output_operand_lossage ("invalid operand for code '%c'", code);
21250 return;
21253 asm_fprintf (stream, "%r", REGNO (x) + 1);
21254 return;
21256 case 'J':
21257 if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
21259 output_operand_lossage ("invalid operand for code '%c'", code);
21260 return;
21263 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 3 : 2));
21264 return;
21266 case 'K':
21267 if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
21269 output_operand_lossage ("invalid operand for code '%c'", code);
21270 return;
21273 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 2 : 3));
21274 return;
21276 case 'm':
21277 asm_fprintf (stream, "%r",
21278 REG_P (XEXP (x, 0))
21279 ? REGNO (XEXP (x, 0)) : REGNO (XEXP (XEXP (x, 0), 0)));
21280 return;
21282 case 'M':
21283 asm_fprintf (stream, "{%r-%r}",
21284 REGNO (x),
21285 REGNO (x) + ARM_NUM_REGS (GET_MODE (x)) - 1);
21286 return;
21288 /* Like 'M', but writing doubleword vector registers, for use by Neon
21289 insns. */
21290 case 'h':
21292 int regno = (REGNO (x) - FIRST_VFP_REGNUM) / 2;
21293 int numregs = ARM_NUM_REGS (GET_MODE (x)) / 2;
21294 if (numregs == 1)
21295 asm_fprintf (stream, "{d%d}", regno);
21296 else
21297 asm_fprintf (stream, "{d%d-d%d}", regno, regno + numregs - 1);
21299 return;
21301 case 'd':
21302 /* CONST_TRUE_RTX means always -- that's the default. */
21303 if (x == const_true_rtx)
21304 return;
21306 if (!COMPARISON_P (x))
21308 output_operand_lossage ("invalid operand for code '%c'", code);
21309 return;
21312 fputs (arm_condition_codes[get_arm_condition_code (x)],
21313 stream);
21314 return;
21316 case 'D':
21317 /* CONST_TRUE_RTX means not always -- i.e. never. We shouldn't ever
21318 want to do that. */
21319 if (x == const_true_rtx)
21321 output_operand_lossage ("instruction never executed");
21322 return;
21324 if (!COMPARISON_P (x))
21326 output_operand_lossage ("invalid operand for code '%c'", code);
21327 return;
21330 fputs (arm_condition_codes[ARM_INVERSE_CONDITION_CODE
21331 (get_arm_condition_code (x))],
21332 stream);
21333 return;
21335 case 's':
21336 case 'V':
21337 case 'W':
21338 case 'X':
21339 case 'Y':
21340 case 'Z':
21341 /* Former Maverick support, removed after GCC-4.7. */
21342 output_operand_lossage ("obsolete Maverick format code '%c'", code);
21343 return;
21345 case 'U':
21346 if (!REG_P (x)
21347 || REGNO (x) < FIRST_IWMMXT_GR_REGNUM
21348 || REGNO (x) > LAST_IWMMXT_GR_REGNUM)
21349 /* Bad value for wCG register number. */
21351 output_operand_lossage ("invalid operand for code '%c'", code);
21352 return;
21355 else
21356 fprintf (stream, "%d", REGNO (x) - FIRST_IWMMXT_GR_REGNUM);
21357 return;
21359 /* Print an iWMMXt control register name. */
21360 case 'w':
21361 if (!CONST_INT_P (x)
21362 || INTVAL (x) < 0
21363 || INTVAL (x) >= 16)
21364 /* Bad value for wC register number. */
21366 output_operand_lossage ("invalid operand for code '%c'", code);
21367 return;
21370 else
21372 static const char * wc_reg_names [16] =
21374 "wCID", "wCon", "wCSSF", "wCASF",
21375 "wC4", "wC5", "wC6", "wC7",
21376 "wCGR0", "wCGR1", "wCGR2", "wCGR3",
21377 "wC12", "wC13", "wC14", "wC15"
21380 fputs (wc_reg_names [INTVAL (x)], stream);
21382 return;
21384 /* Print the high single-precision register of a VFP double-precision
21385 register. */
21386 case 'p':
21388 int mode = GET_MODE (x);
21389 int regno;
21391 if (GET_MODE_SIZE (mode) != 8 || !REG_P (x))
21393 output_operand_lossage ("invalid operand for code '%c'", code);
21394 return;
21397 regno = REGNO (x);
21398 if (!VFP_REGNO_OK_FOR_DOUBLE (regno))
21400 output_operand_lossage ("invalid operand for code '%c'", code);
21401 return;
21404 fprintf (stream, "s%d", regno - FIRST_VFP_REGNUM + 1);
21406 return;
21408 /* Print a VFP/Neon double precision or quad precision register name. */
21409 case 'P':
21410 case 'q':
21412 int mode = GET_MODE (x);
21413 int is_quad = (code == 'q');
21414 int regno;
21416 if (GET_MODE_SIZE (mode) != (is_quad ? 16 : 8))
21418 output_operand_lossage ("invalid operand for code '%c'", code);
21419 return;
21422 if (!REG_P (x)
21423 || !IS_VFP_REGNUM (REGNO (x)))
21425 output_operand_lossage ("invalid operand for code '%c'", code);
21426 return;
21429 regno = REGNO (x);
21430 if ((is_quad && !NEON_REGNO_OK_FOR_QUAD (regno))
21431 || (!is_quad && !VFP_REGNO_OK_FOR_DOUBLE (regno)))
21433 output_operand_lossage ("invalid operand for code '%c'", code);
21434 return;
21437 fprintf (stream, "%c%d", is_quad ? 'q' : 'd',
21438 (regno - FIRST_VFP_REGNUM) >> (is_quad ? 2 : 1));
21440 return;
21442 /* These two codes print the low/high doubleword register of a Neon quad
21443 register, respectively. For pair-structure types, can also print
21444 low/high quadword registers. */
21445 case 'e':
21446 case 'f':
21448 int mode = GET_MODE (x);
21449 int regno;
21451 if ((GET_MODE_SIZE (mode) != 16
21452 && GET_MODE_SIZE (mode) != 32) || !REG_P (x))
21454 output_operand_lossage ("invalid operand for code '%c'", code);
21455 return;
21458 regno = REGNO (x);
21459 if (!NEON_REGNO_OK_FOR_QUAD (regno))
21461 output_operand_lossage ("invalid operand for code '%c'", code);
21462 return;
21465 if (GET_MODE_SIZE (mode) == 16)
21466 fprintf (stream, "d%d", ((regno - FIRST_VFP_REGNUM) >> 1)
21467 + (code == 'f' ? 1 : 0));
21468 else
21469 fprintf (stream, "q%d", ((regno - FIRST_VFP_REGNUM) >> 2)
21470 + (code == 'f' ? 1 : 0));
21472 return;
21474 /* Print a VFPv3 floating-point constant, represented as an integer
21475 index. */
21476 case 'G':
21478 int index = vfp3_const_double_index (x);
21479 gcc_assert (index != -1);
21480 fprintf (stream, "%d", index);
21482 return;
21484 /* Print bits representing opcode features for Neon.
21486 Bit 0 is 1 for signed, 0 for unsigned. Floats count as signed
21487 and polynomials as unsigned.
21489 Bit 1 is 1 for floats and polynomials, 0 for ordinary integers.
21491 Bit 2 is 1 for rounding functions, 0 otherwise. */
21493 /* Identify the type as 's', 'u', 'p' or 'f'. */
21494 case 'T':
21496 HOST_WIDE_INT bits = INTVAL (x);
21497 fputc ("uspf"[bits & 3], stream);
21499 return;
21501 /* Likewise, but signed and unsigned integers are both 'i'. */
21502 case 'F':
21504 HOST_WIDE_INT bits = INTVAL (x);
21505 fputc ("iipf"[bits & 3], stream);
21507 return;
21509 /* As for 'T', but emit 'u' instead of 'p'. */
21510 case 't':
21512 HOST_WIDE_INT bits = INTVAL (x);
21513 fputc ("usuf"[bits & 3], stream);
21515 return;
21517 /* Bit 2: rounding (vs none). */
21518 case 'O':
21520 HOST_WIDE_INT bits = INTVAL (x);
21521 fputs ((bits & 4) != 0 ? "r" : "", stream);
21523 return;
21525 /* Memory operand for vld1/vst1 instruction. */
21526 case 'A':
21528 rtx addr;
21529 bool postinc = FALSE;
21530 unsigned align, memsize, align_bits;
21532 gcc_assert (MEM_P (x));
21533 addr = XEXP (x, 0);
21534 if (GET_CODE (addr) == POST_INC)
21536 postinc = 1;
21537 addr = XEXP (addr, 0);
21539 asm_fprintf (stream, "[%r", REGNO (addr));
21541 /* We know the alignment of this access, so we can emit a hint in the
21542 instruction (for some alignments) as an aid to the memory subsystem
21543 of the target. */
21544 align = MEM_ALIGN (x) >> 3;
21545 memsize = MEM_SIZE (x);
21547 /* Only certain alignment specifiers are supported by the hardware. */
21548 if (memsize == 32 && (align % 32) == 0)
21549 align_bits = 256;
21550 else if ((memsize == 16 || memsize == 32) && (align % 16) == 0)
21551 align_bits = 128;
21552 else if (memsize >= 8 && (align % 8) == 0)
21553 align_bits = 64;
21554 else
21555 align_bits = 0;
21557 if (align_bits != 0)
21558 asm_fprintf (stream, ":%d", align_bits);
21560 asm_fprintf (stream, "]");
21562 if (postinc)
21563 fputs("!", stream);
21565 return;
21567 case 'C':
21569 rtx addr;
21571 gcc_assert (MEM_P (x));
21572 addr = XEXP (x, 0);
21573 gcc_assert (REG_P (addr));
21574 asm_fprintf (stream, "[%r]", REGNO (addr));
21576 return;
21578 /* Translate an S register number into a D register number and element index. */
21579 case 'y':
21581 int mode = GET_MODE (x);
21582 int regno;
21584 if (GET_MODE_SIZE (mode) != 4 || !REG_P (x))
21586 output_operand_lossage ("invalid operand for code '%c'", code);
21587 return;
21590 regno = REGNO (x);
21591 if (!VFP_REGNO_OK_FOR_SINGLE (regno))
21593 output_operand_lossage ("invalid operand for code '%c'", code);
21594 return;
21597 regno = regno - FIRST_VFP_REGNUM;
21598 fprintf (stream, "d%d[%d]", regno / 2, regno % 2);
21600 return;
21602 case 'v':
21603 gcc_assert (CONST_DOUBLE_P (x));
21604 int result;
21605 result = vfp3_const_double_for_fract_bits (x);
21606 if (result == 0)
21607 result = vfp3_const_double_for_bits (x);
21608 fprintf (stream, "#%d", result);
21609 return;
21611 /* Register specifier for vld1.16/vst1.16. Translate the S register
21612 number into a D register number and element index. */
21613 case 'z':
21615 int mode = GET_MODE (x);
21616 int regno;
21618 if (GET_MODE_SIZE (mode) != 2 || !REG_P (x))
21620 output_operand_lossage ("invalid operand for code '%c'", code);
21621 return;
21624 regno = REGNO (x);
21625 if (!VFP_REGNO_OK_FOR_SINGLE (regno))
21627 output_operand_lossage ("invalid operand for code '%c'", code);
21628 return;
21631 regno = regno - FIRST_VFP_REGNUM;
21632 fprintf (stream, "d%d[%d]", regno/2, ((regno % 2) ? 2 : 0));
21634 return;
21636 default:
21637 if (x == 0)
21639 output_operand_lossage ("missing operand");
21640 return;
21643 switch (GET_CODE (x))
21645 case REG:
21646 asm_fprintf (stream, "%r", REGNO (x));
21647 break;
21649 case MEM:
21650 output_memory_reference_mode = GET_MODE (x);
21651 output_address (XEXP (x, 0));
21652 break;
21654 case CONST_DOUBLE:
21655 if (TARGET_NEON)
21657 char fpstr[20];
21658 real_to_decimal (fpstr, CONST_DOUBLE_REAL_VALUE (x),
21659 sizeof (fpstr), 0, 1);
21660 fprintf (stream, "#%s", fpstr);
21662 else
21663 fprintf (stream, "#%s", fp_immediate_constant (x));
21664 break;
21666 default:
21667 gcc_assert (GET_CODE (x) != NEG);
21668 fputc ('#', stream);
21669 if (GET_CODE (x) == HIGH)
21671 fputs (":lower16:", stream);
21672 x = XEXP (x, 0);
21675 output_addr_const (stream, x);
21676 break;
21681 /* Target hook for printing a memory address. */
21682 static void
21683 arm_print_operand_address (FILE *stream, rtx x)
21685 if (TARGET_32BIT)
21687 int is_minus = GET_CODE (x) == MINUS;
21689 if (REG_P (x))
21690 asm_fprintf (stream, "[%r]", REGNO (x));
21691 else if (GET_CODE (x) == PLUS || is_minus)
21693 rtx base = XEXP (x, 0);
21694 rtx index = XEXP (x, 1);
21695 HOST_WIDE_INT offset = 0;
21696 if (!REG_P (base)
21697 || (REG_P (index) && REGNO (index) == SP_REGNUM))
21699 /* Ensure that BASE is a register. */
21700 /* (one of them must be). */
21701 /* Also ensure the SP is not used as in index register. */
21702 rtx temp = base;
21703 base = index;
21704 index = temp;
21706 switch (GET_CODE (index))
21708 case CONST_INT:
21709 offset = INTVAL (index);
21710 if (is_minus)
21711 offset = -offset;
21712 asm_fprintf (stream, "[%r, #%wd]",
21713 REGNO (base), offset);
21714 break;
21716 case REG:
21717 asm_fprintf (stream, "[%r, %s%r]",
21718 REGNO (base), is_minus ? "-" : "",
21719 REGNO (index));
21720 break;
21722 case MULT:
21723 case ASHIFTRT:
21724 case LSHIFTRT:
21725 case ASHIFT:
21726 case ROTATERT:
21728 asm_fprintf (stream, "[%r, %s%r",
21729 REGNO (base), is_minus ? "-" : "",
21730 REGNO (XEXP (index, 0)));
21731 arm_print_operand (stream, index, 'S');
21732 fputs ("]", stream);
21733 break;
21736 default:
21737 gcc_unreachable ();
21740 else if (GET_CODE (x) == PRE_INC || GET_CODE (x) == POST_INC
21741 || GET_CODE (x) == PRE_DEC || GET_CODE (x) == POST_DEC)
21743 extern enum machine_mode output_memory_reference_mode;
21745 gcc_assert (REG_P (XEXP (x, 0)));
21747 if (GET_CODE (x) == PRE_DEC || GET_CODE (x) == PRE_INC)
21748 asm_fprintf (stream, "[%r, #%s%d]!",
21749 REGNO (XEXP (x, 0)),
21750 GET_CODE (x) == PRE_DEC ? "-" : "",
21751 GET_MODE_SIZE (output_memory_reference_mode));
21752 else
21753 asm_fprintf (stream, "[%r], #%s%d",
21754 REGNO (XEXP (x, 0)),
21755 GET_CODE (x) == POST_DEC ? "-" : "",
21756 GET_MODE_SIZE (output_memory_reference_mode));
21758 else if (GET_CODE (x) == PRE_MODIFY)
21760 asm_fprintf (stream, "[%r, ", REGNO (XEXP (x, 0)));
21761 if (CONST_INT_P (XEXP (XEXP (x, 1), 1)))
21762 asm_fprintf (stream, "#%wd]!",
21763 INTVAL (XEXP (XEXP (x, 1), 1)));
21764 else
21765 asm_fprintf (stream, "%r]!",
21766 REGNO (XEXP (XEXP (x, 1), 1)));
21768 else if (GET_CODE (x) == POST_MODIFY)
21770 asm_fprintf (stream, "[%r], ", REGNO (XEXP (x, 0)));
21771 if (CONST_INT_P (XEXP (XEXP (x, 1), 1)))
21772 asm_fprintf (stream, "#%wd",
21773 INTVAL (XEXP (XEXP (x, 1), 1)));
21774 else
21775 asm_fprintf (stream, "%r",
21776 REGNO (XEXP (XEXP (x, 1), 1)));
21778 else output_addr_const (stream, x);
21780 else
21782 if (REG_P (x))
21783 asm_fprintf (stream, "[%r]", REGNO (x));
21784 else if (GET_CODE (x) == POST_INC)
21785 asm_fprintf (stream, "%r!", REGNO (XEXP (x, 0)));
21786 else if (GET_CODE (x) == PLUS)
21788 gcc_assert (REG_P (XEXP (x, 0)));
21789 if (CONST_INT_P (XEXP (x, 1)))
21790 asm_fprintf (stream, "[%r, #%wd]",
21791 REGNO (XEXP (x, 0)),
21792 INTVAL (XEXP (x, 1)));
21793 else
21794 asm_fprintf (stream, "[%r, %r]",
21795 REGNO (XEXP (x, 0)),
21796 REGNO (XEXP (x, 1)));
21798 else
21799 output_addr_const (stream, x);
21803 /* Target hook for indicating whether a punctuation character for
21804 TARGET_PRINT_OPERAND is valid. */
21805 static bool
21806 arm_print_operand_punct_valid_p (unsigned char code)
21808 return (code == '@' || code == '|' || code == '.'
21809 || code == '(' || code == ')' || code == '#'
21810 || (TARGET_32BIT && (code == '?'))
21811 || (TARGET_THUMB2 && (code == '!'))
21812 || (TARGET_THUMB && (code == '_')));
21815 /* Target hook for assembling integer objects. The ARM version needs to
21816 handle word-sized values specially. */
21817 static bool
21818 arm_assemble_integer (rtx x, unsigned int size, int aligned_p)
21820 enum machine_mode mode;
21822 if (size == UNITS_PER_WORD && aligned_p)
21824 fputs ("\t.word\t", asm_out_file);
21825 output_addr_const (asm_out_file, x);
21827 /* Mark symbols as position independent. We only do this in the
21828 .text segment, not in the .data segment. */
21829 if (NEED_GOT_RELOC && flag_pic && making_const_table &&
21830 (GET_CODE (x) == SYMBOL_REF || GET_CODE (x) == LABEL_REF))
21832 /* See legitimize_pic_address for an explanation of the
21833 TARGET_VXWORKS_RTP check. */
21834 if (!arm_pic_data_is_text_relative
21835 || (GET_CODE (x) == SYMBOL_REF && !SYMBOL_REF_LOCAL_P (x)))
21836 fputs ("(GOT)", asm_out_file);
21837 else
21838 fputs ("(GOTOFF)", asm_out_file);
21840 fputc ('\n', asm_out_file);
21841 return true;
21844 mode = GET_MODE (x);
21846 if (arm_vector_mode_supported_p (mode))
21848 int i, units;
21850 gcc_assert (GET_CODE (x) == CONST_VECTOR);
21852 units = CONST_VECTOR_NUNITS (x);
21853 size = GET_MODE_SIZE (GET_MODE_INNER (mode));
21855 if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
21856 for (i = 0; i < units; i++)
21858 rtx elt = CONST_VECTOR_ELT (x, i);
21859 assemble_integer
21860 (elt, size, i == 0 ? BIGGEST_ALIGNMENT : size * BITS_PER_UNIT, 1);
21862 else
21863 for (i = 0; i < units; i++)
21865 rtx elt = CONST_VECTOR_ELT (x, i);
21866 REAL_VALUE_TYPE rval;
21868 REAL_VALUE_FROM_CONST_DOUBLE (rval, elt);
21870 assemble_real
21871 (rval, GET_MODE_INNER (mode),
21872 i == 0 ? BIGGEST_ALIGNMENT : size * BITS_PER_UNIT);
21875 return true;
21878 return default_assemble_integer (x, size, aligned_p);
21881 static void
21882 arm_elf_asm_cdtor (rtx symbol, int priority, bool is_ctor)
21884 section *s;
21886 if (!TARGET_AAPCS_BASED)
21888 (is_ctor ?
21889 default_named_section_asm_out_constructor
21890 : default_named_section_asm_out_destructor) (symbol, priority);
21891 return;
21894 /* Put these in the .init_array section, using a special relocation. */
21895 if (priority != DEFAULT_INIT_PRIORITY)
21897 char buf[18];
21898 sprintf (buf, "%s.%.5u",
21899 is_ctor ? ".init_array" : ".fini_array",
21900 priority);
21901 s = get_section (buf, SECTION_WRITE, NULL_TREE);
21903 else if (is_ctor)
21904 s = ctors_section;
21905 else
21906 s = dtors_section;
21908 switch_to_section (s);
21909 assemble_align (POINTER_SIZE);
21910 fputs ("\t.word\t", asm_out_file);
21911 output_addr_const (asm_out_file, symbol);
21912 fputs ("(target1)\n", asm_out_file);
21915 /* Add a function to the list of static constructors. */
21917 static void
21918 arm_elf_asm_constructor (rtx symbol, int priority)
21920 arm_elf_asm_cdtor (symbol, priority, /*is_ctor=*/true);
21923 /* Add a function to the list of static destructors. */
21925 static void
21926 arm_elf_asm_destructor (rtx symbol, int priority)
21928 arm_elf_asm_cdtor (symbol, priority, /*is_ctor=*/false);
21931 /* A finite state machine takes care of noticing whether or not instructions
21932 can be conditionally executed, and thus decrease execution time and code
21933 size by deleting branch instructions. The fsm is controlled by
21934 final_prescan_insn, and controls the actions of ASM_OUTPUT_OPCODE. */
21936 /* The state of the fsm controlling condition codes are:
21937 0: normal, do nothing special
21938 1: make ASM_OUTPUT_OPCODE not output this instruction
21939 2: make ASM_OUTPUT_OPCODE not output this instruction
21940 3: make instructions conditional
21941 4: make instructions conditional
21943 State transitions (state->state by whom under condition):
21944 0 -> 1 final_prescan_insn if the `target' is a label
21945 0 -> 2 final_prescan_insn if the `target' is an unconditional branch
21946 1 -> 3 ASM_OUTPUT_OPCODE after not having output the conditional branch
21947 2 -> 4 ASM_OUTPUT_OPCODE after not having output the conditional branch
21948 3 -> 0 (*targetm.asm_out.internal_label) if the `target' label is reached
21949 (the target label has CODE_LABEL_NUMBER equal to arm_target_label).
21950 4 -> 0 final_prescan_insn if the `target' unconditional branch is reached
21951 (the target insn is arm_target_insn).
21953 If the jump clobbers the conditions then we use states 2 and 4.
21955 A similar thing can be done with conditional return insns.
21957 XXX In case the `target' is an unconditional branch, this conditionalising
21958 of the instructions always reduces code size, but not always execution
21959 time. But then, I want to reduce the code size to somewhere near what
21960 /bin/cc produces. */
21962 /* In addition to this, state is maintained for Thumb-2 COND_EXEC
21963 instructions. When a COND_EXEC instruction is seen the subsequent
21964 instructions are scanned so that multiple conditional instructions can be
21965 combined into a single IT block. arm_condexec_count and arm_condexec_mask
21966 specify the length and true/false mask for the IT block. These will be
21967 decremented/zeroed by arm_asm_output_opcode as the insns are output. */
21969 /* Returns the index of the ARM condition code string in
21970 `arm_condition_codes', or ARM_NV if the comparison is invalid.
21971 COMPARISON should be an rtx like `(eq (...) (...))'. */
21973 enum arm_cond_code
21974 maybe_get_arm_condition_code (rtx comparison)
21976 enum machine_mode mode = GET_MODE (XEXP (comparison, 0));
21977 enum arm_cond_code code;
21978 enum rtx_code comp_code = GET_CODE (comparison);
21980 if (GET_MODE_CLASS (mode) != MODE_CC)
21981 mode = SELECT_CC_MODE (comp_code, XEXP (comparison, 0),
21982 XEXP (comparison, 1));
21984 switch (mode)
21986 case CC_DNEmode: code = ARM_NE; goto dominance;
21987 case CC_DEQmode: code = ARM_EQ; goto dominance;
21988 case CC_DGEmode: code = ARM_GE; goto dominance;
21989 case CC_DGTmode: code = ARM_GT; goto dominance;
21990 case CC_DLEmode: code = ARM_LE; goto dominance;
21991 case CC_DLTmode: code = ARM_LT; goto dominance;
21992 case CC_DGEUmode: code = ARM_CS; goto dominance;
21993 case CC_DGTUmode: code = ARM_HI; goto dominance;
21994 case CC_DLEUmode: code = ARM_LS; goto dominance;
21995 case CC_DLTUmode: code = ARM_CC;
21997 dominance:
21998 if (comp_code == EQ)
21999 return ARM_INVERSE_CONDITION_CODE (code);
22000 if (comp_code == NE)
22001 return code;
22002 return ARM_NV;
22004 case CC_NOOVmode:
22005 switch (comp_code)
22007 case NE: return ARM_NE;
22008 case EQ: return ARM_EQ;
22009 case GE: return ARM_PL;
22010 case LT: return ARM_MI;
22011 default: return ARM_NV;
22014 case CC_Zmode:
22015 switch (comp_code)
22017 case NE: return ARM_NE;
22018 case EQ: return ARM_EQ;
22019 default: return ARM_NV;
22022 case CC_Nmode:
22023 switch (comp_code)
22025 case NE: return ARM_MI;
22026 case EQ: return ARM_PL;
22027 default: return ARM_NV;
22030 case CCFPEmode:
22031 case CCFPmode:
22032 /* We can handle all cases except UNEQ and LTGT. */
22033 switch (comp_code)
22035 case GE: return ARM_GE;
22036 case GT: return ARM_GT;
22037 case LE: return ARM_LS;
22038 case LT: return ARM_MI;
22039 case NE: return ARM_NE;
22040 case EQ: return ARM_EQ;
22041 case ORDERED: return ARM_VC;
22042 case UNORDERED: return ARM_VS;
22043 case UNLT: return ARM_LT;
22044 case UNLE: return ARM_LE;
22045 case UNGT: return ARM_HI;
22046 case UNGE: return ARM_PL;
22047 /* UNEQ and LTGT do not have a representation. */
22048 case UNEQ: /* Fall through. */
22049 case LTGT: /* Fall through. */
22050 default: return ARM_NV;
22053 case CC_SWPmode:
22054 switch (comp_code)
22056 case NE: return ARM_NE;
22057 case EQ: return ARM_EQ;
22058 case GE: return ARM_LE;
22059 case GT: return ARM_LT;
22060 case LE: return ARM_GE;
22061 case LT: return ARM_GT;
22062 case GEU: return ARM_LS;
22063 case GTU: return ARM_CC;
22064 case LEU: return ARM_CS;
22065 case LTU: return ARM_HI;
22066 default: return ARM_NV;
22069 case CC_Cmode:
22070 switch (comp_code)
22072 case LTU: return ARM_CS;
22073 case GEU: return ARM_CC;
22074 default: return ARM_NV;
22077 case CC_CZmode:
22078 switch (comp_code)
22080 case NE: return ARM_NE;
22081 case EQ: return ARM_EQ;
22082 case GEU: return ARM_CS;
22083 case GTU: return ARM_HI;
22084 case LEU: return ARM_LS;
22085 case LTU: return ARM_CC;
22086 default: return ARM_NV;
22089 case CC_NCVmode:
22090 switch (comp_code)
22092 case GE: return ARM_GE;
22093 case LT: return ARM_LT;
22094 case GEU: return ARM_CS;
22095 case LTU: return ARM_CC;
22096 default: return ARM_NV;
22099 case CCmode:
22100 switch (comp_code)
22102 case NE: return ARM_NE;
22103 case EQ: return ARM_EQ;
22104 case GE: return ARM_GE;
22105 case GT: return ARM_GT;
22106 case LE: return ARM_LE;
22107 case LT: return ARM_LT;
22108 case GEU: return ARM_CS;
22109 case GTU: return ARM_HI;
22110 case LEU: return ARM_LS;
22111 case LTU: return ARM_CC;
22112 default: return ARM_NV;
22115 default: gcc_unreachable ();
22119 /* Like maybe_get_arm_condition_code, but never return ARM_NV. */
22120 static enum arm_cond_code
22121 get_arm_condition_code (rtx comparison)
22123 enum arm_cond_code code = maybe_get_arm_condition_code (comparison);
22124 gcc_assert (code != ARM_NV);
22125 return code;
22128 /* Tell arm_asm_output_opcode to output IT blocks for conditionally executed
22129 instructions. */
22130 void
22131 thumb2_final_prescan_insn (rtx insn)
22133 rtx first_insn = insn;
22134 rtx body = PATTERN (insn);
22135 rtx predicate;
22136 enum arm_cond_code code;
22137 int n;
22138 int mask;
22139 int max;
22141 /* max_insns_skipped in the tune was already taken into account in the
22142 cost model of ifcvt pass when generating COND_EXEC insns. At this stage
22143 just emit the IT blocks as we can. It does not make sense to split
22144 the IT blocks. */
22145 max = MAX_INSN_PER_IT_BLOCK;
22147 /* Remove the previous insn from the count of insns to be output. */
22148 if (arm_condexec_count)
22149 arm_condexec_count--;
22151 /* Nothing to do if we are already inside a conditional block. */
22152 if (arm_condexec_count)
22153 return;
22155 if (GET_CODE (body) != COND_EXEC)
22156 return;
22158 /* Conditional jumps are implemented directly. */
22159 if (JUMP_P (insn))
22160 return;
22162 predicate = COND_EXEC_TEST (body);
22163 arm_current_cc = get_arm_condition_code (predicate);
22165 n = get_attr_ce_count (insn);
22166 arm_condexec_count = 1;
22167 arm_condexec_mask = (1 << n) - 1;
22168 arm_condexec_masklen = n;
22169 /* See if subsequent instructions can be combined into the same block. */
22170 for (;;)
22172 insn = next_nonnote_insn (insn);
22174 /* Jumping into the middle of an IT block is illegal, so a label or
22175 barrier terminates the block. */
22176 if (!NONJUMP_INSN_P (insn) && !JUMP_P (insn))
22177 break;
22179 body = PATTERN (insn);
22180 /* USE and CLOBBER aren't really insns, so just skip them. */
22181 if (GET_CODE (body) == USE
22182 || GET_CODE (body) == CLOBBER)
22183 continue;
22185 /* ??? Recognize conditional jumps, and combine them with IT blocks. */
22186 if (GET_CODE (body) != COND_EXEC)
22187 break;
22188 /* Maximum number of conditionally executed instructions in a block. */
22189 n = get_attr_ce_count (insn);
22190 if (arm_condexec_masklen + n > max)
22191 break;
22193 predicate = COND_EXEC_TEST (body);
22194 code = get_arm_condition_code (predicate);
22195 mask = (1 << n) - 1;
22196 if (arm_current_cc == code)
22197 arm_condexec_mask |= (mask << arm_condexec_masklen);
22198 else if (arm_current_cc != ARM_INVERSE_CONDITION_CODE(code))
22199 break;
22201 arm_condexec_count++;
22202 arm_condexec_masklen += n;
22204 /* A jump must be the last instruction in a conditional block. */
22205 if (JUMP_P (insn))
22206 break;
22208 /* Restore recog_data (getting the attributes of other insns can
22209 destroy this array, but final.c assumes that it remains intact
22210 across this call). */
22211 extract_constrain_insn_cached (first_insn);
22214 void
22215 arm_final_prescan_insn (rtx insn)
22217 /* BODY will hold the body of INSN. */
22218 rtx body = PATTERN (insn);
22220 /* This will be 1 if trying to repeat the trick, and things need to be
22221 reversed if it appears to fail. */
22222 int reverse = 0;
22224 /* If we start with a return insn, we only succeed if we find another one. */
22225 int seeking_return = 0;
22226 enum rtx_code return_code = UNKNOWN;
22228 /* START_INSN will hold the insn from where we start looking. This is the
22229 first insn after the following code_label if REVERSE is true. */
22230 rtx start_insn = insn;
22232 /* If in state 4, check if the target branch is reached, in order to
22233 change back to state 0. */
22234 if (arm_ccfsm_state == 4)
22236 if (insn == arm_target_insn)
22238 arm_target_insn = NULL;
22239 arm_ccfsm_state = 0;
22241 return;
22244 /* If in state 3, it is possible to repeat the trick, if this insn is an
22245 unconditional branch to a label, and immediately following this branch
22246 is the previous target label which is only used once, and the label this
22247 branch jumps to is not too far off. */
22248 if (arm_ccfsm_state == 3)
22250 if (simplejump_p (insn))
22252 start_insn = next_nonnote_insn (start_insn);
22253 if (BARRIER_P (start_insn))
22255 /* XXX Isn't this always a barrier? */
22256 start_insn = next_nonnote_insn (start_insn);
22258 if (LABEL_P (start_insn)
22259 && CODE_LABEL_NUMBER (start_insn) == arm_target_label
22260 && LABEL_NUSES (start_insn) == 1)
22261 reverse = TRUE;
22262 else
22263 return;
22265 else if (ANY_RETURN_P (body))
22267 start_insn = next_nonnote_insn (start_insn);
22268 if (BARRIER_P (start_insn))
22269 start_insn = next_nonnote_insn (start_insn);
22270 if (LABEL_P (start_insn)
22271 && CODE_LABEL_NUMBER (start_insn) == arm_target_label
22272 && LABEL_NUSES (start_insn) == 1)
22274 reverse = TRUE;
22275 seeking_return = 1;
22276 return_code = GET_CODE (body);
22278 else
22279 return;
22281 else
22282 return;
22285 gcc_assert (!arm_ccfsm_state || reverse);
22286 if (!JUMP_P (insn))
22287 return;
22289 /* This jump might be paralleled with a clobber of the condition codes
22290 the jump should always come first */
22291 if (GET_CODE (body) == PARALLEL && XVECLEN (body, 0) > 0)
22292 body = XVECEXP (body, 0, 0);
22294 if (reverse
22295 || (GET_CODE (body) == SET && GET_CODE (SET_DEST (body)) == PC
22296 && GET_CODE (SET_SRC (body)) == IF_THEN_ELSE))
22298 int insns_skipped;
22299 int fail = FALSE, succeed = FALSE;
22300 /* Flag which part of the IF_THEN_ELSE is the LABEL_REF. */
22301 int then_not_else = TRUE;
22302 rtx this_insn = start_insn, label = 0;
22304 /* Register the insn jumped to. */
22305 if (reverse)
22307 if (!seeking_return)
22308 label = XEXP (SET_SRC (body), 0);
22310 else if (GET_CODE (XEXP (SET_SRC (body), 1)) == LABEL_REF)
22311 label = XEXP (XEXP (SET_SRC (body), 1), 0);
22312 else if (GET_CODE (XEXP (SET_SRC (body), 2)) == LABEL_REF)
22314 label = XEXP (XEXP (SET_SRC (body), 2), 0);
22315 then_not_else = FALSE;
22317 else if (ANY_RETURN_P (XEXP (SET_SRC (body), 1)))
22319 seeking_return = 1;
22320 return_code = GET_CODE (XEXP (SET_SRC (body), 1));
22322 else if (ANY_RETURN_P (XEXP (SET_SRC (body), 2)))
22324 seeking_return = 1;
22325 return_code = GET_CODE (XEXP (SET_SRC (body), 2));
22326 then_not_else = FALSE;
22328 else
22329 gcc_unreachable ();
22331 /* See how many insns this branch skips, and what kind of insns. If all
22332 insns are okay, and the label or unconditional branch to the same
22333 label is not too far away, succeed. */
22334 for (insns_skipped = 0;
22335 !fail && !succeed && insns_skipped++ < max_insns_skipped;)
22337 rtx scanbody;
22339 this_insn = next_nonnote_insn (this_insn);
22340 if (!this_insn)
22341 break;
22343 switch (GET_CODE (this_insn))
22345 case CODE_LABEL:
22346 /* Succeed if it is the target label, otherwise fail since
22347 control falls in from somewhere else. */
22348 if (this_insn == label)
22350 arm_ccfsm_state = 1;
22351 succeed = TRUE;
22353 else
22354 fail = TRUE;
22355 break;
22357 case BARRIER:
22358 /* Succeed if the following insn is the target label.
22359 Otherwise fail.
22360 If return insns are used then the last insn in a function
22361 will be a barrier. */
22362 this_insn = next_nonnote_insn (this_insn);
22363 if (this_insn && this_insn == label)
22365 arm_ccfsm_state = 1;
22366 succeed = TRUE;
22368 else
22369 fail = TRUE;
22370 break;
22372 case CALL_INSN:
22373 /* The AAPCS says that conditional calls should not be
22374 used since they make interworking inefficient (the
22375 linker can't transform BL<cond> into BLX). That's
22376 only a problem if the machine has BLX. */
22377 if (arm_arch5)
22379 fail = TRUE;
22380 break;
22383 /* Succeed if the following insn is the target label, or
22384 if the following two insns are a barrier and the
22385 target label. */
22386 this_insn = next_nonnote_insn (this_insn);
22387 if (this_insn && BARRIER_P (this_insn))
22388 this_insn = next_nonnote_insn (this_insn);
22390 if (this_insn && this_insn == label
22391 && insns_skipped < max_insns_skipped)
22393 arm_ccfsm_state = 1;
22394 succeed = TRUE;
22396 else
22397 fail = TRUE;
22398 break;
22400 case JUMP_INSN:
22401 /* If this is an unconditional branch to the same label, succeed.
22402 If it is to another label, do nothing. If it is conditional,
22403 fail. */
22404 /* XXX Probably, the tests for SET and the PC are
22405 unnecessary. */
22407 scanbody = PATTERN (this_insn);
22408 if (GET_CODE (scanbody) == SET
22409 && GET_CODE (SET_DEST (scanbody)) == PC)
22411 if (GET_CODE (SET_SRC (scanbody)) == LABEL_REF
22412 && XEXP (SET_SRC (scanbody), 0) == label && !reverse)
22414 arm_ccfsm_state = 2;
22415 succeed = TRUE;
22417 else if (GET_CODE (SET_SRC (scanbody)) == IF_THEN_ELSE)
22418 fail = TRUE;
22420 /* Fail if a conditional return is undesirable (e.g. on a
22421 StrongARM), but still allow this if optimizing for size. */
22422 else if (GET_CODE (scanbody) == return_code
22423 && !use_return_insn (TRUE, NULL)
22424 && !optimize_size)
22425 fail = TRUE;
22426 else if (GET_CODE (scanbody) == return_code)
22428 arm_ccfsm_state = 2;
22429 succeed = TRUE;
22431 else if (GET_CODE (scanbody) == PARALLEL)
22433 switch (get_attr_conds (this_insn))
22435 case CONDS_NOCOND:
22436 break;
22437 default:
22438 fail = TRUE;
22439 break;
22442 else
22443 fail = TRUE; /* Unrecognized jump (e.g. epilogue). */
22445 break;
22447 case INSN:
22448 /* Instructions using or affecting the condition codes make it
22449 fail. */
22450 scanbody = PATTERN (this_insn);
22451 if (!(GET_CODE (scanbody) == SET
22452 || GET_CODE (scanbody) == PARALLEL)
22453 || get_attr_conds (this_insn) != CONDS_NOCOND)
22454 fail = TRUE;
22455 break;
22457 default:
22458 break;
22461 if (succeed)
22463 if ((!seeking_return) && (arm_ccfsm_state == 1 || reverse))
22464 arm_target_label = CODE_LABEL_NUMBER (label);
22465 else
22467 gcc_assert (seeking_return || arm_ccfsm_state == 2);
22469 while (this_insn && GET_CODE (PATTERN (this_insn)) == USE)
22471 this_insn = next_nonnote_insn (this_insn);
22472 gcc_assert (!this_insn
22473 || (!BARRIER_P (this_insn)
22474 && !LABEL_P (this_insn)));
22476 if (!this_insn)
22478 /* Oh, dear! we ran off the end.. give up. */
22479 extract_constrain_insn_cached (insn);
22480 arm_ccfsm_state = 0;
22481 arm_target_insn = NULL;
22482 return;
22484 arm_target_insn = this_insn;
22487 /* If REVERSE is true, ARM_CURRENT_CC needs to be inverted from
22488 what it was. */
22489 if (!reverse)
22490 arm_current_cc = get_arm_condition_code (XEXP (SET_SRC (body), 0));
22492 if (reverse || then_not_else)
22493 arm_current_cc = ARM_INVERSE_CONDITION_CODE (arm_current_cc);
22496 /* Restore recog_data (getting the attributes of other insns can
22497 destroy this array, but final.c assumes that it remains intact
22498 across this call. */
22499 extract_constrain_insn_cached (insn);
22503 /* Output IT instructions. */
22504 void
22505 thumb2_asm_output_opcode (FILE * stream)
22507 char buff[5];
22508 int n;
22510 if (arm_condexec_mask)
22512 for (n = 0; n < arm_condexec_masklen; n++)
22513 buff[n] = (arm_condexec_mask & (1 << n)) ? 't' : 'e';
22514 buff[n] = 0;
22515 asm_fprintf(stream, "i%s\t%s\n\t", buff,
22516 arm_condition_codes[arm_current_cc]);
22517 arm_condexec_mask = 0;
22521 /* Returns true if REGNO is a valid register
22522 for holding a quantity of type MODE. */
22524 arm_hard_regno_mode_ok (unsigned int regno, enum machine_mode mode)
22526 if (GET_MODE_CLASS (mode) == MODE_CC)
22527 return (regno == CC_REGNUM
22528 || (TARGET_HARD_FLOAT && TARGET_VFP
22529 && regno == VFPCC_REGNUM));
22531 if (TARGET_THUMB1)
22532 /* For the Thumb we only allow values bigger than SImode in
22533 registers 0 - 6, so that there is always a second low
22534 register available to hold the upper part of the value.
22535 We probably we ought to ensure that the register is the
22536 start of an even numbered register pair. */
22537 return (ARM_NUM_REGS (mode) < 2) || (regno < LAST_LO_REGNUM);
22539 if (TARGET_HARD_FLOAT && TARGET_VFP
22540 && IS_VFP_REGNUM (regno))
22542 if (mode == SFmode || mode == SImode)
22543 return VFP_REGNO_OK_FOR_SINGLE (regno);
22545 if (mode == DFmode)
22546 return VFP_REGNO_OK_FOR_DOUBLE (regno);
22548 /* VFP registers can hold HFmode values, but there is no point in
22549 putting them there unless we have hardware conversion insns. */
22550 if (mode == HFmode)
22551 return TARGET_FP16 && VFP_REGNO_OK_FOR_SINGLE (regno);
22553 if (TARGET_NEON)
22554 return (VALID_NEON_DREG_MODE (mode) && VFP_REGNO_OK_FOR_DOUBLE (regno))
22555 || (VALID_NEON_QREG_MODE (mode)
22556 && NEON_REGNO_OK_FOR_QUAD (regno))
22557 || (mode == TImode && NEON_REGNO_OK_FOR_NREGS (regno, 2))
22558 || (mode == EImode && NEON_REGNO_OK_FOR_NREGS (regno, 3))
22559 || (mode == OImode && NEON_REGNO_OK_FOR_NREGS (regno, 4))
22560 || (mode == CImode && NEON_REGNO_OK_FOR_NREGS (regno, 6))
22561 || (mode == XImode && NEON_REGNO_OK_FOR_NREGS (regno, 8));
22563 return FALSE;
22566 if (TARGET_REALLY_IWMMXT)
22568 if (IS_IWMMXT_GR_REGNUM (regno))
22569 return mode == SImode;
22571 if (IS_IWMMXT_REGNUM (regno))
22572 return VALID_IWMMXT_REG_MODE (mode);
22575 /* We allow almost any value to be stored in the general registers.
22576 Restrict doubleword quantities to even register pairs so that we can
22577 use ldrd. Do not allow very large Neon structure opaque modes in
22578 general registers; they would use too many. */
22579 if (regno <= LAST_ARM_REGNUM)
22580 return !(TARGET_LDRD && GET_MODE_SIZE (mode) > 4 && (regno & 1) != 0)
22581 && ARM_NUM_REGS (mode) <= 4;
22583 if (regno == FRAME_POINTER_REGNUM
22584 || regno == ARG_POINTER_REGNUM)
22585 /* We only allow integers in the fake hard registers. */
22586 return GET_MODE_CLASS (mode) == MODE_INT;
22588 return FALSE;
22591 /* Implement MODES_TIEABLE_P. */
22593 bool
22594 arm_modes_tieable_p (enum machine_mode mode1, enum machine_mode mode2)
22596 if (GET_MODE_CLASS (mode1) == GET_MODE_CLASS (mode2))
22597 return true;
22599 /* We specifically want to allow elements of "structure" modes to
22600 be tieable to the structure. This more general condition allows
22601 other rarer situations too. */
22602 if (TARGET_NEON
22603 && (VALID_NEON_DREG_MODE (mode1)
22604 || VALID_NEON_QREG_MODE (mode1)
22605 || VALID_NEON_STRUCT_MODE (mode1))
22606 && (VALID_NEON_DREG_MODE (mode2)
22607 || VALID_NEON_QREG_MODE (mode2)
22608 || VALID_NEON_STRUCT_MODE (mode2)))
22609 return true;
22611 return false;
22614 /* For efficiency and historical reasons LO_REGS, HI_REGS and CC_REGS are
22615 not used in arm mode. */
22617 enum reg_class
22618 arm_regno_class (int regno)
22620 if (TARGET_THUMB1)
22622 if (regno == STACK_POINTER_REGNUM)
22623 return STACK_REG;
22624 if (regno == CC_REGNUM)
22625 return CC_REG;
22626 if (regno < 8)
22627 return LO_REGS;
22628 return HI_REGS;
22631 if (TARGET_THUMB2 && regno < 8)
22632 return LO_REGS;
22634 if ( regno <= LAST_ARM_REGNUM
22635 || regno == FRAME_POINTER_REGNUM
22636 || regno == ARG_POINTER_REGNUM)
22637 return TARGET_THUMB2 ? HI_REGS : GENERAL_REGS;
22639 if (regno == CC_REGNUM || regno == VFPCC_REGNUM)
22640 return TARGET_THUMB2 ? CC_REG : NO_REGS;
22642 if (IS_VFP_REGNUM (regno))
22644 if (regno <= D7_VFP_REGNUM)
22645 return VFP_D0_D7_REGS;
22646 else if (regno <= LAST_LO_VFP_REGNUM)
22647 return VFP_LO_REGS;
22648 else
22649 return VFP_HI_REGS;
22652 if (IS_IWMMXT_REGNUM (regno))
22653 return IWMMXT_REGS;
22655 if (IS_IWMMXT_GR_REGNUM (regno))
22656 return IWMMXT_GR_REGS;
22658 return NO_REGS;
22661 /* Handle a special case when computing the offset
22662 of an argument from the frame pointer. */
22664 arm_debugger_arg_offset (int value, rtx addr)
22666 rtx insn;
22668 /* We are only interested if dbxout_parms() failed to compute the offset. */
22669 if (value != 0)
22670 return 0;
22672 /* We can only cope with the case where the address is held in a register. */
22673 if (!REG_P (addr))
22674 return 0;
22676 /* If we are using the frame pointer to point at the argument, then
22677 an offset of 0 is correct. */
22678 if (REGNO (addr) == (unsigned) HARD_FRAME_POINTER_REGNUM)
22679 return 0;
22681 /* If we are using the stack pointer to point at the
22682 argument, then an offset of 0 is correct. */
22683 /* ??? Check this is consistent with thumb2 frame layout. */
22684 if ((TARGET_THUMB || !frame_pointer_needed)
22685 && REGNO (addr) == SP_REGNUM)
22686 return 0;
22688 /* Oh dear. The argument is pointed to by a register rather
22689 than being held in a register, or being stored at a known
22690 offset from the frame pointer. Since GDB only understands
22691 those two kinds of argument we must translate the address
22692 held in the register into an offset from the frame pointer.
22693 We do this by searching through the insns for the function
22694 looking to see where this register gets its value. If the
22695 register is initialized from the frame pointer plus an offset
22696 then we are in luck and we can continue, otherwise we give up.
22698 This code is exercised by producing debugging information
22699 for a function with arguments like this:
22701 double func (double a, double b, int c, double d) {return d;}
22703 Without this code the stab for parameter 'd' will be set to
22704 an offset of 0 from the frame pointer, rather than 8. */
22706 /* The if() statement says:
22708 If the insn is a normal instruction
22709 and if the insn is setting the value in a register
22710 and if the register being set is the register holding the address of the argument
22711 and if the address is computing by an addition
22712 that involves adding to a register
22713 which is the frame pointer
22714 a constant integer
22716 then... */
22718 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
22720 if ( NONJUMP_INSN_P (insn)
22721 && GET_CODE (PATTERN (insn)) == SET
22722 && REGNO (XEXP (PATTERN (insn), 0)) == REGNO (addr)
22723 && GET_CODE (XEXP (PATTERN (insn), 1)) == PLUS
22724 && REG_P (XEXP (XEXP (PATTERN (insn), 1), 0))
22725 && REGNO (XEXP (XEXP (PATTERN (insn), 1), 0)) == (unsigned) HARD_FRAME_POINTER_REGNUM
22726 && CONST_INT_P (XEXP (XEXP (PATTERN (insn), 1), 1))
22729 value = INTVAL (XEXP (XEXP (PATTERN (insn), 1), 1));
22731 break;
22735 if (value == 0)
22737 debug_rtx (addr);
22738 warning (0, "unable to compute real location of stacked parameter");
22739 value = 8; /* XXX magic hack */
22742 return value;
22745 typedef enum {
22746 T_V8QI,
22747 T_V4HI,
22748 T_V4HF,
22749 T_V2SI,
22750 T_V2SF,
22751 T_DI,
22752 T_V16QI,
22753 T_V8HI,
22754 T_V4SI,
22755 T_V4SF,
22756 T_V2DI,
22757 T_TI,
22758 T_EI,
22759 T_OI,
22760 T_MAX /* Size of enum. Keep last. */
22761 } neon_builtin_type_mode;
22763 #define TYPE_MODE_BIT(X) (1 << (X))
22765 #define TB_DREG (TYPE_MODE_BIT (T_V8QI) | TYPE_MODE_BIT (T_V4HI) \
22766 | TYPE_MODE_BIT (T_V4HF) | TYPE_MODE_BIT (T_V2SI) \
22767 | TYPE_MODE_BIT (T_V2SF) | TYPE_MODE_BIT (T_DI))
22768 #define TB_QREG (TYPE_MODE_BIT (T_V16QI) | TYPE_MODE_BIT (T_V8HI) \
22769 | TYPE_MODE_BIT (T_V4SI) | TYPE_MODE_BIT (T_V4SF) \
22770 | TYPE_MODE_BIT (T_V2DI) | TYPE_MODE_BIT (T_TI))
22772 #define v8qi_UP T_V8QI
22773 #define v4hi_UP T_V4HI
22774 #define v4hf_UP T_V4HF
22775 #define v2si_UP T_V2SI
22776 #define v2sf_UP T_V2SF
22777 #define di_UP T_DI
22778 #define v16qi_UP T_V16QI
22779 #define v8hi_UP T_V8HI
22780 #define v4si_UP T_V4SI
22781 #define v4sf_UP T_V4SF
22782 #define v2di_UP T_V2DI
22783 #define ti_UP T_TI
22784 #define ei_UP T_EI
22785 #define oi_UP T_OI
22787 #define UP(X) X##_UP
22789 typedef enum {
22790 NEON_BINOP,
22791 NEON_TERNOP,
22792 NEON_UNOP,
22793 NEON_GETLANE,
22794 NEON_SETLANE,
22795 NEON_CREATE,
22796 NEON_RINT,
22797 NEON_DUP,
22798 NEON_DUPLANE,
22799 NEON_COMBINE,
22800 NEON_SPLIT,
22801 NEON_LANEMUL,
22802 NEON_LANEMULL,
22803 NEON_LANEMULH,
22804 NEON_LANEMAC,
22805 NEON_SCALARMUL,
22806 NEON_SCALARMULL,
22807 NEON_SCALARMULH,
22808 NEON_SCALARMAC,
22809 NEON_CONVERT,
22810 NEON_FLOAT_WIDEN,
22811 NEON_FLOAT_NARROW,
22812 NEON_FIXCONV,
22813 NEON_SELECT,
22814 NEON_RESULTPAIR,
22815 NEON_REINTERP,
22816 NEON_VTBL,
22817 NEON_VTBX,
22818 NEON_LOAD1,
22819 NEON_LOAD1LANE,
22820 NEON_STORE1,
22821 NEON_STORE1LANE,
22822 NEON_LOADSTRUCT,
22823 NEON_LOADSTRUCTLANE,
22824 NEON_STORESTRUCT,
22825 NEON_STORESTRUCTLANE,
22826 NEON_LOGICBINOP,
22827 NEON_SHIFTINSERT,
22828 NEON_SHIFTIMM,
22829 NEON_SHIFTACC
22830 } neon_itype;
22832 typedef struct {
22833 const char *name;
22834 const neon_itype itype;
22835 const neon_builtin_type_mode mode;
22836 const enum insn_code code;
22837 unsigned int fcode;
22838 } neon_builtin_datum;
22840 #define CF(N,X) CODE_FOR_neon_##N##X
22842 #define VAR1(T, N, A) \
22843 {#N, NEON_##T, UP (A), CF (N, A), 0}
22844 #define VAR2(T, N, A, B) \
22845 VAR1 (T, N, A), \
22846 {#N, NEON_##T, UP (B), CF (N, B), 0}
22847 #define VAR3(T, N, A, B, C) \
22848 VAR2 (T, N, A, B), \
22849 {#N, NEON_##T, UP (C), CF (N, C), 0}
22850 #define VAR4(T, N, A, B, C, D) \
22851 VAR3 (T, N, A, B, C), \
22852 {#N, NEON_##T, UP (D), CF (N, D), 0}
22853 #define VAR5(T, N, A, B, C, D, E) \
22854 VAR4 (T, N, A, B, C, D), \
22855 {#N, NEON_##T, UP (E), CF (N, E), 0}
22856 #define VAR6(T, N, A, B, C, D, E, F) \
22857 VAR5 (T, N, A, B, C, D, E), \
22858 {#N, NEON_##T, UP (F), CF (N, F), 0}
22859 #define VAR7(T, N, A, B, C, D, E, F, G) \
22860 VAR6 (T, N, A, B, C, D, E, F), \
22861 {#N, NEON_##T, UP (G), CF (N, G), 0}
22862 #define VAR8(T, N, A, B, C, D, E, F, G, H) \
22863 VAR7 (T, N, A, B, C, D, E, F, G), \
22864 {#N, NEON_##T, UP (H), CF (N, H), 0}
22865 #define VAR9(T, N, A, B, C, D, E, F, G, H, I) \
22866 VAR8 (T, N, A, B, C, D, E, F, G, H), \
22867 {#N, NEON_##T, UP (I), CF (N, I), 0}
22868 #define VAR10(T, N, A, B, C, D, E, F, G, H, I, J) \
22869 VAR9 (T, N, A, B, C, D, E, F, G, H, I), \
22870 {#N, NEON_##T, UP (J), CF (N, J), 0}
22872 /* The NEON builtin data can be found in arm_neon_builtins.def.
22873 The mode entries in the following table correspond to the "key" type of the
22874 instruction variant, i.e. equivalent to that which would be specified after
22875 the assembler mnemonic, which usually refers to the last vector operand.
22876 (Signed/unsigned/polynomial types are not differentiated between though, and
22877 are all mapped onto the same mode for a given element size.) The modes
22878 listed per instruction should be the same as those defined for that
22879 instruction's pattern in neon.md. */
22881 static neon_builtin_datum neon_builtin_data[] =
22883 #include "arm_neon_builtins.def"
22886 #undef CF
22887 #undef VAR1
22888 #undef VAR2
22889 #undef VAR3
22890 #undef VAR4
22891 #undef VAR5
22892 #undef VAR6
22893 #undef VAR7
22894 #undef VAR8
22895 #undef VAR9
22896 #undef VAR10
22898 #define CF(N,X) ARM_BUILTIN_NEON_##N##X
22899 #define VAR1(T, N, A) \
22900 CF (N, A)
22901 #define VAR2(T, N, A, B) \
22902 VAR1 (T, N, A), \
22903 CF (N, B)
22904 #define VAR3(T, N, A, B, C) \
22905 VAR2 (T, N, A, B), \
22906 CF (N, C)
22907 #define VAR4(T, N, A, B, C, D) \
22908 VAR3 (T, N, A, B, C), \
22909 CF (N, D)
22910 #define VAR5(T, N, A, B, C, D, E) \
22911 VAR4 (T, N, A, B, C, D), \
22912 CF (N, E)
22913 #define VAR6(T, N, A, B, C, D, E, F) \
22914 VAR5 (T, N, A, B, C, D, E), \
22915 CF (N, F)
22916 #define VAR7(T, N, A, B, C, D, E, F, G) \
22917 VAR6 (T, N, A, B, C, D, E, F), \
22918 CF (N, G)
22919 #define VAR8(T, N, A, B, C, D, E, F, G, H) \
22920 VAR7 (T, N, A, B, C, D, E, F, G), \
22921 CF (N, H)
22922 #define VAR9(T, N, A, B, C, D, E, F, G, H, I) \
22923 VAR8 (T, N, A, B, C, D, E, F, G, H), \
22924 CF (N, I)
22925 #define VAR10(T, N, A, B, C, D, E, F, G, H, I, J) \
22926 VAR9 (T, N, A, B, C, D, E, F, G, H, I), \
22927 CF (N, J)
22928 enum arm_builtins
22930 ARM_BUILTIN_GETWCGR0,
22931 ARM_BUILTIN_GETWCGR1,
22932 ARM_BUILTIN_GETWCGR2,
22933 ARM_BUILTIN_GETWCGR3,
22935 ARM_BUILTIN_SETWCGR0,
22936 ARM_BUILTIN_SETWCGR1,
22937 ARM_BUILTIN_SETWCGR2,
22938 ARM_BUILTIN_SETWCGR3,
22940 ARM_BUILTIN_WZERO,
22942 ARM_BUILTIN_WAVG2BR,
22943 ARM_BUILTIN_WAVG2HR,
22944 ARM_BUILTIN_WAVG2B,
22945 ARM_BUILTIN_WAVG2H,
22947 ARM_BUILTIN_WACCB,
22948 ARM_BUILTIN_WACCH,
22949 ARM_BUILTIN_WACCW,
22951 ARM_BUILTIN_WMACS,
22952 ARM_BUILTIN_WMACSZ,
22953 ARM_BUILTIN_WMACU,
22954 ARM_BUILTIN_WMACUZ,
22956 ARM_BUILTIN_WSADB,
22957 ARM_BUILTIN_WSADBZ,
22958 ARM_BUILTIN_WSADH,
22959 ARM_BUILTIN_WSADHZ,
22961 ARM_BUILTIN_WALIGNI,
22962 ARM_BUILTIN_WALIGNR0,
22963 ARM_BUILTIN_WALIGNR1,
22964 ARM_BUILTIN_WALIGNR2,
22965 ARM_BUILTIN_WALIGNR3,
22967 ARM_BUILTIN_TMIA,
22968 ARM_BUILTIN_TMIAPH,
22969 ARM_BUILTIN_TMIABB,
22970 ARM_BUILTIN_TMIABT,
22971 ARM_BUILTIN_TMIATB,
22972 ARM_BUILTIN_TMIATT,
22974 ARM_BUILTIN_TMOVMSKB,
22975 ARM_BUILTIN_TMOVMSKH,
22976 ARM_BUILTIN_TMOVMSKW,
22978 ARM_BUILTIN_TBCSTB,
22979 ARM_BUILTIN_TBCSTH,
22980 ARM_BUILTIN_TBCSTW,
22982 ARM_BUILTIN_WMADDS,
22983 ARM_BUILTIN_WMADDU,
22985 ARM_BUILTIN_WPACKHSS,
22986 ARM_BUILTIN_WPACKWSS,
22987 ARM_BUILTIN_WPACKDSS,
22988 ARM_BUILTIN_WPACKHUS,
22989 ARM_BUILTIN_WPACKWUS,
22990 ARM_BUILTIN_WPACKDUS,
22992 ARM_BUILTIN_WADDB,
22993 ARM_BUILTIN_WADDH,
22994 ARM_BUILTIN_WADDW,
22995 ARM_BUILTIN_WADDSSB,
22996 ARM_BUILTIN_WADDSSH,
22997 ARM_BUILTIN_WADDSSW,
22998 ARM_BUILTIN_WADDUSB,
22999 ARM_BUILTIN_WADDUSH,
23000 ARM_BUILTIN_WADDUSW,
23001 ARM_BUILTIN_WSUBB,
23002 ARM_BUILTIN_WSUBH,
23003 ARM_BUILTIN_WSUBW,
23004 ARM_BUILTIN_WSUBSSB,
23005 ARM_BUILTIN_WSUBSSH,
23006 ARM_BUILTIN_WSUBSSW,
23007 ARM_BUILTIN_WSUBUSB,
23008 ARM_BUILTIN_WSUBUSH,
23009 ARM_BUILTIN_WSUBUSW,
23011 ARM_BUILTIN_WAND,
23012 ARM_BUILTIN_WANDN,
23013 ARM_BUILTIN_WOR,
23014 ARM_BUILTIN_WXOR,
23016 ARM_BUILTIN_WCMPEQB,
23017 ARM_BUILTIN_WCMPEQH,
23018 ARM_BUILTIN_WCMPEQW,
23019 ARM_BUILTIN_WCMPGTUB,
23020 ARM_BUILTIN_WCMPGTUH,
23021 ARM_BUILTIN_WCMPGTUW,
23022 ARM_BUILTIN_WCMPGTSB,
23023 ARM_BUILTIN_WCMPGTSH,
23024 ARM_BUILTIN_WCMPGTSW,
23026 ARM_BUILTIN_TEXTRMSB,
23027 ARM_BUILTIN_TEXTRMSH,
23028 ARM_BUILTIN_TEXTRMSW,
23029 ARM_BUILTIN_TEXTRMUB,
23030 ARM_BUILTIN_TEXTRMUH,
23031 ARM_BUILTIN_TEXTRMUW,
23032 ARM_BUILTIN_TINSRB,
23033 ARM_BUILTIN_TINSRH,
23034 ARM_BUILTIN_TINSRW,
23036 ARM_BUILTIN_WMAXSW,
23037 ARM_BUILTIN_WMAXSH,
23038 ARM_BUILTIN_WMAXSB,
23039 ARM_BUILTIN_WMAXUW,
23040 ARM_BUILTIN_WMAXUH,
23041 ARM_BUILTIN_WMAXUB,
23042 ARM_BUILTIN_WMINSW,
23043 ARM_BUILTIN_WMINSH,
23044 ARM_BUILTIN_WMINSB,
23045 ARM_BUILTIN_WMINUW,
23046 ARM_BUILTIN_WMINUH,
23047 ARM_BUILTIN_WMINUB,
23049 ARM_BUILTIN_WMULUM,
23050 ARM_BUILTIN_WMULSM,
23051 ARM_BUILTIN_WMULUL,
23053 ARM_BUILTIN_PSADBH,
23054 ARM_BUILTIN_WSHUFH,
23056 ARM_BUILTIN_WSLLH,
23057 ARM_BUILTIN_WSLLW,
23058 ARM_BUILTIN_WSLLD,
23059 ARM_BUILTIN_WSRAH,
23060 ARM_BUILTIN_WSRAW,
23061 ARM_BUILTIN_WSRAD,
23062 ARM_BUILTIN_WSRLH,
23063 ARM_BUILTIN_WSRLW,
23064 ARM_BUILTIN_WSRLD,
23065 ARM_BUILTIN_WRORH,
23066 ARM_BUILTIN_WRORW,
23067 ARM_BUILTIN_WRORD,
23068 ARM_BUILTIN_WSLLHI,
23069 ARM_BUILTIN_WSLLWI,
23070 ARM_BUILTIN_WSLLDI,
23071 ARM_BUILTIN_WSRAHI,
23072 ARM_BUILTIN_WSRAWI,
23073 ARM_BUILTIN_WSRADI,
23074 ARM_BUILTIN_WSRLHI,
23075 ARM_BUILTIN_WSRLWI,
23076 ARM_BUILTIN_WSRLDI,
23077 ARM_BUILTIN_WRORHI,
23078 ARM_BUILTIN_WRORWI,
23079 ARM_BUILTIN_WRORDI,
23081 ARM_BUILTIN_WUNPCKIHB,
23082 ARM_BUILTIN_WUNPCKIHH,
23083 ARM_BUILTIN_WUNPCKIHW,
23084 ARM_BUILTIN_WUNPCKILB,
23085 ARM_BUILTIN_WUNPCKILH,
23086 ARM_BUILTIN_WUNPCKILW,
23088 ARM_BUILTIN_WUNPCKEHSB,
23089 ARM_BUILTIN_WUNPCKEHSH,
23090 ARM_BUILTIN_WUNPCKEHSW,
23091 ARM_BUILTIN_WUNPCKEHUB,
23092 ARM_BUILTIN_WUNPCKEHUH,
23093 ARM_BUILTIN_WUNPCKEHUW,
23094 ARM_BUILTIN_WUNPCKELSB,
23095 ARM_BUILTIN_WUNPCKELSH,
23096 ARM_BUILTIN_WUNPCKELSW,
23097 ARM_BUILTIN_WUNPCKELUB,
23098 ARM_BUILTIN_WUNPCKELUH,
23099 ARM_BUILTIN_WUNPCKELUW,
23101 ARM_BUILTIN_WABSB,
23102 ARM_BUILTIN_WABSH,
23103 ARM_BUILTIN_WABSW,
23105 ARM_BUILTIN_WADDSUBHX,
23106 ARM_BUILTIN_WSUBADDHX,
23108 ARM_BUILTIN_WABSDIFFB,
23109 ARM_BUILTIN_WABSDIFFH,
23110 ARM_BUILTIN_WABSDIFFW,
23112 ARM_BUILTIN_WADDCH,
23113 ARM_BUILTIN_WADDCW,
23115 ARM_BUILTIN_WAVG4,
23116 ARM_BUILTIN_WAVG4R,
23118 ARM_BUILTIN_WMADDSX,
23119 ARM_BUILTIN_WMADDUX,
23121 ARM_BUILTIN_WMADDSN,
23122 ARM_BUILTIN_WMADDUN,
23124 ARM_BUILTIN_WMULWSM,
23125 ARM_BUILTIN_WMULWUM,
23127 ARM_BUILTIN_WMULWSMR,
23128 ARM_BUILTIN_WMULWUMR,
23130 ARM_BUILTIN_WMULWL,
23132 ARM_BUILTIN_WMULSMR,
23133 ARM_BUILTIN_WMULUMR,
23135 ARM_BUILTIN_WQMULM,
23136 ARM_BUILTIN_WQMULMR,
23138 ARM_BUILTIN_WQMULWM,
23139 ARM_BUILTIN_WQMULWMR,
23141 ARM_BUILTIN_WADDBHUSM,
23142 ARM_BUILTIN_WADDBHUSL,
23144 ARM_BUILTIN_WQMIABB,
23145 ARM_BUILTIN_WQMIABT,
23146 ARM_BUILTIN_WQMIATB,
23147 ARM_BUILTIN_WQMIATT,
23149 ARM_BUILTIN_WQMIABBN,
23150 ARM_BUILTIN_WQMIABTN,
23151 ARM_BUILTIN_WQMIATBN,
23152 ARM_BUILTIN_WQMIATTN,
23154 ARM_BUILTIN_WMIABB,
23155 ARM_BUILTIN_WMIABT,
23156 ARM_BUILTIN_WMIATB,
23157 ARM_BUILTIN_WMIATT,
23159 ARM_BUILTIN_WMIABBN,
23160 ARM_BUILTIN_WMIABTN,
23161 ARM_BUILTIN_WMIATBN,
23162 ARM_BUILTIN_WMIATTN,
23164 ARM_BUILTIN_WMIAWBB,
23165 ARM_BUILTIN_WMIAWBT,
23166 ARM_BUILTIN_WMIAWTB,
23167 ARM_BUILTIN_WMIAWTT,
23169 ARM_BUILTIN_WMIAWBBN,
23170 ARM_BUILTIN_WMIAWBTN,
23171 ARM_BUILTIN_WMIAWTBN,
23172 ARM_BUILTIN_WMIAWTTN,
23174 ARM_BUILTIN_WMERGE,
23176 ARM_BUILTIN_CRC32B,
23177 ARM_BUILTIN_CRC32H,
23178 ARM_BUILTIN_CRC32W,
23179 ARM_BUILTIN_CRC32CB,
23180 ARM_BUILTIN_CRC32CH,
23181 ARM_BUILTIN_CRC32CW,
23183 #undef CRYPTO1
23184 #undef CRYPTO2
23185 #undef CRYPTO3
23187 #define CRYPTO1(L, U, M1, M2) \
23188 ARM_BUILTIN_CRYPTO_##U,
23189 #define CRYPTO2(L, U, M1, M2, M3) \
23190 ARM_BUILTIN_CRYPTO_##U,
23191 #define CRYPTO3(L, U, M1, M2, M3, M4) \
23192 ARM_BUILTIN_CRYPTO_##U,
23194 #include "crypto.def"
23196 #undef CRYPTO1
23197 #undef CRYPTO2
23198 #undef CRYPTO3
23200 #include "arm_neon_builtins.def"
23202 ,ARM_BUILTIN_MAX
23205 #define ARM_BUILTIN_NEON_BASE (ARM_BUILTIN_MAX - ARRAY_SIZE (neon_builtin_data))
23207 #undef CF
23208 #undef VAR1
23209 #undef VAR2
23210 #undef VAR3
23211 #undef VAR4
23212 #undef VAR5
23213 #undef VAR6
23214 #undef VAR7
23215 #undef VAR8
23216 #undef VAR9
23217 #undef VAR10
23219 static GTY(()) tree arm_builtin_decls[ARM_BUILTIN_MAX];
23221 #define NUM_DREG_TYPES 5
23222 #define NUM_QREG_TYPES 6
23224 static void
23225 arm_init_neon_builtins (void)
23227 unsigned int i, fcode;
23228 tree decl;
23230 tree neon_intQI_type_node;
23231 tree neon_intHI_type_node;
23232 tree neon_floatHF_type_node;
23233 tree neon_polyQI_type_node;
23234 tree neon_polyHI_type_node;
23235 tree neon_intSI_type_node;
23236 tree neon_intDI_type_node;
23237 tree neon_intUTI_type_node;
23238 tree neon_float_type_node;
23240 tree intQI_pointer_node;
23241 tree intHI_pointer_node;
23242 tree intSI_pointer_node;
23243 tree intDI_pointer_node;
23244 tree float_pointer_node;
23246 tree const_intQI_node;
23247 tree const_intHI_node;
23248 tree const_intSI_node;
23249 tree const_intDI_node;
23250 tree const_float_node;
23252 tree const_intQI_pointer_node;
23253 tree const_intHI_pointer_node;
23254 tree const_intSI_pointer_node;
23255 tree const_intDI_pointer_node;
23256 tree const_float_pointer_node;
23258 tree V8QI_type_node;
23259 tree V4HI_type_node;
23260 tree V4HF_type_node;
23261 tree V2SI_type_node;
23262 tree V2SF_type_node;
23263 tree V16QI_type_node;
23264 tree V8HI_type_node;
23265 tree V4SI_type_node;
23266 tree V4SF_type_node;
23267 tree V2DI_type_node;
23269 tree intUQI_type_node;
23270 tree intUHI_type_node;
23271 tree intUSI_type_node;
23272 tree intUDI_type_node;
23274 tree intEI_type_node;
23275 tree intOI_type_node;
23276 tree intCI_type_node;
23277 tree intXI_type_node;
23279 tree V8QI_pointer_node;
23280 tree V4HI_pointer_node;
23281 tree V2SI_pointer_node;
23282 tree V2SF_pointer_node;
23283 tree V16QI_pointer_node;
23284 tree V8HI_pointer_node;
23285 tree V4SI_pointer_node;
23286 tree V4SF_pointer_node;
23287 tree V2DI_pointer_node;
23289 tree void_ftype_pv8qi_v8qi_v8qi;
23290 tree void_ftype_pv4hi_v4hi_v4hi;
23291 tree void_ftype_pv2si_v2si_v2si;
23292 tree void_ftype_pv2sf_v2sf_v2sf;
23293 tree void_ftype_pdi_di_di;
23294 tree void_ftype_pv16qi_v16qi_v16qi;
23295 tree void_ftype_pv8hi_v8hi_v8hi;
23296 tree void_ftype_pv4si_v4si_v4si;
23297 tree void_ftype_pv4sf_v4sf_v4sf;
23298 tree void_ftype_pv2di_v2di_v2di;
23300 tree reinterp_ftype_dreg[NUM_DREG_TYPES][NUM_DREG_TYPES];
23301 tree reinterp_ftype_qreg[NUM_QREG_TYPES][NUM_QREG_TYPES];
23302 tree dreg_types[NUM_DREG_TYPES], qreg_types[NUM_QREG_TYPES];
23304 /* Create distinguished type nodes for NEON vector element types,
23305 and pointers to values of such types, so we can detect them later. */
23306 neon_intQI_type_node = make_signed_type (GET_MODE_PRECISION (QImode));
23307 neon_intHI_type_node = make_signed_type (GET_MODE_PRECISION (HImode));
23308 neon_polyQI_type_node = make_signed_type (GET_MODE_PRECISION (QImode));
23309 neon_polyHI_type_node = make_signed_type (GET_MODE_PRECISION (HImode));
23310 neon_intSI_type_node = make_signed_type (GET_MODE_PRECISION (SImode));
23311 neon_intDI_type_node = make_signed_type (GET_MODE_PRECISION (DImode));
23312 neon_float_type_node = make_node (REAL_TYPE);
23313 TYPE_PRECISION (neon_float_type_node) = FLOAT_TYPE_SIZE;
23314 layout_type (neon_float_type_node);
23315 neon_floatHF_type_node = make_node (REAL_TYPE);
23316 TYPE_PRECISION (neon_floatHF_type_node) = GET_MODE_PRECISION (HFmode);
23317 layout_type (neon_floatHF_type_node);
23319 /* Define typedefs which exactly correspond to the modes we are basing vector
23320 types on. If you change these names you'll need to change
23321 the table used by arm_mangle_type too. */
23322 (*lang_hooks.types.register_builtin_type) (neon_intQI_type_node,
23323 "__builtin_neon_qi");
23324 (*lang_hooks.types.register_builtin_type) (neon_intHI_type_node,
23325 "__builtin_neon_hi");
23326 (*lang_hooks.types.register_builtin_type) (neon_floatHF_type_node,
23327 "__builtin_neon_hf");
23328 (*lang_hooks.types.register_builtin_type) (neon_intSI_type_node,
23329 "__builtin_neon_si");
23330 (*lang_hooks.types.register_builtin_type) (neon_float_type_node,
23331 "__builtin_neon_sf");
23332 (*lang_hooks.types.register_builtin_type) (neon_intDI_type_node,
23333 "__builtin_neon_di");
23334 (*lang_hooks.types.register_builtin_type) (neon_polyQI_type_node,
23335 "__builtin_neon_poly8");
23336 (*lang_hooks.types.register_builtin_type) (neon_polyHI_type_node,
23337 "__builtin_neon_poly16");
23339 intQI_pointer_node = build_pointer_type (neon_intQI_type_node);
23340 intHI_pointer_node = build_pointer_type (neon_intHI_type_node);
23341 intSI_pointer_node = build_pointer_type (neon_intSI_type_node);
23342 intDI_pointer_node = build_pointer_type (neon_intDI_type_node);
23343 float_pointer_node = build_pointer_type (neon_float_type_node);
23345 /* Next create constant-qualified versions of the above types. */
23346 const_intQI_node = build_qualified_type (neon_intQI_type_node,
23347 TYPE_QUAL_CONST);
23348 const_intHI_node = build_qualified_type (neon_intHI_type_node,
23349 TYPE_QUAL_CONST);
23350 const_intSI_node = build_qualified_type (neon_intSI_type_node,
23351 TYPE_QUAL_CONST);
23352 const_intDI_node = build_qualified_type (neon_intDI_type_node,
23353 TYPE_QUAL_CONST);
23354 const_float_node = build_qualified_type (neon_float_type_node,
23355 TYPE_QUAL_CONST);
23357 const_intQI_pointer_node = build_pointer_type (const_intQI_node);
23358 const_intHI_pointer_node = build_pointer_type (const_intHI_node);
23359 const_intSI_pointer_node = build_pointer_type (const_intSI_node);
23360 const_intDI_pointer_node = build_pointer_type (const_intDI_node);
23361 const_float_pointer_node = build_pointer_type (const_float_node);
23363 /* Now create vector types based on our NEON element types. */
23364 /* 64-bit vectors. */
23365 V8QI_type_node =
23366 build_vector_type_for_mode (neon_intQI_type_node, V8QImode);
23367 V4HI_type_node =
23368 build_vector_type_for_mode (neon_intHI_type_node, V4HImode);
23369 V4HF_type_node =
23370 build_vector_type_for_mode (neon_floatHF_type_node, V4HFmode);
23371 V2SI_type_node =
23372 build_vector_type_for_mode (neon_intSI_type_node, V2SImode);
23373 V2SF_type_node =
23374 build_vector_type_for_mode (neon_float_type_node, V2SFmode);
23375 /* 128-bit vectors. */
23376 V16QI_type_node =
23377 build_vector_type_for_mode (neon_intQI_type_node, V16QImode);
23378 V8HI_type_node =
23379 build_vector_type_for_mode (neon_intHI_type_node, V8HImode);
23380 V4SI_type_node =
23381 build_vector_type_for_mode (neon_intSI_type_node, V4SImode);
23382 V4SF_type_node =
23383 build_vector_type_for_mode (neon_float_type_node, V4SFmode);
23384 V2DI_type_node =
23385 build_vector_type_for_mode (neon_intDI_type_node, V2DImode);
23387 /* Unsigned integer types for various mode sizes. */
23388 intUQI_type_node = make_unsigned_type (GET_MODE_PRECISION (QImode));
23389 intUHI_type_node = make_unsigned_type (GET_MODE_PRECISION (HImode));
23390 intUSI_type_node = make_unsigned_type (GET_MODE_PRECISION (SImode));
23391 intUDI_type_node = make_unsigned_type (GET_MODE_PRECISION (DImode));
23392 neon_intUTI_type_node = make_unsigned_type (GET_MODE_PRECISION (TImode));
23395 (*lang_hooks.types.register_builtin_type) (intUQI_type_node,
23396 "__builtin_neon_uqi");
23397 (*lang_hooks.types.register_builtin_type) (intUHI_type_node,
23398 "__builtin_neon_uhi");
23399 (*lang_hooks.types.register_builtin_type) (intUSI_type_node,
23400 "__builtin_neon_usi");
23401 (*lang_hooks.types.register_builtin_type) (intUDI_type_node,
23402 "__builtin_neon_udi");
23403 (*lang_hooks.types.register_builtin_type) (intUDI_type_node,
23404 "__builtin_neon_poly64");
23405 (*lang_hooks.types.register_builtin_type) (neon_intUTI_type_node,
23406 "__builtin_neon_poly128");
23408 /* Opaque integer types for structures of vectors. */
23409 intEI_type_node = make_signed_type (GET_MODE_PRECISION (EImode));
23410 intOI_type_node = make_signed_type (GET_MODE_PRECISION (OImode));
23411 intCI_type_node = make_signed_type (GET_MODE_PRECISION (CImode));
23412 intXI_type_node = make_signed_type (GET_MODE_PRECISION (XImode));
23414 (*lang_hooks.types.register_builtin_type) (intTI_type_node,
23415 "__builtin_neon_ti");
23416 (*lang_hooks.types.register_builtin_type) (intEI_type_node,
23417 "__builtin_neon_ei");
23418 (*lang_hooks.types.register_builtin_type) (intOI_type_node,
23419 "__builtin_neon_oi");
23420 (*lang_hooks.types.register_builtin_type) (intCI_type_node,
23421 "__builtin_neon_ci");
23422 (*lang_hooks.types.register_builtin_type) (intXI_type_node,
23423 "__builtin_neon_xi");
23425 /* Pointers to vector types. */
23426 V8QI_pointer_node = build_pointer_type (V8QI_type_node);
23427 V4HI_pointer_node = build_pointer_type (V4HI_type_node);
23428 V2SI_pointer_node = build_pointer_type (V2SI_type_node);
23429 V2SF_pointer_node = build_pointer_type (V2SF_type_node);
23430 V16QI_pointer_node = build_pointer_type (V16QI_type_node);
23431 V8HI_pointer_node = build_pointer_type (V8HI_type_node);
23432 V4SI_pointer_node = build_pointer_type (V4SI_type_node);
23433 V4SF_pointer_node = build_pointer_type (V4SF_type_node);
23434 V2DI_pointer_node = build_pointer_type (V2DI_type_node);
23436 /* Operations which return results as pairs. */
23437 void_ftype_pv8qi_v8qi_v8qi =
23438 build_function_type_list (void_type_node, V8QI_pointer_node, V8QI_type_node,
23439 V8QI_type_node, NULL);
23440 void_ftype_pv4hi_v4hi_v4hi =
23441 build_function_type_list (void_type_node, V4HI_pointer_node, V4HI_type_node,
23442 V4HI_type_node, NULL);
23443 void_ftype_pv2si_v2si_v2si =
23444 build_function_type_list (void_type_node, V2SI_pointer_node, V2SI_type_node,
23445 V2SI_type_node, NULL);
23446 void_ftype_pv2sf_v2sf_v2sf =
23447 build_function_type_list (void_type_node, V2SF_pointer_node, V2SF_type_node,
23448 V2SF_type_node, NULL);
23449 void_ftype_pdi_di_di =
23450 build_function_type_list (void_type_node, intDI_pointer_node,
23451 neon_intDI_type_node, neon_intDI_type_node, NULL);
23452 void_ftype_pv16qi_v16qi_v16qi =
23453 build_function_type_list (void_type_node, V16QI_pointer_node,
23454 V16QI_type_node, V16QI_type_node, NULL);
23455 void_ftype_pv8hi_v8hi_v8hi =
23456 build_function_type_list (void_type_node, V8HI_pointer_node, V8HI_type_node,
23457 V8HI_type_node, NULL);
23458 void_ftype_pv4si_v4si_v4si =
23459 build_function_type_list (void_type_node, V4SI_pointer_node, V4SI_type_node,
23460 V4SI_type_node, NULL);
23461 void_ftype_pv4sf_v4sf_v4sf =
23462 build_function_type_list (void_type_node, V4SF_pointer_node, V4SF_type_node,
23463 V4SF_type_node, NULL);
23464 void_ftype_pv2di_v2di_v2di =
23465 build_function_type_list (void_type_node, V2DI_pointer_node, V2DI_type_node,
23466 V2DI_type_node, NULL);
23468 if (TARGET_CRYPTO && TARGET_HARD_FLOAT)
23470 tree V4USI_type_node =
23471 build_vector_type_for_mode (intUSI_type_node, V4SImode);
23473 tree V16UQI_type_node =
23474 build_vector_type_for_mode (intUQI_type_node, V16QImode);
23476 tree v16uqi_ftype_v16uqi
23477 = build_function_type_list (V16UQI_type_node, V16UQI_type_node, NULL_TREE);
23479 tree v16uqi_ftype_v16uqi_v16uqi
23480 = build_function_type_list (V16UQI_type_node, V16UQI_type_node,
23481 V16UQI_type_node, NULL_TREE);
23483 tree v4usi_ftype_v4usi
23484 = build_function_type_list (V4USI_type_node, V4USI_type_node, NULL_TREE);
23486 tree v4usi_ftype_v4usi_v4usi
23487 = build_function_type_list (V4USI_type_node, V4USI_type_node,
23488 V4USI_type_node, NULL_TREE);
23490 tree v4usi_ftype_v4usi_v4usi_v4usi
23491 = build_function_type_list (V4USI_type_node, V4USI_type_node,
23492 V4USI_type_node, V4USI_type_node, NULL_TREE);
23494 tree uti_ftype_udi_udi
23495 = build_function_type_list (neon_intUTI_type_node, intUDI_type_node,
23496 intUDI_type_node, NULL_TREE);
23498 #undef CRYPTO1
23499 #undef CRYPTO2
23500 #undef CRYPTO3
23501 #undef C
23502 #undef N
23503 #undef CF
23504 #undef FT1
23505 #undef FT2
23506 #undef FT3
23508 #define C(U) \
23509 ARM_BUILTIN_CRYPTO_##U
23510 #define N(L) \
23511 "__builtin_arm_crypto_"#L
23512 #define FT1(R, A) \
23513 R##_ftype_##A
23514 #define FT2(R, A1, A2) \
23515 R##_ftype_##A1##_##A2
23516 #define FT3(R, A1, A2, A3) \
23517 R##_ftype_##A1##_##A2##_##A3
23518 #define CRYPTO1(L, U, R, A) \
23519 arm_builtin_decls[C (U)] = add_builtin_function (N (L), FT1 (R, A), \
23520 C (U), BUILT_IN_MD, \
23521 NULL, NULL_TREE);
23522 #define CRYPTO2(L, U, R, A1, A2) \
23523 arm_builtin_decls[C (U)] = add_builtin_function (N (L), FT2 (R, A1, A2), \
23524 C (U), BUILT_IN_MD, \
23525 NULL, NULL_TREE);
23527 #define CRYPTO3(L, U, R, A1, A2, A3) \
23528 arm_builtin_decls[C (U)] = add_builtin_function (N (L), FT3 (R, A1, A2, A3), \
23529 C (U), BUILT_IN_MD, \
23530 NULL, NULL_TREE);
23531 #include "crypto.def"
23533 #undef CRYPTO1
23534 #undef CRYPTO2
23535 #undef CRYPTO3
23536 #undef C
23537 #undef N
23538 #undef FT1
23539 #undef FT2
23540 #undef FT3
23542 dreg_types[0] = V8QI_type_node;
23543 dreg_types[1] = V4HI_type_node;
23544 dreg_types[2] = V2SI_type_node;
23545 dreg_types[3] = V2SF_type_node;
23546 dreg_types[4] = neon_intDI_type_node;
23548 qreg_types[0] = V16QI_type_node;
23549 qreg_types[1] = V8HI_type_node;
23550 qreg_types[2] = V4SI_type_node;
23551 qreg_types[3] = V4SF_type_node;
23552 qreg_types[4] = V2DI_type_node;
23553 qreg_types[5] = neon_intUTI_type_node;
23555 for (i = 0; i < NUM_QREG_TYPES; i++)
23557 int j;
23558 for (j = 0; j < NUM_QREG_TYPES; j++)
23560 if (i < NUM_DREG_TYPES && j < NUM_DREG_TYPES)
23561 reinterp_ftype_dreg[i][j]
23562 = build_function_type_list (dreg_types[i], dreg_types[j], NULL);
23564 reinterp_ftype_qreg[i][j]
23565 = build_function_type_list (qreg_types[i], qreg_types[j], NULL);
23569 for (i = 0, fcode = ARM_BUILTIN_NEON_BASE;
23570 i < ARRAY_SIZE (neon_builtin_data);
23571 i++, fcode++)
23573 neon_builtin_datum *d = &neon_builtin_data[i];
23575 const char* const modenames[] = {
23576 "v8qi", "v4hi", "v4hf", "v2si", "v2sf", "di",
23577 "v16qi", "v8hi", "v4si", "v4sf", "v2di",
23578 "ti", "ei", "oi"
23580 char namebuf[60];
23581 tree ftype = NULL;
23582 int is_load = 0, is_store = 0;
23584 gcc_assert (ARRAY_SIZE (modenames) == T_MAX);
23586 d->fcode = fcode;
23588 switch (d->itype)
23590 case NEON_LOAD1:
23591 case NEON_LOAD1LANE:
23592 case NEON_LOADSTRUCT:
23593 case NEON_LOADSTRUCTLANE:
23594 is_load = 1;
23595 /* Fall through. */
23596 case NEON_STORE1:
23597 case NEON_STORE1LANE:
23598 case NEON_STORESTRUCT:
23599 case NEON_STORESTRUCTLANE:
23600 if (!is_load)
23601 is_store = 1;
23602 /* Fall through. */
23603 case NEON_UNOP:
23604 case NEON_RINT:
23605 case NEON_BINOP:
23606 case NEON_LOGICBINOP:
23607 case NEON_SHIFTINSERT:
23608 case NEON_TERNOP:
23609 case NEON_GETLANE:
23610 case NEON_SETLANE:
23611 case NEON_CREATE:
23612 case NEON_DUP:
23613 case NEON_DUPLANE:
23614 case NEON_SHIFTIMM:
23615 case NEON_SHIFTACC:
23616 case NEON_COMBINE:
23617 case NEON_SPLIT:
23618 case NEON_CONVERT:
23619 case NEON_FIXCONV:
23620 case NEON_LANEMUL:
23621 case NEON_LANEMULL:
23622 case NEON_LANEMULH:
23623 case NEON_LANEMAC:
23624 case NEON_SCALARMUL:
23625 case NEON_SCALARMULL:
23626 case NEON_SCALARMULH:
23627 case NEON_SCALARMAC:
23628 case NEON_SELECT:
23629 case NEON_VTBL:
23630 case NEON_VTBX:
23632 int k;
23633 tree return_type = void_type_node, args = void_list_node;
23635 /* Build a function type directly from the insn_data for
23636 this builtin. The build_function_type() function takes
23637 care of removing duplicates for us. */
23638 for (k = insn_data[d->code].n_generator_args - 1; k >= 0; k--)
23640 tree eltype;
23642 if (is_load && k == 1)
23644 /* Neon load patterns always have the memory
23645 operand in the operand 1 position. */
23646 gcc_assert (insn_data[d->code].operand[k].predicate
23647 == neon_struct_operand);
23649 switch (d->mode)
23651 case T_V8QI:
23652 case T_V16QI:
23653 eltype = const_intQI_pointer_node;
23654 break;
23656 case T_V4HI:
23657 case T_V8HI:
23658 eltype = const_intHI_pointer_node;
23659 break;
23661 case T_V2SI:
23662 case T_V4SI:
23663 eltype = const_intSI_pointer_node;
23664 break;
23666 case T_V2SF:
23667 case T_V4SF:
23668 eltype = const_float_pointer_node;
23669 break;
23671 case T_DI:
23672 case T_V2DI:
23673 eltype = const_intDI_pointer_node;
23674 break;
23676 default: gcc_unreachable ();
23679 else if (is_store && k == 0)
23681 /* Similarly, Neon store patterns use operand 0 as
23682 the memory location to store to. */
23683 gcc_assert (insn_data[d->code].operand[k].predicate
23684 == neon_struct_operand);
23686 switch (d->mode)
23688 case T_V8QI:
23689 case T_V16QI:
23690 eltype = intQI_pointer_node;
23691 break;
23693 case T_V4HI:
23694 case T_V8HI:
23695 eltype = intHI_pointer_node;
23696 break;
23698 case T_V2SI:
23699 case T_V4SI:
23700 eltype = intSI_pointer_node;
23701 break;
23703 case T_V2SF:
23704 case T_V4SF:
23705 eltype = float_pointer_node;
23706 break;
23708 case T_DI:
23709 case T_V2DI:
23710 eltype = intDI_pointer_node;
23711 break;
23713 default: gcc_unreachable ();
23716 else
23718 switch (insn_data[d->code].operand[k].mode)
23720 case VOIDmode: eltype = void_type_node; break;
23721 /* Scalars. */
23722 case QImode: eltype = neon_intQI_type_node; break;
23723 case HImode: eltype = neon_intHI_type_node; break;
23724 case SImode: eltype = neon_intSI_type_node; break;
23725 case SFmode: eltype = neon_float_type_node; break;
23726 case DImode: eltype = neon_intDI_type_node; break;
23727 case TImode: eltype = intTI_type_node; break;
23728 case EImode: eltype = intEI_type_node; break;
23729 case OImode: eltype = intOI_type_node; break;
23730 case CImode: eltype = intCI_type_node; break;
23731 case XImode: eltype = intXI_type_node; break;
23732 /* 64-bit vectors. */
23733 case V8QImode: eltype = V8QI_type_node; break;
23734 case V4HImode: eltype = V4HI_type_node; break;
23735 case V2SImode: eltype = V2SI_type_node; break;
23736 case V2SFmode: eltype = V2SF_type_node; break;
23737 /* 128-bit vectors. */
23738 case V16QImode: eltype = V16QI_type_node; break;
23739 case V8HImode: eltype = V8HI_type_node; break;
23740 case V4SImode: eltype = V4SI_type_node; break;
23741 case V4SFmode: eltype = V4SF_type_node; break;
23742 case V2DImode: eltype = V2DI_type_node; break;
23743 default: gcc_unreachable ();
23747 if (k == 0 && !is_store)
23748 return_type = eltype;
23749 else
23750 args = tree_cons (NULL_TREE, eltype, args);
23753 ftype = build_function_type (return_type, args);
23755 break;
23757 case NEON_RESULTPAIR:
23759 switch (insn_data[d->code].operand[1].mode)
23761 case V8QImode: ftype = void_ftype_pv8qi_v8qi_v8qi; break;
23762 case V4HImode: ftype = void_ftype_pv4hi_v4hi_v4hi; break;
23763 case V2SImode: ftype = void_ftype_pv2si_v2si_v2si; break;
23764 case V2SFmode: ftype = void_ftype_pv2sf_v2sf_v2sf; break;
23765 case DImode: ftype = void_ftype_pdi_di_di; break;
23766 case V16QImode: ftype = void_ftype_pv16qi_v16qi_v16qi; break;
23767 case V8HImode: ftype = void_ftype_pv8hi_v8hi_v8hi; break;
23768 case V4SImode: ftype = void_ftype_pv4si_v4si_v4si; break;
23769 case V4SFmode: ftype = void_ftype_pv4sf_v4sf_v4sf; break;
23770 case V2DImode: ftype = void_ftype_pv2di_v2di_v2di; break;
23771 default: gcc_unreachable ();
23774 break;
23776 case NEON_REINTERP:
23778 /* We iterate over NUM_DREG_TYPES doubleword types,
23779 then NUM_QREG_TYPES quadword types.
23780 V4HF is not a type used in reinterpret, so we translate
23781 d->mode to the correct index in reinterp_ftype_dreg. */
23782 bool qreg_p
23783 = GET_MODE_SIZE (insn_data[d->code].operand[0].mode) > 8;
23784 int rhs = (d->mode - ((!qreg_p && (d->mode > T_V4HF)) ? 1 : 0))
23785 % NUM_QREG_TYPES;
23786 switch (insn_data[d->code].operand[0].mode)
23788 case V8QImode: ftype = reinterp_ftype_dreg[0][rhs]; break;
23789 case V4HImode: ftype = reinterp_ftype_dreg[1][rhs]; break;
23790 case V2SImode: ftype = reinterp_ftype_dreg[2][rhs]; break;
23791 case V2SFmode: ftype = reinterp_ftype_dreg[3][rhs]; break;
23792 case DImode: ftype = reinterp_ftype_dreg[4][rhs]; break;
23793 case V16QImode: ftype = reinterp_ftype_qreg[0][rhs]; break;
23794 case V8HImode: ftype = reinterp_ftype_qreg[1][rhs]; break;
23795 case V4SImode: ftype = reinterp_ftype_qreg[2][rhs]; break;
23796 case V4SFmode: ftype = reinterp_ftype_qreg[3][rhs]; break;
23797 case V2DImode: ftype = reinterp_ftype_qreg[4][rhs]; break;
23798 case TImode: ftype = reinterp_ftype_qreg[5][rhs]; break;
23799 default: gcc_unreachable ();
23802 break;
23803 case NEON_FLOAT_WIDEN:
23805 tree eltype = NULL_TREE;
23806 tree return_type = NULL_TREE;
23808 switch (insn_data[d->code].operand[1].mode)
23810 case V4HFmode:
23811 eltype = V4HF_type_node;
23812 return_type = V4SF_type_node;
23813 break;
23814 default: gcc_unreachable ();
23816 ftype = build_function_type_list (return_type, eltype, NULL);
23817 break;
23819 case NEON_FLOAT_NARROW:
23821 tree eltype = NULL_TREE;
23822 tree return_type = NULL_TREE;
23824 switch (insn_data[d->code].operand[1].mode)
23826 case V4SFmode:
23827 eltype = V4SF_type_node;
23828 return_type = V4HF_type_node;
23829 break;
23830 default: gcc_unreachable ();
23832 ftype = build_function_type_list (return_type, eltype, NULL);
23833 break;
23835 default:
23836 gcc_unreachable ();
23839 gcc_assert (ftype != NULL);
23841 sprintf (namebuf, "__builtin_neon_%s%s", d->name, modenames[d->mode]);
23843 decl = add_builtin_function (namebuf, ftype, fcode, BUILT_IN_MD, NULL,
23844 NULL_TREE);
23845 arm_builtin_decls[fcode] = decl;
23849 #undef NUM_DREG_TYPES
23850 #undef NUM_QREG_TYPES
23852 #define def_mbuiltin(MASK, NAME, TYPE, CODE) \
23853 do \
23855 if ((MASK) & insn_flags) \
23857 tree bdecl; \
23858 bdecl = add_builtin_function ((NAME), (TYPE), (CODE), \
23859 BUILT_IN_MD, NULL, NULL_TREE); \
23860 arm_builtin_decls[CODE] = bdecl; \
23863 while (0)
23865 struct builtin_description
23867 const unsigned int mask;
23868 const enum insn_code icode;
23869 const char * const name;
23870 const enum arm_builtins code;
23871 const enum rtx_code comparison;
23872 const unsigned int flag;
23875 static const struct builtin_description bdesc_2arg[] =
23877 #define IWMMXT_BUILTIN(code, string, builtin) \
23878 { FL_IWMMXT, CODE_FOR_##code, "__builtin_arm_" string, \
23879 ARM_BUILTIN_##builtin, UNKNOWN, 0 },
23881 #define IWMMXT2_BUILTIN(code, string, builtin) \
23882 { FL_IWMMXT2, CODE_FOR_##code, "__builtin_arm_" string, \
23883 ARM_BUILTIN_##builtin, UNKNOWN, 0 },
23885 IWMMXT_BUILTIN (addv8qi3, "waddb", WADDB)
23886 IWMMXT_BUILTIN (addv4hi3, "waddh", WADDH)
23887 IWMMXT_BUILTIN (addv2si3, "waddw", WADDW)
23888 IWMMXT_BUILTIN (subv8qi3, "wsubb", WSUBB)
23889 IWMMXT_BUILTIN (subv4hi3, "wsubh", WSUBH)
23890 IWMMXT_BUILTIN (subv2si3, "wsubw", WSUBW)
23891 IWMMXT_BUILTIN (ssaddv8qi3, "waddbss", WADDSSB)
23892 IWMMXT_BUILTIN (ssaddv4hi3, "waddhss", WADDSSH)
23893 IWMMXT_BUILTIN (ssaddv2si3, "waddwss", WADDSSW)
23894 IWMMXT_BUILTIN (sssubv8qi3, "wsubbss", WSUBSSB)
23895 IWMMXT_BUILTIN (sssubv4hi3, "wsubhss", WSUBSSH)
23896 IWMMXT_BUILTIN (sssubv2si3, "wsubwss", WSUBSSW)
23897 IWMMXT_BUILTIN (usaddv8qi3, "waddbus", WADDUSB)
23898 IWMMXT_BUILTIN (usaddv4hi3, "waddhus", WADDUSH)
23899 IWMMXT_BUILTIN (usaddv2si3, "waddwus", WADDUSW)
23900 IWMMXT_BUILTIN (ussubv8qi3, "wsubbus", WSUBUSB)
23901 IWMMXT_BUILTIN (ussubv4hi3, "wsubhus", WSUBUSH)
23902 IWMMXT_BUILTIN (ussubv2si3, "wsubwus", WSUBUSW)
23903 IWMMXT_BUILTIN (mulv4hi3, "wmulul", WMULUL)
23904 IWMMXT_BUILTIN (smulv4hi3_highpart, "wmulsm", WMULSM)
23905 IWMMXT_BUILTIN (umulv4hi3_highpart, "wmulum", WMULUM)
23906 IWMMXT_BUILTIN (eqv8qi3, "wcmpeqb", WCMPEQB)
23907 IWMMXT_BUILTIN (eqv4hi3, "wcmpeqh", WCMPEQH)
23908 IWMMXT_BUILTIN (eqv2si3, "wcmpeqw", WCMPEQW)
23909 IWMMXT_BUILTIN (gtuv8qi3, "wcmpgtub", WCMPGTUB)
23910 IWMMXT_BUILTIN (gtuv4hi3, "wcmpgtuh", WCMPGTUH)
23911 IWMMXT_BUILTIN (gtuv2si3, "wcmpgtuw", WCMPGTUW)
23912 IWMMXT_BUILTIN (gtv8qi3, "wcmpgtsb", WCMPGTSB)
23913 IWMMXT_BUILTIN (gtv4hi3, "wcmpgtsh", WCMPGTSH)
23914 IWMMXT_BUILTIN (gtv2si3, "wcmpgtsw", WCMPGTSW)
23915 IWMMXT_BUILTIN (umaxv8qi3, "wmaxub", WMAXUB)
23916 IWMMXT_BUILTIN (smaxv8qi3, "wmaxsb", WMAXSB)
23917 IWMMXT_BUILTIN (umaxv4hi3, "wmaxuh", WMAXUH)
23918 IWMMXT_BUILTIN (smaxv4hi3, "wmaxsh", WMAXSH)
23919 IWMMXT_BUILTIN (umaxv2si3, "wmaxuw", WMAXUW)
23920 IWMMXT_BUILTIN (smaxv2si3, "wmaxsw", WMAXSW)
23921 IWMMXT_BUILTIN (uminv8qi3, "wminub", WMINUB)
23922 IWMMXT_BUILTIN (sminv8qi3, "wminsb", WMINSB)
23923 IWMMXT_BUILTIN (uminv4hi3, "wminuh", WMINUH)
23924 IWMMXT_BUILTIN (sminv4hi3, "wminsh", WMINSH)
23925 IWMMXT_BUILTIN (uminv2si3, "wminuw", WMINUW)
23926 IWMMXT_BUILTIN (sminv2si3, "wminsw", WMINSW)
23927 IWMMXT_BUILTIN (iwmmxt_anddi3, "wand", WAND)
23928 IWMMXT_BUILTIN (iwmmxt_nanddi3, "wandn", WANDN)
23929 IWMMXT_BUILTIN (iwmmxt_iordi3, "wor", WOR)
23930 IWMMXT_BUILTIN (iwmmxt_xordi3, "wxor", WXOR)
23931 IWMMXT_BUILTIN (iwmmxt_uavgv8qi3, "wavg2b", WAVG2B)
23932 IWMMXT_BUILTIN (iwmmxt_uavgv4hi3, "wavg2h", WAVG2H)
23933 IWMMXT_BUILTIN (iwmmxt_uavgrndv8qi3, "wavg2br", WAVG2BR)
23934 IWMMXT_BUILTIN (iwmmxt_uavgrndv4hi3, "wavg2hr", WAVG2HR)
23935 IWMMXT_BUILTIN (iwmmxt_wunpckilb, "wunpckilb", WUNPCKILB)
23936 IWMMXT_BUILTIN (iwmmxt_wunpckilh, "wunpckilh", WUNPCKILH)
23937 IWMMXT_BUILTIN (iwmmxt_wunpckilw, "wunpckilw", WUNPCKILW)
23938 IWMMXT_BUILTIN (iwmmxt_wunpckihb, "wunpckihb", WUNPCKIHB)
23939 IWMMXT_BUILTIN (iwmmxt_wunpckihh, "wunpckihh", WUNPCKIHH)
23940 IWMMXT_BUILTIN (iwmmxt_wunpckihw, "wunpckihw", WUNPCKIHW)
23941 IWMMXT2_BUILTIN (iwmmxt_waddsubhx, "waddsubhx", WADDSUBHX)
23942 IWMMXT2_BUILTIN (iwmmxt_wsubaddhx, "wsubaddhx", WSUBADDHX)
23943 IWMMXT2_BUILTIN (iwmmxt_wabsdiffb, "wabsdiffb", WABSDIFFB)
23944 IWMMXT2_BUILTIN (iwmmxt_wabsdiffh, "wabsdiffh", WABSDIFFH)
23945 IWMMXT2_BUILTIN (iwmmxt_wabsdiffw, "wabsdiffw", WABSDIFFW)
23946 IWMMXT2_BUILTIN (iwmmxt_avg4, "wavg4", WAVG4)
23947 IWMMXT2_BUILTIN (iwmmxt_avg4r, "wavg4r", WAVG4R)
23948 IWMMXT2_BUILTIN (iwmmxt_wmulwsm, "wmulwsm", WMULWSM)
23949 IWMMXT2_BUILTIN (iwmmxt_wmulwum, "wmulwum", WMULWUM)
23950 IWMMXT2_BUILTIN (iwmmxt_wmulwsmr, "wmulwsmr", WMULWSMR)
23951 IWMMXT2_BUILTIN (iwmmxt_wmulwumr, "wmulwumr", WMULWUMR)
23952 IWMMXT2_BUILTIN (iwmmxt_wmulwl, "wmulwl", WMULWL)
23953 IWMMXT2_BUILTIN (iwmmxt_wmulsmr, "wmulsmr", WMULSMR)
23954 IWMMXT2_BUILTIN (iwmmxt_wmulumr, "wmulumr", WMULUMR)
23955 IWMMXT2_BUILTIN (iwmmxt_wqmulm, "wqmulm", WQMULM)
23956 IWMMXT2_BUILTIN (iwmmxt_wqmulmr, "wqmulmr", WQMULMR)
23957 IWMMXT2_BUILTIN (iwmmxt_wqmulwm, "wqmulwm", WQMULWM)
23958 IWMMXT2_BUILTIN (iwmmxt_wqmulwmr, "wqmulwmr", WQMULWMR)
23959 IWMMXT_BUILTIN (iwmmxt_walignr0, "walignr0", WALIGNR0)
23960 IWMMXT_BUILTIN (iwmmxt_walignr1, "walignr1", WALIGNR1)
23961 IWMMXT_BUILTIN (iwmmxt_walignr2, "walignr2", WALIGNR2)
23962 IWMMXT_BUILTIN (iwmmxt_walignr3, "walignr3", WALIGNR3)
23964 #define IWMMXT_BUILTIN2(code, builtin) \
23965 { FL_IWMMXT, CODE_FOR_##code, NULL, ARM_BUILTIN_##builtin, UNKNOWN, 0 },
23967 #define IWMMXT2_BUILTIN2(code, builtin) \
23968 { FL_IWMMXT2, CODE_FOR_##code, NULL, ARM_BUILTIN_##builtin, UNKNOWN, 0 },
23970 IWMMXT2_BUILTIN2 (iwmmxt_waddbhusm, WADDBHUSM)
23971 IWMMXT2_BUILTIN2 (iwmmxt_waddbhusl, WADDBHUSL)
23972 IWMMXT_BUILTIN2 (iwmmxt_wpackhss, WPACKHSS)
23973 IWMMXT_BUILTIN2 (iwmmxt_wpackwss, WPACKWSS)
23974 IWMMXT_BUILTIN2 (iwmmxt_wpackdss, WPACKDSS)
23975 IWMMXT_BUILTIN2 (iwmmxt_wpackhus, WPACKHUS)
23976 IWMMXT_BUILTIN2 (iwmmxt_wpackwus, WPACKWUS)
23977 IWMMXT_BUILTIN2 (iwmmxt_wpackdus, WPACKDUS)
23978 IWMMXT_BUILTIN2 (iwmmxt_wmacuz, WMACUZ)
23979 IWMMXT_BUILTIN2 (iwmmxt_wmacsz, WMACSZ)
23981 #define CRC32_BUILTIN(L, U) \
23982 {0, CODE_FOR_##L, "__builtin_arm_"#L, ARM_BUILTIN_##U, \
23983 UNKNOWN, 0},
23984 CRC32_BUILTIN (crc32b, CRC32B)
23985 CRC32_BUILTIN (crc32h, CRC32H)
23986 CRC32_BUILTIN (crc32w, CRC32W)
23987 CRC32_BUILTIN (crc32cb, CRC32CB)
23988 CRC32_BUILTIN (crc32ch, CRC32CH)
23989 CRC32_BUILTIN (crc32cw, CRC32CW)
23990 #undef CRC32_BUILTIN
23993 #define CRYPTO_BUILTIN(L, U) \
23994 {0, CODE_FOR_crypto_##L, "__builtin_arm_crypto_"#L, ARM_BUILTIN_CRYPTO_##U, \
23995 UNKNOWN, 0},
23996 #undef CRYPTO1
23997 #undef CRYPTO2
23998 #undef CRYPTO3
23999 #define CRYPTO2(L, U, R, A1, A2) CRYPTO_BUILTIN (L, U)
24000 #define CRYPTO1(L, U, R, A)
24001 #define CRYPTO3(L, U, R, A1, A2, A3)
24002 #include "crypto.def"
24003 #undef CRYPTO1
24004 #undef CRYPTO2
24005 #undef CRYPTO3
24009 static const struct builtin_description bdesc_1arg[] =
24011 IWMMXT_BUILTIN (iwmmxt_tmovmskb, "tmovmskb", TMOVMSKB)
24012 IWMMXT_BUILTIN (iwmmxt_tmovmskh, "tmovmskh", TMOVMSKH)
24013 IWMMXT_BUILTIN (iwmmxt_tmovmskw, "tmovmskw", TMOVMSKW)
24014 IWMMXT_BUILTIN (iwmmxt_waccb, "waccb", WACCB)
24015 IWMMXT_BUILTIN (iwmmxt_wacch, "wacch", WACCH)
24016 IWMMXT_BUILTIN (iwmmxt_waccw, "waccw", WACCW)
24017 IWMMXT_BUILTIN (iwmmxt_wunpckehub, "wunpckehub", WUNPCKEHUB)
24018 IWMMXT_BUILTIN (iwmmxt_wunpckehuh, "wunpckehuh", WUNPCKEHUH)
24019 IWMMXT_BUILTIN (iwmmxt_wunpckehuw, "wunpckehuw", WUNPCKEHUW)
24020 IWMMXT_BUILTIN (iwmmxt_wunpckehsb, "wunpckehsb", WUNPCKEHSB)
24021 IWMMXT_BUILTIN (iwmmxt_wunpckehsh, "wunpckehsh", WUNPCKEHSH)
24022 IWMMXT_BUILTIN (iwmmxt_wunpckehsw, "wunpckehsw", WUNPCKEHSW)
24023 IWMMXT_BUILTIN (iwmmxt_wunpckelub, "wunpckelub", WUNPCKELUB)
24024 IWMMXT_BUILTIN (iwmmxt_wunpckeluh, "wunpckeluh", WUNPCKELUH)
24025 IWMMXT_BUILTIN (iwmmxt_wunpckeluw, "wunpckeluw", WUNPCKELUW)
24026 IWMMXT_BUILTIN (iwmmxt_wunpckelsb, "wunpckelsb", WUNPCKELSB)
24027 IWMMXT_BUILTIN (iwmmxt_wunpckelsh, "wunpckelsh", WUNPCKELSH)
24028 IWMMXT_BUILTIN (iwmmxt_wunpckelsw, "wunpckelsw", WUNPCKELSW)
24029 IWMMXT2_BUILTIN (iwmmxt_wabsv8qi3, "wabsb", WABSB)
24030 IWMMXT2_BUILTIN (iwmmxt_wabsv4hi3, "wabsh", WABSH)
24031 IWMMXT2_BUILTIN (iwmmxt_wabsv2si3, "wabsw", WABSW)
24032 IWMMXT_BUILTIN (tbcstv8qi, "tbcstb", TBCSTB)
24033 IWMMXT_BUILTIN (tbcstv4hi, "tbcsth", TBCSTH)
24034 IWMMXT_BUILTIN (tbcstv2si, "tbcstw", TBCSTW)
24036 #define CRYPTO1(L, U, R, A) CRYPTO_BUILTIN (L, U)
24037 #define CRYPTO2(L, U, R, A1, A2)
24038 #define CRYPTO3(L, U, R, A1, A2, A3)
24039 #include "crypto.def"
24040 #undef CRYPTO1
24041 #undef CRYPTO2
24042 #undef CRYPTO3
24045 static const struct builtin_description bdesc_3arg[] =
24047 #define CRYPTO3(L, U, R, A1, A2, A3) CRYPTO_BUILTIN (L, U)
24048 #define CRYPTO1(L, U, R, A)
24049 #define CRYPTO2(L, U, R, A1, A2)
24050 #include "crypto.def"
24051 #undef CRYPTO1
24052 #undef CRYPTO2
24053 #undef CRYPTO3
24055 #undef CRYPTO_BUILTIN
24057 /* Set up all the iWMMXt builtins. This is not called if
24058 TARGET_IWMMXT is zero. */
24060 static void
24061 arm_init_iwmmxt_builtins (void)
24063 const struct builtin_description * d;
24064 size_t i;
24066 tree V2SI_type_node = build_vector_type_for_mode (intSI_type_node, V2SImode);
24067 tree V4HI_type_node = build_vector_type_for_mode (intHI_type_node, V4HImode);
24068 tree V8QI_type_node = build_vector_type_for_mode (intQI_type_node, V8QImode);
24070 tree v8qi_ftype_v8qi_v8qi_int
24071 = build_function_type_list (V8QI_type_node,
24072 V8QI_type_node, V8QI_type_node,
24073 integer_type_node, NULL_TREE);
24074 tree v4hi_ftype_v4hi_int
24075 = build_function_type_list (V4HI_type_node,
24076 V4HI_type_node, integer_type_node, NULL_TREE);
24077 tree v2si_ftype_v2si_int
24078 = build_function_type_list (V2SI_type_node,
24079 V2SI_type_node, integer_type_node, NULL_TREE);
24080 tree v2si_ftype_di_di
24081 = build_function_type_list (V2SI_type_node,
24082 long_long_integer_type_node,
24083 long_long_integer_type_node,
24084 NULL_TREE);
24085 tree di_ftype_di_int
24086 = build_function_type_list (long_long_integer_type_node,
24087 long_long_integer_type_node,
24088 integer_type_node, NULL_TREE);
24089 tree di_ftype_di_int_int
24090 = build_function_type_list (long_long_integer_type_node,
24091 long_long_integer_type_node,
24092 integer_type_node,
24093 integer_type_node, NULL_TREE);
24094 tree int_ftype_v8qi
24095 = build_function_type_list (integer_type_node,
24096 V8QI_type_node, NULL_TREE);
24097 tree int_ftype_v4hi
24098 = build_function_type_list (integer_type_node,
24099 V4HI_type_node, NULL_TREE);
24100 tree int_ftype_v2si
24101 = build_function_type_list (integer_type_node,
24102 V2SI_type_node, NULL_TREE);
24103 tree int_ftype_v8qi_int
24104 = build_function_type_list (integer_type_node,
24105 V8QI_type_node, integer_type_node, NULL_TREE);
24106 tree int_ftype_v4hi_int
24107 = build_function_type_list (integer_type_node,
24108 V4HI_type_node, integer_type_node, NULL_TREE);
24109 tree int_ftype_v2si_int
24110 = build_function_type_list (integer_type_node,
24111 V2SI_type_node, integer_type_node, NULL_TREE);
24112 tree v8qi_ftype_v8qi_int_int
24113 = build_function_type_list (V8QI_type_node,
24114 V8QI_type_node, integer_type_node,
24115 integer_type_node, NULL_TREE);
24116 tree v4hi_ftype_v4hi_int_int
24117 = build_function_type_list (V4HI_type_node,
24118 V4HI_type_node, integer_type_node,
24119 integer_type_node, NULL_TREE);
24120 tree v2si_ftype_v2si_int_int
24121 = build_function_type_list (V2SI_type_node,
24122 V2SI_type_node, integer_type_node,
24123 integer_type_node, NULL_TREE);
24124 /* Miscellaneous. */
24125 tree v8qi_ftype_v4hi_v4hi
24126 = build_function_type_list (V8QI_type_node,
24127 V4HI_type_node, V4HI_type_node, NULL_TREE);
24128 tree v4hi_ftype_v2si_v2si
24129 = build_function_type_list (V4HI_type_node,
24130 V2SI_type_node, V2SI_type_node, NULL_TREE);
24131 tree v8qi_ftype_v4hi_v8qi
24132 = build_function_type_list (V8QI_type_node,
24133 V4HI_type_node, V8QI_type_node, NULL_TREE);
24134 tree v2si_ftype_v4hi_v4hi
24135 = build_function_type_list (V2SI_type_node,
24136 V4HI_type_node, V4HI_type_node, NULL_TREE);
24137 tree v2si_ftype_v8qi_v8qi
24138 = build_function_type_list (V2SI_type_node,
24139 V8QI_type_node, V8QI_type_node, NULL_TREE);
24140 tree v4hi_ftype_v4hi_di
24141 = build_function_type_list (V4HI_type_node,
24142 V4HI_type_node, long_long_integer_type_node,
24143 NULL_TREE);
24144 tree v2si_ftype_v2si_di
24145 = build_function_type_list (V2SI_type_node,
24146 V2SI_type_node, long_long_integer_type_node,
24147 NULL_TREE);
24148 tree di_ftype_void
24149 = build_function_type_list (long_long_unsigned_type_node, NULL_TREE);
24150 tree int_ftype_void
24151 = build_function_type_list (integer_type_node, NULL_TREE);
24152 tree di_ftype_v8qi
24153 = build_function_type_list (long_long_integer_type_node,
24154 V8QI_type_node, NULL_TREE);
24155 tree di_ftype_v4hi
24156 = build_function_type_list (long_long_integer_type_node,
24157 V4HI_type_node, NULL_TREE);
24158 tree di_ftype_v2si
24159 = build_function_type_list (long_long_integer_type_node,
24160 V2SI_type_node, NULL_TREE);
24161 tree v2si_ftype_v4hi
24162 = build_function_type_list (V2SI_type_node,
24163 V4HI_type_node, NULL_TREE);
24164 tree v4hi_ftype_v8qi
24165 = build_function_type_list (V4HI_type_node,
24166 V8QI_type_node, NULL_TREE);
24167 tree v8qi_ftype_v8qi
24168 = build_function_type_list (V8QI_type_node,
24169 V8QI_type_node, NULL_TREE);
24170 tree v4hi_ftype_v4hi
24171 = build_function_type_list (V4HI_type_node,
24172 V4HI_type_node, NULL_TREE);
24173 tree v2si_ftype_v2si
24174 = build_function_type_list (V2SI_type_node,
24175 V2SI_type_node, NULL_TREE);
24177 tree di_ftype_di_v4hi_v4hi
24178 = build_function_type_list (long_long_unsigned_type_node,
24179 long_long_unsigned_type_node,
24180 V4HI_type_node, V4HI_type_node,
24181 NULL_TREE);
24183 tree di_ftype_v4hi_v4hi
24184 = build_function_type_list (long_long_unsigned_type_node,
24185 V4HI_type_node,V4HI_type_node,
24186 NULL_TREE);
24188 tree v2si_ftype_v2si_v4hi_v4hi
24189 = build_function_type_list (V2SI_type_node,
24190 V2SI_type_node, V4HI_type_node,
24191 V4HI_type_node, NULL_TREE);
24193 tree v2si_ftype_v2si_v8qi_v8qi
24194 = build_function_type_list (V2SI_type_node,
24195 V2SI_type_node, V8QI_type_node,
24196 V8QI_type_node, NULL_TREE);
24198 tree di_ftype_di_v2si_v2si
24199 = build_function_type_list (long_long_unsigned_type_node,
24200 long_long_unsigned_type_node,
24201 V2SI_type_node, V2SI_type_node,
24202 NULL_TREE);
24204 tree di_ftype_di_di_int
24205 = build_function_type_list (long_long_unsigned_type_node,
24206 long_long_unsigned_type_node,
24207 long_long_unsigned_type_node,
24208 integer_type_node, NULL_TREE);
24210 tree void_ftype_int
24211 = build_function_type_list (void_type_node,
24212 integer_type_node, NULL_TREE);
24214 tree v8qi_ftype_char
24215 = build_function_type_list (V8QI_type_node,
24216 signed_char_type_node, NULL_TREE);
24218 tree v4hi_ftype_short
24219 = build_function_type_list (V4HI_type_node,
24220 short_integer_type_node, NULL_TREE);
24222 tree v2si_ftype_int
24223 = build_function_type_list (V2SI_type_node,
24224 integer_type_node, NULL_TREE);
24226 /* Normal vector binops. */
24227 tree v8qi_ftype_v8qi_v8qi
24228 = build_function_type_list (V8QI_type_node,
24229 V8QI_type_node, V8QI_type_node, NULL_TREE);
24230 tree v4hi_ftype_v4hi_v4hi
24231 = build_function_type_list (V4HI_type_node,
24232 V4HI_type_node,V4HI_type_node, NULL_TREE);
24233 tree v2si_ftype_v2si_v2si
24234 = build_function_type_list (V2SI_type_node,
24235 V2SI_type_node, V2SI_type_node, NULL_TREE);
24236 tree di_ftype_di_di
24237 = build_function_type_list (long_long_unsigned_type_node,
24238 long_long_unsigned_type_node,
24239 long_long_unsigned_type_node,
24240 NULL_TREE);
24242 /* Add all builtins that are more or less simple operations on two
24243 operands. */
24244 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
24246 /* Use one of the operands; the target can have a different mode for
24247 mask-generating compares. */
24248 enum machine_mode mode;
24249 tree type;
24251 if (d->name == 0 || !(d->mask == FL_IWMMXT || d->mask == FL_IWMMXT2))
24252 continue;
24254 mode = insn_data[d->icode].operand[1].mode;
24256 switch (mode)
24258 case V8QImode:
24259 type = v8qi_ftype_v8qi_v8qi;
24260 break;
24261 case V4HImode:
24262 type = v4hi_ftype_v4hi_v4hi;
24263 break;
24264 case V2SImode:
24265 type = v2si_ftype_v2si_v2si;
24266 break;
24267 case DImode:
24268 type = di_ftype_di_di;
24269 break;
24271 default:
24272 gcc_unreachable ();
24275 def_mbuiltin (d->mask, d->name, type, d->code);
24278 /* Add the remaining MMX insns with somewhat more complicated types. */
24279 #define iwmmx_mbuiltin(NAME, TYPE, CODE) \
24280 def_mbuiltin (FL_IWMMXT, "__builtin_arm_" NAME, (TYPE), \
24281 ARM_BUILTIN_ ## CODE)
24283 #define iwmmx2_mbuiltin(NAME, TYPE, CODE) \
24284 def_mbuiltin (FL_IWMMXT2, "__builtin_arm_" NAME, (TYPE), \
24285 ARM_BUILTIN_ ## CODE)
24287 iwmmx_mbuiltin ("wzero", di_ftype_void, WZERO);
24288 iwmmx_mbuiltin ("setwcgr0", void_ftype_int, SETWCGR0);
24289 iwmmx_mbuiltin ("setwcgr1", void_ftype_int, SETWCGR1);
24290 iwmmx_mbuiltin ("setwcgr2", void_ftype_int, SETWCGR2);
24291 iwmmx_mbuiltin ("setwcgr3", void_ftype_int, SETWCGR3);
24292 iwmmx_mbuiltin ("getwcgr0", int_ftype_void, GETWCGR0);
24293 iwmmx_mbuiltin ("getwcgr1", int_ftype_void, GETWCGR1);
24294 iwmmx_mbuiltin ("getwcgr2", int_ftype_void, GETWCGR2);
24295 iwmmx_mbuiltin ("getwcgr3", int_ftype_void, GETWCGR3);
24297 iwmmx_mbuiltin ("wsllh", v4hi_ftype_v4hi_di, WSLLH);
24298 iwmmx_mbuiltin ("wsllw", v2si_ftype_v2si_di, WSLLW);
24299 iwmmx_mbuiltin ("wslld", di_ftype_di_di, WSLLD);
24300 iwmmx_mbuiltin ("wsllhi", v4hi_ftype_v4hi_int, WSLLHI);
24301 iwmmx_mbuiltin ("wsllwi", v2si_ftype_v2si_int, WSLLWI);
24302 iwmmx_mbuiltin ("wslldi", di_ftype_di_int, WSLLDI);
24304 iwmmx_mbuiltin ("wsrlh", v4hi_ftype_v4hi_di, WSRLH);
24305 iwmmx_mbuiltin ("wsrlw", v2si_ftype_v2si_di, WSRLW);
24306 iwmmx_mbuiltin ("wsrld", di_ftype_di_di, WSRLD);
24307 iwmmx_mbuiltin ("wsrlhi", v4hi_ftype_v4hi_int, WSRLHI);
24308 iwmmx_mbuiltin ("wsrlwi", v2si_ftype_v2si_int, WSRLWI);
24309 iwmmx_mbuiltin ("wsrldi", di_ftype_di_int, WSRLDI);
24311 iwmmx_mbuiltin ("wsrah", v4hi_ftype_v4hi_di, WSRAH);
24312 iwmmx_mbuiltin ("wsraw", v2si_ftype_v2si_di, WSRAW);
24313 iwmmx_mbuiltin ("wsrad", di_ftype_di_di, WSRAD);
24314 iwmmx_mbuiltin ("wsrahi", v4hi_ftype_v4hi_int, WSRAHI);
24315 iwmmx_mbuiltin ("wsrawi", v2si_ftype_v2si_int, WSRAWI);
24316 iwmmx_mbuiltin ("wsradi", di_ftype_di_int, WSRADI);
24318 iwmmx_mbuiltin ("wrorh", v4hi_ftype_v4hi_di, WRORH);
24319 iwmmx_mbuiltin ("wrorw", v2si_ftype_v2si_di, WRORW);
24320 iwmmx_mbuiltin ("wrord", di_ftype_di_di, WRORD);
24321 iwmmx_mbuiltin ("wrorhi", v4hi_ftype_v4hi_int, WRORHI);
24322 iwmmx_mbuiltin ("wrorwi", v2si_ftype_v2si_int, WRORWI);
24323 iwmmx_mbuiltin ("wrordi", di_ftype_di_int, WRORDI);
24325 iwmmx_mbuiltin ("wshufh", v4hi_ftype_v4hi_int, WSHUFH);
24327 iwmmx_mbuiltin ("wsadb", v2si_ftype_v2si_v8qi_v8qi, WSADB);
24328 iwmmx_mbuiltin ("wsadh", v2si_ftype_v2si_v4hi_v4hi, WSADH);
24329 iwmmx_mbuiltin ("wmadds", v2si_ftype_v4hi_v4hi, WMADDS);
24330 iwmmx2_mbuiltin ("wmaddsx", v2si_ftype_v4hi_v4hi, WMADDSX);
24331 iwmmx2_mbuiltin ("wmaddsn", v2si_ftype_v4hi_v4hi, WMADDSN);
24332 iwmmx_mbuiltin ("wmaddu", v2si_ftype_v4hi_v4hi, WMADDU);
24333 iwmmx2_mbuiltin ("wmaddux", v2si_ftype_v4hi_v4hi, WMADDUX);
24334 iwmmx2_mbuiltin ("wmaddun", v2si_ftype_v4hi_v4hi, WMADDUN);
24335 iwmmx_mbuiltin ("wsadbz", v2si_ftype_v8qi_v8qi, WSADBZ);
24336 iwmmx_mbuiltin ("wsadhz", v2si_ftype_v4hi_v4hi, WSADHZ);
24338 iwmmx_mbuiltin ("textrmsb", int_ftype_v8qi_int, TEXTRMSB);
24339 iwmmx_mbuiltin ("textrmsh", int_ftype_v4hi_int, TEXTRMSH);
24340 iwmmx_mbuiltin ("textrmsw", int_ftype_v2si_int, TEXTRMSW);
24341 iwmmx_mbuiltin ("textrmub", int_ftype_v8qi_int, TEXTRMUB);
24342 iwmmx_mbuiltin ("textrmuh", int_ftype_v4hi_int, TEXTRMUH);
24343 iwmmx_mbuiltin ("textrmuw", int_ftype_v2si_int, TEXTRMUW);
24344 iwmmx_mbuiltin ("tinsrb", v8qi_ftype_v8qi_int_int, TINSRB);
24345 iwmmx_mbuiltin ("tinsrh", v4hi_ftype_v4hi_int_int, TINSRH);
24346 iwmmx_mbuiltin ("tinsrw", v2si_ftype_v2si_int_int, TINSRW);
24348 iwmmx_mbuiltin ("waccb", di_ftype_v8qi, WACCB);
24349 iwmmx_mbuiltin ("wacch", di_ftype_v4hi, WACCH);
24350 iwmmx_mbuiltin ("waccw", di_ftype_v2si, WACCW);
24352 iwmmx_mbuiltin ("tmovmskb", int_ftype_v8qi, TMOVMSKB);
24353 iwmmx_mbuiltin ("tmovmskh", int_ftype_v4hi, TMOVMSKH);
24354 iwmmx_mbuiltin ("tmovmskw", int_ftype_v2si, TMOVMSKW);
24356 iwmmx2_mbuiltin ("waddbhusm", v8qi_ftype_v4hi_v8qi, WADDBHUSM);
24357 iwmmx2_mbuiltin ("waddbhusl", v8qi_ftype_v4hi_v8qi, WADDBHUSL);
24359 iwmmx_mbuiltin ("wpackhss", v8qi_ftype_v4hi_v4hi, WPACKHSS);
24360 iwmmx_mbuiltin ("wpackhus", v8qi_ftype_v4hi_v4hi, WPACKHUS);
24361 iwmmx_mbuiltin ("wpackwus", v4hi_ftype_v2si_v2si, WPACKWUS);
24362 iwmmx_mbuiltin ("wpackwss", v4hi_ftype_v2si_v2si, WPACKWSS);
24363 iwmmx_mbuiltin ("wpackdus", v2si_ftype_di_di, WPACKDUS);
24364 iwmmx_mbuiltin ("wpackdss", v2si_ftype_di_di, WPACKDSS);
24366 iwmmx_mbuiltin ("wunpckehub", v4hi_ftype_v8qi, WUNPCKEHUB);
24367 iwmmx_mbuiltin ("wunpckehuh", v2si_ftype_v4hi, WUNPCKEHUH);
24368 iwmmx_mbuiltin ("wunpckehuw", di_ftype_v2si, WUNPCKEHUW);
24369 iwmmx_mbuiltin ("wunpckehsb", v4hi_ftype_v8qi, WUNPCKEHSB);
24370 iwmmx_mbuiltin ("wunpckehsh", v2si_ftype_v4hi, WUNPCKEHSH);
24371 iwmmx_mbuiltin ("wunpckehsw", di_ftype_v2si, WUNPCKEHSW);
24372 iwmmx_mbuiltin ("wunpckelub", v4hi_ftype_v8qi, WUNPCKELUB);
24373 iwmmx_mbuiltin ("wunpckeluh", v2si_ftype_v4hi, WUNPCKELUH);
24374 iwmmx_mbuiltin ("wunpckeluw", di_ftype_v2si, WUNPCKELUW);
24375 iwmmx_mbuiltin ("wunpckelsb", v4hi_ftype_v8qi, WUNPCKELSB);
24376 iwmmx_mbuiltin ("wunpckelsh", v2si_ftype_v4hi, WUNPCKELSH);
24377 iwmmx_mbuiltin ("wunpckelsw", di_ftype_v2si, WUNPCKELSW);
24379 iwmmx_mbuiltin ("wmacs", di_ftype_di_v4hi_v4hi, WMACS);
24380 iwmmx_mbuiltin ("wmacsz", di_ftype_v4hi_v4hi, WMACSZ);
24381 iwmmx_mbuiltin ("wmacu", di_ftype_di_v4hi_v4hi, WMACU);
24382 iwmmx_mbuiltin ("wmacuz", di_ftype_v4hi_v4hi, WMACUZ);
24384 iwmmx_mbuiltin ("walign", v8qi_ftype_v8qi_v8qi_int, WALIGNI);
24385 iwmmx_mbuiltin ("tmia", di_ftype_di_int_int, TMIA);
24386 iwmmx_mbuiltin ("tmiaph", di_ftype_di_int_int, TMIAPH);
24387 iwmmx_mbuiltin ("tmiabb", di_ftype_di_int_int, TMIABB);
24388 iwmmx_mbuiltin ("tmiabt", di_ftype_di_int_int, TMIABT);
24389 iwmmx_mbuiltin ("tmiatb", di_ftype_di_int_int, TMIATB);
24390 iwmmx_mbuiltin ("tmiatt", di_ftype_di_int_int, TMIATT);
24392 iwmmx2_mbuiltin ("wabsb", v8qi_ftype_v8qi, WABSB);
24393 iwmmx2_mbuiltin ("wabsh", v4hi_ftype_v4hi, WABSH);
24394 iwmmx2_mbuiltin ("wabsw", v2si_ftype_v2si, WABSW);
24396 iwmmx2_mbuiltin ("wqmiabb", v2si_ftype_v2si_v4hi_v4hi, WQMIABB);
24397 iwmmx2_mbuiltin ("wqmiabt", v2si_ftype_v2si_v4hi_v4hi, WQMIABT);
24398 iwmmx2_mbuiltin ("wqmiatb", v2si_ftype_v2si_v4hi_v4hi, WQMIATB);
24399 iwmmx2_mbuiltin ("wqmiatt", v2si_ftype_v2si_v4hi_v4hi, WQMIATT);
24401 iwmmx2_mbuiltin ("wqmiabbn", v2si_ftype_v2si_v4hi_v4hi, WQMIABBN);
24402 iwmmx2_mbuiltin ("wqmiabtn", v2si_ftype_v2si_v4hi_v4hi, WQMIABTN);
24403 iwmmx2_mbuiltin ("wqmiatbn", v2si_ftype_v2si_v4hi_v4hi, WQMIATBN);
24404 iwmmx2_mbuiltin ("wqmiattn", v2si_ftype_v2si_v4hi_v4hi, WQMIATTN);
24406 iwmmx2_mbuiltin ("wmiabb", di_ftype_di_v4hi_v4hi, WMIABB);
24407 iwmmx2_mbuiltin ("wmiabt", di_ftype_di_v4hi_v4hi, WMIABT);
24408 iwmmx2_mbuiltin ("wmiatb", di_ftype_di_v4hi_v4hi, WMIATB);
24409 iwmmx2_mbuiltin ("wmiatt", di_ftype_di_v4hi_v4hi, WMIATT);
24411 iwmmx2_mbuiltin ("wmiabbn", di_ftype_di_v4hi_v4hi, WMIABBN);
24412 iwmmx2_mbuiltin ("wmiabtn", di_ftype_di_v4hi_v4hi, WMIABTN);
24413 iwmmx2_mbuiltin ("wmiatbn", di_ftype_di_v4hi_v4hi, WMIATBN);
24414 iwmmx2_mbuiltin ("wmiattn", di_ftype_di_v4hi_v4hi, WMIATTN);
24416 iwmmx2_mbuiltin ("wmiawbb", di_ftype_di_v2si_v2si, WMIAWBB);
24417 iwmmx2_mbuiltin ("wmiawbt", di_ftype_di_v2si_v2si, WMIAWBT);
24418 iwmmx2_mbuiltin ("wmiawtb", di_ftype_di_v2si_v2si, WMIAWTB);
24419 iwmmx2_mbuiltin ("wmiawtt", di_ftype_di_v2si_v2si, WMIAWTT);
24421 iwmmx2_mbuiltin ("wmiawbbn", di_ftype_di_v2si_v2si, WMIAWBBN);
24422 iwmmx2_mbuiltin ("wmiawbtn", di_ftype_di_v2si_v2si, WMIAWBTN);
24423 iwmmx2_mbuiltin ("wmiawtbn", di_ftype_di_v2si_v2si, WMIAWTBN);
24424 iwmmx2_mbuiltin ("wmiawttn", di_ftype_di_v2si_v2si, WMIAWTTN);
24426 iwmmx2_mbuiltin ("wmerge", di_ftype_di_di_int, WMERGE);
24428 iwmmx_mbuiltin ("tbcstb", v8qi_ftype_char, TBCSTB);
24429 iwmmx_mbuiltin ("tbcsth", v4hi_ftype_short, TBCSTH);
24430 iwmmx_mbuiltin ("tbcstw", v2si_ftype_int, TBCSTW);
24432 #undef iwmmx_mbuiltin
24433 #undef iwmmx2_mbuiltin
24436 static void
24437 arm_init_fp16_builtins (void)
24439 tree fp16_type = make_node (REAL_TYPE);
24440 TYPE_PRECISION (fp16_type) = 16;
24441 layout_type (fp16_type);
24442 (*lang_hooks.types.register_builtin_type) (fp16_type, "__fp16");
24445 static void
24446 arm_init_crc32_builtins ()
24448 tree si_ftype_si_qi
24449 = build_function_type_list (unsigned_intSI_type_node,
24450 unsigned_intSI_type_node,
24451 unsigned_intQI_type_node, NULL_TREE);
24452 tree si_ftype_si_hi
24453 = build_function_type_list (unsigned_intSI_type_node,
24454 unsigned_intSI_type_node,
24455 unsigned_intHI_type_node, NULL_TREE);
24456 tree si_ftype_si_si
24457 = build_function_type_list (unsigned_intSI_type_node,
24458 unsigned_intSI_type_node,
24459 unsigned_intSI_type_node, NULL_TREE);
24461 arm_builtin_decls[ARM_BUILTIN_CRC32B]
24462 = add_builtin_function ("__builtin_arm_crc32b", si_ftype_si_qi,
24463 ARM_BUILTIN_CRC32B, BUILT_IN_MD, NULL, NULL_TREE);
24464 arm_builtin_decls[ARM_BUILTIN_CRC32H]
24465 = add_builtin_function ("__builtin_arm_crc32h", si_ftype_si_hi,
24466 ARM_BUILTIN_CRC32H, BUILT_IN_MD, NULL, NULL_TREE);
24467 arm_builtin_decls[ARM_BUILTIN_CRC32W]
24468 = add_builtin_function ("__builtin_arm_crc32w", si_ftype_si_si,
24469 ARM_BUILTIN_CRC32W, BUILT_IN_MD, NULL, NULL_TREE);
24470 arm_builtin_decls[ARM_BUILTIN_CRC32CB]
24471 = add_builtin_function ("__builtin_arm_crc32cb", si_ftype_si_qi,
24472 ARM_BUILTIN_CRC32CB, BUILT_IN_MD, NULL, NULL_TREE);
24473 arm_builtin_decls[ARM_BUILTIN_CRC32CH]
24474 = add_builtin_function ("__builtin_arm_crc32ch", si_ftype_si_hi,
24475 ARM_BUILTIN_CRC32CH, BUILT_IN_MD, NULL, NULL_TREE);
24476 arm_builtin_decls[ARM_BUILTIN_CRC32CW]
24477 = add_builtin_function ("__builtin_arm_crc32cw", si_ftype_si_si,
24478 ARM_BUILTIN_CRC32CW, BUILT_IN_MD, NULL, NULL_TREE);
24481 static void
24482 arm_init_builtins (void)
24484 if (TARGET_REALLY_IWMMXT)
24485 arm_init_iwmmxt_builtins ();
24487 if (TARGET_NEON)
24488 arm_init_neon_builtins ();
24490 if (arm_fp16_format)
24491 arm_init_fp16_builtins ();
24493 if (TARGET_CRC32)
24494 arm_init_crc32_builtins ();
24497 /* Return the ARM builtin for CODE. */
24499 static tree
24500 arm_builtin_decl (unsigned code, bool initialize_p ATTRIBUTE_UNUSED)
24502 if (code >= ARM_BUILTIN_MAX)
24503 return error_mark_node;
24505 return arm_builtin_decls[code];
24508 /* Implement TARGET_INVALID_PARAMETER_TYPE. */
24510 static const char *
24511 arm_invalid_parameter_type (const_tree t)
24513 if (SCALAR_FLOAT_TYPE_P (t) && TYPE_PRECISION (t) == 16)
24514 return N_("function parameters cannot have __fp16 type");
24515 return NULL;
24518 /* Implement TARGET_INVALID_PARAMETER_TYPE. */
24520 static const char *
24521 arm_invalid_return_type (const_tree t)
24523 if (SCALAR_FLOAT_TYPE_P (t) && TYPE_PRECISION (t) == 16)
24524 return N_("functions cannot return __fp16 type");
24525 return NULL;
24528 /* Implement TARGET_PROMOTED_TYPE. */
24530 static tree
24531 arm_promoted_type (const_tree t)
24533 if (SCALAR_FLOAT_TYPE_P (t) && TYPE_PRECISION (t) == 16)
24534 return float_type_node;
24535 return NULL_TREE;
24538 /* Implement TARGET_CONVERT_TO_TYPE.
24539 Specifically, this hook implements the peculiarity of the ARM
24540 half-precision floating-point C semantics that requires conversions between
24541 __fp16 to or from double to do an intermediate conversion to float. */
24543 static tree
24544 arm_convert_to_type (tree type, tree expr)
24546 tree fromtype = TREE_TYPE (expr);
24547 if (!SCALAR_FLOAT_TYPE_P (fromtype) || !SCALAR_FLOAT_TYPE_P (type))
24548 return NULL_TREE;
24549 if ((TYPE_PRECISION (fromtype) == 16 && TYPE_PRECISION (type) > 32)
24550 || (TYPE_PRECISION (type) == 16 && TYPE_PRECISION (fromtype) > 32))
24551 return convert (type, convert (float_type_node, expr));
24552 return NULL_TREE;
24555 /* Implement TARGET_SCALAR_MODE_SUPPORTED_P.
24556 This simply adds HFmode as a supported mode; even though we don't
24557 implement arithmetic on this type directly, it's supported by
24558 optabs conversions, much the way the double-word arithmetic is
24559 special-cased in the default hook. */
24561 static bool
24562 arm_scalar_mode_supported_p (enum machine_mode mode)
24564 if (mode == HFmode)
24565 return (arm_fp16_format != ARM_FP16_FORMAT_NONE);
24566 else if (ALL_FIXED_POINT_MODE_P (mode))
24567 return true;
24568 else
24569 return default_scalar_mode_supported_p (mode);
24572 /* Errors in the source file can cause expand_expr to return const0_rtx
24573 where we expect a vector. To avoid crashing, use one of the vector
24574 clear instructions. */
24576 static rtx
24577 safe_vector_operand (rtx x, enum machine_mode mode)
24579 if (x != const0_rtx)
24580 return x;
24581 x = gen_reg_rtx (mode);
24583 emit_insn (gen_iwmmxt_clrdi (mode == DImode ? x
24584 : gen_rtx_SUBREG (DImode, x, 0)));
24585 return x;
24588 /* Function to expand ternary builtins. */
24589 static rtx
24590 arm_expand_ternop_builtin (enum insn_code icode,
24591 tree exp, rtx target)
24593 rtx pat;
24594 tree arg0 = CALL_EXPR_ARG (exp, 0);
24595 tree arg1 = CALL_EXPR_ARG (exp, 1);
24596 tree arg2 = CALL_EXPR_ARG (exp, 2);
24598 rtx op0 = expand_normal (arg0);
24599 rtx op1 = expand_normal (arg1);
24600 rtx op2 = expand_normal (arg2);
24601 rtx op3 = NULL_RTX;
24603 /* The sha1c, sha1p, sha1m crypto builtins require a different vec_select
24604 lane operand depending on endianness. */
24605 bool builtin_sha1cpm_p = false;
24607 if (insn_data[icode].n_operands == 5)
24609 gcc_assert (icode == CODE_FOR_crypto_sha1c
24610 || icode == CODE_FOR_crypto_sha1p
24611 || icode == CODE_FOR_crypto_sha1m);
24612 builtin_sha1cpm_p = true;
24614 enum machine_mode tmode = insn_data[icode].operand[0].mode;
24615 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
24616 enum machine_mode mode1 = insn_data[icode].operand[2].mode;
24617 enum machine_mode mode2 = insn_data[icode].operand[3].mode;
24620 if (VECTOR_MODE_P (mode0))
24621 op0 = safe_vector_operand (op0, mode0);
24622 if (VECTOR_MODE_P (mode1))
24623 op1 = safe_vector_operand (op1, mode1);
24624 if (VECTOR_MODE_P (mode2))
24625 op2 = safe_vector_operand (op2, mode2);
24627 if (! target
24628 || GET_MODE (target) != tmode
24629 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
24630 target = gen_reg_rtx (tmode);
24632 gcc_assert ((GET_MODE (op0) == mode0 || GET_MODE (op0) == VOIDmode)
24633 && (GET_MODE (op1) == mode1 || GET_MODE (op1) == VOIDmode)
24634 && (GET_MODE (op2) == mode2 || GET_MODE (op2) == VOIDmode));
24636 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
24637 op0 = copy_to_mode_reg (mode0, op0);
24638 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
24639 op1 = copy_to_mode_reg (mode1, op1);
24640 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
24641 op2 = copy_to_mode_reg (mode2, op2);
24642 if (builtin_sha1cpm_p)
24643 op3 = GEN_INT (TARGET_BIG_END ? 1 : 0);
24645 if (builtin_sha1cpm_p)
24646 pat = GEN_FCN (icode) (target, op0, op1, op2, op3);
24647 else
24648 pat = GEN_FCN (icode) (target, op0, op1, op2);
24649 if (! pat)
24650 return 0;
24651 emit_insn (pat);
24652 return target;
24655 /* Subroutine of arm_expand_builtin to take care of binop insns. */
24657 static rtx
24658 arm_expand_binop_builtin (enum insn_code icode,
24659 tree exp, rtx target)
24661 rtx pat;
24662 tree arg0 = CALL_EXPR_ARG (exp, 0);
24663 tree arg1 = CALL_EXPR_ARG (exp, 1);
24664 rtx op0 = expand_normal (arg0);
24665 rtx op1 = expand_normal (arg1);
24666 enum machine_mode tmode = insn_data[icode].operand[0].mode;
24667 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
24668 enum machine_mode mode1 = insn_data[icode].operand[2].mode;
24670 if (VECTOR_MODE_P (mode0))
24671 op0 = safe_vector_operand (op0, mode0);
24672 if (VECTOR_MODE_P (mode1))
24673 op1 = safe_vector_operand (op1, mode1);
24675 if (! target
24676 || GET_MODE (target) != tmode
24677 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
24678 target = gen_reg_rtx (tmode);
24680 gcc_assert ((GET_MODE (op0) == mode0 || GET_MODE (op0) == VOIDmode)
24681 && (GET_MODE (op1) == mode1 || GET_MODE (op1) == VOIDmode));
24683 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
24684 op0 = copy_to_mode_reg (mode0, op0);
24685 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
24686 op1 = copy_to_mode_reg (mode1, op1);
24688 pat = GEN_FCN (icode) (target, op0, op1);
24689 if (! pat)
24690 return 0;
24691 emit_insn (pat);
24692 return target;
24695 /* Subroutine of arm_expand_builtin to take care of unop insns. */
24697 static rtx
24698 arm_expand_unop_builtin (enum insn_code icode,
24699 tree exp, rtx target, int do_load)
24701 rtx pat;
24702 tree arg0 = CALL_EXPR_ARG (exp, 0);
24703 rtx op0 = expand_normal (arg0);
24704 rtx op1 = NULL_RTX;
24705 enum machine_mode tmode = insn_data[icode].operand[0].mode;
24706 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
24707 bool builtin_sha1h_p = false;
24709 if (insn_data[icode].n_operands == 3)
24711 gcc_assert (icode == CODE_FOR_crypto_sha1h);
24712 builtin_sha1h_p = true;
24715 if (! target
24716 || GET_MODE (target) != tmode
24717 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
24718 target = gen_reg_rtx (tmode);
24719 if (do_load)
24720 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
24721 else
24723 if (VECTOR_MODE_P (mode0))
24724 op0 = safe_vector_operand (op0, mode0);
24726 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
24727 op0 = copy_to_mode_reg (mode0, op0);
24729 if (builtin_sha1h_p)
24730 op1 = GEN_INT (TARGET_BIG_END ? 1 : 0);
24732 if (builtin_sha1h_p)
24733 pat = GEN_FCN (icode) (target, op0, op1);
24734 else
24735 pat = GEN_FCN (icode) (target, op0);
24736 if (! pat)
24737 return 0;
24738 emit_insn (pat);
24739 return target;
24742 typedef enum {
24743 NEON_ARG_COPY_TO_REG,
24744 NEON_ARG_CONSTANT,
24745 NEON_ARG_MEMORY,
24746 NEON_ARG_STOP
24747 } builtin_arg;
24749 #define NEON_MAX_BUILTIN_ARGS 5
24751 /* EXP is a pointer argument to a Neon load or store intrinsic. Derive
24752 and return an expression for the accessed memory.
24754 The intrinsic function operates on a block of registers that has
24755 mode REG_MODE. This block contains vectors of type TYPE_MODE. The
24756 function references the memory at EXP of type TYPE and in mode
24757 MEM_MODE; this mode may be BLKmode if no more suitable mode is
24758 available. */
24760 static tree
24761 neon_dereference_pointer (tree exp, tree type, enum machine_mode mem_mode,
24762 enum machine_mode reg_mode,
24763 neon_builtin_type_mode type_mode)
24765 HOST_WIDE_INT reg_size, vector_size, nvectors, nelems;
24766 tree elem_type, upper_bound, array_type;
24768 /* Work out the size of the register block in bytes. */
24769 reg_size = GET_MODE_SIZE (reg_mode);
24771 /* Work out the size of each vector in bytes. */
24772 gcc_assert (TYPE_MODE_BIT (type_mode) & (TB_DREG | TB_QREG));
24773 vector_size = (TYPE_MODE_BIT (type_mode) & TB_QREG ? 16 : 8);
24775 /* Work out how many vectors there are. */
24776 gcc_assert (reg_size % vector_size == 0);
24777 nvectors = reg_size / vector_size;
24779 /* Work out the type of each element. */
24780 gcc_assert (POINTER_TYPE_P (type));
24781 elem_type = TREE_TYPE (type);
24783 /* Work out how many elements are being loaded or stored.
24784 MEM_MODE == REG_MODE implies a one-to-one mapping between register
24785 and memory elements; anything else implies a lane load or store. */
24786 if (mem_mode == reg_mode)
24787 nelems = vector_size * nvectors / int_size_in_bytes (elem_type);
24788 else
24789 nelems = nvectors;
24791 /* Create a type that describes the full access. */
24792 upper_bound = build_int_cst (size_type_node, nelems - 1);
24793 array_type = build_array_type (elem_type, build_index_type (upper_bound));
24795 /* Dereference EXP using that type. */
24796 return fold_build2 (MEM_REF, array_type, exp,
24797 build_int_cst (build_pointer_type (array_type), 0));
24800 /* Expand a Neon builtin. */
24801 static rtx
24802 arm_expand_neon_args (rtx target, int icode, int have_retval,
24803 neon_builtin_type_mode type_mode,
24804 tree exp, int fcode, ...)
24806 va_list ap;
24807 rtx pat;
24808 tree arg[NEON_MAX_BUILTIN_ARGS];
24809 rtx op[NEON_MAX_BUILTIN_ARGS];
24810 tree arg_type;
24811 tree formals;
24812 enum machine_mode tmode = insn_data[icode].operand[0].mode;
24813 enum machine_mode mode[NEON_MAX_BUILTIN_ARGS];
24814 enum machine_mode other_mode;
24815 int argc = 0;
24816 int opno;
24818 if (have_retval
24819 && (!target
24820 || GET_MODE (target) != tmode
24821 || !(*insn_data[icode].operand[0].predicate) (target, tmode)))
24822 target = gen_reg_rtx (tmode);
24824 va_start (ap, fcode);
24826 formals = TYPE_ARG_TYPES (TREE_TYPE (arm_builtin_decls[fcode]));
24828 for (;;)
24830 builtin_arg thisarg = (builtin_arg) va_arg (ap, int);
24832 if (thisarg == NEON_ARG_STOP)
24833 break;
24834 else
24836 opno = argc + have_retval;
24837 mode[argc] = insn_data[icode].operand[opno].mode;
24838 arg[argc] = CALL_EXPR_ARG (exp, argc);
24839 arg_type = TREE_VALUE (formals);
24840 if (thisarg == NEON_ARG_MEMORY)
24842 other_mode = insn_data[icode].operand[1 - opno].mode;
24843 arg[argc] = neon_dereference_pointer (arg[argc], arg_type,
24844 mode[argc], other_mode,
24845 type_mode);
24848 /* Use EXPAND_MEMORY for NEON_ARG_MEMORY to ensure a MEM_P
24849 be returned. */
24850 op[argc] = expand_expr (arg[argc], NULL_RTX, VOIDmode,
24851 (thisarg == NEON_ARG_MEMORY
24852 ? EXPAND_MEMORY : EXPAND_NORMAL));
24854 switch (thisarg)
24856 case NEON_ARG_COPY_TO_REG:
24857 /*gcc_assert (GET_MODE (op[argc]) == mode[argc]);*/
24858 if (!(*insn_data[icode].operand[opno].predicate)
24859 (op[argc], mode[argc]))
24860 op[argc] = copy_to_mode_reg (mode[argc], op[argc]);
24861 break;
24863 case NEON_ARG_CONSTANT:
24864 /* FIXME: This error message is somewhat unhelpful. */
24865 if (!(*insn_data[icode].operand[opno].predicate)
24866 (op[argc], mode[argc]))
24867 error ("argument must be a constant");
24868 break;
24870 case NEON_ARG_MEMORY:
24871 /* Check if expand failed. */
24872 if (op[argc] == const0_rtx)
24873 return 0;
24874 gcc_assert (MEM_P (op[argc]));
24875 PUT_MODE (op[argc], mode[argc]);
24876 /* ??? arm_neon.h uses the same built-in functions for signed
24877 and unsigned accesses, casting where necessary. This isn't
24878 alias safe. */
24879 set_mem_alias_set (op[argc], 0);
24880 if (!(*insn_data[icode].operand[opno].predicate)
24881 (op[argc], mode[argc]))
24882 op[argc] = (replace_equiv_address
24883 (op[argc], force_reg (Pmode, XEXP (op[argc], 0))));
24884 break;
24886 case NEON_ARG_STOP:
24887 gcc_unreachable ();
24890 argc++;
24891 formals = TREE_CHAIN (formals);
24895 va_end (ap);
24897 if (have_retval)
24898 switch (argc)
24900 case 1:
24901 pat = GEN_FCN (icode) (target, op[0]);
24902 break;
24904 case 2:
24905 pat = GEN_FCN (icode) (target, op[0], op[1]);
24906 break;
24908 case 3:
24909 pat = GEN_FCN (icode) (target, op[0], op[1], op[2]);
24910 break;
24912 case 4:
24913 pat = GEN_FCN (icode) (target, op[0], op[1], op[2], op[3]);
24914 break;
24916 case 5:
24917 pat = GEN_FCN (icode) (target, op[0], op[1], op[2], op[3], op[4]);
24918 break;
24920 default:
24921 gcc_unreachable ();
24923 else
24924 switch (argc)
24926 case 1:
24927 pat = GEN_FCN (icode) (op[0]);
24928 break;
24930 case 2:
24931 pat = GEN_FCN (icode) (op[0], op[1]);
24932 break;
24934 case 3:
24935 pat = GEN_FCN (icode) (op[0], op[1], op[2]);
24936 break;
24938 case 4:
24939 pat = GEN_FCN (icode) (op[0], op[1], op[2], op[3]);
24940 break;
24942 case 5:
24943 pat = GEN_FCN (icode) (op[0], op[1], op[2], op[3], op[4]);
24944 break;
24946 default:
24947 gcc_unreachable ();
24950 if (!pat)
24951 return 0;
24953 emit_insn (pat);
24955 return target;
24958 /* Expand a Neon builtin. These are "special" because they don't have symbolic
24959 constants defined per-instruction or per instruction-variant. Instead, the
24960 required info is looked up in the table neon_builtin_data. */
24961 static rtx
24962 arm_expand_neon_builtin (int fcode, tree exp, rtx target)
24964 neon_builtin_datum *d = &neon_builtin_data[fcode - ARM_BUILTIN_NEON_BASE];
24965 neon_itype itype = d->itype;
24966 enum insn_code icode = d->code;
24967 neon_builtin_type_mode type_mode = d->mode;
24969 switch (itype)
24971 case NEON_UNOP:
24972 case NEON_CONVERT:
24973 case NEON_DUPLANE:
24974 return arm_expand_neon_args (target, icode, 1, type_mode, exp, fcode,
24975 NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT, NEON_ARG_STOP);
24977 case NEON_BINOP:
24978 case NEON_SETLANE:
24979 case NEON_SCALARMUL:
24980 case NEON_SCALARMULL:
24981 case NEON_SCALARMULH:
24982 case NEON_SHIFTINSERT:
24983 case NEON_LOGICBINOP:
24984 return arm_expand_neon_args (target, icode, 1, type_mode, exp, fcode,
24985 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT,
24986 NEON_ARG_STOP);
24988 case NEON_TERNOP:
24989 return arm_expand_neon_args (target, icode, 1, type_mode, exp, fcode,
24990 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG,
24991 NEON_ARG_CONSTANT, NEON_ARG_STOP);
24993 case NEON_GETLANE:
24994 case NEON_FIXCONV:
24995 case NEON_SHIFTIMM:
24996 return arm_expand_neon_args (target, icode, 1, type_mode, exp, fcode,
24997 NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT, NEON_ARG_CONSTANT,
24998 NEON_ARG_STOP);
25000 case NEON_CREATE:
25001 return arm_expand_neon_args (target, icode, 1, type_mode, exp, fcode,
25002 NEON_ARG_COPY_TO_REG, NEON_ARG_STOP);
25004 case NEON_DUP:
25005 case NEON_RINT:
25006 case NEON_SPLIT:
25007 case NEON_FLOAT_WIDEN:
25008 case NEON_FLOAT_NARROW:
25009 case NEON_REINTERP:
25010 return arm_expand_neon_args (target, icode, 1, type_mode, exp, fcode,
25011 NEON_ARG_COPY_TO_REG, NEON_ARG_STOP);
25013 case NEON_COMBINE:
25014 case NEON_VTBL:
25015 return arm_expand_neon_args (target, icode, 1, type_mode, exp, fcode,
25016 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_STOP);
25018 case NEON_RESULTPAIR:
25019 return arm_expand_neon_args (target, icode, 0, type_mode, exp, fcode,
25020 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG,
25021 NEON_ARG_STOP);
25023 case NEON_LANEMUL:
25024 case NEON_LANEMULL:
25025 case NEON_LANEMULH:
25026 return arm_expand_neon_args (target, icode, 1, type_mode, exp, fcode,
25027 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT,
25028 NEON_ARG_CONSTANT, NEON_ARG_STOP);
25030 case NEON_LANEMAC:
25031 return arm_expand_neon_args (target, icode, 1, type_mode, exp, fcode,
25032 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG,
25033 NEON_ARG_CONSTANT, NEON_ARG_CONSTANT, NEON_ARG_STOP);
25035 case NEON_SHIFTACC:
25036 return arm_expand_neon_args (target, icode, 1, type_mode, exp, fcode,
25037 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT,
25038 NEON_ARG_CONSTANT, NEON_ARG_STOP);
25040 case NEON_SCALARMAC:
25041 return arm_expand_neon_args (target, icode, 1, type_mode, exp, fcode,
25042 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG,
25043 NEON_ARG_CONSTANT, NEON_ARG_STOP);
25045 case NEON_SELECT:
25046 case NEON_VTBX:
25047 return arm_expand_neon_args (target, icode, 1, type_mode, exp, fcode,
25048 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG,
25049 NEON_ARG_STOP);
25051 case NEON_LOAD1:
25052 case NEON_LOADSTRUCT:
25053 return arm_expand_neon_args (target, icode, 1, type_mode, exp, fcode,
25054 NEON_ARG_MEMORY, NEON_ARG_STOP);
25056 case NEON_LOAD1LANE:
25057 case NEON_LOADSTRUCTLANE:
25058 return arm_expand_neon_args (target, icode, 1, type_mode, exp, fcode,
25059 NEON_ARG_MEMORY, NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT,
25060 NEON_ARG_STOP);
25062 case NEON_STORE1:
25063 case NEON_STORESTRUCT:
25064 return arm_expand_neon_args (target, icode, 0, type_mode, exp, fcode,
25065 NEON_ARG_MEMORY, NEON_ARG_COPY_TO_REG, NEON_ARG_STOP);
25067 case NEON_STORE1LANE:
25068 case NEON_STORESTRUCTLANE:
25069 return arm_expand_neon_args (target, icode, 0, type_mode, exp, fcode,
25070 NEON_ARG_MEMORY, NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT,
25071 NEON_ARG_STOP);
25074 gcc_unreachable ();
25077 /* Emit code to reinterpret one Neon type as another, without altering bits. */
25078 void
25079 neon_reinterpret (rtx dest, rtx src)
25081 emit_move_insn (dest, gen_lowpart (GET_MODE (dest), src));
25084 /* Emit code to place a Neon pair result in memory locations (with equal
25085 registers). */
25086 void
25087 neon_emit_pair_result_insn (enum machine_mode mode,
25088 rtx (*intfn) (rtx, rtx, rtx, rtx), rtx destaddr,
25089 rtx op1, rtx op2)
25091 rtx mem = gen_rtx_MEM (mode, destaddr);
25092 rtx tmp1 = gen_reg_rtx (mode);
25093 rtx tmp2 = gen_reg_rtx (mode);
25095 emit_insn (intfn (tmp1, op1, op2, tmp2));
25097 emit_move_insn (mem, tmp1);
25098 mem = adjust_address (mem, mode, GET_MODE_SIZE (mode));
25099 emit_move_insn (mem, tmp2);
25102 /* Set up OPERANDS for a register copy from SRC to DEST, taking care
25103 not to early-clobber SRC registers in the process.
25105 We assume that the operands described by SRC and DEST represent a
25106 decomposed copy of OPERANDS[1] into OPERANDS[0]. COUNT is the
25107 number of components into which the copy has been decomposed. */
25108 void
25109 neon_disambiguate_copy (rtx *operands, rtx *dest, rtx *src, unsigned int count)
25111 unsigned int i;
25113 if (!reg_overlap_mentioned_p (operands[0], operands[1])
25114 || REGNO (operands[0]) < REGNO (operands[1]))
25116 for (i = 0; i < count; i++)
25118 operands[2 * i] = dest[i];
25119 operands[2 * i + 1] = src[i];
25122 else
25124 for (i = 0; i < count; i++)
25126 operands[2 * i] = dest[count - i - 1];
25127 operands[2 * i + 1] = src[count - i - 1];
25132 /* Split operands into moves from op[1] + op[2] into op[0]. */
25134 void
25135 neon_split_vcombine (rtx operands[3])
25137 unsigned int dest = REGNO (operands[0]);
25138 unsigned int src1 = REGNO (operands[1]);
25139 unsigned int src2 = REGNO (operands[2]);
25140 enum machine_mode halfmode = GET_MODE (operands[1]);
25141 unsigned int halfregs = HARD_REGNO_NREGS (src1, halfmode);
25142 rtx destlo, desthi;
25144 if (src1 == dest && src2 == dest + halfregs)
25146 /* No-op move. Can't split to nothing; emit something. */
25147 emit_note (NOTE_INSN_DELETED);
25148 return;
25151 /* Preserve register attributes for variable tracking. */
25152 destlo = gen_rtx_REG_offset (operands[0], halfmode, dest, 0);
25153 desthi = gen_rtx_REG_offset (operands[0], halfmode, dest + halfregs,
25154 GET_MODE_SIZE (halfmode));
25156 /* Special case of reversed high/low parts. Use VSWP. */
25157 if (src2 == dest && src1 == dest + halfregs)
25159 rtx x = gen_rtx_SET (VOIDmode, destlo, operands[1]);
25160 rtx y = gen_rtx_SET (VOIDmode, desthi, operands[2]);
25161 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, x, y)));
25162 return;
25165 if (!reg_overlap_mentioned_p (operands[2], destlo))
25167 /* Try to avoid unnecessary moves if part of the result
25168 is in the right place already. */
25169 if (src1 != dest)
25170 emit_move_insn (destlo, operands[1]);
25171 if (src2 != dest + halfregs)
25172 emit_move_insn (desthi, operands[2]);
25174 else
25176 if (src2 != dest + halfregs)
25177 emit_move_insn (desthi, operands[2]);
25178 if (src1 != dest)
25179 emit_move_insn (destlo, operands[1]);
25183 /* Expand an expression EXP that calls a built-in function,
25184 with result going to TARGET if that's convenient
25185 (and in mode MODE if that's convenient).
25186 SUBTARGET may be used as the target for computing one of EXP's operands.
25187 IGNORE is nonzero if the value is to be ignored. */
25189 static rtx
25190 arm_expand_builtin (tree exp,
25191 rtx target,
25192 rtx subtarget ATTRIBUTE_UNUSED,
25193 enum machine_mode mode ATTRIBUTE_UNUSED,
25194 int ignore ATTRIBUTE_UNUSED)
25196 const struct builtin_description * d;
25197 enum insn_code icode;
25198 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
25199 tree arg0;
25200 tree arg1;
25201 tree arg2;
25202 rtx op0;
25203 rtx op1;
25204 rtx op2;
25205 rtx pat;
25206 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
25207 size_t i;
25208 enum machine_mode tmode;
25209 enum machine_mode mode0;
25210 enum machine_mode mode1;
25211 enum machine_mode mode2;
25212 int opint;
25213 int selector;
25214 int mask;
25215 int imm;
25217 if (fcode >= ARM_BUILTIN_NEON_BASE)
25218 return arm_expand_neon_builtin (fcode, exp, target);
25220 switch (fcode)
25222 case ARM_BUILTIN_TEXTRMSB:
25223 case ARM_BUILTIN_TEXTRMUB:
25224 case ARM_BUILTIN_TEXTRMSH:
25225 case ARM_BUILTIN_TEXTRMUH:
25226 case ARM_BUILTIN_TEXTRMSW:
25227 case ARM_BUILTIN_TEXTRMUW:
25228 icode = (fcode == ARM_BUILTIN_TEXTRMSB ? CODE_FOR_iwmmxt_textrmsb
25229 : fcode == ARM_BUILTIN_TEXTRMUB ? CODE_FOR_iwmmxt_textrmub
25230 : fcode == ARM_BUILTIN_TEXTRMSH ? CODE_FOR_iwmmxt_textrmsh
25231 : fcode == ARM_BUILTIN_TEXTRMUH ? CODE_FOR_iwmmxt_textrmuh
25232 : CODE_FOR_iwmmxt_textrmw);
25234 arg0 = CALL_EXPR_ARG (exp, 0);
25235 arg1 = CALL_EXPR_ARG (exp, 1);
25236 op0 = expand_normal (arg0);
25237 op1 = expand_normal (arg1);
25238 tmode = insn_data[icode].operand[0].mode;
25239 mode0 = insn_data[icode].operand[1].mode;
25240 mode1 = insn_data[icode].operand[2].mode;
25242 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
25243 op0 = copy_to_mode_reg (mode0, op0);
25244 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
25246 /* @@@ better error message */
25247 error ("selector must be an immediate");
25248 return gen_reg_rtx (tmode);
25251 opint = INTVAL (op1);
25252 if (fcode == ARM_BUILTIN_TEXTRMSB || fcode == ARM_BUILTIN_TEXTRMUB)
25254 if (opint > 7 || opint < 0)
25255 error ("the range of selector should be in 0 to 7");
25257 else if (fcode == ARM_BUILTIN_TEXTRMSH || fcode == ARM_BUILTIN_TEXTRMUH)
25259 if (opint > 3 || opint < 0)
25260 error ("the range of selector should be in 0 to 3");
25262 else /* ARM_BUILTIN_TEXTRMSW || ARM_BUILTIN_TEXTRMUW. */
25264 if (opint > 1 || opint < 0)
25265 error ("the range of selector should be in 0 to 1");
25268 if (target == 0
25269 || GET_MODE (target) != tmode
25270 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
25271 target = gen_reg_rtx (tmode);
25272 pat = GEN_FCN (icode) (target, op0, op1);
25273 if (! pat)
25274 return 0;
25275 emit_insn (pat);
25276 return target;
25278 case ARM_BUILTIN_WALIGNI:
25279 /* If op2 is immediate, call walighi, else call walighr. */
25280 arg0 = CALL_EXPR_ARG (exp, 0);
25281 arg1 = CALL_EXPR_ARG (exp, 1);
25282 arg2 = CALL_EXPR_ARG (exp, 2);
25283 op0 = expand_normal (arg0);
25284 op1 = expand_normal (arg1);
25285 op2 = expand_normal (arg2);
25286 if (CONST_INT_P (op2))
25288 icode = CODE_FOR_iwmmxt_waligni;
25289 tmode = insn_data[icode].operand[0].mode;
25290 mode0 = insn_data[icode].operand[1].mode;
25291 mode1 = insn_data[icode].operand[2].mode;
25292 mode2 = insn_data[icode].operand[3].mode;
25293 if (!(*insn_data[icode].operand[1].predicate) (op0, mode0))
25294 op0 = copy_to_mode_reg (mode0, op0);
25295 if (!(*insn_data[icode].operand[2].predicate) (op1, mode1))
25296 op1 = copy_to_mode_reg (mode1, op1);
25297 gcc_assert ((*insn_data[icode].operand[3].predicate) (op2, mode2));
25298 selector = INTVAL (op2);
25299 if (selector > 7 || selector < 0)
25300 error ("the range of selector should be in 0 to 7");
25302 else
25304 icode = CODE_FOR_iwmmxt_walignr;
25305 tmode = insn_data[icode].operand[0].mode;
25306 mode0 = insn_data[icode].operand[1].mode;
25307 mode1 = insn_data[icode].operand[2].mode;
25308 mode2 = insn_data[icode].operand[3].mode;
25309 if (!(*insn_data[icode].operand[1].predicate) (op0, mode0))
25310 op0 = copy_to_mode_reg (mode0, op0);
25311 if (!(*insn_data[icode].operand[2].predicate) (op1, mode1))
25312 op1 = copy_to_mode_reg (mode1, op1);
25313 if (!(*insn_data[icode].operand[3].predicate) (op2, mode2))
25314 op2 = copy_to_mode_reg (mode2, op2);
25316 if (target == 0
25317 || GET_MODE (target) != tmode
25318 || !(*insn_data[icode].operand[0].predicate) (target, tmode))
25319 target = gen_reg_rtx (tmode);
25320 pat = GEN_FCN (icode) (target, op0, op1, op2);
25321 if (!pat)
25322 return 0;
25323 emit_insn (pat);
25324 return target;
25326 case ARM_BUILTIN_TINSRB:
25327 case ARM_BUILTIN_TINSRH:
25328 case ARM_BUILTIN_TINSRW:
25329 case ARM_BUILTIN_WMERGE:
25330 icode = (fcode == ARM_BUILTIN_TINSRB ? CODE_FOR_iwmmxt_tinsrb
25331 : fcode == ARM_BUILTIN_TINSRH ? CODE_FOR_iwmmxt_tinsrh
25332 : fcode == ARM_BUILTIN_WMERGE ? CODE_FOR_iwmmxt_wmerge
25333 : CODE_FOR_iwmmxt_tinsrw);
25334 arg0 = CALL_EXPR_ARG (exp, 0);
25335 arg1 = CALL_EXPR_ARG (exp, 1);
25336 arg2 = CALL_EXPR_ARG (exp, 2);
25337 op0 = expand_normal (arg0);
25338 op1 = expand_normal (arg1);
25339 op2 = expand_normal (arg2);
25340 tmode = insn_data[icode].operand[0].mode;
25341 mode0 = insn_data[icode].operand[1].mode;
25342 mode1 = insn_data[icode].operand[2].mode;
25343 mode2 = insn_data[icode].operand[3].mode;
25345 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
25346 op0 = copy_to_mode_reg (mode0, op0);
25347 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
25348 op1 = copy_to_mode_reg (mode1, op1);
25349 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
25351 error ("selector must be an immediate");
25352 return const0_rtx;
25354 if (icode == CODE_FOR_iwmmxt_wmerge)
25356 selector = INTVAL (op2);
25357 if (selector > 7 || selector < 0)
25358 error ("the range of selector should be in 0 to 7");
25360 if ((icode == CODE_FOR_iwmmxt_tinsrb)
25361 || (icode == CODE_FOR_iwmmxt_tinsrh)
25362 || (icode == CODE_FOR_iwmmxt_tinsrw))
25364 mask = 0x01;
25365 selector= INTVAL (op2);
25366 if (icode == CODE_FOR_iwmmxt_tinsrb && (selector < 0 || selector > 7))
25367 error ("the range of selector should be in 0 to 7");
25368 else if (icode == CODE_FOR_iwmmxt_tinsrh && (selector < 0 ||selector > 3))
25369 error ("the range of selector should be in 0 to 3");
25370 else if (icode == CODE_FOR_iwmmxt_tinsrw && (selector < 0 ||selector > 1))
25371 error ("the range of selector should be in 0 to 1");
25372 mask <<= selector;
25373 op2 = GEN_INT (mask);
25375 if (target == 0
25376 || GET_MODE (target) != tmode
25377 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
25378 target = gen_reg_rtx (tmode);
25379 pat = GEN_FCN (icode) (target, op0, op1, op2);
25380 if (! pat)
25381 return 0;
25382 emit_insn (pat);
25383 return target;
25385 case ARM_BUILTIN_SETWCGR0:
25386 case ARM_BUILTIN_SETWCGR1:
25387 case ARM_BUILTIN_SETWCGR2:
25388 case ARM_BUILTIN_SETWCGR3:
25389 icode = (fcode == ARM_BUILTIN_SETWCGR0 ? CODE_FOR_iwmmxt_setwcgr0
25390 : fcode == ARM_BUILTIN_SETWCGR1 ? CODE_FOR_iwmmxt_setwcgr1
25391 : fcode == ARM_BUILTIN_SETWCGR2 ? CODE_FOR_iwmmxt_setwcgr2
25392 : CODE_FOR_iwmmxt_setwcgr3);
25393 arg0 = CALL_EXPR_ARG (exp, 0);
25394 op0 = expand_normal (arg0);
25395 mode0 = insn_data[icode].operand[0].mode;
25396 if (!(*insn_data[icode].operand[0].predicate) (op0, mode0))
25397 op0 = copy_to_mode_reg (mode0, op0);
25398 pat = GEN_FCN (icode) (op0);
25399 if (!pat)
25400 return 0;
25401 emit_insn (pat);
25402 return 0;
25404 case ARM_BUILTIN_GETWCGR0:
25405 case ARM_BUILTIN_GETWCGR1:
25406 case ARM_BUILTIN_GETWCGR2:
25407 case ARM_BUILTIN_GETWCGR3:
25408 icode = (fcode == ARM_BUILTIN_GETWCGR0 ? CODE_FOR_iwmmxt_getwcgr0
25409 : fcode == ARM_BUILTIN_GETWCGR1 ? CODE_FOR_iwmmxt_getwcgr1
25410 : fcode == ARM_BUILTIN_GETWCGR2 ? CODE_FOR_iwmmxt_getwcgr2
25411 : CODE_FOR_iwmmxt_getwcgr3);
25412 tmode = insn_data[icode].operand[0].mode;
25413 if (target == 0
25414 || GET_MODE (target) != tmode
25415 || !(*insn_data[icode].operand[0].predicate) (target, tmode))
25416 target = gen_reg_rtx (tmode);
25417 pat = GEN_FCN (icode) (target);
25418 if (!pat)
25419 return 0;
25420 emit_insn (pat);
25421 return target;
25423 case ARM_BUILTIN_WSHUFH:
25424 icode = CODE_FOR_iwmmxt_wshufh;
25425 arg0 = CALL_EXPR_ARG (exp, 0);
25426 arg1 = CALL_EXPR_ARG (exp, 1);
25427 op0 = expand_normal (arg0);
25428 op1 = expand_normal (arg1);
25429 tmode = insn_data[icode].operand[0].mode;
25430 mode1 = insn_data[icode].operand[1].mode;
25431 mode2 = insn_data[icode].operand[2].mode;
25433 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
25434 op0 = copy_to_mode_reg (mode1, op0);
25435 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
25437 error ("mask must be an immediate");
25438 return const0_rtx;
25440 selector = INTVAL (op1);
25441 if (selector < 0 || selector > 255)
25442 error ("the range of mask should be in 0 to 255");
25443 if (target == 0
25444 || GET_MODE (target) != tmode
25445 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
25446 target = gen_reg_rtx (tmode);
25447 pat = GEN_FCN (icode) (target, op0, op1);
25448 if (! pat)
25449 return 0;
25450 emit_insn (pat);
25451 return target;
25453 case ARM_BUILTIN_WMADDS:
25454 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wmadds, exp, target);
25455 case ARM_BUILTIN_WMADDSX:
25456 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wmaddsx, exp, target);
25457 case ARM_BUILTIN_WMADDSN:
25458 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wmaddsn, exp, target);
25459 case ARM_BUILTIN_WMADDU:
25460 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wmaddu, exp, target);
25461 case ARM_BUILTIN_WMADDUX:
25462 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wmaddux, exp, target);
25463 case ARM_BUILTIN_WMADDUN:
25464 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wmaddun, exp, target);
25465 case ARM_BUILTIN_WSADBZ:
25466 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wsadbz, exp, target);
25467 case ARM_BUILTIN_WSADHZ:
25468 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wsadhz, exp, target);
25470 /* Several three-argument builtins. */
25471 case ARM_BUILTIN_WMACS:
25472 case ARM_BUILTIN_WMACU:
25473 case ARM_BUILTIN_TMIA:
25474 case ARM_BUILTIN_TMIAPH:
25475 case ARM_BUILTIN_TMIATT:
25476 case ARM_BUILTIN_TMIATB:
25477 case ARM_BUILTIN_TMIABT:
25478 case ARM_BUILTIN_TMIABB:
25479 case ARM_BUILTIN_WQMIABB:
25480 case ARM_BUILTIN_WQMIABT:
25481 case ARM_BUILTIN_WQMIATB:
25482 case ARM_BUILTIN_WQMIATT:
25483 case ARM_BUILTIN_WQMIABBN:
25484 case ARM_BUILTIN_WQMIABTN:
25485 case ARM_BUILTIN_WQMIATBN:
25486 case ARM_BUILTIN_WQMIATTN:
25487 case ARM_BUILTIN_WMIABB:
25488 case ARM_BUILTIN_WMIABT:
25489 case ARM_BUILTIN_WMIATB:
25490 case ARM_BUILTIN_WMIATT:
25491 case ARM_BUILTIN_WMIABBN:
25492 case ARM_BUILTIN_WMIABTN:
25493 case ARM_BUILTIN_WMIATBN:
25494 case ARM_BUILTIN_WMIATTN:
25495 case ARM_BUILTIN_WMIAWBB:
25496 case ARM_BUILTIN_WMIAWBT:
25497 case ARM_BUILTIN_WMIAWTB:
25498 case ARM_BUILTIN_WMIAWTT:
25499 case ARM_BUILTIN_WMIAWBBN:
25500 case ARM_BUILTIN_WMIAWBTN:
25501 case ARM_BUILTIN_WMIAWTBN:
25502 case ARM_BUILTIN_WMIAWTTN:
25503 case ARM_BUILTIN_WSADB:
25504 case ARM_BUILTIN_WSADH:
25505 icode = (fcode == ARM_BUILTIN_WMACS ? CODE_FOR_iwmmxt_wmacs
25506 : fcode == ARM_BUILTIN_WMACU ? CODE_FOR_iwmmxt_wmacu
25507 : fcode == ARM_BUILTIN_TMIA ? CODE_FOR_iwmmxt_tmia
25508 : fcode == ARM_BUILTIN_TMIAPH ? CODE_FOR_iwmmxt_tmiaph
25509 : fcode == ARM_BUILTIN_TMIABB ? CODE_FOR_iwmmxt_tmiabb
25510 : fcode == ARM_BUILTIN_TMIABT ? CODE_FOR_iwmmxt_tmiabt
25511 : fcode == ARM_BUILTIN_TMIATB ? CODE_FOR_iwmmxt_tmiatb
25512 : fcode == ARM_BUILTIN_TMIATT ? CODE_FOR_iwmmxt_tmiatt
25513 : fcode == ARM_BUILTIN_WQMIABB ? CODE_FOR_iwmmxt_wqmiabb
25514 : fcode == ARM_BUILTIN_WQMIABT ? CODE_FOR_iwmmxt_wqmiabt
25515 : fcode == ARM_BUILTIN_WQMIATB ? CODE_FOR_iwmmxt_wqmiatb
25516 : fcode == ARM_BUILTIN_WQMIATT ? CODE_FOR_iwmmxt_wqmiatt
25517 : fcode == ARM_BUILTIN_WQMIABBN ? CODE_FOR_iwmmxt_wqmiabbn
25518 : fcode == ARM_BUILTIN_WQMIABTN ? CODE_FOR_iwmmxt_wqmiabtn
25519 : fcode == ARM_BUILTIN_WQMIATBN ? CODE_FOR_iwmmxt_wqmiatbn
25520 : fcode == ARM_BUILTIN_WQMIATTN ? CODE_FOR_iwmmxt_wqmiattn
25521 : fcode == ARM_BUILTIN_WMIABB ? CODE_FOR_iwmmxt_wmiabb
25522 : fcode == ARM_BUILTIN_WMIABT ? CODE_FOR_iwmmxt_wmiabt
25523 : fcode == ARM_BUILTIN_WMIATB ? CODE_FOR_iwmmxt_wmiatb
25524 : fcode == ARM_BUILTIN_WMIATT ? CODE_FOR_iwmmxt_wmiatt
25525 : fcode == ARM_BUILTIN_WMIABBN ? CODE_FOR_iwmmxt_wmiabbn
25526 : fcode == ARM_BUILTIN_WMIABTN ? CODE_FOR_iwmmxt_wmiabtn
25527 : fcode == ARM_BUILTIN_WMIATBN ? CODE_FOR_iwmmxt_wmiatbn
25528 : fcode == ARM_BUILTIN_WMIATTN ? CODE_FOR_iwmmxt_wmiattn
25529 : fcode == ARM_BUILTIN_WMIAWBB ? CODE_FOR_iwmmxt_wmiawbb
25530 : fcode == ARM_BUILTIN_WMIAWBT ? CODE_FOR_iwmmxt_wmiawbt
25531 : fcode == ARM_BUILTIN_WMIAWTB ? CODE_FOR_iwmmxt_wmiawtb
25532 : fcode == ARM_BUILTIN_WMIAWTT ? CODE_FOR_iwmmxt_wmiawtt
25533 : fcode == ARM_BUILTIN_WMIAWBBN ? CODE_FOR_iwmmxt_wmiawbbn
25534 : fcode == ARM_BUILTIN_WMIAWBTN ? CODE_FOR_iwmmxt_wmiawbtn
25535 : fcode == ARM_BUILTIN_WMIAWTBN ? CODE_FOR_iwmmxt_wmiawtbn
25536 : fcode == ARM_BUILTIN_WMIAWTTN ? CODE_FOR_iwmmxt_wmiawttn
25537 : fcode == ARM_BUILTIN_WSADB ? CODE_FOR_iwmmxt_wsadb
25538 : CODE_FOR_iwmmxt_wsadh);
25539 arg0 = CALL_EXPR_ARG (exp, 0);
25540 arg1 = CALL_EXPR_ARG (exp, 1);
25541 arg2 = CALL_EXPR_ARG (exp, 2);
25542 op0 = expand_normal (arg0);
25543 op1 = expand_normal (arg1);
25544 op2 = expand_normal (arg2);
25545 tmode = insn_data[icode].operand[0].mode;
25546 mode0 = insn_data[icode].operand[1].mode;
25547 mode1 = insn_data[icode].operand[2].mode;
25548 mode2 = insn_data[icode].operand[3].mode;
25550 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
25551 op0 = copy_to_mode_reg (mode0, op0);
25552 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
25553 op1 = copy_to_mode_reg (mode1, op1);
25554 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
25555 op2 = copy_to_mode_reg (mode2, op2);
25556 if (target == 0
25557 || GET_MODE (target) != tmode
25558 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
25559 target = gen_reg_rtx (tmode);
25560 pat = GEN_FCN (icode) (target, op0, op1, op2);
25561 if (! pat)
25562 return 0;
25563 emit_insn (pat);
25564 return target;
25566 case ARM_BUILTIN_WZERO:
25567 target = gen_reg_rtx (DImode);
25568 emit_insn (gen_iwmmxt_clrdi (target));
25569 return target;
25571 case ARM_BUILTIN_WSRLHI:
25572 case ARM_BUILTIN_WSRLWI:
25573 case ARM_BUILTIN_WSRLDI:
25574 case ARM_BUILTIN_WSLLHI:
25575 case ARM_BUILTIN_WSLLWI:
25576 case ARM_BUILTIN_WSLLDI:
25577 case ARM_BUILTIN_WSRAHI:
25578 case ARM_BUILTIN_WSRAWI:
25579 case ARM_BUILTIN_WSRADI:
25580 case ARM_BUILTIN_WRORHI:
25581 case ARM_BUILTIN_WRORWI:
25582 case ARM_BUILTIN_WRORDI:
25583 case ARM_BUILTIN_WSRLH:
25584 case ARM_BUILTIN_WSRLW:
25585 case ARM_BUILTIN_WSRLD:
25586 case ARM_BUILTIN_WSLLH:
25587 case ARM_BUILTIN_WSLLW:
25588 case ARM_BUILTIN_WSLLD:
25589 case ARM_BUILTIN_WSRAH:
25590 case ARM_BUILTIN_WSRAW:
25591 case ARM_BUILTIN_WSRAD:
25592 case ARM_BUILTIN_WRORH:
25593 case ARM_BUILTIN_WRORW:
25594 case ARM_BUILTIN_WRORD:
25595 icode = (fcode == ARM_BUILTIN_WSRLHI ? CODE_FOR_lshrv4hi3_iwmmxt
25596 : fcode == ARM_BUILTIN_WSRLWI ? CODE_FOR_lshrv2si3_iwmmxt
25597 : fcode == ARM_BUILTIN_WSRLDI ? CODE_FOR_lshrdi3_iwmmxt
25598 : fcode == ARM_BUILTIN_WSLLHI ? CODE_FOR_ashlv4hi3_iwmmxt
25599 : fcode == ARM_BUILTIN_WSLLWI ? CODE_FOR_ashlv2si3_iwmmxt
25600 : fcode == ARM_BUILTIN_WSLLDI ? CODE_FOR_ashldi3_iwmmxt
25601 : fcode == ARM_BUILTIN_WSRAHI ? CODE_FOR_ashrv4hi3_iwmmxt
25602 : fcode == ARM_BUILTIN_WSRAWI ? CODE_FOR_ashrv2si3_iwmmxt
25603 : fcode == ARM_BUILTIN_WSRADI ? CODE_FOR_ashrdi3_iwmmxt
25604 : fcode == ARM_BUILTIN_WRORHI ? CODE_FOR_rorv4hi3
25605 : fcode == ARM_BUILTIN_WRORWI ? CODE_FOR_rorv2si3
25606 : fcode == ARM_BUILTIN_WRORDI ? CODE_FOR_rordi3
25607 : fcode == ARM_BUILTIN_WSRLH ? CODE_FOR_lshrv4hi3_di
25608 : fcode == ARM_BUILTIN_WSRLW ? CODE_FOR_lshrv2si3_di
25609 : fcode == ARM_BUILTIN_WSRLD ? CODE_FOR_lshrdi3_di
25610 : fcode == ARM_BUILTIN_WSLLH ? CODE_FOR_ashlv4hi3_di
25611 : fcode == ARM_BUILTIN_WSLLW ? CODE_FOR_ashlv2si3_di
25612 : fcode == ARM_BUILTIN_WSLLD ? CODE_FOR_ashldi3_di
25613 : fcode == ARM_BUILTIN_WSRAH ? CODE_FOR_ashrv4hi3_di
25614 : fcode == ARM_BUILTIN_WSRAW ? CODE_FOR_ashrv2si3_di
25615 : fcode == ARM_BUILTIN_WSRAD ? CODE_FOR_ashrdi3_di
25616 : fcode == ARM_BUILTIN_WRORH ? CODE_FOR_rorv4hi3_di
25617 : fcode == ARM_BUILTIN_WRORW ? CODE_FOR_rorv2si3_di
25618 : fcode == ARM_BUILTIN_WRORD ? CODE_FOR_rordi3_di
25619 : CODE_FOR_nothing);
25620 arg1 = CALL_EXPR_ARG (exp, 1);
25621 op1 = expand_normal (arg1);
25622 if (GET_MODE (op1) == VOIDmode)
25624 imm = INTVAL (op1);
25625 if ((fcode == ARM_BUILTIN_WRORHI || fcode == ARM_BUILTIN_WRORWI
25626 || fcode == ARM_BUILTIN_WRORH || fcode == ARM_BUILTIN_WRORW)
25627 && (imm < 0 || imm > 32))
25629 if (fcode == ARM_BUILTIN_WRORHI)
25630 error ("the range of count should be in 0 to 32. please check the intrinsic _mm_rori_pi16 in code.");
25631 else if (fcode == ARM_BUILTIN_WRORWI)
25632 error ("the range of count should be in 0 to 32. please check the intrinsic _mm_rori_pi32 in code.");
25633 else if (fcode == ARM_BUILTIN_WRORH)
25634 error ("the range of count should be in 0 to 32. please check the intrinsic _mm_ror_pi16 in code.");
25635 else
25636 error ("the range of count should be in 0 to 32. please check the intrinsic _mm_ror_pi32 in code.");
25638 else if ((fcode == ARM_BUILTIN_WRORDI || fcode == ARM_BUILTIN_WRORD)
25639 && (imm < 0 || imm > 64))
25641 if (fcode == ARM_BUILTIN_WRORDI)
25642 error ("the range of count should be in 0 to 64. please check the intrinsic _mm_rori_si64 in code.");
25643 else
25644 error ("the range of count should be in 0 to 64. please check the intrinsic _mm_ror_si64 in code.");
25646 else if (imm < 0)
25648 if (fcode == ARM_BUILTIN_WSRLHI)
25649 error ("the count should be no less than 0. please check the intrinsic _mm_srli_pi16 in code.");
25650 else if (fcode == ARM_BUILTIN_WSRLWI)
25651 error ("the count should be no less than 0. please check the intrinsic _mm_srli_pi32 in code.");
25652 else if (fcode == ARM_BUILTIN_WSRLDI)
25653 error ("the count should be no less than 0. please check the intrinsic _mm_srli_si64 in code.");
25654 else if (fcode == ARM_BUILTIN_WSLLHI)
25655 error ("the count should be no less than 0. please check the intrinsic _mm_slli_pi16 in code.");
25656 else if (fcode == ARM_BUILTIN_WSLLWI)
25657 error ("the count should be no less than 0. please check the intrinsic _mm_slli_pi32 in code.");
25658 else if (fcode == ARM_BUILTIN_WSLLDI)
25659 error ("the count should be no less than 0. please check the intrinsic _mm_slli_si64 in code.");
25660 else if (fcode == ARM_BUILTIN_WSRAHI)
25661 error ("the count should be no less than 0. please check the intrinsic _mm_srai_pi16 in code.");
25662 else if (fcode == ARM_BUILTIN_WSRAWI)
25663 error ("the count should be no less than 0. please check the intrinsic _mm_srai_pi32 in code.");
25664 else if (fcode == ARM_BUILTIN_WSRADI)
25665 error ("the count should be no less than 0. please check the intrinsic _mm_srai_si64 in code.");
25666 else if (fcode == ARM_BUILTIN_WSRLH)
25667 error ("the count should be no less than 0. please check the intrinsic _mm_srl_pi16 in code.");
25668 else if (fcode == ARM_BUILTIN_WSRLW)
25669 error ("the count should be no less than 0. please check the intrinsic _mm_srl_pi32 in code.");
25670 else if (fcode == ARM_BUILTIN_WSRLD)
25671 error ("the count should be no less than 0. please check the intrinsic _mm_srl_si64 in code.");
25672 else if (fcode == ARM_BUILTIN_WSLLH)
25673 error ("the count should be no less than 0. please check the intrinsic _mm_sll_pi16 in code.");
25674 else if (fcode == ARM_BUILTIN_WSLLW)
25675 error ("the count should be no less than 0. please check the intrinsic _mm_sll_pi32 in code.");
25676 else if (fcode == ARM_BUILTIN_WSLLD)
25677 error ("the count should be no less than 0. please check the intrinsic _mm_sll_si64 in code.");
25678 else if (fcode == ARM_BUILTIN_WSRAH)
25679 error ("the count should be no less than 0. please check the intrinsic _mm_sra_pi16 in code.");
25680 else if (fcode == ARM_BUILTIN_WSRAW)
25681 error ("the count should be no less than 0. please check the intrinsic _mm_sra_pi32 in code.");
25682 else
25683 error ("the count should be no less than 0. please check the intrinsic _mm_sra_si64 in code.");
25686 return arm_expand_binop_builtin (icode, exp, target);
25688 default:
25689 break;
25692 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
25693 if (d->code == (const enum arm_builtins) fcode)
25694 return arm_expand_binop_builtin (d->icode, exp, target);
25696 for (i = 0, d = bdesc_1arg; i < ARRAY_SIZE (bdesc_1arg); i++, d++)
25697 if (d->code == (const enum arm_builtins) fcode)
25698 return arm_expand_unop_builtin (d->icode, exp, target, 0);
25700 for (i = 0, d = bdesc_3arg; i < ARRAY_SIZE (bdesc_3arg); i++, d++)
25701 if (d->code == (const enum arm_builtins) fcode)
25702 return arm_expand_ternop_builtin (d->icode, exp, target);
25704 /* @@@ Should really do something sensible here. */
25705 return NULL_RTX;
25708 /* Return the number (counting from 0) of
25709 the least significant set bit in MASK. */
25711 inline static int
25712 number_of_first_bit_set (unsigned mask)
25714 return ctz_hwi (mask);
25717 /* Like emit_multi_reg_push, but allowing for a different set of
25718 registers to be described as saved. MASK is the set of registers
25719 to be saved; REAL_REGS is the set of registers to be described as
25720 saved. If REAL_REGS is 0, only describe the stack adjustment. */
25722 static rtx
25723 thumb1_emit_multi_reg_push (unsigned long mask, unsigned long real_regs)
25725 unsigned long regno;
25726 rtx par[10], tmp, reg, insn;
25727 int i, j;
25729 /* Build the parallel of the registers actually being stored. */
25730 for (i = 0; mask; ++i, mask &= mask - 1)
25732 regno = ctz_hwi (mask);
25733 reg = gen_rtx_REG (SImode, regno);
25735 if (i == 0)
25736 tmp = gen_rtx_UNSPEC (BLKmode, gen_rtvec (1, reg), UNSPEC_PUSH_MULT);
25737 else
25738 tmp = gen_rtx_USE (VOIDmode, reg);
25740 par[i] = tmp;
25743 tmp = plus_constant (Pmode, stack_pointer_rtx, -4 * i);
25744 tmp = gen_rtx_PRE_MODIFY (Pmode, stack_pointer_rtx, tmp);
25745 tmp = gen_frame_mem (BLKmode, tmp);
25746 tmp = gen_rtx_SET (VOIDmode, tmp, par[0]);
25747 par[0] = tmp;
25749 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (i, par));
25750 insn = emit_insn (tmp);
25752 /* Always build the stack adjustment note for unwind info. */
25753 tmp = plus_constant (Pmode, stack_pointer_rtx, -4 * i);
25754 tmp = gen_rtx_SET (VOIDmode, stack_pointer_rtx, tmp);
25755 par[0] = tmp;
25757 /* Build the parallel of the registers recorded as saved for unwind. */
25758 for (j = 0; real_regs; ++j, real_regs &= real_regs - 1)
25760 regno = ctz_hwi (real_regs);
25761 reg = gen_rtx_REG (SImode, regno);
25763 tmp = plus_constant (Pmode, stack_pointer_rtx, j * 4);
25764 tmp = gen_frame_mem (SImode, tmp);
25765 tmp = gen_rtx_SET (VOIDmode, tmp, reg);
25766 RTX_FRAME_RELATED_P (tmp) = 1;
25767 par[j + 1] = tmp;
25770 if (j == 0)
25771 tmp = par[0];
25772 else
25774 RTX_FRAME_RELATED_P (par[0]) = 1;
25775 tmp = gen_rtx_SEQUENCE (VOIDmode, gen_rtvec_v (j + 1, par));
25778 add_reg_note (insn, REG_FRAME_RELATED_EXPR, tmp);
25780 return insn;
25783 /* Emit code to push or pop registers to or from the stack. F is the
25784 assembly file. MASK is the registers to pop. */
25785 static void
25786 thumb_pop (FILE *f, unsigned long mask)
25788 int regno;
25789 int lo_mask = mask & 0xFF;
25790 int pushed_words = 0;
25792 gcc_assert (mask);
25794 if (lo_mask == 0 && (mask & (1 << PC_REGNUM)))
25796 /* Special case. Do not generate a POP PC statement here, do it in
25797 thumb_exit() */
25798 thumb_exit (f, -1);
25799 return;
25802 fprintf (f, "\tpop\t{");
25804 /* Look at the low registers first. */
25805 for (regno = 0; regno <= LAST_LO_REGNUM; regno++, lo_mask >>= 1)
25807 if (lo_mask & 1)
25809 asm_fprintf (f, "%r", regno);
25811 if ((lo_mask & ~1) != 0)
25812 fprintf (f, ", ");
25814 pushed_words++;
25818 if (mask & (1 << PC_REGNUM))
25820 /* Catch popping the PC. */
25821 if (TARGET_INTERWORK || TARGET_BACKTRACE
25822 || crtl->calls_eh_return)
25824 /* The PC is never poped directly, instead
25825 it is popped into r3 and then BX is used. */
25826 fprintf (f, "}\n");
25828 thumb_exit (f, -1);
25830 return;
25832 else
25834 if (mask & 0xFF)
25835 fprintf (f, ", ");
25837 asm_fprintf (f, "%r", PC_REGNUM);
25841 fprintf (f, "}\n");
25844 /* Generate code to return from a thumb function.
25845 If 'reg_containing_return_addr' is -1, then the return address is
25846 actually on the stack, at the stack pointer. */
25847 static void
25848 thumb_exit (FILE *f, int reg_containing_return_addr)
25850 unsigned regs_available_for_popping;
25851 unsigned regs_to_pop;
25852 int pops_needed;
25853 unsigned available;
25854 unsigned required;
25855 int mode;
25856 int size;
25857 int restore_a4 = FALSE;
25859 /* Compute the registers we need to pop. */
25860 regs_to_pop = 0;
25861 pops_needed = 0;
25863 if (reg_containing_return_addr == -1)
25865 regs_to_pop |= 1 << LR_REGNUM;
25866 ++pops_needed;
25869 if (TARGET_BACKTRACE)
25871 /* Restore the (ARM) frame pointer and stack pointer. */
25872 regs_to_pop |= (1 << ARM_HARD_FRAME_POINTER_REGNUM) | (1 << SP_REGNUM);
25873 pops_needed += 2;
25876 /* If there is nothing to pop then just emit the BX instruction and
25877 return. */
25878 if (pops_needed == 0)
25880 if (crtl->calls_eh_return)
25881 asm_fprintf (f, "\tadd\t%r, %r\n", SP_REGNUM, ARM_EH_STACKADJ_REGNUM);
25883 asm_fprintf (f, "\tbx\t%r\n", reg_containing_return_addr);
25884 return;
25886 /* Otherwise if we are not supporting interworking and we have not created
25887 a backtrace structure and the function was not entered in ARM mode then
25888 just pop the return address straight into the PC. */
25889 else if (!TARGET_INTERWORK
25890 && !TARGET_BACKTRACE
25891 && !is_called_in_ARM_mode (current_function_decl)
25892 && !crtl->calls_eh_return)
25894 asm_fprintf (f, "\tpop\t{%r}\n", PC_REGNUM);
25895 return;
25898 /* Find out how many of the (return) argument registers we can corrupt. */
25899 regs_available_for_popping = 0;
25901 /* If returning via __builtin_eh_return, the bottom three registers
25902 all contain information needed for the return. */
25903 if (crtl->calls_eh_return)
25904 size = 12;
25905 else
25907 /* If we can deduce the registers used from the function's
25908 return value. This is more reliable that examining
25909 df_regs_ever_live_p () because that will be set if the register is
25910 ever used in the function, not just if the register is used
25911 to hold a return value. */
25913 if (crtl->return_rtx != 0)
25914 mode = GET_MODE (crtl->return_rtx);
25915 else
25916 mode = DECL_MODE (DECL_RESULT (current_function_decl));
25918 size = GET_MODE_SIZE (mode);
25920 if (size == 0)
25922 /* In a void function we can use any argument register.
25923 In a function that returns a structure on the stack
25924 we can use the second and third argument registers. */
25925 if (mode == VOIDmode)
25926 regs_available_for_popping =
25927 (1 << ARG_REGISTER (1))
25928 | (1 << ARG_REGISTER (2))
25929 | (1 << ARG_REGISTER (3));
25930 else
25931 regs_available_for_popping =
25932 (1 << ARG_REGISTER (2))
25933 | (1 << ARG_REGISTER (3));
25935 else if (size <= 4)
25936 regs_available_for_popping =
25937 (1 << ARG_REGISTER (2))
25938 | (1 << ARG_REGISTER (3));
25939 else if (size <= 8)
25940 regs_available_for_popping =
25941 (1 << ARG_REGISTER (3));
25944 /* Match registers to be popped with registers into which we pop them. */
25945 for (available = regs_available_for_popping,
25946 required = regs_to_pop;
25947 required != 0 && available != 0;
25948 available &= ~(available & - available),
25949 required &= ~(required & - required))
25950 -- pops_needed;
25952 /* If we have any popping registers left over, remove them. */
25953 if (available > 0)
25954 regs_available_for_popping &= ~available;
25956 /* Otherwise if we need another popping register we can use
25957 the fourth argument register. */
25958 else if (pops_needed)
25960 /* If we have not found any free argument registers and
25961 reg a4 contains the return address, we must move it. */
25962 if (regs_available_for_popping == 0
25963 && reg_containing_return_addr == LAST_ARG_REGNUM)
25965 asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM, LAST_ARG_REGNUM);
25966 reg_containing_return_addr = LR_REGNUM;
25968 else if (size > 12)
25970 /* Register a4 is being used to hold part of the return value,
25971 but we have dire need of a free, low register. */
25972 restore_a4 = TRUE;
25974 asm_fprintf (f, "\tmov\t%r, %r\n",IP_REGNUM, LAST_ARG_REGNUM);
25977 if (reg_containing_return_addr != LAST_ARG_REGNUM)
25979 /* The fourth argument register is available. */
25980 regs_available_for_popping |= 1 << LAST_ARG_REGNUM;
25982 --pops_needed;
25986 /* Pop as many registers as we can. */
25987 thumb_pop (f, regs_available_for_popping);
25989 /* Process the registers we popped. */
25990 if (reg_containing_return_addr == -1)
25992 /* The return address was popped into the lowest numbered register. */
25993 regs_to_pop &= ~(1 << LR_REGNUM);
25995 reg_containing_return_addr =
25996 number_of_first_bit_set (regs_available_for_popping);
25998 /* Remove this register for the mask of available registers, so that
25999 the return address will not be corrupted by further pops. */
26000 regs_available_for_popping &= ~(1 << reg_containing_return_addr);
26003 /* If we popped other registers then handle them here. */
26004 if (regs_available_for_popping)
26006 int frame_pointer;
26008 /* Work out which register currently contains the frame pointer. */
26009 frame_pointer = number_of_first_bit_set (regs_available_for_popping);
26011 /* Move it into the correct place. */
26012 asm_fprintf (f, "\tmov\t%r, %r\n",
26013 ARM_HARD_FRAME_POINTER_REGNUM, frame_pointer);
26015 /* (Temporarily) remove it from the mask of popped registers. */
26016 regs_available_for_popping &= ~(1 << frame_pointer);
26017 regs_to_pop &= ~(1 << ARM_HARD_FRAME_POINTER_REGNUM);
26019 if (regs_available_for_popping)
26021 int stack_pointer;
26023 /* We popped the stack pointer as well,
26024 find the register that contains it. */
26025 stack_pointer = number_of_first_bit_set (regs_available_for_popping);
26027 /* Move it into the stack register. */
26028 asm_fprintf (f, "\tmov\t%r, %r\n", SP_REGNUM, stack_pointer);
26030 /* At this point we have popped all necessary registers, so
26031 do not worry about restoring regs_available_for_popping
26032 to its correct value:
26034 assert (pops_needed == 0)
26035 assert (regs_available_for_popping == (1 << frame_pointer))
26036 assert (regs_to_pop == (1 << STACK_POINTER)) */
26038 else
26040 /* Since we have just move the popped value into the frame
26041 pointer, the popping register is available for reuse, and
26042 we know that we still have the stack pointer left to pop. */
26043 regs_available_for_popping |= (1 << frame_pointer);
26047 /* If we still have registers left on the stack, but we no longer have
26048 any registers into which we can pop them, then we must move the return
26049 address into the link register and make available the register that
26050 contained it. */
26051 if (regs_available_for_popping == 0 && pops_needed > 0)
26053 regs_available_for_popping |= 1 << reg_containing_return_addr;
26055 asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM,
26056 reg_containing_return_addr);
26058 reg_containing_return_addr = LR_REGNUM;
26061 /* If we have registers left on the stack then pop some more.
26062 We know that at most we will want to pop FP and SP. */
26063 if (pops_needed > 0)
26065 int popped_into;
26066 int move_to;
26068 thumb_pop (f, regs_available_for_popping);
26070 /* We have popped either FP or SP.
26071 Move whichever one it is into the correct register. */
26072 popped_into = number_of_first_bit_set (regs_available_for_popping);
26073 move_to = number_of_first_bit_set (regs_to_pop);
26075 asm_fprintf (f, "\tmov\t%r, %r\n", move_to, popped_into);
26077 regs_to_pop &= ~(1 << move_to);
26079 --pops_needed;
26082 /* If we still have not popped everything then we must have only
26083 had one register available to us and we are now popping the SP. */
26084 if (pops_needed > 0)
26086 int popped_into;
26088 thumb_pop (f, regs_available_for_popping);
26090 popped_into = number_of_first_bit_set (regs_available_for_popping);
26092 asm_fprintf (f, "\tmov\t%r, %r\n", SP_REGNUM, popped_into);
26094 assert (regs_to_pop == (1 << STACK_POINTER))
26095 assert (pops_needed == 1)
26099 /* If necessary restore the a4 register. */
26100 if (restore_a4)
26102 if (reg_containing_return_addr != LR_REGNUM)
26104 asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM, LAST_ARG_REGNUM);
26105 reg_containing_return_addr = LR_REGNUM;
26108 asm_fprintf (f, "\tmov\t%r, %r\n", LAST_ARG_REGNUM, IP_REGNUM);
26111 if (crtl->calls_eh_return)
26112 asm_fprintf (f, "\tadd\t%r, %r\n", SP_REGNUM, ARM_EH_STACKADJ_REGNUM);
26114 /* Return to caller. */
26115 asm_fprintf (f, "\tbx\t%r\n", reg_containing_return_addr);
26118 /* Scan INSN just before assembler is output for it.
26119 For Thumb-1, we track the status of the condition codes; this
26120 information is used in the cbranchsi4_insn pattern. */
26121 void
26122 thumb1_final_prescan_insn (rtx insn)
26124 if (flag_print_asm_name)
26125 asm_fprintf (asm_out_file, "%@ 0x%04x\n",
26126 INSN_ADDRESSES (INSN_UID (insn)));
26127 /* Don't overwrite the previous setter when we get to a cbranch. */
26128 if (INSN_CODE (insn) != CODE_FOR_cbranchsi4_insn)
26130 enum attr_conds conds;
26132 if (cfun->machine->thumb1_cc_insn)
26134 if (modified_in_p (cfun->machine->thumb1_cc_op0, insn)
26135 || modified_in_p (cfun->machine->thumb1_cc_op1, insn))
26136 CC_STATUS_INIT;
26138 conds = get_attr_conds (insn);
26139 if (conds == CONDS_SET)
26141 rtx set = single_set (insn);
26142 cfun->machine->thumb1_cc_insn = insn;
26143 cfun->machine->thumb1_cc_op0 = SET_DEST (set);
26144 cfun->machine->thumb1_cc_op1 = const0_rtx;
26145 cfun->machine->thumb1_cc_mode = CC_NOOVmode;
26146 if (INSN_CODE (insn) == CODE_FOR_thumb1_subsi3_insn)
26148 rtx src1 = XEXP (SET_SRC (set), 1);
26149 if (src1 == const0_rtx)
26150 cfun->machine->thumb1_cc_mode = CCmode;
26152 else if (REG_P (SET_DEST (set)) && REG_P (SET_SRC (set)))
26154 /* Record the src register operand instead of dest because
26155 cprop_hardreg pass propagates src. */
26156 cfun->machine->thumb1_cc_op0 = SET_SRC (set);
26159 else if (conds != CONDS_NOCOND)
26160 cfun->machine->thumb1_cc_insn = NULL_RTX;
26163 /* Check if unexpected far jump is used. */
26164 if (cfun->machine->lr_save_eliminated
26165 && get_attr_far_jump (insn) == FAR_JUMP_YES)
26166 internal_error("Unexpected thumb1 far jump");
26170 thumb_shiftable_const (unsigned HOST_WIDE_INT val)
26172 unsigned HOST_WIDE_INT mask = 0xff;
26173 int i;
26175 val = val & (unsigned HOST_WIDE_INT)0xffffffffu;
26176 if (val == 0) /* XXX */
26177 return 0;
26179 for (i = 0; i < 25; i++)
26180 if ((val & (mask << i)) == val)
26181 return 1;
26183 return 0;
26186 /* Returns nonzero if the current function contains,
26187 or might contain a far jump. */
26188 static int
26189 thumb_far_jump_used_p (void)
26191 rtx insn;
26192 bool far_jump = false;
26193 unsigned int func_size = 0;
26195 /* This test is only important for leaf functions. */
26196 /* assert (!leaf_function_p ()); */
26198 /* If we have already decided that far jumps may be used,
26199 do not bother checking again, and always return true even if
26200 it turns out that they are not being used. Once we have made
26201 the decision that far jumps are present (and that hence the link
26202 register will be pushed onto the stack) we cannot go back on it. */
26203 if (cfun->machine->far_jump_used)
26204 return 1;
26206 /* If this function is not being called from the prologue/epilogue
26207 generation code then it must be being called from the
26208 INITIAL_ELIMINATION_OFFSET macro. */
26209 if (!(ARM_DOUBLEWORD_ALIGN || reload_completed))
26211 /* In this case we know that we are being asked about the elimination
26212 of the arg pointer register. If that register is not being used,
26213 then there are no arguments on the stack, and we do not have to
26214 worry that a far jump might force the prologue to push the link
26215 register, changing the stack offsets. In this case we can just
26216 return false, since the presence of far jumps in the function will
26217 not affect stack offsets.
26219 If the arg pointer is live (or if it was live, but has now been
26220 eliminated and so set to dead) then we do have to test to see if
26221 the function might contain a far jump. This test can lead to some
26222 false negatives, since before reload is completed, then length of
26223 branch instructions is not known, so gcc defaults to returning their
26224 longest length, which in turn sets the far jump attribute to true.
26226 A false negative will not result in bad code being generated, but it
26227 will result in a needless push and pop of the link register. We
26228 hope that this does not occur too often.
26230 If we need doubleword stack alignment this could affect the other
26231 elimination offsets so we can't risk getting it wrong. */
26232 if (df_regs_ever_live_p (ARG_POINTER_REGNUM))
26233 cfun->machine->arg_pointer_live = 1;
26234 else if (!cfun->machine->arg_pointer_live)
26235 return 0;
26238 /* Check to see if the function contains a branch
26239 insn with the far jump attribute set. */
26240 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
26242 if (JUMP_P (insn) && get_attr_far_jump (insn) == FAR_JUMP_YES)
26244 far_jump = true;
26246 func_size += get_attr_length (insn);
26249 /* Attribute far_jump will always be true for thumb1 before
26250 shorten_branch pass. So checking far_jump attribute before
26251 shorten_branch isn't much useful.
26253 Following heuristic tries to estimate more accurately if a far jump
26254 may finally be used. The heuristic is very conservative as there is
26255 no chance to roll-back the decision of not to use far jump.
26257 Thumb1 long branch offset is -2048 to 2046. The worst case is each
26258 2-byte insn is associated with a 4 byte constant pool. Using
26259 function size 2048/3 as the threshold is conservative enough. */
26260 if (far_jump)
26262 if ((func_size * 3) >= 2048)
26264 /* Record the fact that we have decided that
26265 the function does use far jumps. */
26266 cfun->machine->far_jump_used = 1;
26267 return 1;
26271 return 0;
26274 /* Return nonzero if FUNC must be entered in ARM mode. */
26276 is_called_in_ARM_mode (tree func)
26278 gcc_assert (TREE_CODE (func) == FUNCTION_DECL);
26280 /* Ignore the problem about functions whose address is taken. */
26281 if (TARGET_CALLEE_INTERWORKING && TREE_PUBLIC (func))
26282 return TRUE;
26284 #ifdef ARM_PE
26285 return lookup_attribute ("interfacearm", DECL_ATTRIBUTES (func)) != NULL_TREE;
26286 #else
26287 return FALSE;
26288 #endif
26291 /* Given the stack offsets and register mask in OFFSETS, decide how
26292 many additional registers to push instead of subtracting a constant
26293 from SP. For epilogues the principle is the same except we use pop.
26294 FOR_PROLOGUE indicates which we're generating. */
26295 static int
26296 thumb1_extra_regs_pushed (arm_stack_offsets *offsets, bool for_prologue)
26298 HOST_WIDE_INT amount;
26299 unsigned long live_regs_mask = offsets->saved_regs_mask;
26300 /* Extract a mask of the ones we can give to the Thumb's push/pop
26301 instruction. */
26302 unsigned long l_mask = live_regs_mask & (for_prologue ? 0x40ff : 0xff);
26303 /* Then count how many other high registers will need to be pushed. */
26304 unsigned long high_regs_pushed = bit_count (live_regs_mask & 0x0f00);
26305 int n_free, reg_base, size;
26307 if (!for_prologue && frame_pointer_needed)
26308 amount = offsets->locals_base - offsets->saved_regs;
26309 else
26310 amount = offsets->outgoing_args - offsets->saved_regs;
26312 /* If the stack frame size is 512 exactly, we can save one load
26313 instruction, which should make this a win even when optimizing
26314 for speed. */
26315 if (!optimize_size && amount != 512)
26316 return 0;
26318 /* Can't do this if there are high registers to push. */
26319 if (high_regs_pushed != 0)
26320 return 0;
26322 /* Shouldn't do it in the prologue if no registers would normally
26323 be pushed at all. In the epilogue, also allow it if we'll have
26324 a pop insn for the PC. */
26325 if (l_mask == 0
26326 && (for_prologue
26327 || TARGET_BACKTRACE
26328 || (live_regs_mask & 1 << LR_REGNUM) == 0
26329 || TARGET_INTERWORK
26330 || crtl->args.pretend_args_size != 0))
26331 return 0;
26333 /* Don't do this if thumb_expand_prologue wants to emit instructions
26334 between the push and the stack frame allocation. */
26335 if (for_prologue
26336 && ((flag_pic && arm_pic_register != INVALID_REGNUM)
26337 || (!frame_pointer_needed && CALLER_INTERWORKING_SLOT_SIZE > 0)))
26338 return 0;
26340 reg_base = 0;
26341 n_free = 0;
26342 if (!for_prologue)
26344 size = arm_size_return_regs ();
26345 reg_base = ARM_NUM_INTS (size);
26346 live_regs_mask >>= reg_base;
26349 while (reg_base + n_free < 8 && !(live_regs_mask & 1)
26350 && (for_prologue || call_used_regs[reg_base + n_free]))
26352 live_regs_mask >>= 1;
26353 n_free++;
26356 if (n_free == 0)
26357 return 0;
26358 gcc_assert (amount / 4 * 4 == amount);
26360 if (amount >= 512 && (amount - n_free * 4) < 512)
26361 return (amount - 508) / 4;
26362 if (amount <= n_free * 4)
26363 return amount / 4;
26364 return 0;
26367 /* The bits which aren't usefully expanded as rtl. */
26368 const char *
26369 thumb1_unexpanded_epilogue (void)
26371 arm_stack_offsets *offsets;
26372 int regno;
26373 unsigned long live_regs_mask = 0;
26374 int high_regs_pushed = 0;
26375 int extra_pop;
26376 int had_to_push_lr;
26377 int size;
26379 if (cfun->machine->return_used_this_function != 0)
26380 return "";
26382 if (IS_NAKED (arm_current_func_type ()))
26383 return "";
26385 offsets = arm_get_frame_offsets ();
26386 live_regs_mask = offsets->saved_regs_mask;
26387 high_regs_pushed = bit_count (live_regs_mask & 0x0f00);
26389 /* If we can deduce the registers used from the function's return value.
26390 This is more reliable that examining df_regs_ever_live_p () because that
26391 will be set if the register is ever used in the function, not just if
26392 the register is used to hold a return value. */
26393 size = arm_size_return_regs ();
26395 extra_pop = thumb1_extra_regs_pushed (offsets, false);
26396 if (extra_pop > 0)
26398 unsigned long extra_mask = (1 << extra_pop) - 1;
26399 live_regs_mask |= extra_mask << ARM_NUM_INTS (size);
26402 /* The prolog may have pushed some high registers to use as
26403 work registers. e.g. the testsuite file:
26404 gcc/testsuite/gcc/gcc.c-torture/execute/complex-2.c
26405 compiles to produce:
26406 push {r4, r5, r6, r7, lr}
26407 mov r7, r9
26408 mov r6, r8
26409 push {r6, r7}
26410 as part of the prolog. We have to undo that pushing here. */
26412 if (high_regs_pushed)
26414 unsigned long mask = live_regs_mask & 0xff;
26415 int next_hi_reg;
26417 /* The available low registers depend on the size of the value we are
26418 returning. */
26419 if (size <= 12)
26420 mask |= 1 << 3;
26421 if (size <= 8)
26422 mask |= 1 << 2;
26424 if (mask == 0)
26425 /* Oh dear! We have no low registers into which we can pop
26426 high registers! */
26427 internal_error
26428 ("no low registers available for popping high registers");
26430 for (next_hi_reg = 8; next_hi_reg < 13; next_hi_reg++)
26431 if (live_regs_mask & (1 << next_hi_reg))
26432 break;
26434 while (high_regs_pushed)
26436 /* Find lo register(s) into which the high register(s) can
26437 be popped. */
26438 for (regno = 0; regno <= LAST_LO_REGNUM; regno++)
26440 if (mask & (1 << regno))
26441 high_regs_pushed--;
26442 if (high_regs_pushed == 0)
26443 break;
26446 mask &= (2 << regno) - 1; /* A noop if regno == 8 */
26448 /* Pop the values into the low register(s). */
26449 thumb_pop (asm_out_file, mask);
26451 /* Move the value(s) into the high registers. */
26452 for (regno = 0; regno <= LAST_LO_REGNUM; regno++)
26454 if (mask & (1 << regno))
26456 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", next_hi_reg,
26457 regno);
26459 for (next_hi_reg++; next_hi_reg < 13; next_hi_reg++)
26460 if (live_regs_mask & (1 << next_hi_reg))
26461 break;
26465 live_regs_mask &= ~0x0f00;
26468 had_to_push_lr = (live_regs_mask & (1 << LR_REGNUM)) != 0;
26469 live_regs_mask &= 0xff;
26471 if (crtl->args.pretend_args_size == 0 || TARGET_BACKTRACE)
26473 /* Pop the return address into the PC. */
26474 if (had_to_push_lr)
26475 live_regs_mask |= 1 << PC_REGNUM;
26477 /* Either no argument registers were pushed or a backtrace
26478 structure was created which includes an adjusted stack
26479 pointer, so just pop everything. */
26480 if (live_regs_mask)
26481 thumb_pop (asm_out_file, live_regs_mask);
26483 /* We have either just popped the return address into the
26484 PC or it is was kept in LR for the entire function.
26485 Note that thumb_pop has already called thumb_exit if the
26486 PC was in the list. */
26487 if (!had_to_push_lr)
26488 thumb_exit (asm_out_file, LR_REGNUM);
26490 else
26492 /* Pop everything but the return address. */
26493 if (live_regs_mask)
26494 thumb_pop (asm_out_file, live_regs_mask);
26496 if (had_to_push_lr)
26498 if (size > 12)
26500 /* We have no free low regs, so save one. */
26501 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", IP_REGNUM,
26502 LAST_ARG_REGNUM);
26505 /* Get the return address into a temporary register. */
26506 thumb_pop (asm_out_file, 1 << LAST_ARG_REGNUM);
26508 if (size > 12)
26510 /* Move the return address to lr. */
26511 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", LR_REGNUM,
26512 LAST_ARG_REGNUM);
26513 /* Restore the low register. */
26514 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", LAST_ARG_REGNUM,
26515 IP_REGNUM);
26516 regno = LR_REGNUM;
26518 else
26519 regno = LAST_ARG_REGNUM;
26521 else
26522 regno = LR_REGNUM;
26524 /* Remove the argument registers that were pushed onto the stack. */
26525 asm_fprintf (asm_out_file, "\tadd\t%r, %r, #%d\n",
26526 SP_REGNUM, SP_REGNUM,
26527 crtl->args.pretend_args_size);
26529 thumb_exit (asm_out_file, regno);
26532 return "";
26535 /* Functions to save and restore machine-specific function data. */
26536 static struct machine_function *
26537 arm_init_machine_status (void)
26539 struct machine_function *machine;
26540 machine = ggc_alloc_cleared_machine_function ();
26542 #if ARM_FT_UNKNOWN != 0
26543 machine->func_type = ARM_FT_UNKNOWN;
26544 #endif
26545 return machine;
26548 /* Return an RTX indicating where the return address to the
26549 calling function can be found. */
26551 arm_return_addr (int count, rtx frame ATTRIBUTE_UNUSED)
26553 if (count != 0)
26554 return NULL_RTX;
26556 return get_hard_reg_initial_val (Pmode, LR_REGNUM);
26559 /* Do anything needed before RTL is emitted for each function. */
26560 void
26561 arm_init_expanders (void)
26563 /* Arrange to initialize and mark the machine per-function status. */
26564 init_machine_status = arm_init_machine_status;
26566 /* This is to stop the combine pass optimizing away the alignment
26567 adjustment of va_arg. */
26568 /* ??? It is claimed that this should not be necessary. */
26569 if (cfun)
26570 mark_reg_pointer (arg_pointer_rtx, PARM_BOUNDARY);
26574 /* Like arm_compute_initial_elimination offset. Simpler because there
26575 isn't an ABI specified frame pointer for Thumb. Instead, we set it
26576 to point at the base of the local variables after static stack
26577 space for a function has been allocated. */
26579 HOST_WIDE_INT
26580 thumb_compute_initial_elimination_offset (unsigned int from, unsigned int to)
26582 arm_stack_offsets *offsets;
26584 offsets = arm_get_frame_offsets ();
26586 switch (from)
26588 case ARG_POINTER_REGNUM:
26589 switch (to)
26591 case STACK_POINTER_REGNUM:
26592 return offsets->outgoing_args - offsets->saved_args;
26594 case FRAME_POINTER_REGNUM:
26595 return offsets->soft_frame - offsets->saved_args;
26597 case ARM_HARD_FRAME_POINTER_REGNUM:
26598 return offsets->saved_regs - offsets->saved_args;
26600 case THUMB_HARD_FRAME_POINTER_REGNUM:
26601 return offsets->locals_base - offsets->saved_args;
26603 default:
26604 gcc_unreachable ();
26606 break;
26608 case FRAME_POINTER_REGNUM:
26609 switch (to)
26611 case STACK_POINTER_REGNUM:
26612 return offsets->outgoing_args - offsets->soft_frame;
26614 case ARM_HARD_FRAME_POINTER_REGNUM:
26615 return offsets->saved_regs - offsets->soft_frame;
26617 case THUMB_HARD_FRAME_POINTER_REGNUM:
26618 return offsets->locals_base - offsets->soft_frame;
26620 default:
26621 gcc_unreachable ();
26623 break;
26625 default:
26626 gcc_unreachable ();
26630 /* Generate the function's prologue. */
26632 void
26633 thumb1_expand_prologue (void)
26635 rtx insn;
26637 HOST_WIDE_INT amount;
26638 arm_stack_offsets *offsets;
26639 unsigned long func_type;
26640 int regno;
26641 unsigned long live_regs_mask;
26642 unsigned long l_mask;
26643 unsigned high_regs_pushed = 0;
26645 func_type = arm_current_func_type ();
26647 /* Naked functions don't have prologues. */
26648 if (IS_NAKED (func_type))
26649 return;
26651 if (IS_INTERRUPT (func_type))
26653 error ("interrupt Service Routines cannot be coded in Thumb mode");
26654 return;
26657 if (is_called_in_ARM_mode (current_function_decl))
26658 emit_insn (gen_prologue_thumb1_interwork ());
26660 offsets = arm_get_frame_offsets ();
26661 live_regs_mask = offsets->saved_regs_mask;
26663 /* Extract a mask of the ones we can give to the Thumb's push instruction. */
26664 l_mask = live_regs_mask & 0x40ff;
26665 /* Then count how many other high registers will need to be pushed. */
26666 high_regs_pushed = bit_count (live_regs_mask & 0x0f00);
26668 if (crtl->args.pretend_args_size)
26670 rtx x = GEN_INT (-crtl->args.pretend_args_size);
26672 if (cfun->machine->uses_anonymous_args)
26674 int num_pushes = ARM_NUM_INTS (crtl->args.pretend_args_size);
26675 unsigned long mask;
26677 mask = 1ul << (LAST_ARG_REGNUM + 1);
26678 mask -= 1ul << (LAST_ARG_REGNUM + 1 - num_pushes);
26680 insn = thumb1_emit_multi_reg_push (mask, 0);
26682 else
26684 insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
26685 stack_pointer_rtx, x));
26687 RTX_FRAME_RELATED_P (insn) = 1;
26690 if (TARGET_BACKTRACE)
26692 HOST_WIDE_INT offset = 0;
26693 unsigned work_register;
26694 rtx work_reg, x, arm_hfp_rtx;
26696 /* We have been asked to create a stack backtrace structure.
26697 The code looks like this:
26699 0 .align 2
26700 0 func:
26701 0 sub SP, #16 Reserve space for 4 registers.
26702 2 push {R7} Push low registers.
26703 4 add R7, SP, #20 Get the stack pointer before the push.
26704 6 str R7, [SP, #8] Store the stack pointer
26705 (before reserving the space).
26706 8 mov R7, PC Get hold of the start of this code + 12.
26707 10 str R7, [SP, #16] Store it.
26708 12 mov R7, FP Get hold of the current frame pointer.
26709 14 str R7, [SP, #4] Store it.
26710 16 mov R7, LR Get hold of the current return address.
26711 18 str R7, [SP, #12] Store it.
26712 20 add R7, SP, #16 Point at the start of the
26713 backtrace structure.
26714 22 mov FP, R7 Put this value into the frame pointer. */
26716 work_register = thumb_find_work_register (live_regs_mask);
26717 work_reg = gen_rtx_REG (SImode, work_register);
26718 arm_hfp_rtx = gen_rtx_REG (SImode, ARM_HARD_FRAME_POINTER_REGNUM);
26720 insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
26721 stack_pointer_rtx, GEN_INT (-16)));
26722 RTX_FRAME_RELATED_P (insn) = 1;
26724 if (l_mask)
26726 insn = thumb1_emit_multi_reg_push (l_mask, l_mask);
26727 RTX_FRAME_RELATED_P (insn) = 1;
26729 offset = bit_count (l_mask) * UNITS_PER_WORD;
26732 x = GEN_INT (offset + 16 + crtl->args.pretend_args_size);
26733 emit_insn (gen_addsi3 (work_reg, stack_pointer_rtx, x));
26735 x = plus_constant (Pmode, stack_pointer_rtx, offset + 4);
26736 x = gen_frame_mem (SImode, x);
26737 emit_move_insn (x, work_reg);
26739 /* Make sure that the instruction fetching the PC is in the right place
26740 to calculate "start of backtrace creation code + 12". */
26741 /* ??? The stores using the common WORK_REG ought to be enough to
26742 prevent the scheduler from doing anything weird. Failing that
26743 we could always move all of the following into an UNSPEC_VOLATILE. */
26744 if (l_mask)
26746 x = gen_rtx_REG (SImode, PC_REGNUM);
26747 emit_move_insn (work_reg, x);
26749 x = plus_constant (Pmode, stack_pointer_rtx, offset + 12);
26750 x = gen_frame_mem (SImode, x);
26751 emit_move_insn (x, work_reg);
26753 emit_move_insn (work_reg, arm_hfp_rtx);
26755 x = plus_constant (Pmode, stack_pointer_rtx, offset);
26756 x = gen_frame_mem (SImode, x);
26757 emit_move_insn (x, work_reg);
26759 else
26761 emit_move_insn (work_reg, arm_hfp_rtx);
26763 x = plus_constant (Pmode, stack_pointer_rtx, offset);
26764 x = gen_frame_mem (SImode, x);
26765 emit_move_insn (x, work_reg);
26767 x = gen_rtx_REG (SImode, PC_REGNUM);
26768 emit_move_insn (work_reg, x);
26770 x = plus_constant (Pmode, stack_pointer_rtx, offset + 12);
26771 x = gen_frame_mem (SImode, x);
26772 emit_move_insn (x, work_reg);
26775 x = gen_rtx_REG (SImode, LR_REGNUM);
26776 emit_move_insn (work_reg, x);
26778 x = plus_constant (Pmode, stack_pointer_rtx, offset + 8);
26779 x = gen_frame_mem (SImode, x);
26780 emit_move_insn (x, work_reg);
26782 x = GEN_INT (offset + 12);
26783 emit_insn (gen_addsi3 (work_reg, stack_pointer_rtx, x));
26785 emit_move_insn (arm_hfp_rtx, work_reg);
26787 /* Optimization: If we are not pushing any low registers but we are going
26788 to push some high registers then delay our first push. This will just
26789 be a push of LR and we can combine it with the push of the first high
26790 register. */
26791 else if ((l_mask & 0xff) != 0
26792 || (high_regs_pushed == 0 && l_mask))
26794 unsigned long mask = l_mask;
26795 mask |= (1 << thumb1_extra_regs_pushed (offsets, true)) - 1;
26796 insn = thumb1_emit_multi_reg_push (mask, mask);
26797 RTX_FRAME_RELATED_P (insn) = 1;
26800 if (high_regs_pushed)
26802 unsigned pushable_regs;
26803 unsigned next_hi_reg;
26804 unsigned arg_regs_num = TARGET_AAPCS_BASED ? crtl->args.info.aapcs_ncrn
26805 : crtl->args.info.nregs;
26806 unsigned arg_regs_mask = (1 << arg_regs_num) - 1;
26808 for (next_hi_reg = 12; next_hi_reg > LAST_LO_REGNUM; next_hi_reg--)
26809 if (live_regs_mask & (1 << next_hi_reg))
26810 break;
26812 /* Here we need to mask out registers used for passing arguments
26813 even if they can be pushed. This is to avoid using them to stash the high
26814 registers. Such kind of stash may clobber the use of arguments. */
26815 pushable_regs = l_mask & (~arg_regs_mask) & 0xff;
26817 if (pushable_regs == 0)
26818 pushable_regs = 1 << thumb_find_work_register (live_regs_mask);
26820 while (high_regs_pushed > 0)
26822 unsigned long real_regs_mask = 0;
26824 for (regno = LAST_LO_REGNUM; regno >= 0; regno --)
26826 if (pushable_regs & (1 << regno))
26828 emit_move_insn (gen_rtx_REG (SImode, regno),
26829 gen_rtx_REG (SImode, next_hi_reg));
26831 high_regs_pushed --;
26832 real_regs_mask |= (1 << next_hi_reg);
26834 if (high_regs_pushed)
26836 for (next_hi_reg --; next_hi_reg > LAST_LO_REGNUM;
26837 next_hi_reg --)
26838 if (live_regs_mask & (1 << next_hi_reg))
26839 break;
26841 else
26843 pushable_regs &= ~((1 << regno) - 1);
26844 break;
26849 /* If we had to find a work register and we have not yet
26850 saved the LR then add it to the list of regs to push. */
26851 if (l_mask == (1 << LR_REGNUM))
26853 pushable_regs |= l_mask;
26854 real_regs_mask |= l_mask;
26855 l_mask = 0;
26858 insn = thumb1_emit_multi_reg_push (pushable_regs, real_regs_mask);
26859 RTX_FRAME_RELATED_P (insn) = 1;
26863 /* Load the pic register before setting the frame pointer,
26864 so we can use r7 as a temporary work register. */
26865 if (flag_pic && arm_pic_register != INVALID_REGNUM)
26866 arm_load_pic_register (live_regs_mask);
26868 if (!frame_pointer_needed && CALLER_INTERWORKING_SLOT_SIZE > 0)
26869 emit_move_insn (gen_rtx_REG (Pmode, ARM_HARD_FRAME_POINTER_REGNUM),
26870 stack_pointer_rtx);
26872 if (flag_stack_usage_info)
26873 current_function_static_stack_size
26874 = offsets->outgoing_args - offsets->saved_args;
26876 amount = offsets->outgoing_args - offsets->saved_regs;
26877 amount -= 4 * thumb1_extra_regs_pushed (offsets, true);
26878 if (amount)
26880 if (amount < 512)
26882 insn = emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
26883 GEN_INT (- amount)));
26884 RTX_FRAME_RELATED_P (insn) = 1;
26886 else
26888 rtx reg, dwarf;
26890 /* The stack decrement is too big for an immediate value in a single
26891 insn. In theory we could issue multiple subtracts, but after
26892 three of them it becomes more space efficient to place the full
26893 value in the constant pool and load into a register. (Also the
26894 ARM debugger really likes to see only one stack decrement per
26895 function). So instead we look for a scratch register into which
26896 we can load the decrement, and then we subtract this from the
26897 stack pointer. Unfortunately on the thumb the only available
26898 scratch registers are the argument registers, and we cannot use
26899 these as they may hold arguments to the function. Instead we
26900 attempt to locate a call preserved register which is used by this
26901 function. If we can find one, then we know that it will have
26902 been pushed at the start of the prologue and so we can corrupt
26903 it now. */
26904 for (regno = LAST_ARG_REGNUM + 1; regno <= LAST_LO_REGNUM; regno++)
26905 if (live_regs_mask & (1 << regno))
26906 break;
26908 gcc_assert(regno <= LAST_LO_REGNUM);
26910 reg = gen_rtx_REG (SImode, regno);
26912 emit_insn (gen_movsi (reg, GEN_INT (- amount)));
26914 insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
26915 stack_pointer_rtx, reg));
26917 dwarf = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
26918 plus_constant (Pmode, stack_pointer_rtx,
26919 -amount));
26920 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
26921 RTX_FRAME_RELATED_P (insn) = 1;
26925 if (frame_pointer_needed)
26926 thumb_set_frame_pointer (offsets);
26928 /* If we are profiling, make sure no instructions are scheduled before
26929 the call to mcount. Similarly if the user has requested no
26930 scheduling in the prolog. Similarly if we want non-call exceptions
26931 using the EABI unwinder, to prevent faulting instructions from being
26932 swapped with a stack adjustment. */
26933 if (crtl->profile || !TARGET_SCHED_PROLOG
26934 || (arm_except_unwind_info (&global_options) == UI_TARGET
26935 && cfun->can_throw_non_call_exceptions))
26936 emit_insn (gen_blockage ());
26938 cfun->machine->lr_save_eliminated = !thumb_force_lr_save ();
26939 if (live_regs_mask & 0xff)
26940 cfun->machine->lr_save_eliminated = 0;
26943 /* Generate pattern *pop_multiple_with_stack_update_and_return if single
26944 POP instruction can be generated. LR should be replaced by PC. All
26945 the checks required are already done by USE_RETURN_INSN (). Hence,
26946 all we really need to check here is if single register is to be
26947 returned, or multiple register return. */
26948 void
26949 thumb2_expand_return (bool simple_return)
26951 int i, num_regs;
26952 unsigned long saved_regs_mask;
26953 arm_stack_offsets *offsets;
26955 offsets = arm_get_frame_offsets ();
26956 saved_regs_mask = offsets->saved_regs_mask;
26958 for (i = 0, num_regs = 0; i <= LAST_ARM_REGNUM; i++)
26959 if (saved_regs_mask & (1 << i))
26960 num_regs++;
26962 if (!simple_return && saved_regs_mask)
26964 if (num_regs == 1)
26966 rtx par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
26967 rtx reg = gen_rtx_REG (SImode, PC_REGNUM);
26968 rtx addr = gen_rtx_MEM (SImode,
26969 gen_rtx_POST_INC (SImode,
26970 stack_pointer_rtx));
26971 set_mem_alias_set (addr, get_frame_alias_set ());
26972 XVECEXP (par, 0, 0) = ret_rtx;
26973 XVECEXP (par, 0, 1) = gen_rtx_SET (SImode, reg, addr);
26974 RTX_FRAME_RELATED_P (XVECEXP (par, 0, 1)) = 1;
26975 emit_jump_insn (par);
26977 else
26979 saved_regs_mask &= ~ (1 << LR_REGNUM);
26980 saved_regs_mask |= (1 << PC_REGNUM);
26981 arm_emit_multi_reg_pop (saved_regs_mask);
26984 else
26986 emit_jump_insn (simple_return_rtx);
26990 void
26991 thumb1_expand_epilogue (void)
26993 HOST_WIDE_INT amount;
26994 arm_stack_offsets *offsets;
26995 int regno;
26997 /* Naked functions don't have prologues. */
26998 if (IS_NAKED (arm_current_func_type ()))
26999 return;
27001 offsets = arm_get_frame_offsets ();
27002 amount = offsets->outgoing_args - offsets->saved_regs;
27004 if (frame_pointer_needed)
27006 emit_insn (gen_movsi (stack_pointer_rtx, hard_frame_pointer_rtx));
27007 amount = offsets->locals_base - offsets->saved_regs;
27009 amount -= 4 * thumb1_extra_regs_pushed (offsets, false);
27011 gcc_assert (amount >= 0);
27012 if (amount)
27014 emit_insn (gen_blockage ());
27016 if (amount < 512)
27017 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
27018 GEN_INT (amount)));
27019 else
27021 /* r3 is always free in the epilogue. */
27022 rtx reg = gen_rtx_REG (SImode, LAST_ARG_REGNUM);
27024 emit_insn (gen_movsi (reg, GEN_INT (amount)));
27025 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, reg));
27029 /* Emit a USE (stack_pointer_rtx), so that
27030 the stack adjustment will not be deleted. */
27031 emit_insn (gen_force_register_use (stack_pointer_rtx));
27033 if (crtl->profile || !TARGET_SCHED_PROLOG)
27034 emit_insn (gen_blockage ());
27036 /* Emit a clobber for each insn that will be restored in the epilogue,
27037 so that flow2 will get register lifetimes correct. */
27038 for (regno = 0; regno < 13; regno++)
27039 if (df_regs_ever_live_p (regno) && !call_used_regs[regno])
27040 emit_clobber (gen_rtx_REG (SImode, regno));
27042 if (! df_regs_ever_live_p (LR_REGNUM))
27043 emit_use (gen_rtx_REG (SImode, LR_REGNUM));
27046 /* Epilogue code for APCS frame. */
27047 static void
27048 arm_expand_epilogue_apcs_frame (bool really_return)
27050 unsigned long func_type;
27051 unsigned long saved_regs_mask;
27052 int num_regs = 0;
27053 int i;
27054 int floats_from_frame = 0;
27055 arm_stack_offsets *offsets;
27057 gcc_assert (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM);
27058 func_type = arm_current_func_type ();
27060 /* Get frame offsets for ARM. */
27061 offsets = arm_get_frame_offsets ();
27062 saved_regs_mask = offsets->saved_regs_mask;
27064 /* Find the offset of the floating-point save area in the frame. */
27065 floats_from_frame
27066 = (offsets->saved_args
27067 + arm_compute_static_chain_stack_bytes ()
27068 - offsets->frame);
27070 /* Compute how many core registers saved and how far away the floats are. */
27071 for (i = 0; i <= LAST_ARM_REGNUM; i++)
27072 if (saved_regs_mask & (1 << i))
27074 num_regs++;
27075 floats_from_frame += 4;
27078 if (TARGET_HARD_FLOAT && TARGET_VFP)
27080 int start_reg;
27082 /* The offset is from IP_REGNUM. */
27083 int saved_size = arm_get_vfp_saved_size ();
27084 if (saved_size > 0)
27086 floats_from_frame += saved_size;
27087 emit_insn (gen_addsi3 (gen_rtx_REG (SImode, IP_REGNUM),
27088 hard_frame_pointer_rtx,
27089 GEN_INT (-floats_from_frame)));
27092 /* Generate VFP register multi-pop. */
27093 start_reg = FIRST_VFP_REGNUM;
27095 for (i = FIRST_VFP_REGNUM; i < LAST_VFP_REGNUM; i += 2)
27096 /* Look for a case where a reg does not need restoring. */
27097 if ((!df_regs_ever_live_p (i) || call_used_regs[i])
27098 && (!df_regs_ever_live_p (i + 1)
27099 || call_used_regs[i + 1]))
27101 if (start_reg != i)
27102 arm_emit_vfp_multi_reg_pop (start_reg,
27103 (i - start_reg) / 2,
27104 gen_rtx_REG (SImode,
27105 IP_REGNUM));
27106 start_reg = i + 2;
27109 /* Restore the remaining regs that we have discovered (or possibly
27110 even all of them, if the conditional in the for loop never
27111 fired). */
27112 if (start_reg != i)
27113 arm_emit_vfp_multi_reg_pop (start_reg,
27114 (i - start_reg) / 2,
27115 gen_rtx_REG (SImode, IP_REGNUM));
27118 if (TARGET_IWMMXT)
27120 /* The frame pointer is guaranteed to be non-double-word aligned, as
27121 it is set to double-word-aligned old_stack_pointer - 4. */
27122 rtx insn;
27123 int lrm_count = (num_regs % 2) ? (num_regs + 2) : (num_regs + 1);
27125 for (i = LAST_IWMMXT_REGNUM; i >= FIRST_IWMMXT_REGNUM; i--)
27126 if (df_regs_ever_live_p (i) && !call_used_regs[i])
27128 rtx addr = gen_frame_mem (V2SImode,
27129 plus_constant (Pmode, hard_frame_pointer_rtx,
27130 - lrm_count * 4));
27131 insn = emit_insn (gen_movsi (gen_rtx_REG (V2SImode, i), addr));
27132 REG_NOTES (insn) = alloc_reg_note (REG_CFA_RESTORE,
27133 gen_rtx_REG (V2SImode, i),
27134 NULL_RTX);
27135 lrm_count += 2;
27139 /* saved_regs_mask should contain IP which contains old stack pointer
27140 at the time of activation creation. Since SP and IP are adjacent registers,
27141 we can restore the value directly into SP. */
27142 gcc_assert (saved_regs_mask & (1 << IP_REGNUM));
27143 saved_regs_mask &= ~(1 << IP_REGNUM);
27144 saved_regs_mask |= (1 << SP_REGNUM);
27146 /* There are two registers left in saved_regs_mask - LR and PC. We
27147 only need to restore LR (the return address), but to
27148 save time we can load it directly into PC, unless we need a
27149 special function exit sequence, or we are not really returning. */
27150 if (really_return
27151 && ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL
27152 && !crtl->calls_eh_return)
27153 /* Delete LR from the register mask, so that LR on
27154 the stack is loaded into the PC in the register mask. */
27155 saved_regs_mask &= ~(1 << LR_REGNUM);
27156 else
27157 saved_regs_mask &= ~(1 << PC_REGNUM);
27159 num_regs = bit_count (saved_regs_mask);
27160 if ((offsets->outgoing_args != (1 + num_regs)) || cfun->calls_alloca)
27162 emit_insn (gen_blockage ());
27163 /* Unwind the stack to just below the saved registers. */
27164 emit_insn (gen_addsi3 (stack_pointer_rtx,
27165 hard_frame_pointer_rtx,
27166 GEN_INT (- 4 * num_regs)));
27169 arm_emit_multi_reg_pop (saved_regs_mask);
27171 if (IS_INTERRUPT (func_type))
27173 /* Interrupt handlers will have pushed the
27174 IP onto the stack, so restore it now. */
27175 rtx insn;
27176 rtx addr = gen_rtx_MEM (SImode,
27177 gen_rtx_POST_INC (SImode,
27178 stack_pointer_rtx));
27179 set_mem_alias_set (addr, get_frame_alias_set ());
27180 insn = emit_insn (gen_movsi (gen_rtx_REG (SImode, IP_REGNUM), addr));
27181 REG_NOTES (insn) = alloc_reg_note (REG_CFA_RESTORE,
27182 gen_rtx_REG (SImode, IP_REGNUM),
27183 NULL_RTX);
27186 if (!really_return || (saved_regs_mask & (1 << PC_REGNUM)))
27187 return;
27189 if (crtl->calls_eh_return)
27190 emit_insn (gen_addsi3 (stack_pointer_rtx,
27191 stack_pointer_rtx,
27192 gen_rtx_REG (SImode, ARM_EH_STACKADJ_REGNUM)));
27194 if (IS_STACKALIGN (func_type))
27195 /* Restore the original stack pointer. Before prologue, the stack was
27196 realigned and the original stack pointer saved in r0. For details,
27197 see comment in arm_expand_prologue. */
27198 emit_insn (gen_movsi (stack_pointer_rtx, gen_rtx_REG (SImode, 0)));
27200 emit_jump_insn (simple_return_rtx);
27203 /* Generate RTL to represent ARM epilogue. Really_return is true if the
27204 function is not a sibcall. */
27205 void
27206 arm_expand_epilogue (bool really_return)
27208 unsigned long func_type;
27209 unsigned long saved_regs_mask;
27210 int num_regs = 0;
27211 int i;
27212 int amount;
27213 arm_stack_offsets *offsets;
27215 func_type = arm_current_func_type ();
27217 /* Naked functions don't have epilogue. Hence, generate return pattern, and
27218 let output_return_instruction take care of instruction emission if any. */
27219 if (IS_NAKED (func_type)
27220 || (IS_VOLATILE (func_type) && TARGET_ABORT_NORETURN))
27222 if (really_return)
27223 emit_jump_insn (simple_return_rtx);
27224 return;
27227 /* If we are throwing an exception, then we really must be doing a
27228 return, so we can't tail-call. */
27229 gcc_assert (!crtl->calls_eh_return || really_return);
27231 if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
27233 arm_expand_epilogue_apcs_frame (really_return);
27234 return;
27237 /* Get frame offsets for ARM. */
27238 offsets = arm_get_frame_offsets ();
27239 saved_regs_mask = offsets->saved_regs_mask;
27240 num_regs = bit_count (saved_regs_mask);
27242 if (frame_pointer_needed)
27244 rtx insn;
27245 /* Restore stack pointer if necessary. */
27246 if (TARGET_ARM)
27248 /* In ARM mode, frame pointer points to first saved register.
27249 Restore stack pointer to last saved register. */
27250 amount = offsets->frame - offsets->saved_regs;
27252 /* Force out any pending memory operations that reference stacked data
27253 before stack de-allocation occurs. */
27254 emit_insn (gen_blockage ());
27255 insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
27256 hard_frame_pointer_rtx,
27257 GEN_INT (amount)));
27258 arm_add_cfa_adjust_cfa_note (insn, amount,
27259 stack_pointer_rtx,
27260 hard_frame_pointer_rtx);
27262 /* Emit USE(stack_pointer_rtx) to ensure that stack adjustment is not
27263 deleted. */
27264 emit_insn (gen_force_register_use (stack_pointer_rtx));
27266 else
27268 /* In Thumb-2 mode, the frame pointer points to the last saved
27269 register. */
27270 amount = offsets->locals_base - offsets->saved_regs;
27271 if (amount)
27273 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
27274 hard_frame_pointer_rtx,
27275 GEN_INT (amount)));
27276 arm_add_cfa_adjust_cfa_note (insn, amount,
27277 hard_frame_pointer_rtx,
27278 hard_frame_pointer_rtx);
27281 /* Force out any pending memory operations that reference stacked data
27282 before stack de-allocation occurs. */
27283 emit_insn (gen_blockage ());
27284 insn = emit_insn (gen_movsi (stack_pointer_rtx,
27285 hard_frame_pointer_rtx));
27286 arm_add_cfa_adjust_cfa_note (insn, 0,
27287 stack_pointer_rtx,
27288 hard_frame_pointer_rtx);
27289 /* Emit USE(stack_pointer_rtx) to ensure that stack adjustment is not
27290 deleted. */
27291 emit_insn (gen_force_register_use (stack_pointer_rtx));
27294 else
27296 /* Pop off outgoing args and local frame to adjust stack pointer to
27297 last saved register. */
27298 amount = offsets->outgoing_args - offsets->saved_regs;
27299 if (amount)
27301 rtx tmp;
27302 /* Force out any pending memory operations that reference stacked data
27303 before stack de-allocation occurs. */
27304 emit_insn (gen_blockage ());
27305 tmp = emit_insn (gen_addsi3 (stack_pointer_rtx,
27306 stack_pointer_rtx,
27307 GEN_INT (amount)));
27308 arm_add_cfa_adjust_cfa_note (tmp, amount,
27309 stack_pointer_rtx, stack_pointer_rtx);
27310 /* Emit USE(stack_pointer_rtx) to ensure that stack adjustment is
27311 not deleted. */
27312 emit_insn (gen_force_register_use (stack_pointer_rtx));
27316 if (TARGET_HARD_FLOAT && TARGET_VFP)
27318 /* Generate VFP register multi-pop. */
27319 int end_reg = LAST_VFP_REGNUM + 1;
27321 /* Scan the registers in reverse order. We need to match
27322 any groupings made in the prologue and generate matching
27323 vldm operations. The need to match groups is because,
27324 unlike pop, vldm can only do consecutive regs. */
27325 for (i = LAST_VFP_REGNUM - 1; i >= FIRST_VFP_REGNUM; i -= 2)
27326 /* Look for a case where a reg does not need restoring. */
27327 if ((!df_regs_ever_live_p (i) || call_used_regs[i])
27328 && (!df_regs_ever_live_p (i + 1)
27329 || call_used_regs[i + 1]))
27331 /* Restore the regs discovered so far (from reg+2 to
27332 end_reg). */
27333 if (end_reg > i + 2)
27334 arm_emit_vfp_multi_reg_pop (i + 2,
27335 (end_reg - (i + 2)) / 2,
27336 stack_pointer_rtx);
27337 end_reg = i;
27340 /* Restore the remaining regs that we have discovered (or possibly
27341 even all of them, if the conditional in the for loop never
27342 fired). */
27343 if (end_reg > i + 2)
27344 arm_emit_vfp_multi_reg_pop (i + 2,
27345 (end_reg - (i + 2)) / 2,
27346 stack_pointer_rtx);
27349 if (TARGET_IWMMXT)
27350 for (i = FIRST_IWMMXT_REGNUM; i <= LAST_IWMMXT_REGNUM; i++)
27351 if (df_regs_ever_live_p (i) && !call_used_regs[i])
27353 rtx insn;
27354 rtx addr = gen_rtx_MEM (V2SImode,
27355 gen_rtx_POST_INC (SImode,
27356 stack_pointer_rtx));
27357 set_mem_alias_set (addr, get_frame_alias_set ());
27358 insn = emit_insn (gen_movsi (gen_rtx_REG (V2SImode, i), addr));
27359 REG_NOTES (insn) = alloc_reg_note (REG_CFA_RESTORE,
27360 gen_rtx_REG (V2SImode, i),
27361 NULL_RTX);
27362 arm_add_cfa_adjust_cfa_note (insn, UNITS_PER_WORD,
27363 stack_pointer_rtx, stack_pointer_rtx);
27366 if (saved_regs_mask)
27368 rtx insn;
27369 bool return_in_pc = false;
27371 if (ARM_FUNC_TYPE (func_type) != ARM_FT_INTERWORKED
27372 && (TARGET_ARM || ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL)
27373 && !IS_STACKALIGN (func_type)
27374 && really_return
27375 && crtl->args.pretend_args_size == 0
27376 && saved_regs_mask & (1 << LR_REGNUM)
27377 && !crtl->calls_eh_return)
27379 saved_regs_mask &= ~(1 << LR_REGNUM);
27380 saved_regs_mask |= (1 << PC_REGNUM);
27381 return_in_pc = true;
27384 if (num_regs == 1 && (!IS_INTERRUPT (func_type) || !return_in_pc))
27386 for (i = 0; i <= LAST_ARM_REGNUM; i++)
27387 if (saved_regs_mask & (1 << i))
27389 rtx addr = gen_rtx_MEM (SImode,
27390 gen_rtx_POST_INC (SImode,
27391 stack_pointer_rtx));
27392 set_mem_alias_set (addr, get_frame_alias_set ());
27394 if (i == PC_REGNUM)
27396 insn = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
27397 XVECEXP (insn, 0, 0) = ret_rtx;
27398 XVECEXP (insn, 0, 1) = gen_rtx_SET (SImode,
27399 gen_rtx_REG (SImode, i),
27400 addr);
27401 RTX_FRAME_RELATED_P (XVECEXP (insn, 0, 1)) = 1;
27402 insn = emit_jump_insn (insn);
27404 else
27406 insn = emit_insn (gen_movsi (gen_rtx_REG (SImode, i),
27407 addr));
27408 REG_NOTES (insn) = alloc_reg_note (REG_CFA_RESTORE,
27409 gen_rtx_REG (SImode, i),
27410 NULL_RTX);
27411 arm_add_cfa_adjust_cfa_note (insn, UNITS_PER_WORD,
27412 stack_pointer_rtx,
27413 stack_pointer_rtx);
27417 else
27419 if (TARGET_LDRD
27420 && current_tune->prefer_ldrd_strd
27421 && !optimize_function_for_size_p (cfun))
27423 if (TARGET_THUMB2)
27424 thumb2_emit_ldrd_pop (saved_regs_mask);
27425 else if (TARGET_ARM && !IS_INTERRUPT (func_type))
27426 arm_emit_ldrd_pop (saved_regs_mask);
27427 else
27428 arm_emit_multi_reg_pop (saved_regs_mask);
27430 else
27431 arm_emit_multi_reg_pop (saved_regs_mask);
27434 if (return_in_pc == true)
27435 return;
27438 if (crtl->args.pretend_args_size)
27440 int i, j;
27441 rtx dwarf = NULL_RTX;
27442 rtx tmp = emit_insn (gen_addsi3 (stack_pointer_rtx,
27443 stack_pointer_rtx,
27444 GEN_INT (crtl->args.pretend_args_size)));
27446 RTX_FRAME_RELATED_P (tmp) = 1;
27448 if (cfun->machine->uses_anonymous_args)
27450 /* Restore pretend args. Refer arm_expand_prologue on how to save
27451 pretend_args in stack. */
27452 int num_regs = crtl->args.pretend_args_size / 4;
27453 saved_regs_mask = (0xf0 >> num_regs) & 0xf;
27454 for (j = 0, i = 0; j < num_regs; i++)
27455 if (saved_regs_mask & (1 << i))
27457 rtx reg = gen_rtx_REG (SImode, i);
27458 dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
27459 j++;
27461 REG_NOTES (tmp) = dwarf;
27463 arm_add_cfa_adjust_cfa_note (tmp, crtl->args.pretend_args_size,
27464 stack_pointer_rtx, stack_pointer_rtx);
27467 if (!really_return)
27468 return;
27470 if (crtl->calls_eh_return)
27471 emit_insn (gen_addsi3 (stack_pointer_rtx,
27472 stack_pointer_rtx,
27473 gen_rtx_REG (SImode, ARM_EH_STACKADJ_REGNUM)));
27475 if (IS_STACKALIGN (func_type))
27476 /* Restore the original stack pointer. Before prologue, the stack was
27477 realigned and the original stack pointer saved in r0. For details,
27478 see comment in arm_expand_prologue. */
27479 emit_insn (gen_movsi (stack_pointer_rtx, gen_rtx_REG (SImode, 0)));
27481 emit_jump_insn (simple_return_rtx);
27484 /* Implementation of insn prologue_thumb1_interwork. This is the first
27485 "instruction" of a function called in ARM mode. Swap to thumb mode. */
27487 const char *
27488 thumb1_output_interwork (void)
27490 const char * name;
27491 FILE *f = asm_out_file;
27493 gcc_assert (MEM_P (DECL_RTL (current_function_decl)));
27494 gcc_assert (GET_CODE (XEXP (DECL_RTL (current_function_decl), 0))
27495 == SYMBOL_REF);
27496 name = XSTR (XEXP (DECL_RTL (current_function_decl), 0), 0);
27498 /* Generate code sequence to switch us into Thumb mode. */
27499 /* The .code 32 directive has already been emitted by
27500 ASM_DECLARE_FUNCTION_NAME. */
27501 asm_fprintf (f, "\torr\t%r, %r, #1\n", IP_REGNUM, PC_REGNUM);
27502 asm_fprintf (f, "\tbx\t%r\n", IP_REGNUM);
27504 /* Generate a label, so that the debugger will notice the
27505 change in instruction sets. This label is also used by
27506 the assembler to bypass the ARM code when this function
27507 is called from a Thumb encoded function elsewhere in the
27508 same file. Hence the definition of STUB_NAME here must
27509 agree with the definition in gas/config/tc-arm.c. */
27511 #define STUB_NAME ".real_start_of"
27513 fprintf (f, "\t.code\t16\n");
27514 #ifdef ARM_PE
27515 if (arm_dllexport_name_p (name))
27516 name = arm_strip_name_encoding (name);
27517 #endif
27518 asm_fprintf (f, "\t.globl %s%U%s\n", STUB_NAME, name);
27519 fprintf (f, "\t.thumb_func\n");
27520 asm_fprintf (f, "%s%U%s:\n", STUB_NAME, name);
27522 return "";
27525 /* Handle the case of a double word load into a low register from
27526 a computed memory address. The computed address may involve a
27527 register which is overwritten by the load. */
27528 const char *
27529 thumb_load_double_from_address (rtx *operands)
27531 rtx addr;
27532 rtx base;
27533 rtx offset;
27534 rtx arg1;
27535 rtx arg2;
27537 gcc_assert (REG_P (operands[0]));
27538 gcc_assert (MEM_P (operands[1]));
27540 /* Get the memory address. */
27541 addr = XEXP (operands[1], 0);
27543 /* Work out how the memory address is computed. */
27544 switch (GET_CODE (addr))
27546 case REG:
27547 operands[2] = adjust_address (operands[1], SImode, 4);
27549 if (REGNO (operands[0]) == REGNO (addr))
27551 output_asm_insn ("ldr\t%H0, %2", operands);
27552 output_asm_insn ("ldr\t%0, %1", operands);
27554 else
27556 output_asm_insn ("ldr\t%0, %1", operands);
27557 output_asm_insn ("ldr\t%H0, %2", operands);
27559 break;
27561 case CONST:
27562 /* Compute <address> + 4 for the high order load. */
27563 operands[2] = adjust_address (operands[1], SImode, 4);
27565 output_asm_insn ("ldr\t%0, %1", operands);
27566 output_asm_insn ("ldr\t%H0, %2", operands);
27567 break;
27569 case PLUS:
27570 arg1 = XEXP (addr, 0);
27571 arg2 = XEXP (addr, 1);
27573 if (CONSTANT_P (arg1))
27574 base = arg2, offset = arg1;
27575 else
27576 base = arg1, offset = arg2;
27578 gcc_assert (REG_P (base));
27580 /* Catch the case of <address> = <reg> + <reg> */
27581 if (REG_P (offset))
27583 int reg_offset = REGNO (offset);
27584 int reg_base = REGNO (base);
27585 int reg_dest = REGNO (operands[0]);
27587 /* Add the base and offset registers together into the
27588 higher destination register. */
27589 asm_fprintf (asm_out_file, "\tadd\t%r, %r, %r",
27590 reg_dest + 1, reg_base, reg_offset);
27592 /* Load the lower destination register from the address in
27593 the higher destination register. */
27594 asm_fprintf (asm_out_file, "\tldr\t%r, [%r, #0]",
27595 reg_dest, reg_dest + 1);
27597 /* Load the higher destination register from its own address
27598 plus 4. */
27599 asm_fprintf (asm_out_file, "\tldr\t%r, [%r, #4]",
27600 reg_dest + 1, reg_dest + 1);
27602 else
27604 /* Compute <address> + 4 for the high order load. */
27605 operands[2] = adjust_address (operands[1], SImode, 4);
27607 /* If the computed address is held in the low order register
27608 then load the high order register first, otherwise always
27609 load the low order register first. */
27610 if (REGNO (operands[0]) == REGNO (base))
27612 output_asm_insn ("ldr\t%H0, %2", operands);
27613 output_asm_insn ("ldr\t%0, %1", operands);
27615 else
27617 output_asm_insn ("ldr\t%0, %1", operands);
27618 output_asm_insn ("ldr\t%H0, %2", operands);
27621 break;
27623 case LABEL_REF:
27624 /* With no registers to worry about we can just load the value
27625 directly. */
27626 operands[2] = adjust_address (operands[1], SImode, 4);
27628 output_asm_insn ("ldr\t%H0, %2", operands);
27629 output_asm_insn ("ldr\t%0, %1", operands);
27630 break;
27632 default:
27633 gcc_unreachable ();
27636 return "";
27639 const char *
27640 thumb_output_move_mem_multiple (int n, rtx *operands)
27642 rtx tmp;
27644 switch (n)
27646 case 2:
27647 if (REGNO (operands[4]) > REGNO (operands[5]))
27649 tmp = operands[4];
27650 operands[4] = operands[5];
27651 operands[5] = tmp;
27653 output_asm_insn ("ldmia\t%1!, {%4, %5}", operands);
27654 output_asm_insn ("stmia\t%0!, {%4, %5}", operands);
27655 break;
27657 case 3:
27658 if (REGNO (operands[4]) > REGNO (operands[5]))
27660 tmp = operands[4];
27661 operands[4] = operands[5];
27662 operands[5] = tmp;
27664 if (REGNO (operands[5]) > REGNO (operands[6]))
27666 tmp = operands[5];
27667 operands[5] = operands[6];
27668 operands[6] = tmp;
27670 if (REGNO (operands[4]) > REGNO (operands[5]))
27672 tmp = operands[4];
27673 operands[4] = operands[5];
27674 operands[5] = tmp;
27677 output_asm_insn ("ldmia\t%1!, {%4, %5, %6}", operands);
27678 output_asm_insn ("stmia\t%0!, {%4, %5, %6}", operands);
27679 break;
27681 default:
27682 gcc_unreachable ();
27685 return "";
27688 /* Output a call-via instruction for thumb state. */
27689 const char *
27690 thumb_call_via_reg (rtx reg)
27692 int regno = REGNO (reg);
27693 rtx *labelp;
27695 gcc_assert (regno < LR_REGNUM);
27697 /* If we are in the normal text section we can use a single instance
27698 per compilation unit. If we are doing function sections, then we need
27699 an entry per section, since we can't rely on reachability. */
27700 if (in_section == text_section)
27702 thumb_call_reg_needed = 1;
27704 if (thumb_call_via_label[regno] == NULL)
27705 thumb_call_via_label[regno] = gen_label_rtx ();
27706 labelp = thumb_call_via_label + regno;
27708 else
27710 if (cfun->machine->call_via[regno] == NULL)
27711 cfun->machine->call_via[regno] = gen_label_rtx ();
27712 labelp = cfun->machine->call_via + regno;
27715 output_asm_insn ("bl\t%a0", labelp);
27716 return "";
27719 /* Routines for generating rtl. */
27720 void
27721 thumb_expand_movmemqi (rtx *operands)
27723 rtx out = copy_to_mode_reg (SImode, XEXP (operands[0], 0));
27724 rtx in = copy_to_mode_reg (SImode, XEXP (operands[1], 0));
27725 HOST_WIDE_INT len = INTVAL (operands[2]);
27726 HOST_WIDE_INT offset = 0;
27728 while (len >= 12)
27730 emit_insn (gen_movmem12b (out, in, out, in));
27731 len -= 12;
27734 if (len >= 8)
27736 emit_insn (gen_movmem8b (out, in, out, in));
27737 len -= 8;
27740 if (len >= 4)
27742 rtx reg = gen_reg_rtx (SImode);
27743 emit_insn (gen_movsi (reg, gen_rtx_MEM (SImode, in)));
27744 emit_insn (gen_movsi (gen_rtx_MEM (SImode, out), reg));
27745 len -= 4;
27746 offset += 4;
27749 if (len >= 2)
27751 rtx reg = gen_reg_rtx (HImode);
27752 emit_insn (gen_movhi (reg, gen_rtx_MEM (HImode,
27753 plus_constant (Pmode, in,
27754 offset))));
27755 emit_insn (gen_movhi (gen_rtx_MEM (HImode, plus_constant (Pmode, out,
27756 offset)),
27757 reg));
27758 len -= 2;
27759 offset += 2;
27762 if (len)
27764 rtx reg = gen_reg_rtx (QImode);
27765 emit_insn (gen_movqi (reg, gen_rtx_MEM (QImode,
27766 plus_constant (Pmode, in,
27767 offset))));
27768 emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (Pmode, out,
27769 offset)),
27770 reg));
27774 void
27775 thumb_reload_out_hi (rtx *operands)
27777 emit_insn (gen_thumb_movhi_clobber (operands[0], operands[1], operands[2]));
27780 /* Handle reading a half-word from memory during reload. */
27781 void
27782 thumb_reload_in_hi (rtx *operands ATTRIBUTE_UNUSED)
27784 gcc_unreachable ();
27787 /* Return the length of a function name prefix
27788 that starts with the character 'c'. */
27789 static int
27790 arm_get_strip_length (int c)
27792 switch (c)
27794 ARM_NAME_ENCODING_LENGTHS
27795 default: return 0;
27799 /* Return a pointer to a function's name with any
27800 and all prefix encodings stripped from it. */
27801 const char *
27802 arm_strip_name_encoding (const char *name)
27804 int skip;
27806 while ((skip = arm_get_strip_length (* name)))
27807 name += skip;
27809 return name;
27812 /* If there is a '*' anywhere in the name's prefix, then
27813 emit the stripped name verbatim, otherwise prepend an
27814 underscore if leading underscores are being used. */
27815 void
27816 arm_asm_output_labelref (FILE *stream, const char *name)
27818 int skip;
27819 int verbatim = 0;
27821 while ((skip = arm_get_strip_length (* name)))
27823 verbatim |= (*name == '*');
27824 name += skip;
27827 if (verbatim)
27828 fputs (name, stream);
27829 else
27830 asm_fprintf (stream, "%U%s", name);
27833 /* This function is used to emit an EABI tag and its associated value.
27834 We emit the numerical value of the tag in case the assembler does not
27835 support textual tags. (Eg gas prior to 2.20). If requested we include
27836 the tag name in a comment so that anyone reading the assembler output
27837 will know which tag is being set.
27839 This function is not static because arm-c.c needs it too. */
27841 void
27842 arm_emit_eabi_attribute (const char *name, int num, int val)
27844 asm_fprintf (asm_out_file, "\t.eabi_attribute %d, %d", num, val);
27845 if (flag_verbose_asm || flag_debug_asm)
27846 asm_fprintf (asm_out_file, "\t%s %s", ASM_COMMENT_START, name);
27847 asm_fprintf (asm_out_file, "\n");
27850 static void
27851 arm_file_start (void)
27853 int val;
27855 if (TARGET_UNIFIED_ASM)
27856 asm_fprintf (asm_out_file, "\t.syntax unified\n");
27858 if (TARGET_BPABI)
27860 const char *fpu_name;
27861 if (arm_selected_arch)
27863 const char* pos = strchr (arm_selected_arch->name, '+');
27864 if (pos)
27866 char buf[15];
27867 gcc_assert (strlen (arm_selected_arch->name)
27868 <= sizeof (buf) / sizeof (*pos));
27869 strncpy (buf, arm_selected_arch->name,
27870 (pos - arm_selected_arch->name) * sizeof (*pos));
27871 buf[pos - arm_selected_arch->name] = '\0';
27872 asm_fprintf (asm_out_file, "\t.arch %s\n", buf);
27873 asm_fprintf (asm_out_file, "\t.arch_extension %s\n", pos + 1);
27875 else
27876 asm_fprintf (asm_out_file, "\t.arch %s\n", arm_selected_arch->name);
27878 else if (strncmp (arm_selected_cpu->name, "generic", 7) == 0)
27879 asm_fprintf (asm_out_file, "\t.arch %s\n", arm_selected_cpu->name + 8);
27880 else
27882 const char* truncated_name
27883 = arm_rewrite_selected_cpu (arm_selected_cpu->name);
27884 asm_fprintf (asm_out_file, "\t.cpu %s\n", truncated_name);
27887 if (TARGET_SOFT_FLOAT)
27889 fpu_name = "softvfp";
27891 else
27893 fpu_name = arm_fpu_desc->name;
27894 if (arm_fpu_desc->model == ARM_FP_MODEL_VFP)
27896 if (TARGET_HARD_FLOAT)
27897 arm_emit_eabi_attribute ("Tag_ABI_HardFP_use", 27, 3);
27898 if (TARGET_HARD_FLOAT_ABI)
27899 arm_emit_eabi_attribute ("Tag_ABI_VFP_args", 28, 1);
27902 asm_fprintf (asm_out_file, "\t.fpu %s\n", fpu_name);
27904 /* Some of these attributes only apply when the corresponding features
27905 are used. However we don't have any easy way of figuring this out.
27906 Conservatively record the setting that would have been used. */
27908 if (flag_rounding_math)
27909 arm_emit_eabi_attribute ("Tag_ABI_FP_rounding", 19, 1);
27911 if (!flag_unsafe_math_optimizations)
27913 arm_emit_eabi_attribute ("Tag_ABI_FP_denormal", 20, 1);
27914 arm_emit_eabi_attribute ("Tag_ABI_FP_exceptions", 21, 1);
27916 if (flag_signaling_nans)
27917 arm_emit_eabi_attribute ("Tag_ABI_FP_user_exceptions", 22, 1);
27919 arm_emit_eabi_attribute ("Tag_ABI_FP_number_model", 23,
27920 flag_finite_math_only ? 1 : 3);
27922 arm_emit_eabi_attribute ("Tag_ABI_align8_needed", 24, 1);
27923 arm_emit_eabi_attribute ("Tag_ABI_align8_preserved", 25, 1);
27924 arm_emit_eabi_attribute ("Tag_ABI_enum_size", 26,
27925 flag_short_enums ? 1 : 2);
27927 /* Tag_ABI_optimization_goals. */
27928 if (optimize_size)
27929 val = 4;
27930 else if (optimize >= 2)
27931 val = 2;
27932 else if (optimize)
27933 val = 1;
27934 else
27935 val = 6;
27936 arm_emit_eabi_attribute ("Tag_ABI_optimization_goals", 30, val);
27938 arm_emit_eabi_attribute ("Tag_CPU_unaligned_access", 34,
27939 unaligned_access);
27941 if (arm_fp16_format)
27942 arm_emit_eabi_attribute ("Tag_ABI_FP_16bit_format", 38,
27943 (int) arm_fp16_format);
27945 if (arm_lang_output_object_attributes_hook)
27946 arm_lang_output_object_attributes_hook();
27949 default_file_start ();
27952 static void
27953 arm_file_end (void)
27955 int regno;
27957 if (NEED_INDICATE_EXEC_STACK)
27958 /* Add .note.GNU-stack. */
27959 file_end_indicate_exec_stack ();
27961 if (! thumb_call_reg_needed)
27962 return;
27964 switch_to_section (text_section);
27965 asm_fprintf (asm_out_file, "\t.code 16\n");
27966 ASM_OUTPUT_ALIGN (asm_out_file, 1);
27968 for (regno = 0; regno < LR_REGNUM; regno++)
27970 rtx label = thumb_call_via_label[regno];
27972 if (label != 0)
27974 targetm.asm_out.internal_label (asm_out_file, "L",
27975 CODE_LABEL_NUMBER (label));
27976 asm_fprintf (asm_out_file, "\tbx\t%r\n", regno);
27981 #ifndef ARM_PE
27982 /* Symbols in the text segment can be accessed without indirecting via the
27983 constant pool; it may take an extra binary operation, but this is still
27984 faster than indirecting via memory. Don't do this when not optimizing,
27985 since we won't be calculating al of the offsets necessary to do this
27986 simplification. */
27988 static void
27989 arm_encode_section_info (tree decl, rtx rtl, int first)
27991 if (optimize > 0 && TREE_CONSTANT (decl))
27992 SYMBOL_REF_FLAG (XEXP (rtl, 0)) = 1;
27994 default_encode_section_info (decl, rtl, first);
27996 #endif /* !ARM_PE */
27998 static void
27999 arm_internal_label (FILE *stream, const char *prefix, unsigned long labelno)
28001 if (arm_ccfsm_state == 3 && (unsigned) arm_target_label == labelno
28002 && !strcmp (prefix, "L"))
28004 arm_ccfsm_state = 0;
28005 arm_target_insn = NULL;
28007 default_internal_label (stream, prefix, labelno);
28010 /* Output code to add DELTA to the first argument, and then jump
28011 to FUNCTION. Used for C++ multiple inheritance. */
28012 static void
28013 arm_output_mi_thunk (FILE *file, tree thunk ATTRIBUTE_UNUSED,
28014 HOST_WIDE_INT delta,
28015 HOST_WIDE_INT vcall_offset ATTRIBUTE_UNUSED,
28016 tree function)
28018 static int thunk_label = 0;
28019 char label[256];
28020 char labelpc[256];
28021 int mi_delta = delta;
28022 const char *const mi_op = mi_delta < 0 ? "sub" : "add";
28023 int shift = 0;
28024 int this_regno = (aggregate_value_p (TREE_TYPE (TREE_TYPE (function)), function)
28025 ? 1 : 0);
28026 if (mi_delta < 0)
28027 mi_delta = - mi_delta;
28029 final_start_function (emit_barrier (), file, 1);
28031 if (TARGET_THUMB1)
28033 int labelno = thunk_label++;
28034 ASM_GENERATE_INTERNAL_LABEL (label, "LTHUMBFUNC", labelno);
28035 /* Thunks are entered in arm mode when avaiable. */
28036 if (TARGET_THUMB1_ONLY)
28038 /* push r3 so we can use it as a temporary. */
28039 /* TODO: Omit this save if r3 is not used. */
28040 fputs ("\tpush {r3}\n", file);
28041 fputs ("\tldr\tr3, ", file);
28043 else
28045 fputs ("\tldr\tr12, ", file);
28047 assemble_name (file, label);
28048 fputc ('\n', file);
28049 if (flag_pic)
28051 /* If we are generating PIC, the ldr instruction below loads
28052 "(target - 7) - .LTHUNKPCn" into r12. The pc reads as
28053 the address of the add + 8, so we have:
28055 r12 = (target - 7) - .LTHUNKPCn + (.LTHUNKPCn + 8)
28056 = target + 1.
28058 Note that we have "+ 1" because some versions of GNU ld
28059 don't set the low bit of the result for R_ARM_REL32
28060 relocations against thumb function symbols.
28061 On ARMv6M this is +4, not +8. */
28062 ASM_GENERATE_INTERNAL_LABEL (labelpc, "LTHUNKPC", labelno);
28063 assemble_name (file, labelpc);
28064 fputs (":\n", file);
28065 if (TARGET_THUMB1_ONLY)
28067 /* This is 2 insns after the start of the thunk, so we know it
28068 is 4-byte aligned. */
28069 fputs ("\tadd\tr3, pc, r3\n", file);
28070 fputs ("\tmov r12, r3\n", file);
28072 else
28073 fputs ("\tadd\tr12, pc, r12\n", file);
28075 else if (TARGET_THUMB1_ONLY)
28076 fputs ("\tmov r12, r3\n", file);
28078 if (TARGET_THUMB1_ONLY)
28080 if (mi_delta > 255)
28082 fputs ("\tldr\tr3, ", file);
28083 assemble_name (file, label);
28084 fputs ("+4\n", file);
28085 asm_fprintf (file, "\t%s\t%r, %r, r3\n",
28086 mi_op, this_regno, this_regno);
28088 else if (mi_delta != 0)
28090 asm_fprintf (file, "\t%s\t%r, %r, #%d\n",
28091 mi_op, this_regno, this_regno,
28092 mi_delta);
28095 else
28097 /* TODO: Use movw/movt for large constants when available. */
28098 while (mi_delta != 0)
28100 if ((mi_delta & (3 << shift)) == 0)
28101 shift += 2;
28102 else
28104 asm_fprintf (file, "\t%s\t%r, %r, #%d\n",
28105 mi_op, this_regno, this_regno,
28106 mi_delta & (0xff << shift));
28107 mi_delta &= ~(0xff << shift);
28108 shift += 8;
28112 if (TARGET_THUMB1)
28114 if (TARGET_THUMB1_ONLY)
28115 fputs ("\tpop\t{r3}\n", file);
28117 fprintf (file, "\tbx\tr12\n");
28118 ASM_OUTPUT_ALIGN (file, 2);
28119 assemble_name (file, label);
28120 fputs (":\n", file);
28121 if (flag_pic)
28123 /* Output ".word .LTHUNKn-7-.LTHUNKPCn". */
28124 rtx tem = XEXP (DECL_RTL (function), 0);
28125 tem = plus_constant (GET_MODE (tem), tem, -7);
28126 tem = gen_rtx_MINUS (GET_MODE (tem),
28127 tem,
28128 gen_rtx_SYMBOL_REF (Pmode,
28129 ggc_strdup (labelpc)));
28130 assemble_integer (tem, 4, BITS_PER_WORD, 1);
28132 else
28133 /* Output ".word .LTHUNKn". */
28134 assemble_integer (XEXP (DECL_RTL (function), 0), 4, BITS_PER_WORD, 1);
28136 if (TARGET_THUMB1_ONLY && mi_delta > 255)
28137 assemble_integer (GEN_INT(mi_delta), 4, BITS_PER_WORD, 1);
28139 else
28141 fputs ("\tb\t", file);
28142 assemble_name (file, XSTR (XEXP (DECL_RTL (function), 0), 0));
28143 if (NEED_PLT_RELOC)
28144 fputs ("(PLT)", file);
28145 fputc ('\n', file);
28148 final_end_function ();
28152 arm_emit_vector_const (FILE *file, rtx x)
28154 int i;
28155 const char * pattern;
28157 gcc_assert (GET_CODE (x) == CONST_VECTOR);
28159 switch (GET_MODE (x))
28161 case V2SImode: pattern = "%08x"; break;
28162 case V4HImode: pattern = "%04x"; break;
28163 case V8QImode: pattern = "%02x"; break;
28164 default: gcc_unreachable ();
28167 fprintf (file, "0x");
28168 for (i = CONST_VECTOR_NUNITS (x); i--;)
28170 rtx element;
28172 element = CONST_VECTOR_ELT (x, i);
28173 fprintf (file, pattern, INTVAL (element));
28176 return 1;
28179 /* Emit a fp16 constant appropriately padded to occupy a 4-byte word.
28180 HFmode constant pool entries are actually loaded with ldr. */
28181 void
28182 arm_emit_fp16_const (rtx c)
28184 REAL_VALUE_TYPE r;
28185 long bits;
28187 REAL_VALUE_FROM_CONST_DOUBLE (r, c);
28188 bits = real_to_target (NULL, &r, HFmode);
28189 if (WORDS_BIG_ENDIAN)
28190 assemble_zeros (2);
28191 assemble_integer (GEN_INT (bits), 2, BITS_PER_WORD, 1);
28192 if (!WORDS_BIG_ENDIAN)
28193 assemble_zeros (2);
28196 const char *
28197 arm_output_load_gr (rtx *operands)
28199 rtx reg;
28200 rtx offset;
28201 rtx wcgr;
28202 rtx sum;
28204 if (!MEM_P (operands [1])
28205 || GET_CODE (sum = XEXP (operands [1], 0)) != PLUS
28206 || !REG_P (reg = XEXP (sum, 0))
28207 || !CONST_INT_P (offset = XEXP (sum, 1))
28208 || ((INTVAL (offset) < 1024) && (INTVAL (offset) > -1024)))
28209 return "wldrw%?\t%0, %1";
28211 /* Fix up an out-of-range load of a GR register. */
28212 output_asm_insn ("str%?\t%0, [sp, #-4]!\t@ Start of GR load expansion", & reg);
28213 wcgr = operands[0];
28214 operands[0] = reg;
28215 output_asm_insn ("ldr%?\t%0, %1", operands);
28217 operands[0] = wcgr;
28218 operands[1] = reg;
28219 output_asm_insn ("tmcr%?\t%0, %1", operands);
28220 output_asm_insn ("ldr%?\t%0, [sp], #4\t@ End of GR load expansion", & reg);
28222 return "";
28225 /* Worker function for TARGET_SETUP_INCOMING_VARARGS.
28227 On the ARM, PRETEND_SIZE is set in order to have the prologue push the last
28228 named arg and all anonymous args onto the stack.
28229 XXX I know the prologue shouldn't be pushing registers, but it is faster
28230 that way. */
28232 static void
28233 arm_setup_incoming_varargs (cumulative_args_t pcum_v,
28234 enum machine_mode mode,
28235 tree type,
28236 int *pretend_size,
28237 int second_time ATTRIBUTE_UNUSED)
28239 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
28240 int nregs;
28242 cfun->machine->uses_anonymous_args = 1;
28243 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
28245 nregs = pcum->aapcs_ncrn;
28246 if ((nregs & 1) && arm_needs_doubleword_align (mode, type))
28247 nregs++;
28249 else
28250 nregs = pcum->nregs;
28252 if (nregs < NUM_ARG_REGS)
28253 *pretend_size = (NUM_ARG_REGS - nregs) * UNITS_PER_WORD;
28256 /* We can't rely on the caller doing the proper promotion when
28257 using APCS or ATPCS. */
28259 static bool
28260 arm_promote_prototypes (const_tree t ATTRIBUTE_UNUSED)
28262 return !TARGET_AAPCS_BASED;
28265 static enum machine_mode
28266 arm_promote_function_mode (const_tree type ATTRIBUTE_UNUSED,
28267 enum machine_mode mode,
28268 int *punsignedp ATTRIBUTE_UNUSED,
28269 const_tree fntype ATTRIBUTE_UNUSED,
28270 int for_return ATTRIBUTE_UNUSED)
28272 if (GET_MODE_CLASS (mode) == MODE_INT
28273 && GET_MODE_SIZE (mode) < 4)
28274 return SImode;
28276 return mode;
28279 /* AAPCS based ABIs use short enums by default. */
28281 static bool
28282 arm_default_short_enums (void)
28284 return TARGET_AAPCS_BASED && arm_abi != ARM_ABI_AAPCS_LINUX;
28288 /* AAPCS requires that anonymous bitfields affect structure alignment. */
28290 static bool
28291 arm_align_anon_bitfield (void)
28293 return TARGET_AAPCS_BASED;
28297 /* The generic C++ ABI says 64-bit (long long). The EABI says 32-bit. */
28299 static tree
28300 arm_cxx_guard_type (void)
28302 return TARGET_AAPCS_BASED ? integer_type_node : long_long_integer_type_node;
28306 /* The EABI says test the least significant bit of a guard variable. */
28308 static bool
28309 arm_cxx_guard_mask_bit (void)
28311 return TARGET_AAPCS_BASED;
28315 /* The EABI specifies that all array cookies are 8 bytes long. */
28317 static tree
28318 arm_get_cookie_size (tree type)
28320 tree size;
28322 if (!TARGET_AAPCS_BASED)
28323 return default_cxx_get_cookie_size (type);
28325 size = build_int_cst (sizetype, 8);
28326 return size;
28330 /* The EABI says that array cookies should also contain the element size. */
28332 static bool
28333 arm_cookie_has_size (void)
28335 return TARGET_AAPCS_BASED;
28339 /* The EABI says constructors and destructors should return a pointer to
28340 the object constructed/destroyed. */
28342 static bool
28343 arm_cxx_cdtor_returns_this (void)
28345 return TARGET_AAPCS_BASED;
28348 /* The EABI says that an inline function may never be the key
28349 method. */
28351 static bool
28352 arm_cxx_key_method_may_be_inline (void)
28354 return !TARGET_AAPCS_BASED;
28357 static void
28358 arm_cxx_determine_class_data_visibility (tree decl)
28360 if (!TARGET_AAPCS_BASED
28361 || !TARGET_DLLIMPORT_DECL_ATTRIBUTES)
28362 return;
28364 /* In general, \S 3.2.5.5 of the ARM EABI requires that class data
28365 is exported. However, on systems without dynamic vague linkage,
28366 \S 3.2.5.6 says that COMDAT class data has hidden linkage. */
28367 if (!TARGET_ARM_DYNAMIC_VAGUE_LINKAGE_P && DECL_COMDAT (decl))
28368 DECL_VISIBILITY (decl) = VISIBILITY_HIDDEN;
28369 else
28370 DECL_VISIBILITY (decl) = VISIBILITY_DEFAULT;
28371 DECL_VISIBILITY_SPECIFIED (decl) = 1;
28374 static bool
28375 arm_cxx_class_data_always_comdat (void)
28377 /* \S 3.2.5.4 of the ARM C++ ABI says that class data only have
28378 vague linkage if the class has no key function. */
28379 return !TARGET_AAPCS_BASED;
28383 /* The EABI says __aeabi_atexit should be used to register static
28384 destructors. */
28386 static bool
28387 arm_cxx_use_aeabi_atexit (void)
28389 return TARGET_AAPCS_BASED;
28393 void
28394 arm_set_return_address (rtx source, rtx scratch)
28396 arm_stack_offsets *offsets;
28397 HOST_WIDE_INT delta;
28398 rtx addr;
28399 unsigned long saved_regs;
28401 offsets = arm_get_frame_offsets ();
28402 saved_regs = offsets->saved_regs_mask;
28404 if ((saved_regs & (1 << LR_REGNUM)) == 0)
28405 emit_move_insn (gen_rtx_REG (Pmode, LR_REGNUM), source);
28406 else
28408 if (frame_pointer_needed)
28409 addr = plus_constant (Pmode, hard_frame_pointer_rtx, -4);
28410 else
28412 /* LR will be the first saved register. */
28413 delta = offsets->outgoing_args - (offsets->frame + 4);
28416 if (delta >= 4096)
28418 emit_insn (gen_addsi3 (scratch, stack_pointer_rtx,
28419 GEN_INT (delta & ~4095)));
28420 addr = scratch;
28421 delta &= 4095;
28423 else
28424 addr = stack_pointer_rtx;
28426 addr = plus_constant (Pmode, addr, delta);
28428 emit_move_insn (gen_frame_mem (Pmode, addr), source);
28433 void
28434 thumb_set_return_address (rtx source, rtx scratch)
28436 arm_stack_offsets *offsets;
28437 HOST_WIDE_INT delta;
28438 HOST_WIDE_INT limit;
28439 int reg;
28440 rtx addr;
28441 unsigned long mask;
28443 emit_use (source);
28445 offsets = arm_get_frame_offsets ();
28446 mask = offsets->saved_regs_mask;
28447 if (mask & (1 << LR_REGNUM))
28449 limit = 1024;
28450 /* Find the saved regs. */
28451 if (frame_pointer_needed)
28453 delta = offsets->soft_frame - offsets->saved_args;
28454 reg = THUMB_HARD_FRAME_POINTER_REGNUM;
28455 if (TARGET_THUMB1)
28456 limit = 128;
28458 else
28460 delta = offsets->outgoing_args - offsets->saved_args;
28461 reg = SP_REGNUM;
28463 /* Allow for the stack frame. */
28464 if (TARGET_THUMB1 && TARGET_BACKTRACE)
28465 delta -= 16;
28466 /* The link register is always the first saved register. */
28467 delta -= 4;
28469 /* Construct the address. */
28470 addr = gen_rtx_REG (SImode, reg);
28471 if (delta > limit)
28473 emit_insn (gen_movsi (scratch, GEN_INT (delta)));
28474 emit_insn (gen_addsi3 (scratch, scratch, stack_pointer_rtx));
28475 addr = scratch;
28477 else
28478 addr = plus_constant (Pmode, addr, delta);
28480 emit_move_insn (gen_frame_mem (Pmode, addr), source);
28482 else
28483 emit_move_insn (gen_rtx_REG (Pmode, LR_REGNUM), source);
28486 /* Implements target hook vector_mode_supported_p. */
28487 bool
28488 arm_vector_mode_supported_p (enum machine_mode mode)
28490 /* Neon also supports V2SImode, etc. listed in the clause below. */
28491 if (TARGET_NEON && (mode == V2SFmode || mode == V4SImode || mode == V8HImode
28492 || mode == V4HFmode || mode == V16QImode || mode == V4SFmode || mode == V2DImode))
28493 return true;
28495 if ((TARGET_NEON || TARGET_IWMMXT)
28496 && ((mode == V2SImode)
28497 || (mode == V4HImode)
28498 || (mode == V8QImode)))
28499 return true;
28501 if (TARGET_INT_SIMD && (mode == V4UQQmode || mode == V4QQmode
28502 || mode == V2UHQmode || mode == V2HQmode || mode == V2UHAmode
28503 || mode == V2HAmode))
28504 return true;
28506 return false;
28509 /* Implements target hook array_mode_supported_p. */
28511 static bool
28512 arm_array_mode_supported_p (enum machine_mode mode,
28513 unsigned HOST_WIDE_INT nelems)
28515 if (TARGET_NEON
28516 && (VALID_NEON_DREG_MODE (mode) || VALID_NEON_QREG_MODE (mode))
28517 && (nelems >= 2 && nelems <= 4))
28518 return true;
28520 return false;
28523 /* Use the option -mvectorize-with-neon-double to override the use of quardword
28524 registers when autovectorizing for Neon, at least until multiple vector
28525 widths are supported properly by the middle-end. */
28527 static enum machine_mode
28528 arm_preferred_simd_mode (enum machine_mode mode)
28530 if (TARGET_NEON)
28531 switch (mode)
28533 case SFmode:
28534 return TARGET_NEON_VECTORIZE_DOUBLE ? V2SFmode : V4SFmode;
28535 case SImode:
28536 return TARGET_NEON_VECTORIZE_DOUBLE ? V2SImode : V4SImode;
28537 case HImode:
28538 return TARGET_NEON_VECTORIZE_DOUBLE ? V4HImode : V8HImode;
28539 case QImode:
28540 return TARGET_NEON_VECTORIZE_DOUBLE ? V8QImode : V16QImode;
28541 case DImode:
28542 if (!TARGET_NEON_VECTORIZE_DOUBLE)
28543 return V2DImode;
28544 break;
28546 default:;
28549 if (TARGET_REALLY_IWMMXT)
28550 switch (mode)
28552 case SImode:
28553 return V2SImode;
28554 case HImode:
28555 return V4HImode;
28556 case QImode:
28557 return V8QImode;
28559 default:;
28562 return word_mode;
28565 /* Implement TARGET_CLASS_LIKELY_SPILLED_P.
28567 We need to define this for LO_REGS on Thumb-1. Otherwise we can end up
28568 using r0-r4 for function arguments, r7 for the stack frame and don't have
28569 enough left over to do doubleword arithmetic. For Thumb-2 all the
28570 potentially problematic instructions accept high registers so this is not
28571 necessary. Care needs to be taken to avoid adding new Thumb-2 patterns
28572 that require many low registers. */
28573 static bool
28574 arm_class_likely_spilled_p (reg_class_t rclass)
28576 if ((TARGET_THUMB1 && rclass == LO_REGS)
28577 || rclass == CC_REG)
28578 return true;
28580 return false;
28583 /* Implements target hook small_register_classes_for_mode_p. */
28584 bool
28585 arm_small_register_classes_for_mode_p (enum machine_mode mode ATTRIBUTE_UNUSED)
28587 return TARGET_THUMB1;
28590 /* Implement TARGET_SHIFT_TRUNCATION_MASK. SImode shifts use normal
28591 ARM insns and therefore guarantee that the shift count is modulo 256.
28592 DImode shifts (those implemented by lib1funcs.S or by optabs.c)
28593 guarantee no particular behavior for out-of-range counts. */
28595 static unsigned HOST_WIDE_INT
28596 arm_shift_truncation_mask (enum machine_mode mode)
28598 return mode == SImode ? 255 : 0;
28602 /* Map internal gcc register numbers to DWARF2 register numbers. */
28604 unsigned int
28605 arm_dbx_register_number (unsigned int regno)
28607 if (regno < 16)
28608 return regno;
28610 if (IS_VFP_REGNUM (regno))
28612 /* See comment in arm_dwarf_register_span. */
28613 if (VFP_REGNO_OK_FOR_SINGLE (regno))
28614 return 64 + regno - FIRST_VFP_REGNUM;
28615 else
28616 return 256 + (regno - FIRST_VFP_REGNUM) / 2;
28619 if (IS_IWMMXT_GR_REGNUM (regno))
28620 return 104 + regno - FIRST_IWMMXT_GR_REGNUM;
28622 if (IS_IWMMXT_REGNUM (regno))
28623 return 112 + regno - FIRST_IWMMXT_REGNUM;
28625 gcc_unreachable ();
28628 /* Dwarf models VFPv3 registers as 32 64-bit registers.
28629 GCC models tham as 64 32-bit registers, so we need to describe this to
28630 the DWARF generation code. Other registers can use the default. */
28631 static rtx
28632 arm_dwarf_register_span (rtx rtl)
28634 enum machine_mode mode;
28635 unsigned regno;
28636 rtx parts[8];
28637 int nregs;
28638 int i;
28640 regno = REGNO (rtl);
28641 if (!IS_VFP_REGNUM (regno))
28642 return NULL_RTX;
28644 /* XXX FIXME: The EABI defines two VFP register ranges:
28645 64-95: Legacy VFPv2 numbering for S0-S31 (obsolescent)
28646 256-287: D0-D31
28647 The recommended encoding for S0-S31 is a DW_OP_bit_piece of the
28648 corresponding D register. Until GDB supports this, we shall use the
28649 legacy encodings. We also use these encodings for D0-D15 for
28650 compatibility with older debuggers. */
28651 mode = GET_MODE (rtl);
28652 if (GET_MODE_SIZE (mode) < 8)
28653 return NULL_RTX;
28655 if (VFP_REGNO_OK_FOR_SINGLE (regno))
28657 nregs = GET_MODE_SIZE (mode) / 4;
28658 for (i = 0; i < nregs; i += 2)
28659 if (TARGET_BIG_END)
28661 parts[i] = gen_rtx_REG (SImode, regno + i + 1);
28662 parts[i + 1] = gen_rtx_REG (SImode, regno + i);
28664 else
28666 parts[i] = gen_rtx_REG (SImode, regno + i);
28667 parts[i + 1] = gen_rtx_REG (SImode, regno + i + 1);
28670 else
28672 nregs = GET_MODE_SIZE (mode) / 8;
28673 for (i = 0; i < nregs; i++)
28674 parts[i] = gen_rtx_REG (DImode, regno + i);
28677 return gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (nregs , parts));
28680 #if ARM_UNWIND_INFO
28681 /* Emit unwind directives for a store-multiple instruction or stack pointer
28682 push during alignment.
28683 These should only ever be generated by the function prologue code, so
28684 expect them to have a particular form. */
28686 static void
28687 arm_unwind_emit_sequence (FILE * asm_out_file, rtx p)
28689 int i;
28690 HOST_WIDE_INT offset;
28691 HOST_WIDE_INT nregs;
28692 int reg_size;
28693 unsigned reg;
28694 unsigned lastreg;
28695 rtx e;
28697 e = XVECEXP (p, 0, 0);
28698 if (GET_CODE (e) != SET)
28699 abort ();
28701 /* First insn will adjust the stack pointer. */
28702 if (GET_CODE (e) != SET
28703 || !REG_P (XEXP (e, 0))
28704 || REGNO (XEXP (e, 0)) != SP_REGNUM
28705 || GET_CODE (XEXP (e, 1)) != PLUS)
28706 abort ();
28708 offset = -INTVAL (XEXP (XEXP (e, 1), 1));
28709 nregs = XVECLEN (p, 0) - 1;
28711 reg = REGNO (XEXP (XVECEXP (p, 0, 1), 1));
28712 if (reg < 16)
28714 /* The function prologue may also push pc, but not annotate it as it is
28715 never restored. We turn this into a stack pointer adjustment. */
28716 if (nregs * 4 == offset - 4)
28718 fprintf (asm_out_file, "\t.pad #4\n");
28719 offset -= 4;
28721 reg_size = 4;
28722 fprintf (asm_out_file, "\t.save {");
28724 else if (IS_VFP_REGNUM (reg))
28726 reg_size = 8;
28727 fprintf (asm_out_file, "\t.vsave {");
28729 else
28730 /* Unknown register type. */
28731 abort ();
28733 /* If the stack increment doesn't match the size of the saved registers,
28734 something has gone horribly wrong. */
28735 if (offset != nregs * reg_size)
28736 abort ();
28738 offset = 0;
28739 lastreg = 0;
28740 /* The remaining insns will describe the stores. */
28741 for (i = 1; i <= nregs; i++)
28743 /* Expect (set (mem <addr>) (reg)).
28744 Where <addr> is (reg:SP) or (plus (reg:SP) (const_int)). */
28745 e = XVECEXP (p, 0, i);
28746 if (GET_CODE (e) != SET
28747 || !MEM_P (XEXP (e, 0))
28748 || !REG_P (XEXP (e, 1)))
28749 abort ();
28751 reg = REGNO (XEXP (e, 1));
28752 if (reg < lastreg)
28753 abort ();
28755 if (i != 1)
28756 fprintf (asm_out_file, ", ");
28757 /* We can't use %r for vfp because we need to use the
28758 double precision register names. */
28759 if (IS_VFP_REGNUM (reg))
28760 asm_fprintf (asm_out_file, "d%d", (reg - FIRST_VFP_REGNUM) / 2);
28761 else
28762 asm_fprintf (asm_out_file, "%r", reg);
28764 #ifdef ENABLE_CHECKING
28765 /* Check that the addresses are consecutive. */
28766 e = XEXP (XEXP (e, 0), 0);
28767 if (GET_CODE (e) == PLUS)
28769 offset += reg_size;
28770 if (!REG_P (XEXP (e, 0))
28771 || REGNO (XEXP (e, 0)) != SP_REGNUM
28772 || !CONST_INT_P (XEXP (e, 1))
28773 || offset != INTVAL (XEXP (e, 1)))
28774 abort ();
28776 else if (i != 1
28777 || !REG_P (e)
28778 || REGNO (e) != SP_REGNUM)
28779 abort ();
28780 #endif
28782 fprintf (asm_out_file, "}\n");
28785 /* Emit unwind directives for a SET. */
28787 static void
28788 arm_unwind_emit_set (FILE * asm_out_file, rtx p)
28790 rtx e0;
28791 rtx e1;
28792 unsigned reg;
28794 e0 = XEXP (p, 0);
28795 e1 = XEXP (p, 1);
28796 switch (GET_CODE (e0))
28798 case MEM:
28799 /* Pushing a single register. */
28800 if (GET_CODE (XEXP (e0, 0)) != PRE_DEC
28801 || !REG_P (XEXP (XEXP (e0, 0), 0))
28802 || REGNO (XEXP (XEXP (e0, 0), 0)) != SP_REGNUM)
28803 abort ();
28805 asm_fprintf (asm_out_file, "\t.save ");
28806 if (IS_VFP_REGNUM (REGNO (e1)))
28807 asm_fprintf(asm_out_file, "{d%d}\n",
28808 (REGNO (e1) - FIRST_VFP_REGNUM) / 2);
28809 else
28810 asm_fprintf(asm_out_file, "{%r}\n", REGNO (e1));
28811 break;
28813 case REG:
28814 if (REGNO (e0) == SP_REGNUM)
28816 /* A stack increment. */
28817 if (GET_CODE (e1) != PLUS
28818 || !REG_P (XEXP (e1, 0))
28819 || REGNO (XEXP (e1, 0)) != SP_REGNUM
28820 || !CONST_INT_P (XEXP (e1, 1)))
28821 abort ();
28823 asm_fprintf (asm_out_file, "\t.pad #%wd\n",
28824 -INTVAL (XEXP (e1, 1)));
28826 else if (REGNO (e0) == HARD_FRAME_POINTER_REGNUM)
28828 HOST_WIDE_INT offset;
28830 if (GET_CODE (e1) == PLUS)
28832 if (!REG_P (XEXP (e1, 0))
28833 || !CONST_INT_P (XEXP (e1, 1)))
28834 abort ();
28835 reg = REGNO (XEXP (e1, 0));
28836 offset = INTVAL (XEXP (e1, 1));
28837 asm_fprintf (asm_out_file, "\t.setfp %r, %r, #%wd\n",
28838 HARD_FRAME_POINTER_REGNUM, reg,
28839 offset);
28841 else if (REG_P (e1))
28843 reg = REGNO (e1);
28844 asm_fprintf (asm_out_file, "\t.setfp %r, %r\n",
28845 HARD_FRAME_POINTER_REGNUM, reg);
28847 else
28848 abort ();
28850 else if (REG_P (e1) && REGNO (e1) == SP_REGNUM)
28852 /* Move from sp to reg. */
28853 asm_fprintf (asm_out_file, "\t.movsp %r\n", REGNO (e0));
28855 else if (GET_CODE (e1) == PLUS
28856 && REG_P (XEXP (e1, 0))
28857 && REGNO (XEXP (e1, 0)) == SP_REGNUM
28858 && CONST_INT_P (XEXP (e1, 1)))
28860 /* Set reg to offset from sp. */
28861 asm_fprintf (asm_out_file, "\t.movsp %r, #%d\n",
28862 REGNO (e0), (int)INTVAL(XEXP (e1, 1)));
28864 else
28865 abort ();
28866 break;
28868 default:
28869 abort ();
28874 /* Emit unwind directives for the given insn. */
28876 static void
28877 arm_unwind_emit (FILE * asm_out_file, rtx insn)
28879 rtx note, pat;
28880 bool handled_one = false;
28882 if (arm_except_unwind_info (&global_options) != UI_TARGET)
28883 return;
28885 if (!(flag_unwind_tables || crtl->uses_eh_lsda)
28886 && (TREE_NOTHROW (current_function_decl)
28887 || crtl->all_throwers_are_sibcalls))
28888 return;
28890 if (NOTE_P (insn) || !RTX_FRAME_RELATED_P (insn))
28891 return;
28893 for (note = REG_NOTES (insn); note ; note = XEXP (note, 1))
28895 switch (REG_NOTE_KIND (note))
28897 case REG_FRAME_RELATED_EXPR:
28898 pat = XEXP (note, 0);
28899 goto found;
28901 case REG_CFA_REGISTER:
28902 pat = XEXP (note, 0);
28903 if (pat == NULL)
28905 pat = PATTERN (insn);
28906 if (GET_CODE (pat) == PARALLEL)
28907 pat = XVECEXP (pat, 0, 0);
28910 /* Only emitted for IS_STACKALIGN re-alignment. */
28912 rtx dest, src;
28913 unsigned reg;
28915 src = SET_SRC (pat);
28916 dest = SET_DEST (pat);
28918 gcc_assert (src == stack_pointer_rtx);
28919 reg = REGNO (dest);
28920 asm_fprintf (asm_out_file, "\t.unwind_raw 0, 0x%x @ vsp = r%d\n",
28921 reg + 0x90, reg);
28923 handled_one = true;
28924 break;
28926 /* The INSN is generated in epilogue. It is set as RTX_FRAME_RELATED_P
28927 to get correct dwarf information for shrink-wrap. We should not
28928 emit unwind information for it because these are used either for
28929 pretend arguments or notes to adjust sp and restore registers from
28930 stack. */
28931 case REG_CFA_ADJUST_CFA:
28932 case REG_CFA_RESTORE:
28933 return;
28935 case REG_CFA_DEF_CFA:
28936 case REG_CFA_EXPRESSION:
28937 case REG_CFA_OFFSET:
28938 /* ??? Only handling here what we actually emit. */
28939 gcc_unreachable ();
28941 default:
28942 break;
28945 if (handled_one)
28946 return;
28947 pat = PATTERN (insn);
28948 found:
28950 switch (GET_CODE (pat))
28952 case SET:
28953 arm_unwind_emit_set (asm_out_file, pat);
28954 break;
28956 case SEQUENCE:
28957 /* Store multiple. */
28958 arm_unwind_emit_sequence (asm_out_file, pat);
28959 break;
28961 default:
28962 abort();
28967 /* Output a reference from a function exception table to the type_info
28968 object X. The EABI specifies that the symbol should be relocated by
28969 an R_ARM_TARGET2 relocation. */
28971 static bool
28972 arm_output_ttype (rtx x)
28974 fputs ("\t.word\t", asm_out_file);
28975 output_addr_const (asm_out_file, x);
28976 /* Use special relocations for symbol references. */
28977 if (!CONST_INT_P (x))
28978 fputs ("(TARGET2)", asm_out_file);
28979 fputc ('\n', asm_out_file);
28981 return TRUE;
28984 /* Implement TARGET_ASM_EMIT_EXCEPT_PERSONALITY. */
28986 static void
28987 arm_asm_emit_except_personality (rtx personality)
28989 fputs ("\t.personality\t", asm_out_file);
28990 output_addr_const (asm_out_file, personality);
28991 fputc ('\n', asm_out_file);
28994 /* Implement TARGET_ASM_INITIALIZE_SECTIONS. */
28996 static void
28997 arm_asm_init_sections (void)
28999 exception_section = get_unnamed_section (0, output_section_asm_op,
29000 "\t.handlerdata");
29002 #endif /* ARM_UNWIND_INFO */
29004 /* Output unwind directives for the start/end of a function. */
29006 void
29007 arm_output_fn_unwind (FILE * f, bool prologue)
29009 if (arm_except_unwind_info (&global_options) != UI_TARGET)
29010 return;
29012 if (prologue)
29013 fputs ("\t.fnstart\n", f);
29014 else
29016 /* If this function will never be unwound, then mark it as such.
29017 The came condition is used in arm_unwind_emit to suppress
29018 the frame annotations. */
29019 if (!(flag_unwind_tables || crtl->uses_eh_lsda)
29020 && (TREE_NOTHROW (current_function_decl)
29021 || crtl->all_throwers_are_sibcalls))
29022 fputs("\t.cantunwind\n", f);
29024 fputs ("\t.fnend\n", f);
29028 static bool
29029 arm_emit_tls_decoration (FILE *fp, rtx x)
29031 enum tls_reloc reloc;
29032 rtx val;
29034 val = XVECEXP (x, 0, 0);
29035 reloc = (enum tls_reloc) INTVAL (XVECEXP (x, 0, 1));
29037 output_addr_const (fp, val);
29039 switch (reloc)
29041 case TLS_GD32:
29042 fputs ("(tlsgd)", fp);
29043 break;
29044 case TLS_LDM32:
29045 fputs ("(tlsldm)", fp);
29046 break;
29047 case TLS_LDO32:
29048 fputs ("(tlsldo)", fp);
29049 break;
29050 case TLS_IE32:
29051 fputs ("(gottpoff)", fp);
29052 break;
29053 case TLS_LE32:
29054 fputs ("(tpoff)", fp);
29055 break;
29056 case TLS_DESCSEQ:
29057 fputs ("(tlsdesc)", fp);
29058 break;
29059 default:
29060 gcc_unreachable ();
29063 switch (reloc)
29065 case TLS_GD32:
29066 case TLS_LDM32:
29067 case TLS_IE32:
29068 case TLS_DESCSEQ:
29069 fputs (" + (. - ", fp);
29070 output_addr_const (fp, XVECEXP (x, 0, 2));
29071 /* For DESCSEQ the 3rd operand encodes thumbness, and is added */
29072 fputs (reloc == TLS_DESCSEQ ? " + " : " - ", fp);
29073 output_addr_const (fp, XVECEXP (x, 0, 3));
29074 fputc (')', fp);
29075 break;
29076 default:
29077 break;
29080 return TRUE;
29083 /* ARM implementation of TARGET_ASM_OUTPUT_DWARF_DTPREL. */
29085 static void
29086 arm_output_dwarf_dtprel (FILE *file, int size, rtx x)
29088 gcc_assert (size == 4);
29089 fputs ("\t.word\t", file);
29090 output_addr_const (file, x);
29091 fputs ("(tlsldo)", file);
29094 /* Implement TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA. */
29096 static bool
29097 arm_output_addr_const_extra (FILE *fp, rtx x)
29099 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
29100 return arm_emit_tls_decoration (fp, x);
29101 else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_PIC_LABEL)
29103 char label[256];
29104 int labelno = INTVAL (XVECEXP (x, 0, 0));
29106 ASM_GENERATE_INTERNAL_LABEL (label, "LPIC", labelno);
29107 assemble_name_raw (fp, label);
29109 return TRUE;
29111 else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_GOTSYM_OFF)
29113 assemble_name (fp, "_GLOBAL_OFFSET_TABLE_");
29114 if (GOT_PCREL)
29115 fputs ("+.", fp);
29116 fputs ("-(", fp);
29117 output_addr_const (fp, XVECEXP (x, 0, 0));
29118 fputc (')', fp);
29119 return TRUE;
29121 else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_SYMBOL_OFFSET)
29123 output_addr_const (fp, XVECEXP (x, 0, 0));
29124 if (GOT_PCREL)
29125 fputs ("+.", fp);
29126 fputs ("-(", fp);
29127 output_addr_const (fp, XVECEXP (x, 0, 1));
29128 fputc (')', fp);
29129 return TRUE;
29131 else if (GET_CODE (x) == CONST_VECTOR)
29132 return arm_emit_vector_const (fp, x);
29134 return FALSE;
29137 /* Output assembly for a shift instruction.
29138 SET_FLAGS determines how the instruction modifies the condition codes.
29139 0 - Do not set condition codes.
29140 1 - Set condition codes.
29141 2 - Use smallest instruction. */
29142 const char *
29143 arm_output_shift(rtx * operands, int set_flags)
29145 char pattern[100];
29146 static const char flag_chars[3] = {'?', '.', '!'};
29147 const char *shift;
29148 HOST_WIDE_INT val;
29149 char c;
29151 c = flag_chars[set_flags];
29152 if (TARGET_UNIFIED_ASM)
29154 shift = shift_op(operands[3], &val);
29155 if (shift)
29157 if (val != -1)
29158 operands[2] = GEN_INT(val);
29159 sprintf (pattern, "%s%%%c\t%%0, %%1, %%2", shift, c);
29161 else
29162 sprintf (pattern, "mov%%%c\t%%0, %%1", c);
29164 else
29165 sprintf (pattern, "mov%%%c\t%%0, %%1%%S3", c);
29166 output_asm_insn (pattern, operands);
29167 return "";
29170 /* Output assembly for a WMMX immediate shift instruction. */
29171 const char *
29172 arm_output_iwmmxt_shift_immediate (const char *insn_name, rtx *operands, bool wror_or_wsra)
29174 int shift = INTVAL (operands[2]);
29175 char templ[50];
29176 enum machine_mode opmode = GET_MODE (operands[0]);
29178 gcc_assert (shift >= 0);
29180 /* If the shift value in the register versions is > 63 (for D qualifier),
29181 31 (for W qualifier) or 15 (for H qualifier). */
29182 if (((opmode == V4HImode) && (shift > 15))
29183 || ((opmode == V2SImode) && (shift > 31))
29184 || ((opmode == DImode) && (shift > 63)))
29186 if (wror_or_wsra)
29188 sprintf (templ, "%s\t%%0, %%1, #%d", insn_name, 32);
29189 output_asm_insn (templ, operands);
29190 if (opmode == DImode)
29192 sprintf (templ, "%s\t%%0, %%0, #%d", insn_name, 32);
29193 output_asm_insn (templ, operands);
29196 else
29198 /* The destination register will contain all zeros. */
29199 sprintf (templ, "wzero\t%%0");
29200 output_asm_insn (templ, operands);
29202 return "";
29205 if ((opmode == DImode) && (shift > 32))
29207 sprintf (templ, "%s\t%%0, %%1, #%d", insn_name, 32);
29208 output_asm_insn (templ, operands);
29209 sprintf (templ, "%s\t%%0, %%0, #%d", insn_name, shift - 32);
29210 output_asm_insn (templ, operands);
29212 else
29214 sprintf (templ, "%s\t%%0, %%1, #%d", insn_name, shift);
29215 output_asm_insn (templ, operands);
29217 return "";
29220 /* Output assembly for a WMMX tinsr instruction. */
29221 const char *
29222 arm_output_iwmmxt_tinsr (rtx *operands)
29224 int mask = INTVAL (operands[3]);
29225 int i;
29226 char templ[50];
29227 int units = mode_nunits[GET_MODE (operands[0])];
29228 gcc_assert ((mask & (mask - 1)) == 0);
29229 for (i = 0; i < units; ++i)
29231 if ((mask & 0x01) == 1)
29233 break;
29235 mask >>= 1;
29237 gcc_assert (i < units);
29239 switch (GET_MODE (operands[0]))
29241 case V8QImode:
29242 sprintf (templ, "tinsrb%%?\t%%0, %%2, #%d", i);
29243 break;
29244 case V4HImode:
29245 sprintf (templ, "tinsrh%%?\t%%0, %%2, #%d", i);
29246 break;
29247 case V2SImode:
29248 sprintf (templ, "tinsrw%%?\t%%0, %%2, #%d", i);
29249 break;
29250 default:
29251 gcc_unreachable ();
29252 break;
29254 output_asm_insn (templ, operands);
29256 return "";
29259 /* Output a Thumb-1 casesi dispatch sequence. */
29260 const char *
29261 thumb1_output_casesi (rtx *operands)
29263 rtx diff_vec = PATTERN (NEXT_INSN (operands[0]));
29265 gcc_assert (GET_CODE (diff_vec) == ADDR_DIFF_VEC);
29267 switch (GET_MODE(diff_vec))
29269 case QImode:
29270 return (ADDR_DIFF_VEC_FLAGS (diff_vec).offset_unsigned ?
29271 "bl\t%___gnu_thumb1_case_uqi" : "bl\t%___gnu_thumb1_case_sqi");
29272 case HImode:
29273 return (ADDR_DIFF_VEC_FLAGS (diff_vec).offset_unsigned ?
29274 "bl\t%___gnu_thumb1_case_uhi" : "bl\t%___gnu_thumb1_case_shi");
29275 case SImode:
29276 return "bl\t%___gnu_thumb1_case_si";
29277 default:
29278 gcc_unreachable ();
29282 /* Output a Thumb-2 casesi instruction. */
29283 const char *
29284 thumb2_output_casesi (rtx *operands)
29286 rtx diff_vec = PATTERN (NEXT_INSN (operands[2]));
29288 gcc_assert (GET_CODE (diff_vec) == ADDR_DIFF_VEC);
29290 output_asm_insn ("cmp\t%0, %1", operands);
29291 output_asm_insn ("bhi\t%l3", operands);
29292 switch (GET_MODE(diff_vec))
29294 case QImode:
29295 return "tbb\t[%|pc, %0]";
29296 case HImode:
29297 return "tbh\t[%|pc, %0, lsl #1]";
29298 case SImode:
29299 if (flag_pic)
29301 output_asm_insn ("adr\t%4, %l2", operands);
29302 output_asm_insn ("ldr\t%5, [%4, %0, lsl #2]", operands);
29303 output_asm_insn ("add\t%4, %4, %5", operands);
29304 return "bx\t%4";
29306 else
29308 output_asm_insn ("adr\t%4, %l2", operands);
29309 return "ldr\t%|pc, [%4, %0, lsl #2]";
29311 default:
29312 gcc_unreachable ();
29316 /* Most ARM cores are single issue, but some newer ones can dual issue.
29317 The scheduler descriptions rely on this being correct. */
29318 static int
29319 arm_issue_rate (void)
29321 switch (arm_tune)
29323 case cortexa15:
29324 return 3;
29326 case cortexr4:
29327 case cortexr4f:
29328 case cortexr5:
29329 case genericv7a:
29330 case cortexa5:
29331 case cortexa7:
29332 case cortexa8:
29333 case cortexa9:
29334 case cortexa12:
29335 case cortexa53:
29336 case fa726te:
29337 case marvell_pj4:
29338 return 2;
29340 default:
29341 return 1;
29345 /* A table and a function to perform ARM-specific name mangling for
29346 NEON vector types in order to conform to the AAPCS (see "Procedure
29347 Call Standard for the ARM Architecture", Appendix A). To qualify
29348 for emission with the mangled names defined in that document, a
29349 vector type must not only be of the correct mode but also be
29350 composed of NEON vector element types (e.g. __builtin_neon_qi). */
29351 typedef struct
29353 enum machine_mode mode;
29354 const char *element_type_name;
29355 const char *aapcs_name;
29356 } arm_mangle_map_entry;
29358 static arm_mangle_map_entry arm_mangle_map[] = {
29359 /* 64-bit containerized types. */
29360 { V8QImode, "__builtin_neon_qi", "15__simd64_int8_t" },
29361 { V8QImode, "__builtin_neon_uqi", "16__simd64_uint8_t" },
29362 { V4HImode, "__builtin_neon_hi", "16__simd64_int16_t" },
29363 { V4HImode, "__builtin_neon_uhi", "17__simd64_uint16_t" },
29364 { V4HFmode, "__builtin_neon_hf", "18__simd64_float16_t" },
29365 { V2SImode, "__builtin_neon_si", "16__simd64_int32_t" },
29366 { V2SImode, "__builtin_neon_usi", "17__simd64_uint32_t" },
29367 { V2SFmode, "__builtin_neon_sf", "18__simd64_float32_t" },
29368 { V8QImode, "__builtin_neon_poly8", "16__simd64_poly8_t" },
29369 { V4HImode, "__builtin_neon_poly16", "17__simd64_poly16_t" },
29371 /* 128-bit containerized types. */
29372 { V16QImode, "__builtin_neon_qi", "16__simd128_int8_t" },
29373 { V16QImode, "__builtin_neon_uqi", "17__simd128_uint8_t" },
29374 { V8HImode, "__builtin_neon_hi", "17__simd128_int16_t" },
29375 { V8HImode, "__builtin_neon_uhi", "18__simd128_uint16_t" },
29376 { V4SImode, "__builtin_neon_si", "17__simd128_int32_t" },
29377 { V4SImode, "__builtin_neon_usi", "18__simd128_uint32_t" },
29378 { V4SFmode, "__builtin_neon_sf", "19__simd128_float32_t" },
29379 { V16QImode, "__builtin_neon_poly8", "17__simd128_poly8_t" },
29380 { V8HImode, "__builtin_neon_poly16", "18__simd128_poly16_t" },
29381 { VOIDmode, NULL, NULL }
29384 const char *
29385 arm_mangle_type (const_tree type)
29387 arm_mangle_map_entry *pos = arm_mangle_map;
29389 /* The ARM ABI documents (10th October 2008) say that "__va_list"
29390 has to be managled as if it is in the "std" namespace. */
29391 if (TARGET_AAPCS_BASED
29392 && lang_hooks.types_compatible_p (CONST_CAST_TREE (type), va_list_type))
29393 return "St9__va_list";
29395 /* Half-precision float. */
29396 if (TREE_CODE (type) == REAL_TYPE && TYPE_PRECISION (type) == 16)
29397 return "Dh";
29399 if (TREE_CODE (type) != VECTOR_TYPE)
29400 return NULL;
29402 /* Check the mode of the vector type, and the name of the vector
29403 element type, against the table. */
29404 while (pos->mode != VOIDmode)
29406 tree elt_type = TREE_TYPE (type);
29408 if (pos->mode == TYPE_MODE (type)
29409 && TREE_CODE (TYPE_NAME (elt_type)) == TYPE_DECL
29410 && !strcmp (IDENTIFIER_POINTER (DECL_NAME (TYPE_NAME (elt_type))),
29411 pos->element_type_name))
29412 return pos->aapcs_name;
29414 pos++;
29417 /* Use the default mangling for unrecognized (possibly user-defined)
29418 vector types. */
29419 return NULL;
29422 /* Order of allocation of core registers for Thumb: this allocation is
29423 written over the corresponding initial entries of the array
29424 initialized with REG_ALLOC_ORDER. We allocate all low registers
29425 first. Saving and restoring a low register is usually cheaper than
29426 using a call-clobbered high register. */
29428 static const int thumb_core_reg_alloc_order[] =
29430 3, 2, 1, 0, 4, 5, 6, 7,
29431 14, 12, 8, 9, 10, 11
29434 /* Adjust register allocation order when compiling for Thumb. */
29436 void
29437 arm_order_regs_for_local_alloc (void)
29439 const int arm_reg_alloc_order[] = REG_ALLOC_ORDER;
29440 memcpy(reg_alloc_order, arm_reg_alloc_order, sizeof (reg_alloc_order));
29441 if (TARGET_THUMB)
29442 memcpy (reg_alloc_order, thumb_core_reg_alloc_order,
29443 sizeof (thumb_core_reg_alloc_order));
29446 /* Implement TARGET_FRAME_POINTER_REQUIRED. */
29448 bool
29449 arm_frame_pointer_required (void)
29451 return (cfun->has_nonlocal_label
29452 || SUBTARGET_FRAME_POINTER_REQUIRED
29453 || (TARGET_ARM && TARGET_APCS_FRAME && ! leaf_function_p ()));
29456 /* Only thumb1 can't support conditional execution, so return true if
29457 the target is not thumb1. */
29458 static bool
29459 arm_have_conditional_execution (void)
29461 return !TARGET_THUMB1;
29464 tree
29465 arm_builtin_vectorized_function (tree fndecl, tree type_out, tree type_in)
29467 enum machine_mode in_mode, out_mode;
29468 int in_n, out_n;
29470 if (TREE_CODE (type_out) != VECTOR_TYPE
29471 || TREE_CODE (type_in) != VECTOR_TYPE
29472 || !(TARGET_NEON && TARGET_FPU_ARMV8 && flag_unsafe_math_optimizations))
29473 return NULL_TREE;
29475 out_mode = TYPE_MODE (TREE_TYPE (type_out));
29476 out_n = TYPE_VECTOR_SUBPARTS (type_out);
29477 in_mode = TYPE_MODE (TREE_TYPE (type_in));
29478 in_n = TYPE_VECTOR_SUBPARTS (type_in);
29480 /* ARM_CHECK_BUILTIN_MODE and ARM_FIND_VRINT_VARIANT are used to find the
29481 decl of the vectorized builtin for the appropriate vector mode.
29482 NULL_TREE is returned if no such builtin is available. */
29483 #undef ARM_CHECK_BUILTIN_MODE
29484 #define ARM_CHECK_BUILTIN_MODE(C) \
29485 (out_mode == SFmode && out_n == C \
29486 && in_mode == SFmode && in_n == C)
29488 #undef ARM_FIND_VRINT_VARIANT
29489 #define ARM_FIND_VRINT_VARIANT(N) \
29490 (ARM_CHECK_BUILTIN_MODE (2) \
29491 ? arm_builtin_decl(ARM_BUILTIN_NEON_##N##v2sf, false) \
29492 : (ARM_CHECK_BUILTIN_MODE (4) \
29493 ? arm_builtin_decl(ARM_BUILTIN_NEON_##N##v4sf, false) \
29494 : NULL_TREE))
29496 if (DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_NORMAL)
29498 enum built_in_function fn = DECL_FUNCTION_CODE (fndecl);
29499 switch (fn)
29501 case BUILT_IN_FLOORF:
29502 return ARM_FIND_VRINT_VARIANT (vrintm);
29503 case BUILT_IN_CEILF:
29504 return ARM_FIND_VRINT_VARIANT (vrintp);
29505 case BUILT_IN_TRUNCF:
29506 return ARM_FIND_VRINT_VARIANT (vrintz);
29507 case BUILT_IN_ROUNDF:
29508 return ARM_FIND_VRINT_VARIANT (vrinta);
29509 default:
29510 return NULL_TREE;
29513 return NULL_TREE;
29515 #undef ARM_CHECK_BUILTIN_MODE
29516 #undef ARM_FIND_VRINT_VARIANT
29518 /* The AAPCS sets the maximum alignment of a vector to 64 bits. */
29519 static HOST_WIDE_INT
29520 arm_vector_alignment (const_tree type)
29522 HOST_WIDE_INT align = tree_to_shwi (TYPE_SIZE (type));
29524 if (TARGET_AAPCS_BASED)
29525 align = MIN (align, 64);
29527 return align;
29530 static unsigned int
29531 arm_autovectorize_vector_sizes (void)
29533 return TARGET_NEON_VECTORIZE_DOUBLE ? 0 : (16 | 8);
29536 static bool
29537 arm_vector_alignment_reachable (const_tree type, bool is_packed)
29539 /* Vectors which aren't in packed structures will not be less aligned than
29540 the natural alignment of their element type, so this is safe. */
29541 if (TARGET_NEON && !BYTES_BIG_ENDIAN)
29542 return !is_packed;
29544 return default_builtin_vector_alignment_reachable (type, is_packed);
29547 static bool
29548 arm_builtin_support_vector_misalignment (enum machine_mode mode,
29549 const_tree type, int misalignment,
29550 bool is_packed)
29552 if (TARGET_NEON && !BYTES_BIG_ENDIAN)
29554 HOST_WIDE_INT align = TYPE_ALIGN_UNIT (type);
29556 if (is_packed)
29557 return align == 1;
29559 /* If the misalignment is unknown, we should be able to handle the access
29560 so long as it is not to a member of a packed data structure. */
29561 if (misalignment == -1)
29562 return true;
29564 /* Return true if the misalignment is a multiple of the natural alignment
29565 of the vector's element type. This is probably always going to be
29566 true in practice, since we've already established that this isn't a
29567 packed access. */
29568 return ((misalignment % align) == 0);
29571 return default_builtin_support_vector_misalignment (mode, type, misalignment,
29572 is_packed);
29575 static void
29576 arm_conditional_register_usage (void)
29578 int regno;
29580 if (TARGET_THUMB1 && optimize_size)
29582 /* When optimizing for size on Thumb-1, it's better not
29583 to use the HI regs, because of the overhead of
29584 stacking them. */
29585 for (regno = FIRST_HI_REGNUM;
29586 regno <= LAST_HI_REGNUM; ++regno)
29587 fixed_regs[regno] = call_used_regs[regno] = 1;
29590 /* The link register can be clobbered by any branch insn,
29591 but we have no way to track that at present, so mark
29592 it as unavailable. */
29593 if (TARGET_THUMB1)
29594 fixed_regs[LR_REGNUM] = call_used_regs[LR_REGNUM] = 1;
29596 if (TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP)
29598 /* VFPv3 registers are disabled when earlier VFP
29599 versions are selected due to the definition of
29600 LAST_VFP_REGNUM. */
29601 for (regno = FIRST_VFP_REGNUM;
29602 regno <= LAST_VFP_REGNUM; ++ regno)
29604 fixed_regs[regno] = 0;
29605 call_used_regs[regno] = regno < FIRST_VFP_REGNUM + 16
29606 || regno >= FIRST_VFP_REGNUM + 32;
29610 if (TARGET_REALLY_IWMMXT)
29612 regno = FIRST_IWMMXT_GR_REGNUM;
29613 /* The 2002/10/09 revision of the XScale ABI has wCG0
29614 and wCG1 as call-preserved registers. The 2002/11/21
29615 revision changed this so that all wCG registers are
29616 scratch registers. */
29617 for (regno = FIRST_IWMMXT_GR_REGNUM;
29618 regno <= LAST_IWMMXT_GR_REGNUM; ++ regno)
29619 fixed_regs[regno] = 0;
29620 /* The XScale ABI has wR0 - wR9 as scratch registers,
29621 the rest as call-preserved registers. */
29622 for (regno = FIRST_IWMMXT_REGNUM;
29623 regno <= LAST_IWMMXT_REGNUM; ++ regno)
29625 fixed_regs[regno] = 0;
29626 call_used_regs[regno] = regno < FIRST_IWMMXT_REGNUM + 10;
29630 if ((unsigned) PIC_OFFSET_TABLE_REGNUM != INVALID_REGNUM)
29632 fixed_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
29633 call_used_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
29635 else if (TARGET_APCS_STACK)
29637 fixed_regs[10] = 1;
29638 call_used_regs[10] = 1;
29640 /* -mcaller-super-interworking reserves r11 for calls to
29641 _interwork_r11_call_via_rN(). Making the register global
29642 is an easy way of ensuring that it remains valid for all
29643 calls. */
29644 if (TARGET_APCS_FRAME || TARGET_CALLER_INTERWORKING
29645 || TARGET_TPCS_FRAME || TARGET_TPCS_LEAF_FRAME)
29647 fixed_regs[ARM_HARD_FRAME_POINTER_REGNUM] = 1;
29648 call_used_regs[ARM_HARD_FRAME_POINTER_REGNUM] = 1;
29649 if (TARGET_CALLER_INTERWORKING)
29650 global_regs[ARM_HARD_FRAME_POINTER_REGNUM] = 1;
29652 SUBTARGET_CONDITIONAL_REGISTER_USAGE
29655 static reg_class_t
29656 arm_preferred_rename_class (reg_class_t rclass)
29658 /* Thumb-2 instructions using LO_REGS may be smaller than instructions
29659 using GENERIC_REGS. During register rename pass, we prefer LO_REGS,
29660 and code size can be reduced. */
29661 if (TARGET_THUMB2 && rclass == GENERAL_REGS)
29662 return LO_REGS;
29663 else
29664 return NO_REGS;
29667 /* Compute the atrribute "length" of insn "*push_multi".
29668 So this function MUST be kept in sync with that insn pattern. */
29670 arm_attr_length_push_multi(rtx parallel_op, rtx first_op)
29672 int i, regno, hi_reg;
29673 int num_saves = XVECLEN (parallel_op, 0);
29675 /* ARM mode. */
29676 if (TARGET_ARM)
29677 return 4;
29678 /* Thumb1 mode. */
29679 if (TARGET_THUMB1)
29680 return 2;
29682 /* Thumb2 mode. */
29683 regno = REGNO (first_op);
29684 hi_reg = (REGNO_REG_CLASS (regno) == HI_REGS) && (regno != LR_REGNUM);
29685 for (i = 1; i < num_saves && !hi_reg; i++)
29687 regno = REGNO (XEXP (XVECEXP (parallel_op, 0, i), 0));
29688 hi_reg |= (REGNO_REG_CLASS (regno) == HI_REGS) && (regno != LR_REGNUM);
29691 if (!hi_reg)
29692 return 2;
29693 return 4;
29696 /* Compute the number of instructions emitted by output_move_double. */
29698 arm_count_output_move_double_insns (rtx *operands)
29700 int count;
29701 rtx ops[2];
29702 /* output_move_double may modify the operands array, so call it
29703 here on a copy of the array. */
29704 ops[0] = operands[0];
29705 ops[1] = operands[1];
29706 output_move_double (ops, false, &count);
29707 return count;
29711 vfp3_const_double_for_fract_bits (rtx operand)
29713 REAL_VALUE_TYPE r0;
29715 if (!CONST_DOUBLE_P (operand))
29716 return 0;
29718 REAL_VALUE_FROM_CONST_DOUBLE (r0, operand);
29719 if (exact_real_inverse (DFmode, &r0))
29721 if (exact_real_truncate (DFmode, &r0))
29723 HOST_WIDE_INT value = real_to_integer (&r0);
29724 value = value & 0xffffffff;
29725 if ((value != 0) && ( (value & (value - 1)) == 0))
29726 return int_log2 (value);
29729 return 0;
29733 vfp3_const_double_for_bits (rtx operand)
29735 REAL_VALUE_TYPE r0;
29737 if (!CONST_DOUBLE_P (operand))
29738 return 0;
29740 REAL_VALUE_FROM_CONST_DOUBLE (r0, operand);
29741 if (exact_real_truncate (DFmode, &r0))
29743 HOST_WIDE_INT value = real_to_integer (&r0);
29744 value = value & 0xffffffff;
29745 if ((value != 0) && ( (value & (value - 1)) == 0))
29746 return int_log2 (value);
29749 return 0;
29752 /* Emit a memory barrier around an atomic sequence according to MODEL. */
29754 static void
29755 arm_pre_atomic_barrier (enum memmodel model)
29757 if (need_atomic_barrier_p (model, true))
29758 emit_insn (gen_memory_barrier ());
29761 static void
29762 arm_post_atomic_barrier (enum memmodel model)
29764 if (need_atomic_barrier_p (model, false))
29765 emit_insn (gen_memory_barrier ());
29768 /* Emit the load-exclusive and store-exclusive instructions.
29769 Use acquire and release versions if necessary. */
29771 static void
29772 arm_emit_load_exclusive (enum machine_mode mode, rtx rval, rtx mem, bool acq)
29774 rtx (*gen) (rtx, rtx);
29776 if (acq)
29778 switch (mode)
29780 case QImode: gen = gen_arm_load_acquire_exclusiveqi; break;
29781 case HImode: gen = gen_arm_load_acquire_exclusivehi; break;
29782 case SImode: gen = gen_arm_load_acquire_exclusivesi; break;
29783 case DImode: gen = gen_arm_load_acquire_exclusivedi; break;
29784 default:
29785 gcc_unreachable ();
29788 else
29790 switch (mode)
29792 case QImode: gen = gen_arm_load_exclusiveqi; break;
29793 case HImode: gen = gen_arm_load_exclusivehi; break;
29794 case SImode: gen = gen_arm_load_exclusivesi; break;
29795 case DImode: gen = gen_arm_load_exclusivedi; break;
29796 default:
29797 gcc_unreachable ();
29801 emit_insn (gen (rval, mem));
29804 static void
29805 arm_emit_store_exclusive (enum machine_mode mode, rtx bval, rtx rval,
29806 rtx mem, bool rel)
29808 rtx (*gen) (rtx, rtx, rtx);
29810 if (rel)
29812 switch (mode)
29814 case QImode: gen = gen_arm_store_release_exclusiveqi; break;
29815 case HImode: gen = gen_arm_store_release_exclusivehi; break;
29816 case SImode: gen = gen_arm_store_release_exclusivesi; break;
29817 case DImode: gen = gen_arm_store_release_exclusivedi; break;
29818 default:
29819 gcc_unreachable ();
29822 else
29824 switch (mode)
29826 case QImode: gen = gen_arm_store_exclusiveqi; break;
29827 case HImode: gen = gen_arm_store_exclusivehi; break;
29828 case SImode: gen = gen_arm_store_exclusivesi; break;
29829 case DImode: gen = gen_arm_store_exclusivedi; break;
29830 default:
29831 gcc_unreachable ();
29835 emit_insn (gen (bval, rval, mem));
29838 /* Mark the previous jump instruction as unlikely. */
29840 static void
29841 emit_unlikely_jump (rtx insn)
29843 int very_unlikely = REG_BR_PROB_BASE / 100 - 1;
29845 insn = emit_jump_insn (insn);
29846 add_int_reg_note (insn, REG_BR_PROB, very_unlikely);
29849 /* Expand a compare and swap pattern. */
29851 void
29852 arm_expand_compare_and_swap (rtx operands[])
29854 rtx bval, rval, mem, oldval, newval, is_weak, mod_s, mod_f, x;
29855 enum machine_mode mode;
29856 rtx (*gen) (rtx, rtx, rtx, rtx, rtx, rtx, rtx);
29858 bval = operands[0];
29859 rval = operands[1];
29860 mem = operands[2];
29861 oldval = operands[3];
29862 newval = operands[4];
29863 is_weak = operands[5];
29864 mod_s = operands[6];
29865 mod_f = operands[7];
29866 mode = GET_MODE (mem);
29868 /* Normally the succ memory model must be stronger than fail, but in the
29869 unlikely event of fail being ACQUIRE and succ being RELEASE we need to
29870 promote succ to ACQ_REL so that we don't lose the acquire semantics. */
29872 if (TARGET_HAVE_LDACQ
29873 && INTVAL (mod_f) == MEMMODEL_ACQUIRE
29874 && INTVAL (mod_s) == MEMMODEL_RELEASE)
29875 mod_s = GEN_INT (MEMMODEL_ACQ_REL);
29877 switch (mode)
29879 case QImode:
29880 case HImode:
29881 /* For narrow modes, we're going to perform the comparison in SImode,
29882 so do the zero-extension now. */
29883 rval = gen_reg_rtx (SImode);
29884 oldval = convert_modes (SImode, mode, oldval, true);
29885 /* FALLTHRU */
29887 case SImode:
29888 /* Force the value into a register if needed. We waited until after
29889 the zero-extension above to do this properly. */
29890 if (!arm_add_operand (oldval, SImode))
29891 oldval = force_reg (SImode, oldval);
29892 break;
29894 case DImode:
29895 if (!cmpdi_operand (oldval, mode))
29896 oldval = force_reg (mode, oldval);
29897 break;
29899 default:
29900 gcc_unreachable ();
29903 switch (mode)
29905 case QImode: gen = gen_atomic_compare_and_swapqi_1; break;
29906 case HImode: gen = gen_atomic_compare_and_swaphi_1; break;
29907 case SImode: gen = gen_atomic_compare_and_swapsi_1; break;
29908 case DImode: gen = gen_atomic_compare_and_swapdi_1; break;
29909 default:
29910 gcc_unreachable ();
29913 emit_insn (gen (rval, mem, oldval, newval, is_weak, mod_s, mod_f));
29915 if (mode == QImode || mode == HImode)
29916 emit_move_insn (operands[1], gen_lowpart (mode, rval));
29918 /* In all cases, we arrange for success to be signaled by Z set.
29919 This arrangement allows for the boolean result to be used directly
29920 in a subsequent branch, post optimization. */
29921 x = gen_rtx_REG (CCmode, CC_REGNUM);
29922 x = gen_rtx_EQ (SImode, x, const0_rtx);
29923 emit_insn (gen_rtx_SET (VOIDmode, bval, x));
29926 /* Split a compare and swap pattern. It is IMPLEMENTATION DEFINED whether
29927 another memory store between the load-exclusive and store-exclusive can
29928 reset the monitor from Exclusive to Open state. This means we must wait
29929 until after reload to split the pattern, lest we get a register spill in
29930 the middle of the atomic sequence. */
29932 void
29933 arm_split_compare_and_swap (rtx operands[])
29935 rtx rval, mem, oldval, newval, scratch;
29936 enum machine_mode mode;
29937 enum memmodel mod_s, mod_f;
29938 bool is_weak;
29939 rtx label1, label2, x, cond;
29941 rval = operands[0];
29942 mem = operands[1];
29943 oldval = operands[2];
29944 newval = operands[3];
29945 is_weak = (operands[4] != const0_rtx);
29946 mod_s = (enum memmodel) INTVAL (operands[5]);
29947 mod_f = (enum memmodel) INTVAL (operands[6]);
29948 scratch = operands[7];
29949 mode = GET_MODE (mem);
29951 bool use_acquire = TARGET_HAVE_LDACQ
29952 && !(mod_s == MEMMODEL_RELAXED
29953 || mod_s == MEMMODEL_CONSUME
29954 || mod_s == MEMMODEL_RELEASE);
29956 bool use_release = TARGET_HAVE_LDACQ
29957 && !(mod_s == MEMMODEL_RELAXED
29958 || mod_s == MEMMODEL_CONSUME
29959 || mod_s == MEMMODEL_ACQUIRE);
29961 /* Checks whether a barrier is needed and emits one accordingly. */
29962 if (!(use_acquire || use_release))
29963 arm_pre_atomic_barrier (mod_s);
29965 label1 = NULL_RTX;
29966 if (!is_weak)
29968 label1 = gen_label_rtx ();
29969 emit_label (label1);
29971 label2 = gen_label_rtx ();
29973 arm_emit_load_exclusive (mode, rval, mem, use_acquire);
29975 cond = arm_gen_compare_reg (NE, rval, oldval, scratch);
29976 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
29977 x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
29978 gen_rtx_LABEL_REF (Pmode, label2), pc_rtx);
29979 emit_unlikely_jump (gen_rtx_SET (VOIDmode, pc_rtx, x));
29981 arm_emit_store_exclusive (mode, scratch, mem, newval, use_release);
29983 /* Weak or strong, we want EQ to be true for success, so that we
29984 match the flags that we got from the compare above. */
29985 cond = gen_rtx_REG (CCmode, CC_REGNUM);
29986 x = gen_rtx_COMPARE (CCmode, scratch, const0_rtx);
29987 emit_insn (gen_rtx_SET (VOIDmode, cond, x));
29989 if (!is_weak)
29991 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
29992 x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
29993 gen_rtx_LABEL_REF (Pmode, label1), pc_rtx);
29994 emit_unlikely_jump (gen_rtx_SET (VOIDmode, pc_rtx, x));
29997 if (mod_f != MEMMODEL_RELAXED)
29998 emit_label (label2);
30000 /* Checks whether a barrier is needed and emits one accordingly. */
30001 if (!(use_acquire || use_release))
30002 arm_post_atomic_barrier (mod_s);
30004 if (mod_f == MEMMODEL_RELAXED)
30005 emit_label (label2);
30008 void
30009 arm_split_atomic_op (enum rtx_code code, rtx old_out, rtx new_out, rtx mem,
30010 rtx value, rtx model_rtx, rtx cond)
30012 enum memmodel model = (enum memmodel) INTVAL (model_rtx);
30013 enum machine_mode mode = GET_MODE (mem);
30014 enum machine_mode wmode = (mode == DImode ? DImode : SImode);
30015 rtx label, x;
30017 bool use_acquire = TARGET_HAVE_LDACQ
30018 && !(model == MEMMODEL_RELAXED
30019 || model == MEMMODEL_CONSUME
30020 || model == MEMMODEL_RELEASE);
30022 bool use_release = TARGET_HAVE_LDACQ
30023 && !(model == MEMMODEL_RELAXED
30024 || model == MEMMODEL_CONSUME
30025 || model == MEMMODEL_ACQUIRE);
30027 /* Checks whether a barrier is needed and emits one accordingly. */
30028 if (!(use_acquire || use_release))
30029 arm_pre_atomic_barrier (model);
30031 label = gen_label_rtx ();
30032 emit_label (label);
30034 if (new_out)
30035 new_out = gen_lowpart (wmode, new_out);
30036 if (old_out)
30037 old_out = gen_lowpart (wmode, old_out);
30038 else
30039 old_out = new_out;
30040 value = simplify_gen_subreg (wmode, value, mode, 0);
30042 arm_emit_load_exclusive (mode, old_out, mem, use_acquire);
30044 switch (code)
30046 case SET:
30047 new_out = value;
30048 break;
30050 case NOT:
30051 x = gen_rtx_AND (wmode, old_out, value);
30052 emit_insn (gen_rtx_SET (VOIDmode, new_out, x));
30053 x = gen_rtx_NOT (wmode, new_out);
30054 emit_insn (gen_rtx_SET (VOIDmode, new_out, x));
30055 break;
30057 case MINUS:
30058 if (CONST_INT_P (value))
30060 value = GEN_INT (-INTVAL (value));
30061 code = PLUS;
30063 /* FALLTHRU */
30065 case PLUS:
30066 if (mode == DImode)
30068 /* DImode plus/minus need to clobber flags. */
30069 /* The adddi3 and subdi3 patterns are incorrectly written so that
30070 they require matching operands, even when we could easily support
30071 three operands. Thankfully, this can be fixed up post-splitting,
30072 as the individual add+adc patterns do accept three operands and
30073 post-reload cprop can make these moves go away. */
30074 emit_move_insn (new_out, old_out);
30075 if (code == PLUS)
30076 x = gen_adddi3 (new_out, new_out, value);
30077 else
30078 x = gen_subdi3 (new_out, new_out, value);
30079 emit_insn (x);
30080 break;
30082 /* FALLTHRU */
30084 default:
30085 x = gen_rtx_fmt_ee (code, wmode, old_out, value);
30086 emit_insn (gen_rtx_SET (VOIDmode, new_out, x));
30087 break;
30090 arm_emit_store_exclusive (mode, cond, mem, gen_lowpart (mode, new_out),
30091 use_release);
30093 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
30094 emit_unlikely_jump (gen_cbranchsi4 (x, cond, const0_rtx, label));
30096 /* Checks whether a barrier is needed and emits one accordingly. */
30097 if (!(use_acquire || use_release))
30098 arm_post_atomic_barrier (model);
30101 #define MAX_VECT_LEN 16
30103 struct expand_vec_perm_d
30105 rtx target, op0, op1;
30106 unsigned char perm[MAX_VECT_LEN];
30107 enum machine_mode vmode;
30108 unsigned char nelt;
30109 bool one_vector_p;
30110 bool testing_p;
30113 /* Generate a variable permutation. */
30115 static void
30116 arm_expand_vec_perm_1 (rtx target, rtx op0, rtx op1, rtx sel)
30118 enum machine_mode vmode = GET_MODE (target);
30119 bool one_vector_p = rtx_equal_p (op0, op1);
30121 gcc_checking_assert (vmode == V8QImode || vmode == V16QImode);
30122 gcc_checking_assert (GET_MODE (op0) == vmode);
30123 gcc_checking_assert (GET_MODE (op1) == vmode);
30124 gcc_checking_assert (GET_MODE (sel) == vmode);
30125 gcc_checking_assert (TARGET_NEON);
30127 if (one_vector_p)
30129 if (vmode == V8QImode)
30130 emit_insn (gen_neon_vtbl1v8qi (target, op0, sel));
30131 else
30132 emit_insn (gen_neon_vtbl1v16qi (target, op0, sel));
30134 else
30136 rtx pair;
30138 if (vmode == V8QImode)
30140 pair = gen_reg_rtx (V16QImode);
30141 emit_insn (gen_neon_vcombinev8qi (pair, op0, op1));
30142 pair = gen_lowpart (TImode, pair);
30143 emit_insn (gen_neon_vtbl2v8qi (target, pair, sel));
30145 else
30147 pair = gen_reg_rtx (OImode);
30148 emit_insn (gen_neon_vcombinev16qi (pair, op0, op1));
30149 emit_insn (gen_neon_vtbl2v16qi (target, pair, sel));
30154 void
30155 arm_expand_vec_perm (rtx target, rtx op0, rtx op1, rtx sel)
30157 enum machine_mode vmode = GET_MODE (target);
30158 unsigned int i, nelt = GET_MODE_NUNITS (vmode);
30159 bool one_vector_p = rtx_equal_p (op0, op1);
30160 rtx rmask[MAX_VECT_LEN], mask;
30162 /* TODO: ARM's VTBL indexing is little-endian. In order to handle GCC's
30163 numbering of elements for big-endian, we must reverse the order. */
30164 gcc_checking_assert (!BYTES_BIG_ENDIAN);
30166 /* The VTBL instruction does not use a modulo index, so we must take care
30167 of that ourselves. */
30168 mask = GEN_INT (one_vector_p ? nelt - 1 : 2 * nelt - 1);
30169 for (i = 0; i < nelt; ++i)
30170 rmask[i] = mask;
30171 mask = gen_rtx_CONST_VECTOR (vmode, gen_rtvec_v (nelt, rmask));
30172 sel = expand_simple_binop (vmode, AND, sel, mask, NULL, 0, OPTAB_LIB_WIDEN);
30174 arm_expand_vec_perm_1 (target, op0, op1, sel);
30177 /* Generate or test for an insn that supports a constant permutation. */
30179 /* Recognize patterns for the VUZP insns. */
30181 static bool
30182 arm_evpc_neon_vuzp (struct expand_vec_perm_d *d)
30184 unsigned int i, odd, mask, nelt = d->nelt;
30185 rtx out0, out1, in0, in1, x;
30186 rtx (*gen)(rtx, rtx, rtx, rtx);
30188 if (GET_MODE_UNIT_SIZE (d->vmode) >= 8)
30189 return false;
30191 /* Note that these are little-endian tests. Adjust for big-endian later. */
30192 if (d->perm[0] == 0)
30193 odd = 0;
30194 else if (d->perm[0] == 1)
30195 odd = 1;
30196 else
30197 return false;
30198 mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
30200 for (i = 0; i < nelt; i++)
30202 unsigned elt = (i * 2 + odd) & mask;
30203 if (d->perm[i] != elt)
30204 return false;
30207 /* Success! */
30208 if (d->testing_p)
30209 return true;
30211 switch (d->vmode)
30213 case V16QImode: gen = gen_neon_vuzpv16qi_internal; break;
30214 case V8QImode: gen = gen_neon_vuzpv8qi_internal; break;
30215 case V8HImode: gen = gen_neon_vuzpv8hi_internal; break;
30216 case V4HImode: gen = gen_neon_vuzpv4hi_internal; break;
30217 case V4SImode: gen = gen_neon_vuzpv4si_internal; break;
30218 case V2SImode: gen = gen_neon_vuzpv2si_internal; break;
30219 case V2SFmode: gen = gen_neon_vuzpv2sf_internal; break;
30220 case V4SFmode: gen = gen_neon_vuzpv4sf_internal; break;
30221 default:
30222 gcc_unreachable ();
30225 in0 = d->op0;
30226 in1 = d->op1;
30227 if (BYTES_BIG_ENDIAN)
30229 x = in0, in0 = in1, in1 = x;
30230 odd = !odd;
30233 out0 = d->target;
30234 out1 = gen_reg_rtx (d->vmode);
30235 if (odd)
30236 x = out0, out0 = out1, out1 = x;
30238 emit_insn (gen (out0, in0, in1, out1));
30239 return true;
30242 /* Recognize patterns for the VZIP insns. */
30244 static bool
30245 arm_evpc_neon_vzip (struct expand_vec_perm_d *d)
30247 unsigned int i, high, mask, nelt = d->nelt;
30248 rtx out0, out1, in0, in1, x;
30249 rtx (*gen)(rtx, rtx, rtx, rtx);
30251 if (GET_MODE_UNIT_SIZE (d->vmode) >= 8)
30252 return false;
30254 /* Note that these are little-endian tests. Adjust for big-endian later. */
30255 high = nelt / 2;
30256 if (d->perm[0] == high)
30258 else if (d->perm[0] == 0)
30259 high = 0;
30260 else
30261 return false;
30262 mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
30264 for (i = 0; i < nelt / 2; i++)
30266 unsigned elt = (i + high) & mask;
30267 if (d->perm[i * 2] != elt)
30268 return false;
30269 elt = (elt + nelt) & mask;
30270 if (d->perm[i * 2 + 1] != elt)
30271 return false;
30274 /* Success! */
30275 if (d->testing_p)
30276 return true;
30278 switch (d->vmode)
30280 case V16QImode: gen = gen_neon_vzipv16qi_internal; break;
30281 case V8QImode: gen = gen_neon_vzipv8qi_internal; break;
30282 case V8HImode: gen = gen_neon_vzipv8hi_internal; break;
30283 case V4HImode: gen = gen_neon_vzipv4hi_internal; break;
30284 case V4SImode: gen = gen_neon_vzipv4si_internal; break;
30285 case V2SImode: gen = gen_neon_vzipv2si_internal; break;
30286 case V2SFmode: gen = gen_neon_vzipv2sf_internal; break;
30287 case V4SFmode: gen = gen_neon_vzipv4sf_internal; break;
30288 default:
30289 gcc_unreachable ();
30292 in0 = d->op0;
30293 in1 = d->op1;
30294 if (BYTES_BIG_ENDIAN)
30296 x = in0, in0 = in1, in1 = x;
30297 high = !high;
30300 out0 = d->target;
30301 out1 = gen_reg_rtx (d->vmode);
30302 if (high)
30303 x = out0, out0 = out1, out1 = x;
30305 emit_insn (gen (out0, in0, in1, out1));
30306 return true;
30309 /* Recognize patterns for the VREV insns. */
30311 static bool
30312 arm_evpc_neon_vrev (struct expand_vec_perm_d *d)
30314 unsigned int i, j, diff, nelt = d->nelt;
30315 rtx (*gen)(rtx, rtx, rtx);
30317 if (!d->one_vector_p)
30318 return false;
30320 diff = d->perm[0];
30321 switch (diff)
30323 case 7:
30324 switch (d->vmode)
30326 case V16QImode: gen = gen_neon_vrev64v16qi; break;
30327 case V8QImode: gen = gen_neon_vrev64v8qi; break;
30328 default:
30329 return false;
30331 break;
30332 case 3:
30333 switch (d->vmode)
30335 case V16QImode: gen = gen_neon_vrev32v16qi; break;
30336 case V8QImode: gen = gen_neon_vrev32v8qi; break;
30337 case V8HImode: gen = gen_neon_vrev64v8hi; break;
30338 case V4HImode: gen = gen_neon_vrev64v4hi; break;
30339 default:
30340 return false;
30342 break;
30343 case 1:
30344 switch (d->vmode)
30346 case V16QImode: gen = gen_neon_vrev16v16qi; break;
30347 case V8QImode: gen = gen_neon_vrev16v8qi; break;
30348 case V8HImode: gen = gen_neon_vrev32v8hi; break;
30349 case V4HImode: gen = gen_neon_vrev32v4hi; break;
30350 case V4SImode: gen = gen_neon_vrev64v4si; break;
30351 case V2SImode: gen = gen_neon_vrev64v2si; break;
30352 case V4SFmode: gen = gen_neon_vrev64v4sf; break;
30353 case V2SFmode: gen = gen_neon_vrev64v2sf; break;
30354 default:
30355 return false;
30357 break;
30358 default:
30359 return false;
30362 for (i = 0; i < nelt ; i += diff + 1)
30363 for (j = 0; j <= diff; j += 1)
30365 /* This is guaranteed to be true as the value of diff
30366 is 7, 3, 1 and we should have enough elements in the
30367 queue to generate this. Getting a vector mask with a
30368 value of diff other than these values implies that
30369 something is wrong by the time we get here. */
30370 gcc_assert (i + j < nelt);
30371 if (d->perm[i + j] != i + diff - j)
30372 return false;
30375 /* Success! */
30376 if (d->testing_p)
30377 return true;
30379 /* ??? The third operand is an artifact of the builtin infrastructure
30380 and is ignored by the actual instruction. */
30381 emit_insn (gen (d->target, d->op0, const0_rtx));
30382 return true;
30385 /* Recognize patterns for the VTRN insns. */
30387 static bool
30388 arm_evpc_neon_vtrn (struct expand_vec_perm_d *d)
30390 unsigned int i, odd, mask, nelt = d->nelt;
30391 rtx out0, out1, in0, in1, x;
30392 rtx (*gen)(rtx, rtx, rtx, rtx);
30394 if (GET_MODE_UNIT_SIZE (d->vmode) >= 8)
30395 return false;
30397 /* Note that these are little-endian tests. Adjust for big-endian later. */
30398 if (d->perm[0] == 0)
30399 odd = 0;
30400 else if (d->perm[0] == 1)
30401 odd = 1;
30402 else
30403 return false;
30404 mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
30406 for (i = 0; i < nelt; i += 2)
30408 if (d->perm[i] != i + odd)
30409 return false;
30410 if (d->perm[i + 1] != ((i + nelt + odd) & mask))
30411 return false;
30414 /* Success! */
30415 if (d->testing_p)
30416 return true;
30418 switch (d->vmode)
30420 case V16QImode: gen = gen_neon_vtrnv16qi_internal; break;
30421 case V8QImode: gen = gen_neon_vtrnv8qi_internal; break;
30422 case V8HImode: gen = gen_neon_vtrnv8hi_internal; break;
30423 case V4HImode: gen = gen_neon_vtrnv4hi_internal; break;
30424 case V4SImode: gen = gen_neon_vtrnv4si_internal; break;
30425 case V2SImode: gen = gen_neon_vtrnv2si_internal; break;
30426 case V2SFmode: gen = gen_neon_vtrnv2sf_internal; break;
30427 case V4SFmode: gen = gen_neon_vtrnv4sf_internal; break;
30428 default:
30429 gcc_unreachable ();
30432 in0 = d->op0;
30433 in1 = d->op1;
30434 if (BYTES_BIG_ENDIAN)
30436 x = in0, in0 = in1, in1 = x;
30437 odd = !odd;
30440 out0 = d->target;
30441 out1 = gen_reg_rtx (d->vmode);
30442 if (odd)
30443 x = out0, out0 = out1, out1 = x;
30445 emit_insn (gen (out0, in0, in1, out1));
30446 return true;
30449 /* Recognize patterns for the VEXT insns. */
30451 static bool
30452 arm_evpc_neon_vext (struct expand_vec_perm_d *d)
30454 unsigned int i, nelt = d->nelt;
30455 rtx (*gen) (rtx, rtx, rtx, rtx);
30456 rtx offset;
30458 unsigned int location;
30460 unsigned int next = d->perm[0] + 1;
30462 /* TODO: Handle GCC's numbering of elements for big-endian. */
30463 if (BYTES_BIG_ENDIAN)
30464 return false;
30466 /* Check if the extracted indexes are increasing by one. */
30467 for (i = 1; i < nelt; next++, i++)
30469 /* If we hit the most significant element of the 2nd vector in
30470 the previous iteration, no need to test further. */
30471 if (next == 2 * nelt)
30472 return false;
30474 /* If we are operating on only one vector: it could be a
30475 rotation. If there are only two elements of size < 64, let
30476 arm_evpc_neon_vrev catch it. */
30477 if (d->one_vector_p && (next == nelt))
30479 if ((nelt == 2) && (d->vmode != V2DImode))
30480 return false;
30481 else
30482 next = 0;
30485 if (d->perm[i] != next)
30486 return false;
30489 location = d->perm[0];
30491 switch (d->vmode)
30493 case V16QImode: gen = gen_neon_vextv16qi; break;
30494 case V8QImode: gen = gen_neon_vextv8qi; break;
30495 case V4HImode: gen = gen_neon_vextv4hi; break;
30496 case V8HImode: gen = gen_neon_vextv8hi; break;
30497 case V2SImode: gen = gen_neon_vextv2si; break;
30498 case V4SImode: gen = gen_neon_vextv4si; break;
30499 case V2SFmode: gen = gen_neon_vextv2sf; break;
30500 case V4SFmode: gen = gen_neon_vextv4sf; break;
30501 case V2DImode: gen = gen_neon_vextv2di; break;
30502 default:
30503 return false;
30506 /* Success! */
30507 if (d->testing_p)
30508 return true;
30510 offset = GEN_INT (location);
30511 emit_insn (gen (d->target, d->op0, d->op1, offset));
30512 return true;
30515 /* The NEON VTBL instruction is a fully variable permuation that's even
30516 stronger than what we expose via VEC_PERM_EXPR. What it doesn't do
30517 is mask the index operand as VEC_PERM_EXPR requires. Therefore we
30518 can do slightly better by expanding this as a constant where we don't
30519 have to apply a mask. */
30521 static bool
30522 arm_evpc_neon_vtbl (struct expand_vec_perm_d *d)
30524 rtx rperm[MAX_VECT_LEN], sel;
30525 enum machine_mode vmode = d->vmode;
30526 unsigned int i, nelt = d->nelt;
30528 /* TODO: ARM's VTBL indexing is little-endian. In order to handle GCC's
30529 numbering of elements for big-endian, we must reverse the order. */
30530 if (BYTES_BIG_ENDIAN)
30531 return false;
30533 if (d->testing_p)
30534 return true;
30536 /* Generic code will try constant permutation twice. Once with the
30537 original mode and again with the elements lowered to QImode.
30538 So wait and don't do the selector expansion ourselves. */
30539 if (vmode != V8QImode && vmode != V16QImode)
30540 return false;
30542 for (i = 0; i < nelt; ++i)
30543 rperm[i] = GEN_INT (d->perm[i]);
30544 sel = gen_rtx_CONST_VECTOR (vmode, gen_rtvec_v (nelt, rperm));
30545 sel = force_reg (vmode, sel);
30547 arm_expand_vec_perm_1 (d->target, d->op0, d->op1, sel);
30548 return true;
30551 static bool
30552 arm_expand_vec_perm_const_1 (struct expand_vec_perm_d *d)
30554 /* Check if the input mask matches vext before reordering the
30555 operands. */
30556 if (TARGET_NEON)
30557 if (arm_evpc_neon_vext (d))
30558 return true;
30560 /* The pattern matching functions above are written to look for a small
30561 number to begin the sequence (0, 1, N/2). If we begin with an index
30562 from the second operand, we can swap the operands. */
30563 if (d->perm[0] >= d->nelt)
30565 unsigned i, nelt = d->nelt;
30566 rtx x;
30568 for (i = 0; i < nelt; ++i)
30569 d->perm[i] = (d->perm[i] + nelt) & (2 * nelt - 1);
30571 x = d->op0;
30572 d->op0 = d->op1;
30573 d->op1 = x;
30576 if (TARGET_NEON)
30578 if (arm_evpc_neon_vuzp (d))
30579 return true;
30580 if (arm_evpc_neon_vzip (d))
30581 return true;
30582 if (arm_evpc_neon_vrev (d))
30583 return true;
30584 if (arm_evpc_neon_vtrn (d))
30585 return true;
30586 return arm_evpc_neon_vtbl (d);
30588 return false;
30591 /* Expand a vec_perm_const pattern. */
30593 bool
30594 arm_expand_vec_perm_const (rtx target, rtx op0, rtx op1, rtx sel)
30596 struct expand_vec_perm_d d;
30597 int i, nelt, which;
30599 d.target = target;
30600 d.op0 = op0;
30601 d.op1 = op1;
30603 d.vmode = GET_MODE (target);
30604 gcc_assert (VECTOR_MODE_P (d.vmode));
30605 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
30606 d.testing_p = false;
30608 for (i = which = 0; i < nelt; ++i)
30610 rtx e = XVECEXP (sel, 0, i);
30611 int ei = INTVAL (e) & (2 * nelt - 1);
30612 which |= (ei < nelt ? 1 : 2);
30613 d.perm[i] = ei;
30616 switch (which)
30618 default:
30619 gcc_unreachable();
30621 case 3:
30622 d.one_vector_p = false;
30623 if (!rtx_equal_p (op0, op1))
30624 break;
30626 /* The elements of PERM do not suggest that only the first operand
30627 is used, but both operands are identical. Allow easier matching
30628 of the permutation by folding the permutation into the single
30629 input vector. */
30630 /* FALLTHRU */
30631 case 2:
30632 for (i = 0; i < nelt; ++i)
30633 d.perm[i] &= nelt - 1;
30634 d.op0 = op1;
30635 d.one_vector_p = true;
30636 break;
30638 case 1:
30639 d.op1 = op0;
30640 d.one_vector_p = true;
30641 break;
30644 return arm_expand_vec_perm_const_1 (&d);
30647 /* Implement TARGET_VECTORIZE_VEC_PERM_CONST_OK. */
30649 static bool
30650 arm_vectorize_vec_perm_const_ok (enum machine_mode vmode,
30651 const unsigned char *sel)
30653 struct expand_vec_perm_d d;
30654 unsigned int i, nelt, which;
30655 bool ret;
30657 d.vmode = vmode;
30658 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
30659 d.testing_p = true;
30660 memcpy (d.perm, sel, nelt);
30662 /* Categorize the set of elements in the selector. */
30663 for (i = which = 0; i < nelt; ++i)
30665 unsigned char e = d.perm[i];
30666 gcc_assert (e < 2 * nelt);
30667 which |= (e < nelt ? 1 : 2);
30670 /* For all elements from second vector, fold the elements to first. */
30671 if (which == 2)
30672 for (i = 0; i < nelt; ++i)
30673 d.perm[i] -= nelt;
30675 /* Check whether the mask can be applied to the vector type. */
30676 d.one_vector_p = (which != 3);
30678 d.target = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 1);
30679 d.op1 = d.op0 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 2);
30680 if (!d.one_vector_p)
30681 d.op1 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 3);
30683 start_sequence ();
30684 ret = arm_expand_vec_perm_const_1 (&d);
30685 end_sequence ();
30687 return ret;
30690 bool
30691 arm_autoinc_modes_ok_p (enum machine_mode mode, enum arm_auto_incmodes code)
30693 /* If we are soft float and we do not have ldrd
30694 then all auto increment forms are ok. */
30695 if (TARGET_SOFT_FLOAT && (TARGET_LDRD || GET_MODE_SIZE (mode) <= 4))
30696 return true;
30698 switch (code)
30700 /* Post increment and Pre Decrement are supported for all
30701 instruction forms except for vector forms. */
30702 case ARM_POST_INC:
30703 case ARM_PRE_DEC:
30704 if (VECTOR_MODE_P (mode))
30706 if (code != ARM_PRE_DEC)
30707 return true;
30708 else
30709 return false;
30712 return true;
30714 case ARM_POST_DEC:
30715 case ARM_PRE_INC:
30716 /* Without LDRD and mode size greater than
30717 word size, there is no point in auto-incrementing
30718 because ldm and stm will not have these forms. */
30719 if (!TARGET_LDRD && GET_MODE_SIZE (mode) > 4)
30720 return false;
30722 /* Vector and floating point modes do not support
30723 these auto increment forms. */
30724 if (FLOAT_MODE_P (mode) || VECTOR_MODE_P (mode))
30725 return false;
30727 return true;
30729 default:
30730 return false;
30734 return false;
30737 /* The default expansion of general 64-bit shifts in core-regs is suboptimal,
30738 on ARM, since we know that shifts by negative amounts are no-ops.
30739 Additionally, the default expansion code is not available or suitable
30740 for post-reload insn splits (this can occur when the register allocator
30741 chooses not to do a shift in NEON).
30743 This function is used in both initial expand and post-reload splits, and
30744 handles all kinds of 64-bit shifts.
30746 Input requirements:
30747 - It is safe for the input and output to be the same register, but
30748 early-clobber rules apply for the shift amount and scratch registers.
30749 - Shift by register requires both scratch registers. In all other cases
30750 the scratch registers may be NULL.
30751 - Ashiftrt by a register also clobbers the CC register. */
30752 void
30753 arm_emit_coreregs_64bit_shift (enum rtx_code code, rtx out, rtx in,
30754 rtx amount, rtx scratch1, rtx scratch2)
30756 rtx out_high = gen_highpart (SImode, out);
30757 rtx out_low = gen_lowpart (SImode, out);
30758 rtx in_high = gen_highpart (SImode, in);
30759 rtx in_low = gen_lowpart (SImode, in);
30761 /* Terminology:
30762 in = the register pair containing the input value.
30763 out = the destination register pair.
30764 up = the high- or low-part of each pair.
30765 down = the opposite part to "up".
30766 In a shift, we can consider bits to shift from "up"-stream to
30767 "down"-stream, so in a left-shift "up" is the low-part and "down"
30768 is the high-part of each register pair. */
30770 rtx out_up = code == ASHIFT ? out_low : out_high;
30771 rtx out_down = code == ASHIFT ? out_high : out_low;
30772 rtx in_up = code == ASHIFT ? in_low : in_high;
30773 rtx in_down = code == ASHIFT ? in_high : in_low;
30775 gcc_assert (code == ASHIFT || code == ASHIFTRT || code == LSHIFTRT);
30776 gcc_assert (out
30777 && (REG_P (out) || GET_CODE (out) == SUBREG)
30778 && GET_MODE (out) == DImode);
30779 gcc_assert (in
30780 && (REG_P (in) || GET_CODE (in) == SUBREG)
30781 && GET_MODE (in) == DImode);
30782 gcc_assert (amount
30783 && (((REG_P (amount) || GET_CODE (amount) == SUBREG)
30784 && GET_MODE (amount) == SImode)
30785 || CONST_INT_P (amount)));
30786 gcc_assert (scratch1 == NULL
30787 || (GET_CODE (scratch1) == SCRATCH)
30788 || (GET_MODE (scratch1) == SImode
30789 && REG_P (scratch1)));
30790 gcc_assert (scratch2 == NULL
30791 || (GET_CODE (scratch2) == SCRATCH)
30792 || (GET_MODE (scratch2) == SImode
30793 && REG_P (scratch2)));
30794 gcc_assert (!REG_P (out) || !REG_P (amount)
30795 || !HARD_REGISTER_P (out)
30796 || (REGNO (out) != REGNO (amount)
30797 && REGNO (out) + 1 != REGNO (amount)));
30799 /* Macros to make following code more readable. */
30800 #define SUB_32(DEST,SRC) \
30801 gen_addsi3 ((DEST), (SRC), GEN_INT (-32))
30802 #define RSB_32(DEST,SRC) \
30803 gen_subsi3 ((DEST), GEN_INT (32), (SRC))
30804 #define SUB_S_32(DEST,SRC) \
30805 gen_addsi3_compare0 ((DEST), (SRC), \
30806 GEN_INT (-32))
30807 #define SET(DEST,SRC) \
30808 gen_rtx_SET (SImode, (DEST), (SRC))
30809 #define SHIFT(CODE,SRC,AMOUNT) \
30810 gen_rtx_fmt_ee ((CODE), SImode, (SRC), (AMOUNT))
30811 #define LSHIFT(CODE,SRC,AMOUNT) \
30812 gen_rtx_fmt_ee ((CODE) == ASHIFT ? ASHIFT : LSHIFTRT, \
30813 SImode, (SRC), (AMOUNT))
30814 #define REV_LSHIFT(CODE,SRC,AMOUNT) \
30815 gen_rtx_fmt_ee ((CODE) == ASHIFT ? LSHIFTRT : ASHIFT, \
30816 SImode, (SRC), (AMOUNT))
30817 #define ORR(A,B) \
30818 gen_rtx_IOR (SImode, (A), (B))
30819 #define BRANCH(COND,LABEL) \
30820 gen_arm_cond_branch ((LABEL), \
30821 gen_rtx_ ## COND (CCmode, cc_reg, \
30822 const0_rtx), \
30823 cc_reg)
30825 /* Shifts by register and shifts by constant are handled separately. */
30826 if (CONST_INT_P (amount))
30828 /* We have a shift-by-constant. */
30830 /* First, handle out-of-range shift amounts.
30831 In both cases we try to match the result an ARM instruction in a
30832 shift-by-register would give. This helps reduce execution
30833 differences between optimization levels, but it won't stop other
30834 parts of the compiler doing different things. This is "undefined
30835 behaviour, in any case. */
30836 if (INTVAL (amount) <= 0)
30837 emit_insn (gen_movdi (out, in));
30838 else if (INTVAL (amount) >= 64)
30840 if (code == ASHIFTRT)
30842 rtx const31_rtx = GEN_INT (31);
30843 emit_insn (SET (out_down, SHIFT (code, in_up, const31_rtx)));
30844 emit_insn (SET (out_up, SHIFT (code, in_up, const31_rtx)));
30846 else
30847 emit_insn (gen_movdi (out, const0_rtx));
30850 /* Now handle valid shifts. */
30851 else if (INTVAL (amount) < 32)
30853 /* Shifts by a constant less than 32. */
30854 rtx reverse_amount = GEN_INT (32 - INTVAL (amount));
30856 emit_insn (SET (out_down, LSHIFT (code, in_down, amount)));
30857 emit_insn (SET (out_down,
30858 ORR (REV_LSHIFT (code, in_up, reverse_amount),
30859 out_down)));
30860 emit_insn (SET (out_up, SHIFT (code, in_up, amount)));
30862 else
30864 /* Shifts by a constant greater than 31. */
30865 rtx adj_amount = GEN_INT (INTVAL (amount) - 32);
30867 emit_insn (SET (out_down, SHIFT (code, in_up, adj_amount)));
30868 if (code == ASHIFTRT)
30869 emit_insn (gen_ashrsi3 (out_up, in_up,
30870 GEN_INT (31)));
30871 else
30872 emit_insn (SET (out_up, const0_rtx));
30875 else
30877 /* We have a shift-by-register. */
30878 rtx cc_reg = gen_rtx_REG (CC_NOOVmode, CC_REGNUM);
30880 /* This alternative requires the scratch registers. */
30881 gcc_assert (scratch1 && REG_P (scratch1));
30882 gcc_assert (scratch2 && REG_P (scratch2));
30884 /* We will need the values "amount-32" and "32-amount" later.
30885 Swapping them around now allows the later code to be more general. */
30886 switch (code)
30888 case ASHIFT:
30889 emit_insn (SUB_32 (scratch1, amount));
30890 emit_insn (RSB_32 (scratch2, amount));
30891 break;
30892 case ASHIFTRT:
30893 emit_insn (RSB_32 (scratch1, amount));
30894 /* Also set CC = amount > 32. */
30895 emit_insn (SUB_S_32 (scratch2, amount));
30896 break;
30897 case LSHIFTRT:
30898 emit_insn (RSB_32 (scratch1, amount));
30899 emit_insn (SUB_32 (scratch2, amount));
30900 break;
30901 default:
30902 gcc_unreachable ();
30905 /* Emit code like this:
30907 arithmetic-left:
30908 out_down = in_down << amount;
30909 out_down = (in_up << (amount - 32)) | out_down;
30910 out_down = ((unsigned)in_up >> (32 - amount)) | out_down;
30911 out_up = in_up << amount;
30913 arithmetic-right:
30914 out_down = in_down >> amount;
30915 out_down = (in_up << (32 - amount)) | out_down;
30916 if (amount < 32)
30917 out_down = ((signed)in_up >> (amount - 32)) | out_down;
30918 out_up = in_up << amount;
30920 logical-right:
30921 out_down = in_down >> amount;
30922 out_down = (in_up << (32 - amount)) | out_down;
30923 if (amount < 32)
30924 out_down = ((unsigned)in_up >> (amount - 32)) | out_down;
30925 out_up = in_up << amount;
30927 The ARM and Thumb2 variants are the same but implemented slightly
30928 differently. If this were only called during expand we could just
30929 use the Thumb2 case and let combine do the right thing, but this
30930 can also be called from post-reload splitters. */
30932 emit_insn (SET (out_down, LSHIFT (code, in_down, amount)));
30934 if (!TARGET_THUMB2)
30936 /* Emit code for ARM mode. */
30937 emit_insn (SET (out_down,
30938 ORR (SHIFT (ASHIFT, in_up, scratch1), out_down)));
30939 if (code == ASHIFTRT)
30941 rtx done_label = gen_label_rtx ();
30942 emit_jump_insn (BRANCH (LT, done_label));
30943 emit_insn (SET (out_down, ORR (SHIFT (ASHIFTRT, in_up, scratch2),
30944 out_down)));
30945 emit_label (done_label);
30947 else
30948 emit_insn (SET (out_down, ORR (SHIFT (LSHIFTRT, in_up, scratch2),
30949 out_down)));
30951 else
30953 /* Emit code for Thumb2 mode.
30954 Thumb2 can't do shift and or in one insn. */
30955 emit_insn (SET (scratch1, SHIFT (ASHIFT, in_up, scratch1)));
30956 emit_insn (gen_iorsi3 (out_down, out_down, scratch1));
30958 if (code == ASHIFTRT)
30960 rtx done_label = gen_label_rtx ();
30961 emit_jump_insn (BRANCH (LT, done_label));
30962 emit_insn (SET (scratch2, SHIFT (ASHIFTRT, in_up, scratch2)));
30963 emit_insn (SET (out_down, ORR (out_down, scratch2)));
30964 emit_label (done_label);
30966 else
30968 emit_insn (SET (scratch2, SHIFT (LSHIFTRT, in_up, scratch2)));
30969 emit_insn (gen_iorsi3 (out_down, out_down, scratch2));
30973 emit_insn (SET (out_up, SHIFT (code, in_up, amount)));
30976 #undef SUB_32
30977 #undef RSB_32
30978 #undef SUB_S_32
30979 #undef SET
30980 #undef SHIFT
30981 #undef LSHIFT
30982 #undef REV_LSHIFT
30983 #undef ORR
30984 #undef BRANCH
30988 /* Returns true if a valid comparison operation and makes
30989 the operands in a form that is valid. */
30990 bool
30991 arm_validize_comparison (rtx *comparison, rtx * op1, rtx * op2)
30993 enum rtx_code code = GET_CODE (*comparison);
30994 int code_int;
30995 enum machine_mode mode = (GET_MODE (*op1) == VOIDmode)
30996 ? GET_MODE (*op2) : GET_MODE (*op1);
30998 gcc_assert (GET_MODE (*op1) != VOIDmode || GET_MODE (*op2) != VOIDmode);
31000 if (code == UNEQ || code == LTGT)
31001 return false;
31003 code_int = (int)code;
31004 arm_canonicalize_comparison (&code_int, op1, op2, 0);
31005 PUT_CODE (*comparison, (enum rtx_code)code_int);
31007 switch (mode)
31009 case SImode:
31010 if (!arm_add_operand (*op1, mode))
31011 *op1 = force_reg (mode, *op1);
31012 if (!arm_add_operand (*op2, mode))
31013 *op2 = force_reg (mode, *op2);
31014 return true;
31016 case DImode:
31017 if (!cmpdi_operand (*op1, mode))
31018 *op1 = force_reg (mode, *op1);
31019 if (!cmpdi_operand (*op2, mode))
31020 *op2 = force_reg (mode, *op2);
31021 return true;
31023 case SFmode:
31024 case DFmode:
31025 if (!arm_float_compare_operand (*op1, mode))
31026 *op1 = force_reg (mode, *op1);
31027 if (!arm_float_compare_operand (*op2, mode))
31028 *op2 = force_reg (mode, *op2);
31029 return true;
31030 default:
31031 break;
31034 return false;
31038 /* Implement the TARGET_ASAN_SHADOW_OFFSET hook. */
31040 static unsigned HOST_WIDE_INT
31041 arm_asan_shadow_offset (void)
31043 return (unsigned HOST_WIDE_INT) 1 << 29;
31046 #include "gt-arm.h"