[ARM 2/5 big.LITTLE] Allow tuning parameters without unique tuning targets.
[official-gcc.git] / gcc / config / arm / arm.c
blob2bc9bf10d9c460959ccebe34d56e2f19cfda6575
1 /* Output routines for GCC for ARM.
2 Copyright (C) 1991-2013 Free Software Foundation, Inc.
3 Contributed by Pieter `Tiggr' Schoenmakers (rcpieter@win.tue.nl)
4 and Martin Simmons (@harleqn.co.uk).
5 More major hacks by Richard Earnshaw (rearnsha@arm.com).
7 This file is part of GCC.
9 GCC is free software; you can redistribute it and/or modify it
10 under the terms of the GNU General Public License as published
11 by the Free Software Foundation; either version 3, or (at your
12 option) any later version.
14 GCC is distributed in the hope that it will be useful, but WITHOUT
15 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
16 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
17 License for more details.
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING3. If not see
21 <http://www.gnu.org/licenses/>. */
23 #include "config.h"
24 #include "system.h"
25 #include "coretypes.h"
26 #include "hash-table.h"
27 #include "tm.h"
28 #include "rtl.h"
29 #include "tree.h"
30 #include "stringpool.h"
31 #include "stor-layout.h"
32 #include "calls.h"
33 #include "varasm.h"
34 #include "obstack.h"
35 #include "regs.h"
36 #include "hard-reg-set.h"
37 #include "insn-config.h"
38 #include "conditions.h"
39 #include "output.h"
40 #include "insn-attr.h"
41 #include "flags.h"
42 #include "reload.h"
43 #include "function.h"
44 #include "expr.h"
45 #include "optabs.h"
46 #include "diagnostic-core.h"
47 #include "recog.h"
48 #include "cgraph.h"
49 #include "ggc.h"
50 #include "except.h"
51 #include "tm_p.h"
52 #include "target.h"
53 #include "target-def.h"
54 #include "debug.h"
55 #include "langhooks.h"
56 #include "df.h"
57 #include "intl.h"
58 #include "libfuncs.h"
59 #include "params.h"
60 #include "opts.h"
61 #include "dumpfile.h"
63 /* Forward definitions of types. */
64 typedef struct minipool_node Mnode;
65 typedef struct minipool_fixup Mfix;
67 void (*arm_lang_output_object_attributes_hook)(void);
69 struct four_ints
71 int i[4];
74 /* Forward function declarations. */
75 static bool arm_lra_p (void);
76 static bool arm_needs_doubleword_align (enum machine_mode, const_tree);
77 static int arm_compute_static_chain_stack_bytes (void);
78 static arm_stack_offsets *arm_get_frame_offsets (void);
79 static void arm_add_gc_roots (void);
80 static int arm_gen_constant (enum rtx_code, enum machine_mode, rtx,
81 HOST_WIDE_INT, rtx, rtx, int, int);
82 static unsigned bit_count (unsigned long);
83 static int arm_address_register_rtx_p (rtx, int);
84 static int arm_legitimate_index_p (enum machine_mode, rtx, RTX_CODE, int);
85 static int thumb2_legitimate_index_p (enum machine_mode, rtx, int);
86 static int thumb1_base_register_rtx_p (rtx, enum machine_mode, int);
87 static rtx arm_legitimize_address (rtx, rtx, enum machine_mode);
88 static reg_class_t arm_preferred_reload_class (rtx, reg_class_t);
89 static rtx thumb_legitimize_address (rtx, rtx, enum machine_mode);
90 inline static int thumb1_index_register_rtx_p (rtx, int);
91 static bool arm_legitimate_address_p (enum machine_mode, rtx, bool);
92 static int thumb_far_jump_used_p (void);
93 static bool thumb_force_lr_save (void);
94 static unsigned arm_size_return_regs (void);
95 static bool arm_assemble_integer (rtx, unsigned int, int);
96 static void arm_print_operand (FILE *, rtx, int);
97 static void arm_print_operand_address (FILE *, rtx);
98 static bool arm_print_operand_punct_valid_p (unsigned char code);
99 static const char *fp_const_from_val (REAL_VALUE_TYPE *);
100 static arm_cc get_arm_condition_code (rtx);
101 static HOST_WIDE_INT int_log2 (HOST_WIDE_INT);
102 static const char *output_multi_immediate (rtx *, const char *, const char *,
103 int, HOST_WIDE_INT);
104 static const char *shift_op (rtx, HOST_WIDE_INT *);
105 static struct machine_function *arm_init_machine_status (void);
106 static void thumb_exit (FILE *, int);
107 static HOST_WIDE_INT get_jump_table_size (rtx);
108 static Mnode *move_minipool_fix_forward_ref (Mnode *, Mnode *, HOST_WIDE_INT);
109 static Mnode *add_minipool_forward_ref (Mfix *);
110 static Mnode *move_minipool_fix_backward_ref (Mnode *, Mnode *, HOST_WIDE_INT);
111 static Mnode *add_minipool_backward_ref (Mfix *);
112 static void assign_minipool_offsets (Mfix *);
113 static void arm_print_value (FILE *, rtx);
114 static void dump_minipool (rtx);
115 static int arm_barrier_cost (rtx);
116 static Mfix *create_fix_barrier (Mfix *, HOST_WIDE_INT);
117 static void push_minipool_barrier (rtx, HOST_WIDE_INT);
118 static void push_minipool_fix (rtx, HOST_WIDE_INT, rtx *, enum machine_mode,
119 rtx);
120 static void arm_reorg (void);
121 static void note_invalid_constants (rtx, HOST_WIDE_INT, int);
122 static unsigned long arm_compute_save_reg0_reg12_mask (void);
123 static unsigned long arm_compute_save_reg_mask (void);
124 static unsigned long arm_isr_value (tree);
125 static unsigned long arm_compute_func_type (void);
126 static tree arm_handle_fndecl_attribute (tree *, tree, tree, int, bool *);
127 static tree arm_handle_pcs_attribute (tree *, tree, tree, int, bool *);
128 static tree arm_handle_isr_attribute (tree *, tree, tree, int, bool *);
129 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
130 static tree arm_handle_notshared_attribute (tree *, tree, tree, int, bool *);
131 #endif
132 static void arm_output_function_epilogue (FILE *, HOST_WIDE_INT);
133 static void arm_output_function_prologue (FILE *, HOST_WIDE_INT);
134 static int arm_comp_type_attributes (const_tree, const_tree);
135 static void arm_set_default_type_attributes (tree);
136 static int arm_adjust_cost (rtx, rtx, rtx, int);
137 static int arm_sched_reorder (FILE *, int, rtx *, int *, int);
138 static int optimal_immediate_sequence (enum rtx_code code,
139 unsigned HOST_WIDE_INT val,
140 struct four_ints *return_sequence);
141 static int optimal_immediate_sequence_1 (enum rtx_code code,
142 unsigned HOST_WIDE_INT val,
143 struct four_ints *return_sequence,
144 int i);
145 static int arm_get_strip_length (int);
146 static bool arm_function_ok_for_sibcall (tree, tree);
147 static enum machine_mode arm_promote_function_mode (const_tree,
148 enum machine_mode, int *,
149 const_tree, int);
150 static bool arm_return_in_memory (const_tree, const_tree);
151 static rtx arm_function_value (const_tree, const_tree, bool);
152 static rtx arm_libcall_value_1 (enum machine_mode);
153 static rtx arm_libcall_value (enum machine_mode, const_rtx);
154 static bool arm_function_value_regno_p (const unsigned int);
155 static void arm_internal_label (FILE *, const char *, unsigned long);
156 static void arm_output_mi_thunk (FILE *, tree, HOST_WIDE_INT, HOST_WIDE_INT,
157 tree);
158 static bool arm_have_conditional_execution (void);
159 static bool arm_cannot_force_const_mem (enum machine_mode, rtx);
160 static bool arm_legitimate_constant_p (enum machine_mode, rtx);
161 static bool arm_rtx_costs_1 (rtx, enum rtx_code, int*, bool);
162 static bool arm_size_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *);
163 static bool arm_slowmul_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *, bool);
164 static bool arm_fastmul_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *, bool);
165 static bool arm_xscale_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *, bool);
166 static bool arm_9e_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *, bool);
167 static bool arm_rtx_costs (rtx, int, int, int, int *, bool);
168 static int arm_address_cost (rtx, enum machine_mode, addr_space_t, bool);
169 static int arm_register_move_cost (enum machine_mode, reg_class_t, reg_class_t);
170 static int arm_memory_move_cost (enum machine_mode, reg_class_t, bool);
171 static void arm_init_builtins (void);
172 static void arm_init_iwmmxt_builtins (void);
173 static rtx safe_vector_operand (rtx, enum machine_mode);
174 static rtx arm_expand_binop_builtin (enum insn_code, tree, rtx);
175 static rtx arm_expand_unop_builtin (enum insn_code, tree, rtx, int);
176 static rtx arm_expand_builtin (tree, rtx, rtx, enum machine_mode, int);
177 static tree arm_builtin_decl (unsigned, bool);
178 static void emit_constant_insn (rtx cond, rtx pattern);
179 static rtx emit_set_insn (rtx, rtx);
180 static rtx emit_multi_reg_push (unsigned long);
181 static int arm_arg_partial_bytes (cumulative_args_t, enum machine_mode,
182 tree, bool);
183 static rtx arm_function_arg (cumulative_args_t, enum machine_mode,
184 const_tree, bool);
185 static void arm_function_arg_advance (cumulative_args_t, enum machine_mode,
186 const_tree, bool);
187 static unsigned int arm_function_arg_boundary (enum machine_mode, const_tree);
188 static rtx aapcs_allocate_return_reg (enum machine_mode, const_tree,
189 const_tree);
190 static rtx aapcs_libcall_value (enum machine_mode);
191 static int aapcs_select_return_coproc (const_tree, const_tree);
193 #ifdef OBJECT_FORMAT_ELF
194 static void arm_elf_asm_constructor (rtx, int) ATTRIBUTE_UNUSED;
195 static void arm_elf_asm_destructor (rtx, int) ATTRIBUTE_UNUSED;
196 #endif
197 #ifndef ARM_PE
198 static void arm_encode_section_info (tree, rtx, int);
199 #endif
201 static void arm_file_end (void);
202 static void arm_file_start (void);
204 static void arm_setup_incoming_varargs (cumulative_args_t, enum machine_mode,
205 tree, int *, int);
206 static bool arm_pass_by_reference (cumulative_args_t,
207 enum machine_mode, const_tree, bool);
208 static bool arm_promote_prototypes (const_tree);
209 static bool arm_default_short_enums (void);
210 static bool arm_align_anon_bitfield (void);
211 static bool arm_return_in_msb (const_tree);
212 static bool arm_must_pass_in_stack (enum machine_mode, const_tree);
213 static bool arm_return_in_memory (const_tree, const_tree);
214 #if ARM_UNWIND_INFO
215 static void arm_unwind_emit (FILE *, rtx);
216 static bool arm_output_ttype (rtx);
217 static void arm_asm_emit_except_personality (rtx);
218 static void arm_asm_init_sections (void);
219 #endif
220 static rtx arm_dwarf_register_span (rtx);
222 static tree arm_cxx_guard_type (void);
223 static bool arm_cxx_guard_mask_bit (void);
224 static tree arm_get_cookie_size (tree);
225 static bool arm_cookie_has_size (void);
226 static bool arm_cxx_cdtor_returns_this (void);
227 static bool arm_cxx_key_method_may_be_inline (void);
228 static void arm_cxx_determine_class_data_visibility (tree);
229 static bool arm_cxx_class_data_always_comdat (void);
230 static bool arm_cxx_use_aeabi_atexit (void);
231 static void arm_init_libfuncs (void);
232 static tree arm_build_builtin_va_list (void);
233 static void arm_expand_builtin_va_start (tree, rtx);
234 static tree arm_gimplify_va_arg_expr (tree, tree, gimple_seq *, gimple_seq *);
235 static void arm_option_override (void);
236 static unsigned HOST_WIDE_INT arm_shift_truncation_mask (enum machine_mode);
237 static bool arm_cannot_copy_insn_p (rtx);
238 static bool arm_tls_symbol_p (rtx x);
239 static int arm_issue_rate (void);
240 static void arm_output_dwarf_dtprel (FILE *, int, rtx) ATTRIBUTE_UNUSED;
241 static bool arm_output_addr_const_extra (FILE *, rtx);
242 static bool arm_allocate_stack_slots_for_args (void);
243 static bool arm_warn_func_return (tree);
244 static const char *arm_invalid_parameter_type (const_tree t);
245 static const char *arm_invalid_return_type (const_tree t);
246 static tree arm_promoted_type (const_tree t);
247 static tree arm_convert_to_type (tree type, tree expr);
248 static bool arm_scalar_mode_supported_p (enum machine_mode);
249 static bool arm_frame_pointer_required (void);
250 static bool arm_can_eliminate (const int, const int);
251 static void arm_asm_trampoline_template (FILE *);
252 static void arm_trampoline_init (rtx, tree, rtx);
253 static rtx arm_trampoline_adjust_address (rtx);
254 static rtx arm_pic_static_addr (rtx orig, rtx reg);
255 static bool cortex_a9_sched_adjust_cost (rtx, rtx, rtx, int *);
256 static bool xscale_sched_adjust_cost (rtx, rtx, rtx, int *);
257 static bool fa726te_sched_adjust_cost (rtx, rtx, rtx, int *);
258 static bool arm_array_mode_supported_p (enum machine_mode,
259 unsigned HOST_WIDE_INT);
260 static enum machine_mode arm_preferred_simd_mode (enum machine_mode);
261 static bool arm_class_likely_spilled_p (reg_class_t);
262 static HOST_WIDE_INT arm_vector_alignment (const_tree type);
263 static bool arm_vector_alignment_reachable (const_tree type, bool is_packed);
264 static bool arm_builtin_support_vector_misalignment (enum machine_mode mode,
265 const_tree type,
266 int misalignment,
267 bool is_packed);
268 static void arm_conditional_register_usage (void);
269 static reg_class_t arm_preferred_rename_class (reg_class_t rclass);
270 static unsigned int arm_autovectorize_vector_sizes (void);
271 static int arm_default_branch_cost (bool, bool);
272 static int arm_cortex_a5_branch_cost (bool, bool);
273 static int arm_cortex_m_branch_cost (bool, bool);
275 static bool arm_vectorize_vec_perm_const_ok (enum machine_mode vmode,
276 const unsigned char *sel);
278 static int arm_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
279 tree vectype,
280 int misalign ATTRIBUTE_UNUSED);
281 static unsigned arm_add_stmt_cost (void *data, int count,
282 enum vect_cost_for_stmt kind,
283 struct _stmt_vec_info *stmt_info,
284 int misalign,
285 enum vect_cost_model_location where);
287 static void arm_canonicalize_comparison (int *code, rtx *op0, rtx *op1,
288 bool op0_preserve_value);
289 static unsigned HOST_WIDE_INT arm_asan_shadow_offset (void);
291 /* Table of machine attributes. */
292 static const struct attribute_spec arm_attribute_table[] =
294 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler,
295 affects_type_identity } */
296 /* Function calls made to this symbol must be done indirectly, because
297 it may lie outside of the 26 bit addressing range of a normal function
298 call. */
299 { "long_call", 0, 0, false, true, true, NULL, false },
300 /* Whereas these functions are always known to reside within the 26 bit
301 addressing range. */
302 { "short_call", 0, 0, false, true, true, NULL, false },
303 /* Specify the procedure call conventions for a function. */
304 { "pcs", 1, 1, false, true, true, arm_handle_pcs_attribute,
305 false },
306 /* Interrupt Service Routines have special prologue and epilogue requirements. */
307 { "isr", 0, 1, false, false, false, arm_handle_isr_attribute,
308 false },
309 { "interrupt", 0, 1, false, false, false, arm_handle_isr_attribute,
310 false },
311 { "naked", 0, 0, true, false, false, arm_handle_fndecl_attribute,
312 false },
313 #ifdef ARM_PE
314 /* ARM/PE has three new attributes:
315 interfacearm - ?
316 dllexport - for exporting a function/variable that will live in a dll
317 dllimport - for importing a function/variable from a dll
319 Microsoft allows multiple declspecs in one __declspec, separating
320 them with spaces. We do NOT support this. Instead, use __declspec
321 multiple times.
323 { "dllimport", 0, 0, true, false, false, NULL, false },
324 { "dllexport", 0, 0, true, false, false, NULL, false },
325 { "interfacearm", 0, 0, true, false, false, arm_handle_fndecl_attribute,
326 false },
327 #elif TARGET_DLLIMPORT_DECL_ATTRIBUTES
328 { "dllimport", 0, 0, false, false, false, handle_dll_attribute, false },
329 { "dllexport", 0, 0, false, false, false, handle_dll_attribute, false },
330 { "notshared", 0, 0, false, true, false, arm_handle_notshared_attribute,
331 false },
332 #endif
333 { NULL, 0, 0, false, false, false, NULL, false }
336 /* Initialize the GCC target structure. */
337 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
338 #undef TARGET_MERGE_DECL_ATTRIBUTES
339 #define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
340 #endif
342 #undef TARGET_LEGITIMIZE_ADDRESS
343 #define TARGET_LEGITIMIZE_ADDRESS arm_legitimize_address
345 #undef TARGET_LRA_P
346 #define TARGET_LRA_P arm_lra_p
348 #undef TARGET_ATTRIBUTE_TABLE
349 #define TARGET_ATTRIBUTE_TABLE arm_attribute_table
351 #undef TARGET_ASM_FILE_START
352 #define TARGET_ASM_FILE_START arm_file_start
353 #undef TARGET_ASM_FILE_END
354 #define TARGET_ASM_FILE_END arm_file_end
356 #undef TARGET_ASM_ALIGNED_SI_OP
357 #define TARGET_ASM_ALIGNED_SI_OP NULL
358 #undef TARGET_ASM_INTEGER
359 #define TARGET_ASM_INTEGER arm_assemble_integer
361 #undef TARGET_PRINT_OPERAND
362 #define TARGET_PRINT_OPERAND arm_print_operand
363 #undef TARGET_PRINT_OPERAND_ADDRESS
364 #define TARGET_PRINT_OPERAND_ADDRESS arm_print_operand_address
365 #undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
366 #define TARGET_PRINT_OPERAND_PUNCT_VALID_P arm_print_operand_punct_valid_p
368 #undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA
369 #define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA arm_output_addr_const_extra
371 #undef TARGET_ASM_FUNCTION_PROLOGUE
372 #define TARGET_ASM_FUNCTION_PROLOGUE arm_output_function_prologue
374 #undef TARGET_ASM_FUNCTION_EPILOGUE
375 #define TARGET_ASM_FUNCTION_EPILOGUE arm_output_function_epilogue
377 #undef TARGET_OPTION_OVERRIDE
378 #define TARGET_OPTION_OVERRIDE arm_option_override
380 #undef TARGET_COMP_TYPE_ATTRIBUTES
381 #define TARGET_COMP_TYPE_ATTRIBUTES arm_comp_type_attributes
383 #undef TARGET_SET_DEFAULT_TYPE_ATTRIBUTES
384 #define TARGET_SET_DEFAULT_TYPE_ATTRIBUTES arm_set_default_type_attributes
386 #undef TARGET_SCHED_ADJUST_COST
387 #define TARGET_SCHED_ADJUST_COST arm_adjust_cost
389 #undef TARGET_SCHED_REORDER
390 #define TARGET_SCHED_REORDER arm_sched_reorder
392 #undef TARGET_REGISTER_MOVE_COST
393 #define TARGET_REGISTER_MOVE_COST arm_register_move_cost
395 #undef TARGET_MEMORY_MOVE_COST
396 #define TARGET_MEMORY_MOVE_COST arm_memory_move_cost
398 #undef TARGET_ENCODE_SECTION_INFO
399 #ifdef ARM_PE
400 #define TARGET_ENCODE_SECTION_INFO arm_pe_encode_section_info
401 #else
402 #define TARGET_ENCODE_SECTION_INFO arm_encode_section_info
403 #endif
405 #undef TARGET_STRIP_NAME_ENCODING
406 #define TARGET_STRIP_NAME_ENCODING arm_strip_name_encoding
408 #undef TARGET_ASM_INTERNAL_LABEL
409 #define TARGET_ASM_INTERNAL_LABEL arm_internal_label
411 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
412 #define TARGET_FUNCTION_OK_FOR_SIBCALL arm_function_ok_for_sibcall
414 #undef TARGET_FUNCTION_VALUE
415 #define TARGET_FUNCTION_VALUE arm_function_value
417 #undef TARGET_LIBCALL_VALUE
418 #define TARGET_LIBCALL_VALUE arm_libcall_value
420 #undef TARGET_FUNCTION_VALUE_REGNO_P
421 #define TARGET_FUNCTION_VALUE_REGNO_P arm_function_value_regno_p
423 #undef TARGET_ASM_OUTPUT_MI_THUNK
424 #define TARGET_ASM_OUTPUT_MI_THUNK arm_output_mi_thunk
425 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
426 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK default_can_output_mi_thunk_no_vcall
428 #undef TARGET_RTX_COSTS
429 #define TARGET_RTX_COSTS arm_rtx_costs
430 #undef TARGET_ADDRESS_COST
431 #define TARGET_ADDRESS_COST arm_address_cost
433 #undef TARGET_SHIFT_TRUNCATION_MASK
434 #define TARGET_SHIFT_TRUNCATION_MASK arm_shift_truncation_mask
435 #undef TARGET_VECTOR_MODE_SUPPORTED_P
436 #define TARGET_VECTOR_MODE_SUPPORTED_P arm_vector_mode_supported_p
437 #undef TARGET_ARRAY_MODE_SUPPORTED_P
438 #define TARGET_ARRAY_MODE_SUPPORTED_P arm_array_mode_supported_p
439 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
440 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE arm_preferred_simd_mode
441 #undef TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES
442 #define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES \
443 arm_autovectorize_vector_sizes
445 #undef TARGET_MACHINE_DEPENDENT_REORG
446 #define TARGET_MACHINE_DEPENDENT_REORG arm_reorg
448 #undef TARGET_INIT_BUILTINS
449 #define TARGET_INIT_BUILTINS arm_init_builtins
450 #undef TARGET_EXPAND_BUILTIN
451 #define TARGET_EXPAND_BUILTIN arm_expand_builtin
452 #undef TARGET_BUILTIN_DECL
453 #define TARGET_BUILTIN_DECL arm_builtin_decl
455 #undef TARGET_INIT_LIBFUNCS
456 #define TARGET_INIT_LIBFUNCS arm_init_libfuncs
458 #undef TARGET_PROMOTE_FUNCTION_MODE
459 #define TARGET_PROMOTE_FUNCTION_MODE arm_promote_function_mode
460 #undef TARGET_PROMOTE_PROTOTYPES
461 #define TARGET_PROMOTE_PROTOTYPES arm_promote_prototypes
462 #undef TARGET_PASS_BY_REFERENCE
463 #define TARGET_PASS_BY_REFERENCE arm_pass_by_reference
464 #undef TARGET_ARG_PARTIAL_BYTES
465 #define TARGET_ARG_PARTIAL_BYTES arm_arg_partial_bytes
466 #undef TARGET_FUNCTION_ARG
467 #define TARGET_FUNCTION_ARG arm_function_arg
468 #undef TARGET_FUNCTION_ARG_ADVANCE
469 #define TARGET_FUNCTION_ARG_ADVANCE arm_function_arg_advance
470 #undef TARGET_FUNCTION_ARG_BOUNDARY
471 #define TARGET_FUNCTION_ARG_BOUNDARY arm_function_arg_boundary
473 #undef TARGET_SETUP_INCOMING_VARARGS
474 #define TARGET_SETUP_INCOMING_VARARGS arm_setup_incoming_varargs
476 #undef TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS
477 #define TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS arm_allocate_stack_slots_for_args
479 #undef TARGET_ASM_TRAMPOLINE_TEMPLATE
480 #define TARGET_ASM_TRAMPOLINE_TEMPLATE arm_asm_trampoline_template
481 #undef TARGET_TRAMPOLINE_INIT
482 #define TARGET_TRAMPOLINE_INIT arm_trampoline_init
483 #undef TARGET_TRAMPOLINE_ADJUST_ADDRESS
484 #define TARGET_TRAMPOLINE_ADJUST_ADDRESS arm_trampoline_adjust_address
486 #undef TARGET_WARN_FUNC_RETURN
487 #define TARGET_WARN_FUNC_RETURN arm_warn_func_return
489 #undef TARGET_DEFAULT_SHORT_ENUMS
490 #define TARGET_DEFAULT_SHORT_ENUMS arm_default_short_enums
492 #undef TARGET_ALIGN_ANON_BITFIELD
493 #define TARGET_ALIGN_ANON_BITFIELD arm_align_anon_bitfield
495 #undef TARGET_NARROW_VOLATILE_BITFIELD
496 #define TARGET_NARROW_VOLATILE_BITFIELD hook_bool_void_false
498 #undef TARGET_CXX_GUARD_TYPE
499 #define TARGET_CXX_GUARD_TYPE arm_cxx_guard_type
501 #undef TARGET_CXX_GUARD_MASK_BIT
502 #define TARGET_CXX_GUARD_MASK_BIT arm_cxx_guard_mask_bit
504 #undef TARGET_CXX_GET_COOKIE_SIZE
505 #define TARGET_CXX_GET_COOKIE_SIZE arm_get_cookie_size
507 #undef TARGET_CXX_COOKIE_HAS_SIZE
508 #define TARGET_CXX_COOKIE_HAS_SIZE arm_cookie_has_size
510 #undef TARGET_CXX_CDTOR_RETURNS_THIS
511 #define TARGET_CXX_CDTOR_RETURNS_THIS arm_cxx_cdtor_returns_this
513 #undef TARGET_CXX_KEY_METHOD_MAY_BE_INLINE
514 #define TARGET_CXX_KEY_METHOD_MAY_BE_INLINE arm_cxx_key_method_may_be_inline
516 #undef TARGET_CXX_USE_AEABI_ATEXIT
517 #define TARGET_CXX_USE_AEABI_ATEXIT arm_cxx_use_aeabi_atexit
519 #undef TARGET_CXX_DETERMINE_CLASS_DATA_VISIBILITY
520 #define TARGET_CXX_DETERMINE_CLASS_DATA_VISIBILITY \
521 arm_cxx_determine_class_data_visibility
523 #undef TARGET_CXX_CLASS_DATA_ALWAYS_COMDAT
524 #define TARGET_CXX_CLASS_DATA_ALWAYS_COMDAT arm_cxx_class_data_always_comdat
526 #undef TARGET_RETURN_IN_MSB
527 #define TARGET_RETURN_IN_MSB arm_return_in_msb
529 #undef TARGET_RETURN_IN_MEMORY
530 #define TARGET_RETURN_IN_MEMORY arm_return_in_memory
532 #undef TARGET_MUST_PASS_IN_STACK
533 #define TARGET_MUST_PASS_IN_STACK arm_must_pass_in_stack
535 #if ARM_UNWIND_INFO
536 #undef TARGET_ASM_UNWIND_EMIT
537 #define TARGET_ASM_UNWIND_EMIT arm_unwind_emit
539 /* EABI unwinding tables use a different format for the typeinfo tables. */
540 #undef TARGET_ASM_TTYPE
541 #define TARGET_ASM_TTYPE arm_output_ttype
543 #undef TARGET_ARM_EABI_UNWINDER
544 #define TARGET_ARM_EABI_UNWINDER true
546 #undef TARGET_ASM_EMIT_EXCEPT_PERSONALITY
547 #define TARGET_ASM_EMIT_EXCEPT_PERSONALITY arm_asm_emit_except_personality
549 #undef TARGET_ASM_INIT_SECTIONS
550 #define TARGET_ASM_INIT_SECTIONS arm_asm_init_sections
551 #endif /* ARM_UNWIND_INFO */
553 #undef TARGET_DWARF_REGISTER_SPAN
554 #define TARGET_DWARF_REGISTER_SPAN arm_dwarf_register_span
556 #undef TARGET_CANNOT_COPY_INSN_P
557 #define TARGET_CANNOT_COPY_INSN_P arm_cannot_copy_insn_p
559 #ifdef HAVE_AS_TLS
560 #undef TARGET_HAVE_TLS
561 #define TARGET_HAVE_TLS true
562 #endif
564 #undef TARGET_HAVE_CONDITIONAL_EXECUTION
565 #define TARGET_HAVE_CONDITIONAL_EXECUTION arm_have_conditional_execution
567 #undef TARGET_LEGITIMATE_CONSTANT_P
568 #define TARGET_LEGITIMATE_CONSTANT_P arm_legitimate_constant_p
570 #undef TARGET_CANNOT_FORCE_CONST_MEM
571 #define TARGET_CANNOT_FORCE_CONST_MEM arm_cannot_force_const_mem
573 #undef TARGET_MAX_ANCHOR_OFFSET
574 #define TARGET_MAX_ANCHOR_OFFSET 4095
576 /* The minimum is set such that the total size of the block
577 for a particular anchor is -4088 + 1 + 4095 bytes, which is
578 divisible by eight, ensuring natural spacing of anchors. */
579 #undef TARGET_MIN_ANCHOR_OFFSET
580 #define TARGET_MIN_ANCHOR_OFFSET -4088
582 #undef TARGET_SCHED_ISSUE_RATE
583 #define TARGET_SCHED_ISSUE_RATE arm_issue_rate
585 #undef TARGET_MANGLE_TYPE
586 #define TARGET_MANGLE_TYPE arm_mangle_type
588 #undef TARGET_BUILD_BUILTIN_VA_LIST
589 #define TARGET_BUILD_BUILTIN_VA_LIST arm_build_builtin_va_list
590 #undef TARGET_EXPAND_BUILTIN_VA_START
591 #define TARGET_EXPAND_BUILTIN_VA_START arm_expand_builtin_va_start
592 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
593 #define TARGET_GIMPLIFY_VA_ARG_EXPR arm_gimplify_va_arg_expr
595 #ifdef HAVE_AS_TLS
596 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
597 #define TARGET_ASM_OUTPUT_DWARF_DTPREL arm_output_dwarf_dtprel
598 #endif
600 #undef TARGET_LEGITIMATE_ADDRESS_P
601 #define TARGET_LEGITIMATE_ADDRESS_P arm_legitimate_address_p
603 #undef TARGET_PREFERRED_RELOAD_CLASS
604 #define TARGET_PREFERRED_RELOAD_CLASS arm_preferred_reload_class
606 #undef TARGET_INVALID_PARAMETER_TYPE
607 #define TARGET_INVALID_PARAMETER_TYPE arm_invalid_parameter_type
609 #undef TARGET_INVALID_RETURN_TYPE
610 #define TARGET_INVALID_RETURN_TYPE arm_invalid_return_type
612 #undef TARGET_PROMOTED_TYPE
613 #define TARGET_PROMOTED_TYPE arm_promoted_type
615 #undef TARGET_CONVERT_TO_TYPE
616 #define TARGET_CONVERT_TO_TYPE arm_convert_to_type
618 #undef TARGET_SCALAR_MODE_SUPPORTED_P
619 #define TARGET_SCALAR_MODE_SUPPORTED_P arm_scalar_mode_supported_p
621 #undef TARGET_FRAME_POINTER_REQUIRED
622 #define TARGET_FRAME_POINTER_REQUIRED arm_frame_pointer_required
624 #undef TARGET_CAN_ELIMINATE
625 #define TARGET_CAN_ELIMINATE arm_can_eliminate
627 #undef TARGET_CONDITIONAL_REGISTER_USAGE
628 #define TARGET_CONDITIONAL_REGISTER_USAGE arm_conditional_register_usage
630 #undef TARGET_CLASS_LIKELY_SPILLED_P
631 #define TARGET_CLASS_LIKELY_SPILLED_P arm_class_likely_spilled_p
633 #undef TARGET_VECTORIZE_BUILTINS
634 #define TARGET_VECTORIZE_BUILTINS
636 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
637 #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
638 arm_builtin_vectorized_function
640 #undef TARGET_VECTOR_ALIGNMENT
641 #define TARGET_VECTOR_ALIGNMENT arm_vector_alignment
643 #undef TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE
644 #define TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE \
645 arm_vector_alignment_reachable
647 #undef TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT
648 #define TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT \
649 arm_builtin_support_vector_misalignment
651 #undef TARGET_PREFERRED_RENAME_CLASS
652 #define TARGET_PREFERRED_RENAME_CLASS \
653 arm_preferred_rename_class
655 #undef TARGET_VECTORIZE_VEC_PERM_CONST_OK
656 #define TARGET_VECTORIZE_VEC_PERM_CONST_OK \
657 arm_vectorize_vec_perm_const_ok
659 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
660 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
661 arm_builtin_vectorization_cost
662 #undef TARGET_VECTORIZE_ADD_STMT_COST
663 #define TARGET_VECTORIZE_ADD_STMT_COST arm_add_stmt_cost
665 #undef TARGET_CANONICALIZE_COMPARISON
666 #define TARGET_CANONICALIZE_COMPARISON \
667 arm_canonicalize_comparison
669 #undef TARGET_ASAN_SHADOW_OFFSET
670 #define TARGET_ASAN_SHADOW_OFFSET arm_asan_shadow_offset
672 #undef MAX_INSN_PER_IT_BLOCK
673 #define MAX_INSN_PER_IT_BLOCK (arm_restrict_it ? 1 : 4)
675 #undef TARGET_CAN_USE_DOLOOP_P
676 #define TARGET_CAN_USE_DOLOOP_P can_use_doloop_if_innermost
678 struct gcc_target targetm = TARGET_INITIALIZER;
680 /* Obstack for minipool constant handling. */
681 static struct obstack minipool_obstack;
682 static char * minipool_startobj;
684 /* The maximum number of insns skipped which
685 will be conditionalised if possible. */
686 static int max_insns_skipped = 5;
688 extern FILE * asm_out_file;
690 /* True if we are currently building a constant table. */
691 int making_const_table;
693 /* The processor for which instructions should be scheduled. */
694 enum processor_type arm_tune = arm_none;
696 /* The current tuning set. */
697 const struct tune_params *current_tune;
699 /* Which floating point hardware to schedule for. */
700 int arm_fpu_attr;
702 /* Which floating popint hardware to use. */
703 const struct arm_fpu_desc *arm_fpu_desc;
705 /* Used for Thumb call_via trampolines. */
706 rtx thumb_call_via_label[14];
707 static int thumb_call_reg_needed;
709 /* Bit values used to identify processor capabilities. */
710 #define FL_CO_PROC (1 << 0) /* Has external co-processor bus */
711 #define FL_ARCH3M (1 << 1) /* Extended multiply */
712 #define FL_MODE26 (1 << 2) /* 26-bit mode support */
713 #define FL_MODE32 (1 << 3) /* 32-bit mode support */
714 #define FL_ARCH4 (1 << 4) /* Architecture rel 4 */
715 #define FL_ARCH5 (1 << 5) /* Architecture rel 5 */
716 #define FL_THUMB (1 << 6) /* Thumb aware */
717 #define FL_LDSCHED (1 << 7) /* Load scheduling necessary */
718 #define FL_STRONG (1 << 8) /* StrongARM */
719 #define FL_ARCH5E (1 << 9) /* DSP extensions to v5 */
720 #define FL_XSCALE (1 << 10) /* XScale */
721 /* spare (1 << 11) */
722 #define FL_ARCH6 (1 << 12) /* Architecture rel 6. Adds
723 media instructions. */
724 #define FL_VFPV2 (1 << 13) /* Vector Floating Point V2. */
725 #define FL_WBUF (1 << 14) /* Schedule for write buffer ops.
726 Note: ARM6 & 7 derivatives only. */
727 #define FL_ARCH6K (1 << 15) /* Architecture rel 6 K extensions. */
728 #define FL_THUMB2 (1 << 16) /* Thumb-2. */
729 #define FL_NOTM (1 << 17) /* Instructions not present in the 'M'
730 profile. */
731 #define FL_THUMB_DIV (1 << 18) /* Hardware divide (Thumb mode). */
732 #define FL_VFPV3 (1 << 19) /* Vector Floating Point V3. */
733 #define FL_NEON (1 << 20) /* Neon instructions. */
734 #define FL_ARCH7EM (1 << 21) /* Instructions present in the ARMv7E-M
735 architecture. */
736 #define FL_ARCH7 (1 << 22) /* Architecture 7. */
737 #define FL_ARM_DIV (1 << 23) /* Hardware divide (ARM mode). */
738 #define FL_ARCH8 (1 << 24) /* Architecture 8. */
740 #define FL_IWMMXT (1 << 29) /* XScale v2 or "Intel Wireless MMX technology". */
741 #define FL_IWMMXT2 (1 << 30) /* "Intel Wireless MMX2 technology". */
743 /* Flags that only effect tuning, not available instructions. */
744 #define FL_TUNE (FL_WBUF | FL_VFPV2 | FL_STRONG | FL_LDSCHED \
745 | FL_CO_PROC)
747 #define FL_FOR_ARCH2 FL_NOTM
748 #define FL_FOR_ARCH3 (FL_FOR_ARCH2 | FL_MODE32)
749 #define FL_FOR_ARCH3M (FL_FOR_ARCH3 | FL_ARCH3M)
750 #define FL_FOR_ARCH4 (FL_FOR_ARCH3M | FL_ARCH4)
751 #define FL_FOR_ARCH4T (FL_FOR_ARCH4 | FL_THUMB)
752 #define FL_FOR_ARCH5 (FL_FOR_ARCH4 | FL_ARCH5)
753 #define FL_FOR_ARCH5T (FL_FOR_ARCH5 | FL_THUMB)
754 #define FL_FOR_ARCH5E (FL_FOR_ARCH5 | FL_ARCH5E)
755 #define FL_FOR_ARCH5TE (FL_FOR_ARCH5E | FL_THUMB)
756 #define FL_FOR_ARCH5TEJ FL_FOR_ARCH5TE
757 #define FL_FOR_ARCH6 (FL_FOR_ARCH5TE | FL_ARCH6)
758 #define FL_FOR_ARCH6J FL_FOR_ARCH6
759 #define FL_FOR_ARCH6K (FL_FOR_ARCH6 | FL_ARCH6K)
760 #define FL_FOR_ARCH6Z FL_FOR_ARCH6
761 #define FL_FOR_ARCH6ZK FL_FOR_ARCH6K
762 #define FL_FOR_ARCH6T2 (FL_FOR_ARCH6 | FL_THUMB2)
763 #define FL_FOR_ARCH6M (FL_FOR_ARCH6 & ~FL_NOTM)
764 #define FL_FOR_ARCH7 ((FL_FOR_ARCH6T2 & ~FL_NOTM) | FL_ARCH7)
765 #define FL_FOR_ARCH7A (FL_FOR_ARCH7 | FL_NOTM | FL_ARCH6K)
766 #define FL_FOR_ARCH7R (FL_FOR_ARCH7A | FL_THUMB_DIV)
767 #define FL_FOR_ARCH7M (FL_FOR_ARCH7 | FL_THUMB_DIV)
768 #define FL_FOR_ARCH7EM (FL_FOR_ARCH7M | FL_ARCH7EM)
769 #define FL_FOR_ARCH8A (FL_FOR_ARCH7 | FL_ARCH6K | FL_ARCH8 | FL_THUMB_DIV \
770 | FL_ARM_DIV | FL_NOTM)
772 /* The bits in this mask specify which
773 instructions we are allowed to generate. */
774 static unsigned long insn_flags = 0;
776 /* The bits in this mask specify which instruction scheduling options should
777 be used. */
778 static unsigned long tune_flags = 0;
780 /* The highest ARM architecture version supported by the
781 target. */
782 enum base_architecture arm_base_arch = BASE_ARCH_0;
784 /* The following are used in the arm.md file as equivalents to bits
785 in the above two flag variables. */
787 /* Nonzero if this chip supports the ARM Architecture 3M extensions. */
788 int arm_arch3m = 0;
790 /* Nonzero if this chip supports the ARM Architecture 4 extensions. */
791 int arm_arch4 = 0;
793 /* Nonzero if this chip supports the ARM Architecture 4t extensions. */
794 int arm_arch4t = 0;
796 /* Nonzero if this chip supports the ARM Architecture 5 extensions. */
797 int arm_arch5 = 0;
799 /* Nonzero if this chip supports the ARM Architecture 5E extensions. */
800 int arm_arch5e = 0;
802 /* Nonzero if this chip supports the ARM Architecture 6 extensions. */
803 int arm_arch6 = 0;
805 /* Nonzero if this chip supports the ARM 6K extensions. */
806 int arm_arch6k = 0;
808 /* Nonzero if instructions present in ARMv6-M can be used. */
809 int arm_arch6m = 0;
811 /* Nonzero if this chip supports the ARM 7 extensions. */
812 int arm_arch7 = 0;
814 /* Nonzero if instructions not present in the 'M' profile can be used. */
815 int arm_arch_notm = 0;
817 /* Nonzero if instructions present in ARMv7E-M can be used. */
818 int arm_arch7em = 0;
820 /* Nonzero if instructions present in ARMv8 can be used. */
821 int arm_arch8 = 0;
823 /* Nonzero if this chip can benefit from load scheduling. */
824 int arm_ld_sched = 0;
826 /* Nonzero if this chip is a StrongARM. */
827 int arm_tune_strongarm = 0;
829 /* Nonzero if this chip supports Intel Wireless MMX technology. */
830 int arm_arch_iwmmxt = 0;
832 /* Nonzero if this chip supports Intel Wireless MMX2 technology. */
833 int arm_arch_iwmmxt2 = 0;
835 /* Nonzero if this chip is an XScale. */
836 int arm_arch_xscale = 0;
838 /* Nonzero if tuning for XScale */
839 int arm_tune_xscale = 0;
841 /* Nonzero if we want to tune for stores that access the write-buffer.
842 This typically means an ARM6 or ARM7 with MMU or MPU. */
843 int arm_tune_wbuf = 0;
845 /* Nonzero if tuning for Cortex-A9. */
846 int arm_tune_cortex_a9 = 0;
848 /* Nonzero if generating Thumb instructions. */
849 int thumb_code = 0;
851 /* Nonzero if generating Thumb-1 instructions. */
852 int thumb1_code = 0;
854 /* Nonzero if we should define __THUMB_INTERWORK__ in the
855 preprocessor.
856 XXX This is a bit of a hack, it's intended to help work around
857 problems in GLD which doesn't understand that armv5t code is
858 interworking clean. */
859 int arm_cpp_interwork = 0;
861 /* Nonzero if chip supports Thumb 2. */
862 int arm_arch_thumb2;
864 /* Nonzero if chip supports integer division instruction. */
865 int arm_arch_arm_hwdiv;
866 int arm_arch_thumb_hwdiv;
868 /* Nonzero if we should use Neon to handle 64-bits operations rather
869 than core registers. */
870 int prefer_neon_for_64bits = 0;
872 /* Nonzero if we shouldn't use literal pools. */
873 bool arm_disable_literal_pool = false;
875 /* In case of a PRE_INC, POST_INC, PRE_DEC, POST_DEC memory reference,
876 we must report the mode of the memory reference from
877 TARGET_PRINT_OPERAND to TARGET_PRINT_OPERAND_ADDRESS. */
878 enum machine_mode output_memory_reference_mode;
880 /* The register number to be used for the PIC offset register. */
881 unsigned arm_pic_register = INVALID_REGNUM;
883 /* Set to 1 after arm_reorg has started. Reset to start at the start of
884 the next function. */
885 static int after_arm_reorg = 0;
887 enum arm_pcs arm_pcs_default;
889 /* For an explanation of these variables, see final_prescan_insn below. */
890 int arm_ccfsm_state;
891 /* arm_current_cc is also used for Thumb-2 cond_exec blocks. */
892 enum arm_cond_code arm_current_cc;
894 rtx arm_target_insn;
895 int arm_target_label;
896 /* The number of conditionally executed insns, including the current insn. */
897 int arm_condexec_count = 0;
898 /* A bitmask specifying the patterns for the IT block.
899 Zero means do not output an IT block before this insn. */
900 int arm_condexec_mask = 0;
901 /* The number of bits used in arm_condexec_mask. */
902 int arm_condexec_masklen = 0;
904 /* The condition codes of the ARM, and the inverse function. */
905 static const char * const arm_condition_codes[] =
907 "eq", "ne", "cs", "cc", "mi", "pl", "vs", "vc",
908 "hi", "ls", "ge", "lt", "gt", "le", "al", "nv"
911 /* The register numbers in sequence, for passing to arm_gen_load_multiple. */
912 int arm_regs_in_sequence[] =
914 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
917 #define ARM_LSL_NAME (TARGET_UNIFIED_ASM ? "lsl" : "asl")
918 #define streq(string1, string2) (strcmp (string1, string2) == 0)
920 #define THUMB2_WORK_REGS (0xff & ~( (1 << THUMB_HARD_FRAME_POINTER_REGNUM) \
921 | (1 << SP_REGNUM) | (1 << PC_REGNUM) \
922 | (1 << PIC_OFFSET_TABLE_REGNUM)))
924 /* Initialization code. */
926 struct processors
928 const char *const name;
929 enum processor_type core;
930 const char *arch;
931 enum base_architecture base_arch;
932 const unsigned long flags;
933 const struct tune_params *const tune;
937 #define ARM_PREFETCH_NOT_BENEFICIAL 0, -1, -1
938 #define ARM_PREFETCH_BENEFICIAL(prefetch_slots,l1_size,l1_line_size) \
939 prefetch_slots, \
940 l1_size, \
941 l1_line_size
943 /* arm generic vectorizer costs. */
944 static const
945 struct cpu_vec_costs arm_default_vec_cost = {
946 1, /* scalar_stmt_cost. */
947 1, /* scalar load_cost. */
948 1, /* scalar_store_cost. */
949 1, /* vec_stmt_cost. */
950 1, /* vec_to_scalar_cost. */
951 1, /* scalar_to_vec_cost. */
952 1, /* vec_align_load_cost. */
953 1, /* vec_unalign_load_cost. */
954 1, /* vec_unalign_store_cost. */
955 1, /* vec_store_cost. */
956 3, /* cond_taken_branch_cost. */
957 1, /* cond_not_taken_branch_cost. */
960 /* Cost tables for AArch32 + AArch64 cores should go in aarch-cost-tables.h */
961 #include "aarch-cost-tables.h"
965 const struct cpu_cost_table cortexa9_extra_costs =
967 /* ALU */
969 0, /* Arith. */
970 0, /* Logical. */
971 0, /* Shift. */
972 COSTS_N_INSNS (1), /* Shift_reg. */
973 COSTS_N_INSNS (1), /* Arith_shift. */
974 COSTS_N_INSNS (2), /* Arith_shift_reg. */
975 0, /* Log_shift. */
976 COSTS_N_INSNS (1), /* Log_shift_reg. */
977 COSTS_N_INSNS (1), /* Extend. */
978 COSTS_N_INSNS (2), /* Extend_arith. */
979 COSTS_N_INSNS (1), /* Bfi. */
980 COSTS_N_INSNS (1), /* Bfx. */
981 0, /* Clz. */
982 0, /* non_exec. */
983 true /* non_exec_costs_exec. */
986 /* MULT SImode */
988 COSTS_N_INSNS (3), /* Simple. */
989 COSTS_N_INSNS (3), /* Flag_setting. */
990 COSTS_N_INSNS (2), /* Extend. */
991 COSTS_N_INSNS (3), /* Add. */
992 COSTS_N_INSNS (2), /* Extend_add. */
993 COSTS_N_INSNS (30) /* Idiv. No HW div on Cortex A9. */
995 /* MULT DImode */
997 0, /* Simple (N/A). */
998 0, /* Flag_setting (N/A). */
999 COSTS_N_INSNS (4), /* Extend. */
1000 0, /* Add (N/A). */
1001 COSTS_N_INSNS (4), /* Extend_add. */
1002 0 /* Idiv (N/A). */
1005 /* LD/ST */
1007 COSTS_N_INSNS (2), /* Load. */
1008 COSTS_N_INSNS (2), /* Load_sign_extend. */
1009 COSTS_N_INSNS (2), /* Ldrd. */
1010 COSTS_N_INSNS (2), /* Ldm_1st. */
1011 1, /* Ldm_regs_per_insn_1st. */
1012 2, /* Ldm_regs_per_insn_subsequent. */
1013 COSTS_N_INSNS (5), /* Loadf. */
1014 COSTS_N_INSNS (5), /* Loadd. */
1015 COSTS_N_INSNS (1), /* Load_unaligned. */
1016 COSTS_N_INSNS (2), /* Store. */
1017 COSTS_N_INSNS (2), /* Strd. */
1018 COSTS_N_INSNS (2), /* Stm_1st. */
1019 1, /* Stm_regs_per_insn_1st. */
1020 2, /* Stm_regs_per_insn_subsequent. */
1021 COSTS_N_INSNS (1), /* Storef. */
1022 COSTS_N_INSNS (1), /* Stored. */
1023 COSTS_N_INSNS (1) /* Store_unaligned. */
1026 /* FP SFmode */
1028 COSTS_N_INSNS (14), /* Div. */
1029 COSTS_N_INSNS (4), /* Mult. */
1030 COSTS_N_INSNS (7), /* Mult_addsub. */
1031 COSTS_N_INSNS (30), /* Fma. */
1032 COSTS_N_INSNS (3), /* Addsub. */
1033 COSTS_N_INSNS (1), /* Fpconst. */
1034 COSTS_N_INSNS (1), /* Neg. */
1035 COSTS_N_INSNS (3), /* Compare. */
1036 COSTS_N_INSNS (3), /* Widen. */
1037 COSTS_N_INSNS (3), /* Narrow. */
1038 COSTS_N_INSNS (3), /* Toint. */
1039 COSTS_N_INSNS (3), /* Fromint. */
1040 COSTS_N_INSNS (3) /* Roundint. */
1042 /* FP DFmode */
1044 COSTS_N_INSNS (24), /* Div. */
1045 COSTS_N_INSNS (5), /* Mult. */
1046 COSTS_N_INSNS (8), /* Mult_addsub. */
1047 COSTS_N_INSNS (30), /* Fma. */
1048 COSTS_N_INSNS (3), /* Addsub. */
1049 COSTS_N_INSNS (1), /* Fpconst. */
1050 COSTS_N_INSNS (1), /* Neg. */
1051 COSTS_N_INSNS (3), /* Compare. */
1052 COSTS_N_INSNS (3), /* Widen. */
1053 COSTS_N_INSNS (3), /* Narrow. */
1054 COSTS_N_INSNS (3), /* Toint. */
1055 COSTS_N_INSNS (3), /* Fromint. */
1056 COSTS_N_INSNS (3) /* Roundint. */
1059 /* Vector */
1061 COSTS_N_INSNS (1) /* Alu. */
1066 const struct cpu_cost_table cortexa7_extra_costs =
1068 /* ALU */
1070 0, /* Arith. */
1071 0, /* Logical. */
1072 COSTS_N_INSNS (1), /* Shift. */
1073 COSTS_N_INSNS (1), /* Shift_reg. */
1074 COSTS_N_INSNS (1), /* Arith_shift. */
1075 COSTS_N_INSNS (1), /* Arith_shift_reg. */
1076 COSTS_N_INSNS (1), /* Log_shift. */
1077 COSTS_N_INSNS (1), /* Log_shift_reg. */
1078 COSTS_N_INSNS (1), /* Extend. */
1079 COSTS_N_INSNS (1), /* Extend_arith. */
1080 COSTS_N_INSNS (1), /* Bfi. */
1081 COSTS_N_INSNS (1), /* Bfx. */
1082 COSTS_N_INSNS (1), /* Clz. */
1083 0, /* non_exec. */
1084 true /* non_exec_costs_exec. */
1088 /* MULT SImode */
1090 0, /* Simple. */
1091 COSTS_N_INSNS (1), /* Flag_setting. */
1092 COSTS_N_INSNS (1), /* Extend. */
1093 COSTS_N_INSNS (1), /* Add. */
1094 COSTS_N_INSNS (1), /* Extend_add. */
1095 COSTS_N_INSNS (7) /* Idiv. */
1097 /* MULT DImode */
1099 0, /* Simple (N/A). */
1100 0, /* Flag_setting (N/A). */
1101 COSTS_N_INSNS (1), /* Extend. */
1102 0, /* Add. */
1103 COSTS_N_INSNS (2), /* Extend_add. */
1104 0 /* Idiv (N/A). */
1107 /* LD/ST */
1109 COSTS_N_INSNS (1), /* Load. */
1110 COSTS_N_INSNS (1), /* Load_sign_extend. */
1111 COSTS_N_INSNS (3), /* Ldrd. */
1112 COSTS_N_INSNS (1), /* Ldm_1st. */
1113 1, /* Ldm_regs_per_insn_1st. */
1114 2, /* Ldm_regs_per_insn_subsequent. */
1115 COSTS_N_INSNS (2), /* Loadf. */
1116 COSTS_N_INSNS (2), /* Loadd. */
1117 COSTS_N_INSNS (1), /* Load_unaligned. */
1118 COSTS_N_INSNS (1), /* Store. */
1119 COSTS_N_INSNS (3), /* Strd. */
1120 COSTS_N_INSNS (1), /* Stm_1st. */
1121 1, /* Stm_regs_per_insn_1st. */
1122 2, /* Stm_regs_per_insn_subsequent. */
1123 COSTS_N_INSNS (2), /* Storef. */
1124 COSTS_N_INSNS (2), /* Stored. */
1125 COSTS_N_INSNS (1) /* Store_unaligned. */
1128 /* FP SFmode */
1130 COSTS_N_INSNS (15), /* Div. */
1131 COSTS_N_INSNS (3), /* Mult. */
1132 COSTS_N_INSNS (7), /* Mult_addsub. */
1133 COSTS_N_INSNS (7), /* Fma. */
1134 COSTS_N_INSNS (3), /* Addsub. */
1135 COSTS_N_INSNS (3), /* Fpconst. */
1136 COSTS_N_INSNS (3), /* Neg. */
1137 COSTS_N_INSNS (3), /* Compare. */
1138 COSTS_N_INSNS (3), /* Widen. */
1139 COSTS_N_INSNS (3), /* Narrow. */
1140 COSTS_N_INSNS (3), /* Toint. */
1141 COSTS_N_INSNS (3), /* Fromint. */
1142 COSTS_N_INSNS (3) /* Roundint. */
1144 /* FP DFmode */
1146 COSTS_N_INSNS (30), /* Div. */
1147 COSTS_N_INSNS (6), /* Mult. */
1148 COSTS_N_INSNS (10), /* Mult_addsub. */
1149 COSTS_N_INSNS (7), /* Fma. */
1150 COSTS_N_INSNS (3), /* Addsub. */
1151 COSTS_N_INSNS (3), /* Fpconst. */
1152 COSTS_N_INSNS (3), /* Neg. */
1153 COSTS_N_INSNS (3), /* Compare. */
1154 COSTS_N_INSNS (3), /* Widen. */
1155 COSTS_N_INSNS (3), /* Narrow. */
1156 COSTS_N_INSNS (3), /* Toint. */
1157 COSTS_N_INSNS (3), /* Fromint. */
1158 COSTS_N_INSNS (3) /* Roundint. */
1161 /* Vector */
1163 COSTS_N_INSNS (1) /* Alu. */
1167 const struct cpu_cost_table cortexa12_extra_costs =
1169 /* ALU */
1171 0, /* Arith. */
1172 0, /* Logical. */
1173 0, /* Shift. */
1174 COSTS_N_INSNS (1), /* Shift_reg. */
1175 COSTS_N_INSNS (1), /* Arith_shift. */
1176 COSTS_N_INSNS (1), /* Arith_shift_reg. */
1177 COSTS_N_INSNS (1), /* Log_shift. */
1178 COSTS_N_INSNS (1), /* Log_shift_reg. */
1179 0, /* Extend. */
1180 COSTS_N_INSNS (1), /* Extend_arith. */
1181 0, /* Bfi. */
1182 COSTS_N_INSNS (1), /* Bfx. */
1183 COSTS_N_INSNS (1), /* Clz. */
1184 0, /* non_exec. */
1185 true /* non_exec_costs_exec. */
1187 /* MULT SImode */
1190 COSTS_N_INSNS (2), /* Simple. */
1191 COSTS_N_INSNS (3), /* Flag_setting. */
1192 COSTS_N_INSNS (2), /* Extend. */
1193 COSTS_N_INSNS (3), /* Add. */
1194 COSTS_N_INSNS (2), /* Extend_add. */
1195 COSTS_N_INSNS (18) /* Idiv. */
1197 /* MULT DImode */
1199 0, /* Simple (N/A). */
1200 0, /* Flag_setting (N/A). */
1201 COSTS_N_INSNS (3), /* Extend. */
1202 0, /* Add (N/A). */
1203 COSTS_N_INSNS (3), /* Extend_add. */
1204 0 /* Idiv (N/A). */
1207 /* LD/ST */
1209 COSTS_N_INSNS (3), /* Load. */
1210 COSTS_N_INSNS (3), /* Load_sign_extend. */
1211 COSTS_N_INSNS (3), /* Ldrd. */
1212 COSTS_N_INSNS (3), /* Ldm_1st. */
1213 1, /* Ldm_regs_per_insn_1st. */
1214 2, /* Ldm_regs_per_insn_subsequent. */
1215 COSTS_N_INSNS (3), /* Loadf. */
1216 COSTS_N_INSNS (3), /* Loadd. */
1217 0, /* Load_unaligned. */
1218 0, /* Store. */
1219 0, /* Strd. */
1220 0, /* Stm_1st. */
1221 1, /* Stm_regs_per_insn_1st. */
1222 2, /* Stm_regs_per_insn_subsequent. */
1223 COSTS_N_INSNS (2), /* Storef. */
1224 COSTS_N_INSNS (2), /* Stored. */
1225 0 /* Store_unaligned. */
1228 /* FP SFmode */
1230 COSTS_N_INSNS (17), /* Div. */
1231 COSTS_N_INSNS (4), /* Mult. */
1232 COSTS_N_INSNS (8), /* Mult_addsub. */
1233 COSTS_N_INSNS (8), /* Fma. */
1234 COSTS_N_INSNS (4), /* Addsub. */
1235 COSTS_N_INSNS (2), /* Fpconst. */
1236 COSTS_N_INSNS (2), /* Neg. */
1237 COSTS_N_INSNS (2), /* Compare. */
1238 COSTS_N_INSNS (4), /* Widen. */
1239 COSTS_N_INSNS (4), /* Narrow. */
1240 COSTS_N_INSNS (4), /* Toint. */
1241 COSTS_N_INSNS (4), /* Fromint. */
1242 COSTS_N_INSNS (4) /* Roundint. */
1244 /* FP DFmode */
1246 COSTS_N_INSNS (31), /* Div. */
1247 COSTS_N_INSNS (4), /* Mult. */
1248 COSTS_N_INSNS (8), /* Mult_addsub. */
1249 COSTS_N_INSNS (8), /* Fma. */
1250 COSTS_N_INSNS (4), /* Addsub. */
1251 COSTS_N_INSNS (2), /* Fpconst. */
1252 COSTS_N_INSNS (2), /* Neg. */
1253 COSTS_N_INSNS (2), /* Compare. */
1254 COSTS_N_INSNS (4), /* Widen. */
1255 COSTS_N_INSNS (4), /* Narrow. */
1256 COSTS_N_INSNS (4), /* Toint. */
1257 COSTS_N_INSNS (4), /* Fromint. */
1258 COSTS_N_INSNS (4) /* Roundint. */
1261 /* Vector */
1263 COSTS_N_INSNS (1) /* Alu. */
1267 const struct cpu_cost_table cortexa15_extra_costs =
1269 /* ALU */
1271 0, /* Arith. */
1272 0, /* Logical. */
1273 0, /* Shift. */
1274 0, /* Shift_reg. */
1275 COSTS_N_INSNS (1), /* Arith_shift. */
1276 COSTS_N_INSNS (1), /* Arith_shift_reg. */
1277 COSTS_N_INSNS (1), /* Log_shift. */
1278 COSTS_N_INSNS (1), /* Log_shift_reg. */
1279 0, /* Extend. */
1280 COSTS_N_INSNS (1), /* Extend_arith. */
1281 COSTS_N_INSNS (1), /* Bfi. */
1282 0, /* Bfx. */
1283 0, /* Clz. */
1284 0, /* non_exec. */
1285 true /* non_exec_costs_exec. */
1287 /* MULT SImode */
1290 COSTS_N_INSNS (2), /* Simple. */
1291 COSTS_N_INSNS (3), /* Flag_setting. */
1292 COSTS_N_INSNS (2), /* Extend. */
1293 COSTS_N_INSNS (2), /* Add. */
1294 COSTS_N_INSNS (2), /* Extend_add. */
1295 COSTS_N_INSNS (18) /* Idiv. */
1297 /* MULT DImode */
1299 0, /* Simple (N/A). */
1300 0, /* Flag_setting (N/A). */
1301 COSTS_N_INSNS (3), /* Extend. */
1302 0, /* Add (N/A). */
1303 COSTS_N_INSNS (3), /* Extend_add. */
1304 0 /* Idiv (N/A). */
1307 /* LD/ST */
1309 COSTS_N_INSNS (3), /* Load. */
1310 COSTS_N_INSNS (3), /* Load_sign_extend. */
1311 COSTS_N_INSNS (3), /* Ldrd. */
1312 COSTS_N_INSNS (4), /* Ldm_1st. */
1313 1, /* Ldm_regs_per_insn_1st. */
1314 2, /* Ldm_regs_per_insn_subsequent. */
1315 COSTS_N_INSNS (4), /* Loadf. */
1316 COSTS_N_INSNS (4), /* Loadd. */
1317 0, /* Load_unaligned. */
1318 0, /* Store. */
1319 0, /* Strd. */
1320 COSTS_N_INSNS (1), /* Stm_1st. */
1321 1, /* Stm_regs_per_insn_1st. */
1322 2, /* Stm_regs_per_insn_subsequent. */
1323 0, /* Storef. */
1324 0, /* Stored. */
1325 0 /* Store_unaligned. */
1328 /* FP SFmode */
1330 COSTS_N_INSNS (17), /* Div. */
1331 COSTS_N_INSNS (4), /* Mult. */
1332 COSTS_N_INSNS (8), /* Mult_addsub. */
1333 COSTS_N_INSNS (8), /* Fma. */
1334 COSTS_N_INSNS (4), /* Addsub. */
1335 COSTS_N_INSNS (2), /* Fpconst. */
1336 COSTS_N_INSNS (2), /* Neg. */
1337 COSTS_N_INSNS (5), /* Compare. */
1338 COSTS_N_INSNS (4), /* Widen. */
1339 COSTS_N_INSNS (4), /* Narrow. */
1340 COSTS_N_INSNS (4), /* Toint. */
1341 COSTS_N_INSNS (4), /* Fromint. */
1342 COSTS_N_INSNS (4) /* Roundint. */
1344 /* FP DFmode */
1346 COSTS_N_INSNS (31), /* Div. */
1347 COSTS_N_INSNS (4), /* Mult. */
1348 COSTS_N_INSNS (8), /* Mult_addsub. */
1349 COSTS_N_INSNS (8), /* Fma. */
1350 COSTS_N_INSNS (4), /* Addsub. */
1351 COSTS_N_INSNS (2), /* Fpconst. */
1352 COSTS_N_INSNS (2), /* Neg. */
1353 COSTS_N_INSNS (2), /* Compare. */
1354 COSTS_N_INSNS (4), /* Widen. */
1355 COSTS_N_INSNS (4), /* Narrow. */
1356 COSTS_N_INSNS (4), /* Toint. */
1357 COSTS_N_INSNS (4), /* Fromint. */
1358 COSTS_N_INSNS (4) /* Roundint. */
1361 /* Vector */
1363 COSTS_N_INSNS (1) /* Alu. */
1367 const struct cpu_cost_table v7m_extra_costs =
1369 /* ALU */
1371 0, /* Arith. */
1372 0, /* Logical. */
1373 0, /* Shift. */
1374 0, /* Shift_reg. */
1375 0, /* Arith_shift. */
1376 COSTS_N_INSNS (1), /* Arith_shift_reg. */
1377 0, /* Log_shift. */
1378 COSTS_N_INSNS (1), /* Log_shift_reg. */
1379 0, /* Extend. */
1380 COSTS_N_INSNS (1), /* Extend_arith. */
1381 0, /* Bfi. */
1382 0, /* Bfx. */
1383 0, /* Clz. */
1384 COSTS_N_INSNS (1), /* non_exec. */
1385 false /* non_exec_costs_exec. */
1388 /* MULT SImode */
1390 COSTS_N_INSNS (1), /* Simple. */
1391 COSTS_N_INSNS (1), /* Flag_setting. */
1392 COSTS_N_INSNS (2), /* Extend. */
1393 COSTS_N_INSNS (1), /* Add. */
1394 COSTS_N_INSNS (3), /* Extend_add. */
1395 COSTS_N_INSNS (8) /* Idiv. */
1397 /* MULT DImode */
1399 0, /* Simple (N/A). */
1400 0, /* Flag_setting (N/A). */
1401 COSTS_N_INSNS (2), /* Extend. */
1402 0, /* Add (N/A). */
1403 COSTS_N_INSNS (3), /* Extend_add. */
1404 0 /* Idiv (N/A). */
1407 /* LD/ST */
1409 COSTS_N_INSNS (2), /* Load. */
1410 0, /* Load_sign_extend. */
1411 COSTS_N_INSNS (3), /* Ldrd. */
1412 COSTS_N_INSNS (2), /* Ldm_1st. */
1413 1, /* Ldm_regs_per_insn_1st. */
1414 1, /* Ldm_regs_per_insn_subsequent. */
1415 COSTS_N_INSNS (2), /* Loadf. */
1416 COSTS_N_INSNS (3), /* Loadd. */
1417 COSTS_N_INSNS (1), /* Load_unaligned. */
1418 COSTS_N_INSNS (2), /* Store. */
1419 COSTS_N_INSNS (3), /* Strd. */
1420 COSTS_N_INSNS (2), /* Stm_1st. */
1421 1, /* Stm_regs_per_insn_1st. */
1422 1, /* Stm_regs_per_insn_subsequent. */
1423 COSTS_N_INSNS (2), /* Storef. */
1424 COSTS_N_INSNS (3), /* Stored. */
1425 COSTS_N_INSNS (1) /* Store_unaligned. */
1428 /* FP SFmode */
1430 COSTS_N_INSNS (7), /* Div. */
1431 COSTS_N_INSNS (2), /* Mult. */
1432 COSTS_N_INSNS (5), /* Mult_addsub. */
1433 COSTS_N_INSNS (3), /* Fma. */
1434 COSTS_N_INSNS (1), /* Addsub. */
1435 0, /* Fpconst. */
1436 0, /* Neg. */
1437 0, /* Compare. */
1438 0, /* Widen. */
1439 0, /* Narrow. */
1440 0, /* Toint. */
1441 0, /* Fromint. */
1442 0 /* Roundint. */
1444 /* FP DFmode */
1446 COSTS_N_INSNS (15), /* Div. */
1447 COSTS_N_INSNS (5), /* Mult. */
1448 COSTS_N_INSNS (7), /* Mult_addsub. */
1449 COSTS_N_INSNS (7), /* Fma. */
1450 COSTS_N_INSNS (3), /* Addsub. */
1451 0, /* Fpconst. */
1452 0, /* Neg. */
1453 0, /* Compare. */
1454 0, /* Widen. */
1455 0, /* Narrow. */
1456 0, /* Toint. */
1457 0, /* Fromint. */
1458 0 /* Roundint. */
1461 /* Vector */
1463 COSTS_N_INSNS (1) /* Alu. */
1467 const struct tune_params arm_slowmul_tune =
1469 arm_slowmul_rtx_costs,
1470 NULL,
1471 NULL, /* Sched adj cost. */
1472 3, /* Constant limit. */
1473 5, /* Max cond insns. */
1474 ARM_PREFETCH_NOT_BENEFICIAL,
1475 true, /* Prefer constant pool. */
1476 arm_default_branch_cost,
1477 false, /* Prefer LDRD/STRD. */
1478 {true, true}, /* Prefer non short circuit. */
1479 &arm_default_vec_cost, /* Vectorizer costs. */
1480 false /* Prefer Neon for 64-bits bitops. */
1483 const struct tune_params arm_fastmul_tune =
1485 arm_fastmul_rtx_costs,
1486 NULL,
1487 NULL, /* Sched adj cost. */
1488 1, /* Constant limit. */
1489 5, /* Max cond insns. */
1490 ARM_PREFETCH_NOT_BENEFICIAL,
1491 true, /* Prefer constant pool. */
1492 arm_default_branch_cost,
1493 false, /* Prefer LDRD/STRD. */
1494 {true, true}, /* Prefer non short circuit. */
1495 &arm_default_vec_cost, /* Vectorizer costs. */
1496 false /* Prefer Neon for 64-bits bitops. */
1499 /* StrongARM has early execution of branches, so a sequence that is worth
1500 skipping is shorter. Set max_insns_skipped to a lower value. */
1502 const struct tune_params arm_strongarm_tune =
1504 arm_fastmul_rtx_costs,
1505 NULL,
1506 NULL, /* Sched adj cost. */
1507 1, /* Constant limit. */
1508 3, /* Max cond insns. */
1509 ARM_PREFETCH_NOT_BENEFICIAL,
1510 true, /* Prefer constant pool. */
1511 arm_default_branch_cost,
1512 false, /* Prefer LDRD/STRD. */
1513 {true, true}, /* Prefer non short circuit. */
1514 &arm_default_vec_cost, /* Vectorizer costs. */
1515 false /* Prefer Neon for 64-bits bitops. */
1518 const struct tune_params arm_xscale_tune =
1520 arm_xscale_rtx_costs,
1521 NULL,
1522 xscale_sched_adjust_cost,
1523 2, /* Constant limit. */
1524 3, /* Max cond insns. */
1525 ARM_PREFETCH_NOT_BENEFICIAL,
1526 true, /* Prefer constant pool. */
1527 arm_default_branch_cost,
1528 false, /* Prefer LDRD/STRD. */
1529 {true, true}, /* Prefer non short circuit. */
1530 &arm_default_vec_cost, /* Vectorizer costs. */
1531 false /* Prefer Neon for 64-bits bitops. */
1534 const struct tune_params arm_9e_tune =
1536 arm_9e_rtx_costs,
1537 NULL,
1538 NULL, /* Sched adj cost. */
1539 1, /* Constant limit. */
1540 5, /* Max cond insns. */
1541 ARM_PREFETCH_NOT_BENEFICIAL,
1542 true, /* Prefer constant pool. */
1543 arm_default_branch_cost,
1544 false, /* Prefer LDRD/STRD. */
1545 {true, true}, /* Prefer non short circuit. */
1546 &arm_default_vec_cost, /* Vectorizer costs. */
1547 false /* Prefer Neon for 64-bits bitops. */
1550 const struct tune_params arm_v6t2_tune =
1552 arm_9e_rtx_costs,
1553 NULL,
1554 NULL, /* Sched adj cost. */
1555 1, /* Constant limit. */
1556 5, /* Max cond insns. */
1557 ARM_PREFETCH_NOT_BENEFICIAL,
1558 false, /* Prefer constant pool. */
1559 arm_default_branch_cost,
1560 false, /* Prefer LDRD/STRD. */
1561 {true, true}, /* Prefer non short circuit. */
1562 &arm_default_vec_cost, /* Vectorizer costs. */
1563 false /* Prefer Neon for 64-bits bitops. */
1566 /* Generic Cortex tuning. Use more specific tunings if appropriate. */
1567 const struct tune_params arm_cortex_tune =
1569 arm_9e_rtx_costs,
1570 &generic_extra_costs,
1571 NULL, /* Sched adj cost. */
1572 1, /* Constant limit. */
1573 5, /* Max cond insns. */
1574 ARM_PREFETCH_NOT_BENEFICIAL,
1575 false, /* Prefer constant pool. */
1576 arm_default_branch_cost,
1577 false, /* Prefer LDRD/STRD. */
1578 {true, true}, /* Prefer non short circuit. */
1579 &arm_default_vec_cost, /* Vectorizer costs. */
1580 false /* Prefer Neon for 64-bits bitops. */
1583 const struct tune_params arm_cortex_a7_tune =
1585 arm_9e_rtx_costs,
1586 &cortexa7_extra_costs,
1587 NULL,
1588 1, /* Constant limit. */
1589 5, /* Max cond insns. */
1590 ARM_PREFETCH_NOT_BENEFICIAL,
1591 false, /* Prefer constant pool. */
1592 arm_default_branch_cost,
1593 false, /* Prefer LDRD/STRD. */
1594 {true, true}, /* Prefer non short circuit. */
1595 &arm_default_vec_cost, /* Vectorizer costs. */
1596 false /* Prefer Neon for 64-bits bitops. */
1599 const struct tune_params arm_cortex_a15_tune =
1601 arm_9e_rtx_costs,
1602 &cortexa15_extra_costs,
1603 NULL, /* Sched adj cost. */
1604 1, /* Constant limit. */
1605 2, /* Max cond insns. */
1606 ARM_PREFETCH_NOT_BENEFICIAL,
1607 false, /* Prefer constant pool. */
1608 arm_default_branch_cost,
1609 true, /* Prefer LDRD/STRD. */
1610 {true, true}, /* Prefer non short circuit. */
1611 &arm_default_vec_cost, /* Vectorizer costs. */
1612 false /* Prefer Neon for 64-bits bitops. */
1615 const struct tune_params arm_cortex_a53_tune =
1617 arm_9e_rtx_costs,
1618 &cortexa53_extra_costs,
1619 NULL, /* Scheduler cost adjustment. */
1620 1, /* Constant limit. */
1621 5, /* Max cond insns. */
1622 ARM_PREFETCH_NOT_BENEFICIAL,
1623 false, /* Prefer constant pool. */
1624 arm_default_branch_cost,
1625 false, /* Prefer LDRD/STRD. */
1626 {true, true}, /* Prefer non short circuit. */
1627 &arm_default_vec_cost, /* Vectorizer costs. */
1628 false /* Prefer Neon for 64-bits bitops. */
1631 /* Branches can be dual-issued on Cortex-A5, so conditional execution is
1632 less appealing. Set max_insns_skipped to a low value. */
1634 const struct tune_params arm_cortex_a5_tune =
1636 arm_9e_rtx_costs,
1637 NULL,
1638 NULL, /* Sched adj cost. */
1639 1, /* Constant limit. */
1640 1, /* Max cond insns. */
1641 ARM_PREFETCH_NOT_BENEFICIAL,
1642 false, /* Prefer constant pool. */
1643 arm_cortex_a5_branch_cost,
1644 false, /* Prefer LDRD/STRD. */
1645 {false, false}, /* Prefer non short circuit. */
1646 &arm_default_vec_cost, /* Vectorizer costs. */
1647 false /* Prefer Neon for 64-bits bitops. */
1650 const struct tune_params arm_cortex_a9_tune =
1652 arm_9e_rtx_costs,
1653 &cortexa9_extra_costs,
1654 cortex_a9_sched_adjust_cost,
1655 1, /* Constant limit. */
1656 5, /* Max cond insns. */
1657 ARM_PREFETCH_BENEFICIAL(4,32,32),
1658 false, /* Prefer constant pool. */
1659 arm_default_branch_cost,
1660 false, /* Prefer LDRD/STRD. */
1661 {true, true}, /* Prefer non short circuit. */
1662 &arm_default_vec_cost, /* Vectorizer costs. */
1663 false /* Prefer Neon for 64-bits bitops. */
1666 const struct tune_params arm_cortex_a12_tune =
1668 arm_9e_rtx_costs,
1669 &cortexa12_extra_costs,
1670 NULL,
1671 1, /* Constant limit. */
1672 5, /* Max cond insns. */
1673 ARM_PREFETCH_BENEFICIAL(4,32,32),
1674 false, /* Prefer constant pool. */
1675 arm_default_branch_cost,
1676 true, /* Prefer LDRD/STRD. */
1677 {true, true}, /* Prefer non short circuit. */
1678 &arm_default_vec_cost, /* Vectorizer costs. */
1679 false /* Prefer Neon for 64-bits bitops. */
1682 /* armv7m tuning. On Cortex-M4 cores for example, MOVW/MOVT take a single
1683 cycle to execute each. An LDR from the constant pool also takes two cycles
1684 to execute, but mildly increases pipelining opportunity (consecutive
1685 loads/stores can be pipelined together, saving one cycle), and may also
1686 improve icache utilisation. Hence we prefer the constant pool for such
1687 processors. */
1689 const struct tune_params arm_v7m_tune =
1691 arm_9e_rtx_costs,
1692 &v7m_extra_costs,
1693 NULL, /* Sched adj cost. */
1694 1, /* Constant limit. */
1695 5, /* Max cond insns. */
1696 ARM_PREFETCH_NOT_BENEFICIAL,
1697 true, /* Prefer constant pool. */
1698 arm_cortex_m_branch_cost,
1699 false, /* Prefer LDRD/STRD. */
1700 {false, false}, /* Prefer non short circuit. */
1701 &arm_default_vec_cost, /* Vectorizer costs. */
1702 false /* Prefer Neon for 64-bits bitops. */
1705 /* The arm_v6m_tune is duplicated from arm_cortex_tune, rather than
1706 arm_v6t2_tune. It is used for cortex-m0, cortex-m1 and cortex-m0plus. */
1707 const struct tune_params arm_v6m_tune =
1709 arm_9e_rtx_costs,
1710 NULL,
1711 NULL, /* Sched adj cost. */
1712 1, /* Constant limit. */
1713 5, /* Max cond insns. */
1714 ARM_PREFETCH_NOT_BENEFICIAL,
1715 false, /* Prefer constant pool. */
1716 arm_default_branch_cost,
1717 false, /* Prefer LDRD/STRD. */
1718 {false, false}, /* Prefer non short circuit. */
1719 &arm_default_vec_cost, /* Vectorizer costs. */
1720 false /* Prefer Neon for 64-bits bitops. */
1723 const struct tune_params arm_fa726te_tune =
1725 arm_9e_rtx_costs,
1726 NULL,
1727 fa726te_sched_adjust_cost,
1728 1, /* Constant limit. */
1729 5, /* Max cond insns. */
1730 ARM_PREFETCH_NOT_BENEFICIAL,
1731 true, /* Prefer constant pool. */
1732 arm_default_branch_cost,
1733 false, /* Prefer LDRD/STRD. */
1734 {true, true}, /* Prefer non short circuit. */
1735 &arm_default_vec_cost, /* Vectorizer costs. */
1736 false /* Prefer Neon for 64-bits bitops. */
1740 /* Not all of these give usefully different compilation alternatives,
1741 but there is no simple way of generalizing them. */
1742 static const struct processors all_cores[] =
1744 /* ARM Cores */
1745 #define ARM_CORE(NAME, X, IDENT, ARCH, FLAGS, COSTS) \
1746 {NAME, IDENT, #ARCH, BASE_ARCH_##ARCH, \
1747 FLAGS | FL_FOR_ARCH##ARCH, &arm_##COSTS##_tune},
1748 #include "arm-cores.def"
1749 #undef ARM_CORE
1750 {NULL, arm_none, NULL, BASE_ARCH_0, 0, NULL}
1753 static const struct processors all_architectures[] =
1755 /* ARM Architectures */
1756 /* We don't specify tuning costs here as it will be figured out
1757 from the core. */
1759 #define ARM_ARCH(NAME, CORE, ARCH, FLAGS) \
1760 {NAME, CORE, #ARCH, BASE_ARCH_##ARCH, FLAGS, NULL},
1761 #include "arm-arches.def"
1762 #undef ARM_ARCH
1763 {NULL, arm_none, NULL, BASE_ARCH_0, 0, NULL}
1767 /* These are populated as commandline arguments are processed, or NULL
1768 if not specified. */
1769 static const struct processors *arm_selected_arch;
1770 static const struct processors *arm_selected_cpu;
1771 static const struct processors *arm_selected_tune;
1773 /* The name of the preprocessor macro to define for this architecture. */
1775 char arm_arch_name[] = "__ARM_ARCH_0UNK__";
1777 /* Available values for -mfpu=. */
1779 static const struct arm_fpu_desc all_fpus[] =
1781 #define ARM_FPU(NAME, MODEL, REV, VFP_REGS, NEON, FP16, CRYPTO) \
1782 { NAME, MODEL, REV, VFP_REGS, NEON, FP16, CRYPTO },
1783 #include "arm-fpus.def"
1784 #undef ARM_FPU
1788 /* Supported TLS relocations. */
1790 enum tls_reloc {
1791 TLS_GD32,
1792 TLS_LDM32,
1793 TLS_LDO32,
1794 TLS_IE32,
1795 TLS_LE32,
1796 TLS_DESCSEQ /* GNU scheme */
1799 /* The maximum number of insns to be used when loading a constant. */
1800 inline static int
1801 arm_constant_limit (bool size_p)
1803 return size_p ? 1 : current_tune->constant_limit;
1806 /* Emit an insn that's a simple single-set. Both the operands must be known
1807 to be valid. */
1808 inline static rtx
1809 emit_set_insn (rtx x, rtx y)
1811 return emit_insn (gen_rtx_SET (VOIDmode, x, y));
1814 /* Return the number of bits set in VALUE. */
1815 static unsigned
1816 bit_count (unsigned long value)
1818 unsigned long count = 0;
1820 while (value)
1822 count++;
1823 value &= value - 1; /* Clear the least-significant set bit. */
1826 return count;
1829 typedef struct
1831 enum machine_mode mode;
1832 const char *name;
1833 } arm_fixed_mode_set;
1835 /* A small helper for setting fixed-point library libfuncs. */
1837 static void
1838 arm_set_fixed_optab_libfunc (optab optable, enum machine_mode mode,
1839 const char *funcname, const char *modename,
1840 int num_suffix)
1842 char buffer[50];
1844 if (num_suffix == 0)
1845 sprintf (buffer, "__gnu_%s%s", funcname, modename);
1846 else
1847 sprintf (buffer, "__gnu_%s%s%d", funcname, modename, num_suffix);
1849 set_optab_libfunc (optable, mode, buffer);
1852 static void
1853 arm_set_fixed_conv_libfunc (convert_optab optable, enum machine_mode to,
1854 enum machine_mode from, const char *funcname,
1855 const char *toname, const char *fromname)
1857 char buffer[50];
1858 const char *maybe_suffix_2 = "";
1860 /* Follow the logic for selecting a "2" suffix in fixed-bit.h. */
1861 if (ALL_FIXED_POINT_MODE_P (from) && ALL_FIXED_POINT_MODE_P (to)
1862 && UNSIGNED_FIXED_POINT_MODE_P (from) == UNSIGNED_FIXED_POINT_MODE_P (to)
1863 && ALL_FRACT_MODE_P (from) == ALL_FRACT_MODE_P (to))
1864 maybe_suffix_2 = "2";
1866 sprintf (buffer, "__gnu_%s%s%s%s", funcname, fromname, toname,
1867 maybe_suffix_2);
1869 set_conv_libfunc (optable, to, from, buffer);
1872 /* Set up library functions unique to ARM. */
1874 static void
1875 arm_init_libfuncs (void)
1877 /* For Linux, we have access to kernel support for atomic operations. */
1878 if (arm_abi == ARM_ABI_AAPCS_LINUX)
1879 init_sync_libfuncs (2 * UNITS_PER_WORD);
1881 /* There are no special library functions unless we are using the
1882 ARM BPABI. */
1883 if (!TARGET_BPABI)
1884 return;
1886 /* The functions below are described in Section 4 of the "Run-Time
1887 ABI for the ARM architecture", Version 1.0. */
1889 /* Double-precision floating-point arithmetic. Table 2. */
1890 set_optab_libfunc (add_optab, DFmode, "__aeabi_dadd");
1891 set_optab_libfunc (sdiv_optab, DFmode, "__aeabi_ddiv");
1892 set_optab_libfunc (smul_optab, DFmode, "__aeabi_dmul");
1893 set_optab_libfunc (neg_optab, DFmode, "__aeabi_dneg");
1894 set_optab_libfunc (sub_optab, DFmode, "__aeabi_dsub");
1896 /* Double-precision comparisons. Table 3. */
1897 set_optab_libfunc (eq_optab, DFmode, "__aeabi_dcmpeq");
1898 set_optab_libfunc (ne_optab, DFmode, NULL);
1899 set_optab_libfunc (lt_optab, DFmode, "__aeabi_dcmplt");
1900 set_optab_libfunc (le_optab, DFmode, "__aeabi_dcmple");
1901 set_optab_libfunc (ge_optab, DFmode, "__aeabi_dcmpge");
1902 set_optab_libfunc (gt_optab, DFmode, "__aeabi_dcmpgt");
1903 set_optab_libfunc (unord_optab, DFmode, "__aeabi_dcmpun");
1905 /* Single-precision floating-point arithmetic. Table 4. */
1906 set_optab_libfunc (add_optab, SFmode, "__aeabi_fadd");
1907 set_optab_libfunc (sdiv_optab, SFmode, "__aeabi_fdiv");
1908 set_optab_libfunc (smul_optab, SFmode, "__aeabi_fmul");
1909 set_optab_libfunc (neg_optab, SFmode, "__aeabi_fneg");
1910 set_optab_libfunc (sub_optab, SFmode, "__aeabi_fsub");
1912 /* Single-precision comparisons. Table 5. */
1913 set_optab_libfunc (eq_optab, SFmode, "__aeabi_fcmpeq");
1914 set_optab_libfunc (ne_optab, SFmode, NULL);
1915 set_optab_libfunc (lt_optab, SFmode, "__aeabi_fcmplt");
1916 set_optab_libfunc (le_optab, SFmode, "__aeabi_fcmple");
1917 set_optab_libfunc (ge_optab, SFmode, "__aeabi_fcmpge");
1918 set_optab_libfunc (gt_optab, SFmode, "__aeabi_fcmpgt");
1919 set_optab_libfunc (unord_optab, SFmode, "__aeabi_fcmpun");
1921 /* Floating-point to integer conversions. Table 6. */
1922 set_conv_libfunc (sfix_optab, SImode, DFmode, "__aeabi_d2iz");
1923 set_conv_libfunc (ufix_optab, SImode, DFmode, "__aeabi_d2uiz");
1924 set_conv_libfunc (sfix_optab, DImode, DFmode, "__aeabi_d2lz");
1925 set_conv_libfunc (ufix_optab, DImode, DFmode, "__aeabi_d2ulz");
1926 set_conv_libfunc (sfix_optab, SImode, SFmode, "__aeabi_f2iz");
1927 set_conv_libfunc (ufix_optab, SImode, SFmode, "__aeabi_f2uiz");
1928 set_conv_libfunc (sfix_optab, DImode, SFmode, "__aeabi_f2lz");
1929 set_conv_libfunc (ufix_optab, DImode, SFmode, "__aeabi_f2ulz");
1931 /* Conversions between floating types. Table 7. */
1932 set_conv_libfunc (trunc_optab, SFmode, DFmode, "__aeabi_d2f");
1933 set_conv_libfunc (sext_optab, DFmode, SFmode, "__aeabi_f2d");
1935 /* Integer to floating-point conversions. Table 8. */
1936 set_conv_libfunc (sfloat_optab, DFmode, SImode, "__aeabi_i2d");
1937 set_conv_libfunc (ufloat_optab, DFmode, SImode, "__aeabi_ui2d");
1938 set_conv_libfunc (sfloat_optab, DFmode, DImode, "__aeabi_l2d");
1939 set_conv_libfunc (ufloat_optab, DFmode, DImode, "__aeabi_ul2d");
1940 set_conv_libfunc (sfloat_optab, SFmode, SImode, "__aeabi_i2f");
1941 set_conv_libfunc (ufloat_optab, SFmode, SImode, "__aeabi_ui2f");
1942 set_conv_libfunc (sfloat_optab, SFmode, DImode, "__aeabi_l2f");
1943 set_conv_libfunc (ufloat_optab, SFmode, DImode, "__aeabi_ul2f");
1945 /* Long long. Table 9. */
1946 set_optab_libfunc (smul_optab, DImode, "__aeabi_lmul");
1947 set_optab_libfunc (sdivmod_optab, DImode, "__aeabi_ldivmod");
1948 set_optab_libfunc (udivmod_optab, DImode, "__aeabi_uldivmod");
1949 set_optab_libfunc (ashl_optab, DImode, "__aeabi_llsl");
1950 set_optab_libfunc (lshr_optab, DImode, "__aeabi_llsr");
1951 set_optab_libfunc (ashr_optab, DImode, "__aeabi_lasr");
1952 set_optab_libfunc (cmp_optab, DImode, "__aeabi_lcmp");
1953 set_optab_libfunc (ucmp_optab, DImode, "__aeabi_ulcmp");
1955 /* Integer (32/32->32) division. \S 4.3.1. */
1956 set_optab_libfunc (sdivmod_optab, SImode, "__aeabi_idivmod");
1957 set_optab_libfunc (udivmod_optab, SImode, "__aeabi_uidivmod");
1959 /* The divmod functions are designed so that they can be used for
1960 plain division, even though they return both the quotient and the
1961 remainder. The quotient is returned in the usual location (i.e.,
1962 r0 for SImode, {r0, r1} for DImode), just as would be expected
1963 for an ordinary division routine. Because the AAPCS calling
1964 conventions specify that all of { r0, r1, r2, r3 } are
1965 callee-saved registers, there is no need to tell the compiler
1966 explicitly that those registers are clobbered by these
1967 routines. */
1968 set_optab_libfunc (sdiv_optab, DImode, "__aeabi_ldivmod");
1969 set_optab_libfunc (udiv_optab, DImode, "__aeabi_uldivmod");
1971 /* For SImode division the ABI provides div-without-mod routines,
1972 which are faster. */
1973 set_optab_libfunc (sdiv_optab, SImode, "__aeabi_idiv");
1974 set_optab_libfunc (udiv_optab, SImode, "__aeabi_uidiv");
1976 /* We don't have mod libcalls. Fortunately gcc knows how to use the
1977 divmod libcalls instead. */
1978 set_optab_libfunc (smod_optab, DImode, NULL);
1979 set_optab_libfunc (umod_optab, DImode, NULL);
1980 set_optab_libfunc (smod_optab, SImode, NULL);
1981 set_optab_libfunc (umod_optab, SImode, NULL);
1983 /* Half-precision float operations. The compiler handles all operations
1984 with NULL libfuncs by converting the SFmode. */
1985 switch (arm_fp16_format)
1987 case ARM_FP16_FORMAT_IEEE:
1988 case ARM_FP16_FORMAT_ALTERNATIVE:
1990 /* Conversions. */
1991 set_conv_libfunc (trunc_optab, HFmode, SFmode,
1992 (arm_fp16_format == ARM_FP16_FORMAT_IEEE
1993 ? "__gnu_f2h_ieee"
1994 : "__gnu_f2h_alternative"));
1995 set_conv_libfunc (sext_optab, SFmode, HFmode,
1996 (arm_fp16_format == ARM_FP16_FORMAT_IEEE
1997 ? "__gnu_h2f_ieee"
1998 : "__gnu_h2f_alternative"));
2000 /* Arithmetic. */
2001 set_optab_libfunc (add_optab, HFmode, NULL);
2002 set_optab_libfunc (sdiv_optab, HFmode, NULL);
2003 set_optab_libfunc (smul_optab, HFmode, NULL);
2004 set_optab_libfunc (neg_optab, HFmode, NULL);
2005 set_optab_libfunc (sub_optab, HFmode, NULL);
2007 /* Comparisons. */
2008 set_optab_libfunc (eq_optab, HFmode, NULL);
2009 set_optab_libfunc (ne_optab, HFmode, NULL);
2010 set_optab_libfunc (lt_optab, HFmode, NULL);
2011 set_optab_libfunc (le_optab, HFmode, NULL);
2012 set_optab_libfunc (ge_optab, HFmode, NULL);
2013 set_optab_libfunc (gt_optab, HFmode, NULL);
2014 set_optab_libfunc (unord_optab, HFmode, NULL);
2015 break;
2017 default:
2018 break;
2021 /* Use names prefixed with __gnu_ for fixed-point helper functions. */
2023 const arm_fixed_mode_set fixed_arith_modes[] =
2025 { QQmode, "qq" },
2026 { UQQmode, "uqq" },
2027 { HQmode, "hq" },
2028 { UHQmode, "uhq" },
2029 { SQmode, "sq" },
2030 { USQmode, "usq" },
2031 { DQmode, "dq" },
2032 { UDQmode, "udq" },
2033 { TQmode, "tq" },
2034 { UTQmode, "utq" },
2035 { HAmode, "ha" },
2036 { UHAmode, "uha" },
2037 { SAmode, "sa" },
2038 { USAmode, "usa" },
2039 { DAmode, "da" },
2040 { UDAmode, "uda" },
2041 { TAmode, "ta" },
2042 { UTAmode, "uta" }
2044 const arm_fixed_mode_set fixed_conv_modes[] =
2046 { QQmode, "qq" },
2047 { UQQmode, "uqq" },
2048 { HQmode, "hq" },
2049 { UHQmode, "uhq" },
2050 { SQmode, "sq" },
2051 { USQmode, "usq" },
2052 { DQmode, "dq" },
2053 { UDQmode, "udq" },
2054 { TQmode, "tq" },
2055 { UTQmode, "utq" },
2056 { HAmode, "ha" },
2057 { UHAmode, "uha" },
2058 { SAmode, "sa" },
2059 { USAmode, "usa" },
2060 { DAmode, "da" },
2061 { UDAmode, "uda" },
2062 { TAmode, "ta" },
2063 { UTAmode, "uta" },
2064 { QImode, "qi" },
2065 { HImode, "hi" },
2066 { SImode, "si" },
2067 { DImode, "di" },
2068 { TImode, "ti" },
2069 { SFmode, "sf" },
2070 { DFmode, "df" }
2072 unsigned int i, j;
2074 for (i = 0; i < ARRAY_SIZE (fixed_arith_modes); i++)
2076 arm_set_fixed_optab_libfunc (add_optab, fixed_arith_modes[i].mode,
2077 "add", fixed_arith_modes[i].name, 3);
2078 arm_set_fixed_optab_libfunc (ssadd_optab, fixed_arith_modes[i].mode,
2079 "ssadd", fixed_arith_modes[i].name, 3);
2080 arm_set_fixed_optab_libfunc (usadd_optab, fixed_arith_modes[i].mode,
2081 "usadd", fixed_arith_modes[i].name, 3);
2082 arm_set_fixed_optab_libfunc (sub_optab, fixed_arith_modes[i].mode,
2083 "sub", fixed_arith_modes[i].name, 3);
2084 arm_set_fixed_optab_libfunc (sssub_optab, fixed_arith_modes[i].mode,
2085 "sssub", fixed_arith_modes[i].name, 3);
2086 arm_set_fixed_optab_libfunc (ussub_optab, fixed_arith_modes[i].mode,
2087 "ussub", fixed_arith_modes[i].name, 3);
2088 arm_set_fixed_optab_libfunc (smul_optab, fixed_arith_modes[i].mode,
2089 "mul", fixed_arith_modes[i].name, 3);
2090 arm_set_fixed_optab_libfunc (ssmul_optab, fixed_arith_modes[i].mode,
2091 "ssmul", fixed_arith_modes[i].name, 3);
2092 arm_set_fixed_optab_libfunc (usmul_optab, fixed_arith_modes[i].mode,
2093 "usmul", fixed_arith_modes[i].name, 3);
2094 arm_set_fixed_optab_libfunc (sdiv_optab, fixed_arith_modes[i].mode,
2095 "div", fixed_arith_modes[i].name, 3);
2096 arm_set_fixed_optab_libfunc (udiv_optab, fixed_arith_modes[i].mode,
2097 "udiv", fixed_arith_modes[i].name, 3);
2098 arm_set_fixed_optab_libfunc (ssdiv_optab, fixed_arith_modes[i].mode,
2099 "ssdiv", fixed_arith_modes[i].name, 3);
2100 arm_set_fixed_optab_libfunc (usdiv_optab, fixed_arith_modes[i].mode,
2101 "usdiv", fixed_arith_modes[i].name, 3);
2102 arm_set_fixed_optab_libfunc (neg_optab, fixed_arith_modes[i].mode,
2103 "neg", fixed_arith_modes[i].name, 2);
2104 arm_set_fixed_optab_libfunc (ssneg_optab, fixed_arith_modes[i].mode,
2105 "ssneg", fixed_arith_modes[i].name, 2);
2106 arm_set_fixed_optab_libfunc (usneg_optab, fixed_arith_modes[i].mode,
2107 "usneg", fixed_arith_modes[i].name, 2);
2108 arm_set_fixed_optab_libfunc (ashl_optab, fixed_arith_modes[i].mode,
2109 "ashl", fixed_arith_modes[i].name, 3);
2110 arm_set_fixed_optab_libfunc (ashr_optab, fixed_arith_modes[i].mode,
2111 "ashr", fixed_arith_modes[i].name, 3);
2112 arm_set_fixed_optab_libfunc (lshr_optab, fixed_arith_modes[i].mode,
2113 "lshr", fixed_arith_modes[i].name, 3);
2114 arm_set_fixed_optab_libfunc (ssashl_optab, fixed_arith_modes[i].mode,
2115 "ssashl", fixed_arith_modes[i].name, 3);
2116 arm_set_fixed_optab_libfunc (usashl_optab, fixed_arith_modes[i].mode,
2117 "usashl", fixed_arith_modes[i].name, 3);
2118 arm_set_fixed_optab_libfunc (cmp_optab, fixed_arith_modes[i].mode,
2119 "cmp", fixed_arith_modes[i].name, 2);
2122 for (i = 0; i < ARRAY_SIZE (fixed_conv_modes); i++)
2123 for (j = 0; j < ARRAY_SIZE (fixed_conv_modes); j++)
2125 if (i == j
2126 || (!ALL_FIXED_POINT_MODE_P (fixed_conv_modes[i].mode)
2127 && !ALL_FIXED_POINT_MODE_P (fixed_conv_modes[j].mode)))
2128 continue;
2130 arm_set_fixed_conv_libfunc (fract_optab, fixed_conv_modes[i].mode,
2131 fixed_conv_modes[j].mode, "fract",
2132 fixed_conv_modes[i].name,
2133 fixed_conv_modes[j].name);
2134 arm_set_fixed_conv_libfunc (satfract_optab,
2135 fixed_conv_modes[i].mode,
2136 fixed_conv_modes[j].mode, "satfract",
2137 fixed_conv_modes[i].name,
2138 fixed_conv_modes[j].name);
2139 arm_set_fixed_conv_libfunc (fractuns_optab,
2140 fixed_conv_modes[i].mode,
2141 fixed_conv_modes[j].mode, "fractuns",
2142 fixed_conv_modes[i].name,
2143 fixed_conv_modes[j].name);
2144 arm_set_fixed_conv_libfunc (satfractuns_optab,
2145 fixed_conv_modes[i].mode,
2146 fixed_conv_modes[j].mode, "satfractuns",
2147 fixed_conv_modes[i].name,
2148 fixed_conv_modes[j].name);
2152 if (TARGET_AAPCS_BASED)
2153 synchronize_libfunc = init_one_libfunc ("__sync_synchronize");
2156 /* On AAPCS systems, this is the "struct __va_list". */
2157 static GTY(()) tree va_list_type;
2159 /* Return the type to use as __builtin_va_list. */
2160 static tree
2161 arm_build_builtin_va_list (void)
2163 tree va_list_name;
2164 tree ap_field;
2166 if (!TARGET_AAPCS_BASED)
2167 return std_build_builtin_va_list ();
2169 /* AAPCS \S 7.1.4 requires that va_list be a typedef for a type
2170 defined as:
2172 struct __va_list
2174 void *__ap;
2177 The C Library ABI further reinforces this definition in \S
2178 4.1.
2180 We must follow this definition exactly. The structure tag
2181 name is visible in C++ mangled names, and thus forms a part
2182 of the ABI. The field name may be used by people who
2183 #include <stdarg.h>. */
2184 /* Create the type. */
2185 va_list_type = lang_hooks.types.make_type (RECORD_TYPE);
2186 /* Give it the required name. */
2187 va_list_name = build_decl (BUILTINS_LOCATION,
2188 TYPE_DECL,
2189 get_identifier ("__va_list"),
2190 va_list_type);
2191 DECL_ARTIFICIAL (va_list_name) = 1;
2192 TYPE_NAME (va_list_type) = va_list_name;
2193 TYPE_STUB_DECL (va_list_type) = va_list_name;
2194 /* Create the __ap field. */
2195 ap_field = build_decl (BUILTINS_LOCATION,
2196 FIELD_DECL,
2197 get_identifier ("__ap"),
2198 ptr_type_node);
2199 DECL_ARTIFICIAL (ap_field) = 1;
2200 DECL_FIELD_CONTEXT (ap_field) = va_list_type;
2201 TYPE_FIELDS (va_list_type) = ap_field;
2202 /* Compute its layout. */
2203 layout_type (va_list_type);
2205 return va_list_type;
2208 /* Return an expression of type "void *" pointing to the next
2209 available argument in a variable-argument list. VALIST is the
2210 user-level va_list object, of type __builtin_va_list. */
2211 static tree
2212 arm_extract_valist_ptr (tree valist)
2214 if (TREE_TYPE (valist) == error_mark_node)
2215 return error_mark_node;
2217 /* On an AAPCS target, the pointer is stored within "struct
2218 va_list". */
2219 if (TARGET_AAPCS_BASED)
2221 tree ap_field = TYPE_FIELDS (TREE_TYPE (valist));
2222 valist = build3 (COMPONENT_REF, TREE_TYPE (ap_field),
2223 valist, ap_field, NULL_TREE);
2226 return valist;
2229 /* Implement TARGET_EXPAND_BUILTIN_VA_START. */
2230 static void
2231 arm_expand_builtin_va_start (tree valist, rtx nextarg)
2233 valist = arm_extract_valist_ptr (valist);
2234 std_expand_builtin_va_start (valist, nextarg);
2237 /* Implement TARGET_GIMPLIFY_VA_ARG_EXPR. */
2238 static tree
2239 arm_gimplify_va_arg_expr (tree valist, tree type, gimple_seq *pre_p,
2240 gimple_seq *post_p)
2242 valist = arm_extract_valist_ptr (valist);
2243 return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
2246 /* Fix up any incompatible options that the user has specified. */
2247 static void
2248 arm_option_override (void)
2250 if (global_options_set.x_arm_arch_option)
2251 arm_selected_arch = &all_architectures[arm_arch_option];
2253 if (global_options_set.x_arm_cpu_option)
2255 arm_selected_cpu = &all_cores[(int) arm_cpu_option];
2256 arm_selected_tune = &all_cores[(int) arm_cpu_option];
2259 if (global_options_set.x_arm_tune_option)
2260 arm_selected_tune = &all_cores[(int) arm_tune_option];
2262 #ifdef SUBTARGET_OVERRIDE_OPTIONS
2263 SUBTARGET_OVERRIDE_OPTIONS;
2264 #endif
2266 if (arm_selected_arch)
2268 if (arm_selected_cpu)
2270 /* Check for conflict between mcpu and march. */
2271 if ((arm_selected_cpu->flags ^ arm_selected_arch->flags) & ~FL_TUNE)
2273 warning (0, "switch -mcpu=%s conflicts with -march=%s switch",
2274 arm_selected_cpu->name, arm_selected_arch->name);
2275 /* -march wins for code generation.
2276 -mcpu wins for default tuning. */
2277 if (!arm_selected_tune)
2278 arm_selected_tune = arm_selected_cpu;
2280 arm_selected_cpu = arm_selected_arch;
2282 else
2283 /* -mcpu wins. */
2284 arm_selected_arch = NULL;
2286 else
2287 /* Pick a CPU based on the architecture. */
2288 arm_selected_cpu = arm_selected_arch;
2291 /* If the user did not specify a processor, choose one for them. */
2292 if (!arm_selected_cpu)
2294 const struct processors * sel;
2295 unsigned int sought;
2297 arm_selected_cpu = &all_cores[TARGET_CPU_DEFAULT];
2298 if (!arm_selected_cpu->name)
2300 #ifdef SUBTARGET_CPU_DEFAULT
2301 /* Use the subtarget default CPU if none was specified by
2302 configure. */
2303 arm_selected_cpu = &all_cores[SUBTARGET_CPU_DEFAULT];
2304 #endif
2305 /* Default to ARM6. */
2306 if (!arm_selected_cpu->name)
2307 arm_selected_cpu = &all_cores[arm6];
2310 sel = arm_selected_cpu;
2311 insn_flags = sel->flags;
2313 /* Now check to see if the user has specified some command line
2314 switch that require certain abilities from the cpu. */
2315 sought = 0;
2317 if (TARGET_INTERWORK || TARGET_THUMB)
2319 sought |= (FL_THUMB | FL_MODE32);
2321 /* There are no ARM processors that support both APCS-26 and
2322 interworking. Therefore we force FL_MODE26 to be removed
2323 from insn_flags here (if it was set), so that the search
2324 below will always be able to find a compatible processor. */
2325 insn_flags &= ~FL_MODE26;
2328 if (sought != 0 && ((sought & insn_flags) != sought))
2330 /* Try to locate a CPU type that supports all of the abilities
2331 of the default CPU, plus the extra abilities requested by
2332 the user. */
2333 for (sel = all_cores; sel->name != NULL; sel++)
2334 if ((sel->flags & sought) == (sought | insn_flags))
2335 break;
2337 if (sel->name == NULL)
2339 unsigned current_bit_count = 0;
2340 const struct processors * best_fit = NULL;
2342 /* Ideally we would like to issue an error message here
2343 saying that it was not possible to find a CPU compatible
2344 with the default CPU, but which also supports the command
2345 line options specified by the programmer, and so they
2346 ought to use the -mcpu=<name> command line option to
2347 override the default CPU type.
2349 If we cannot find a cpu that has both the
2350 characteristics of the default cpu and the given
2351 command line options we scan the array again looking
2352 for a best match. */
2353 for (sel = all_cores; sel->name != NULL; sel++)
2354 if ((sel->flags & sought) == sought)
2356 unsigned count;
2358 count = bit_count (sel->flags & insn_flags);
2360 if (count >= current_bit_count)
2362 best_fit = sel;
2363 current_bit_count = count;
2367 gcc_assert (best_fit);
2368 sel = best_fit;
2371 arm_selected_cpu = sel;
2375 gcc_assert (arm_selected_cpu);
2376 /* The selected cpu may be an architecture, so lookup tuning by core ID. */
2377 if (!arm_selected_tune)
2378 arm_selected_tune = &all_cores[arm_selected_cpu->core];
2380 sprintf (arm_arch_name, "__ARM_ARCH_%s__", arm_selected_cpu->arch);
2381 insn_flags = arm_selected_cpu->flags;
2382 arm_base_arch = arm_selected_cpu->base_arch;
2384 arm_tune = arm_selected_tune->core;
2385 tune_flags = arm_selected_tune->flags;
2386 current_tune = arm_selected_tune->tune;
2388 /* Make sure that the processor choice does not conflict with any of the
2389 other command line choices. */
2390 if (TARGET_ARM && !(insn_flags & FL_NOTM))
2391 error ("target CPU does not support ARM mode");
2393 /* BPABI targets use linker tricks to allow interworking on cores
2394 without thumb support. */
2395 if (TARGET_INTERWORK && !((insn_flags & FL_THUMB) || TARGET_BPABI))
2397 warning (0, "target CPU does not support interworking" );
2398 target_flags &= ~MASK_INTERWORK;
2401 if (TARGET_THUMB && !(insn_flags & FL_THUMB))
2403 warning (0, "target CPU does not support THUMB instructions");
2404 target_flags &= ~MASK_THUMB;
2407 if (TARGET_APCS_FRAME && TARGET_THUMB)
2409 /* warning (0, "ignoring -mapcs-frame because -mthumb was used"); */
2410 target_flags &= ~MASK_APCS_FRAME;
2413 /* Callee super interworking implies thumb interworking. Adding
2414 this to the flags here simplifies the logic elsewhere. */
2415 if (TARGET_THUMB && TARGET_CALLEE_INTERWORKING)
2416 target_flags |= MASK_INTERWORK;
2418 /* TARGET_BACKTRACE calls leaf_function_p, which causes a crash if done
2419 from here where no function is being compiled currently. */
2420 if ((TARGET_TPCS_FRAME || TARGET_TPCS_LEAF_FRAME) && TARGET_ARM)
2421 warning (0, "enabling backtrace support is only meaningful when compiling for the Thumb");
2423 if (TARGET_ARM && TARGET_CALLEE_INTERWORKING)
2424 warning (0, "enabling callee interworking support is only meaningful when compiling for the Thumb");
2426 if (TARGET_APCS_STACK && !TARGET_APCS_FRAME)
2428 warning (0, "-mapcs-stack-check incompatible with -mno-apcs-frame");
2429 target_flags |= MASK_APCS_FRAME;
2432 if (TARGET_POKE_FUNCTION_NAME)
2433 target_flags |= MASK_APCS_FRAME;
2435 if (TARGET_APCS_REENT && flag_pic)
2436 error ("-fpic and -mapcs-reent are incompatible");
2438 if (TARGET_APCS_REENT)
2439 warning (0, "APCS reentrant code not supported. Ignored");
2441 /* If this target is normally configured to use APCS frames, warn if they
2442 are turned off and debugging is turned on. */
2443 if (TARGET_ARM
2444 && write_symbols != NO_DEBUG
2445 && !TARGET_APCS_FRAME
2446 && (TARGET_DEFAULT & MASK_APCS_FRAME))
2447 warning (0, "-g with -mno-apcs-frame may not give sensible debugging");
2449 if (TARGET_APCS_FLOAT)
2450 warning (0, "passing floating point arguments in fp regs not yet supported");
2452 if (TARGET_LITTLE_WORDS)
2453 warning (OPT_Wdeprecated, "%<mwords-little-endian%> is deprecated and "
2454 "will be removed in a future release");
2456 /* Initialize boolean versions of the flags, for use in the arm.md file. */
2457 arm_arch3m = (insn_flags & FL_ARCH3M) != 0;
2458 arm_arch4 = (insn_flags & FL_ARCH4) != 0;
2459 arm_arch4t = arm_arch4 & ((insn_flags & FL_THUMB) != 0);
2460 arm_arch5 = (insn_flags & FL_ARCH5) != 0;
2461 arm_arch5e = (insn_flags & FL_ARCH5E) != 0;
2462 arm_arch6 = (insn_flags & FL_ARCH6) != 0;
2463 arm_arch6k = (insn_flags & FL_ARCH6K) != 0;
2464 arm_arch_notm = (insn_flags & FL_NOTM) != 0;
2465 arm_arch6m = arm_arch6 && !arm_arch_notm;
2466 arm_arch7 = (insn_flags & FL_ARCH7) != 0;
2467 arm_arch7em = (insn_flags & FL_ARCH7EM) != 0;
2468 arm_arch8 = (insn_flags & FL_ARCH8) != 0;
2469 arm_arch_thumb2 = (insn_flags & FL_THUMB2) != 0;
2470 arm_arch_xscale = (insn_flags & FL_XSCALE) != 0;
2472 arm_ld_sched = (tune_flags & FL_LDSCHED) != 0;
2473 arm_tune_strongarm = (tune_flags & FL_STRONG) != 0;
2474 thumb_code = TARGET_ARM == 0;
2475 thumb1_code = TARGET_THUMB1 != 0;
2476 arm_tune_wbuf = (tune_flags & FL_WBUF) != 0;
2477 arm_tune_xscale = (tune_flags & FL_XSCALE) != 0;
2478 arm_arch_iwmmxt = (insn_flags & FL_IWMMXT) != 0;
2479 arm_arch_iwmmxt2 = (insn_flags & FL_IWMMXT2) != 0;
2480 arm_arch_thumb_hwdiv = (insn_flags & FL_THUMB_DIV) != 0;
2481 arm_arch_arm_hwdiv = (insn_flags & FL_ARM_DIV) != 0;
2482 arm_tune_cortex_a9 = (arm_tune == cortexa9) != 0;
2483 if (arm_restrict_it == 2)
2484 arm_restrict_it = arm_arch8 && TARGET_THUMB2;
2486 if (!TARGET_THUMB2)
2487 arm_restrict_it = 0;
2489 /* If we are not using the default (ARM mode) section anchor offset
2490 ranges, then set the correct ranges now. */
2491 if (TARGET_THUMB1)
2493 /* Thumb-1 LDR instructions cannot have negative offsets.
2494 Permissible positive offset ranges are 5-bit (for byte loads),
2495 6-bit (for halfword loads), or 7-bit (for word loads).
2496 Empirical results suggest a 7-bit anchor range gives the best
2497 overall code size. */
2498 targetm.min_anchor_offset = 0;
2499 targetm.max_anchor_offset = 127;
2501 else if (TARGET_THUMB2)
2503 /* The minimum is set such that the total size of the block
2504 for a particular anchor is 248 + 1 + 4095 bytes, which is
2505 divisible by eight, ensuring natural spacing of anchors. */
2506 targetm.min_anchor_offset = -248;
2507 targetm.max_anchor_offset = 4095;
2510 /* V5 code we generate is completely interworking capable, so we turn off
2511 TARGET_INTERWORK here to avoid many tests later on. */
2513 /* XXX However, we must pass the right pre-processor defines to CPP
2514 or GLD can get confused. This is a hack. */
2515 if (TARGET_INTERWORK)
2516 arm_cpp_interwork = 1;
2518 if (arm_arch5)
2519 target_flags &= ~MASK_INTERWORK;
2521 if (TARGET_IWMMXT && !ARM_DOUBLEWORD_ALIGN)
2522 error ("iwmmxt requires an AAPCS compatible ABI for proper operation");
2524 if (TARGET_IWMMXT_ABI && !TARGET_IWMMXT)
2525 error ("iwmmxt abi requires an iwmmxt capable cpu");
2527 if (!global_options_set.x_arm_fpu_index)
2529 const char *target_fpu_name;
2530 bool ok;
2532 #ifdef FPUTYPE_DEFAULT
2533 target_fpu_name = FPUTYPE_DEFAULT;
2534 #else
2535 target_fpu_name = "vfp";
2536 #endif
2538 ok = opt_enum_arg_to_value (OPT_mfpu_, target_fpu_name, &arm_fpu_index,
2539 CL_TARGET);
2540 gcc_assert (ok);
2543 arm_fpu_desc = &all_fpus[arm_fpu_index];
2545 switch (arm_fpu_desc->model)
2547 case ARM_FP_MODEL_VFP:
2548 arm_fpu_attr = FPU_VFP;
2549 break;
2551 default:
2552 gcc_unreachable();
2555 if (TARGET_AAPCS_BASED)
2557 if (TARGET_CALLER_INTERWORKING)
2558 error ("AAPCS does not support -mcaller-super-interworking");
2559 else
2560 if (TARGET_CALLEE_INTERWORKING)
2561 error ("AAPCS does not support -mcallee-super-interworking");
2564 /* iWMMXt and NEON are incompatible. */
2565 if (TARGET_IWMMXT && TARGET_NEON)
2566 error ("iWMMXt and NEON are incompatible");
2568 /* iWMMXt unsupported under Thumb mode. */
2569 if (TARGET_THUMB && TARGET_IWMMXT)
2570 error ("iWMMXt unsupported under Thumb mode");
2572 /* __fp16 support currently assumes the core has ldrh. */
2573 if (!arm_arch4 && arm_fp16_format != ARM_FP16_FORMAT_NONE)
2574 sorry ("__fp16 and no ldrh");
2576 /* If soft-float is specified then don't use FPU. */
2577 if (TARGET_SOFT_FLOAT)
2578 arm_fpu_attr = FPU_NONE;
2580 if (TARGET_AAPCS_BASED)
2582 if (arm_abi == ARM_ABI_IWMMXT)
2583 arm_pcs_default = ARM_PCS_AAPCS_IWMMXT;
2584 else if (arm_float_abi == ARM_FLOAT_ABI_HARD
2585 && TARGET_HARD_FLOAT
2586 && TARGET_VFP)
2587 arm_pcs_default = ARM_PCS_AAPCS_VFP;
2588 else
2589 arm_pcs_default = ARM_PCS_AAPCS;
2591 else
2593 if (arm_float_abi == ARM_FLOAT_ABI_HARD && TARGET_VFP)
2594 sorry ("-mfloat-abi=hard and VFP");
2596 if (arm_abi == ARM_ABI_APCS)
2597 arm_pcs_default = ARM_PCS_APCS;
2598 else
2599 arm_pcs_default = ARM_PCS_ATPCS;
2602 /* For arm2/3 there is no need to do any scheduling if we are doing
2603 software floating-point. */
2604 if (TARGET_SOFT_FLOAT && (tune_flags & FL_MODE32) == 0)
2605 flag_schedule_insns = flag_schedule_insns_after_reload = 0;
2607 /* Use the cp15 method if it is available. */
2608 if (target_thread_pointer == TP_AUTO)
2610 if (arm_arch6k && !TARGET_THUMB1)
2611 target_thread_pointer = TP_CP15;
2612 else
2613 target_thread_pointer = TP_SOFT;
2616 if (TARGET_HARD_TP && TARGET_THUMB1)
2617 error ("can not use -mtp=cp15 with 16-bit Thumb");
2619 /* Override the default structure alignment for AAPCS ABI. */
2620 if (!global_options_set.x_arm_structure_size_boundary)
2622 if (TARGET_AAPCS_BASED)
2623 arm_structure_size_boundary = 8;
2625 else
2627 if (arm_structure_size_boundary != 8
2628 && arm_structure_size_boundary != 32
2629 && !(ARM_DOUBLEWORD_ALIGN && arm_structure_size_boundary == 64))
2631 if (ARM_DOUBLEWORD_ALIGN)
2632 warning (0,
2633 "structure size boundary can only be set to 8, 32 or 64");
2634 else
2635 warning (0, "structure size boundary can only be set to 8 or 32");
2636 arm_structure_size_boundary
2637 = (TARGET_AAPCS_BASED ? 8 : DEFAULT_STRUCTURE_SIZE_BOUNDARY);
2641 if (!TARGET_ARM && TARGET_VXWORKS_RTP && flag_pic)
2643 error ("RTP PIC is incompatible with Thumb");
2644 flag_pic = 0;
2647 /* If stack checking is disabled, we can use r10 as the PIC register,
2648 which keeps r9 available. The EABI specifies r9 as the PIC register. */
2649 if (flag_pic && TARGET_SINGLE_PIC_BASE)
2651 if (TARGET_VXWORKS_RTP)
2652 warning (0, "RTP PIC is incompatible with -msingle-pic-base");
2653 arm_pic_register = (TARGET_APCS_STACK || TARGET_AAPCS_BASED) ? 9 : 10;
2656 if (flag_pic && TARGET_VXWORKS_RTP)
2657 arm_pic_register = 9;
2659 if (arm_pic_register_string != NULL)
2661 int pic_register = decode_reg_name (arm_pic_register_string);
2663 if (!flag_pic)
2664 warning (0, "-mpic-register= is useless without -fpic");
2666 /* Prevent the user from choosing an obviously stupid PIC register. */
2667 else if (pic_register < 0 || call_used_regs[pic_register]
2668 || pic_register == HARD_FRAME_POINTER_REGNUM
2669 || pic_register == STACK_POINTER_REGNUM
2670 || pic_register >= PC_REGNUM
2671 || (TARGET_VXWORKS_RTP
2672 && (unsigned int) pic_register != arm_pic_register))
2673 error ("unable to use '%s' for PIC register", arm_pic_register_string);
2674 else
2675 arm_pic_register = pic_register;
2678 if (TARGET_VXWORKS_RTP
2679 && !global_options_set.x_arm_pic_data_is_text_relative)
2680 arm_pic_data_is_text_relative = 0;
2682 /* Enable -mfix-cortex-m3-ldrd by default for Cortex-M3 cores. */
2683 if (fix_cm3_ldrd == 2)
2685 if (arm_selected_cpu->core == cortexm3)
2686 fix_cm3_ldrd = 1;
2687 else
2688 fix_cm3_ldrd = 0;
2691 /* Enable -munaligned-access by default for
2692 - all ARMv6 architecture-based processors
2693 - ARMv7-A, ARMv7-R, and ARMv7-M architecture-based processors.
2694 - ARMv8 architecture-base processors.
2696 Disable -munaligned-access by default for
2697 - all pre-ARMv6 architecture-based processors
2698 - ARMv6-M architecture-based processors. */
2700 if (unaligned_access == 2)
2702 if (arm_arch6 && (arm_arch_notm || arm_arch7))
2703 unaligned_access = 1;
2704 else
2705 unaligned_access = 0;
2707 else if (unaligned_access == 1
2708 && !(arm_arch6 && (arm_arch_notm || arm_arch7)))
2710 warning (0, "target CPU does not support unaligned accesses");
2711 unaligned_access = 0;
2714 if (TARGET_THUMB1 && flag_schedule_insns)
2716 /* Don't warn since it's on by default in -O2. */
2717 flag_schedule_insns = 0;
2720 if (optimize_size)
2722 /* If optimizing for size, bump the number of instructions that we
2723 are prepared to conditionally execute (even on a StrongARM). */
2724 max_insns_skipped = 6;
2726 else
2727 max_insns_skipped = current_tune->max_insns_skipped;
2729 /* Hot/Cold partitioning is not currently supported, since we can't
2730 handle literal pool placement in that case. */
2731 if (flag_reorder_blocks_and_partition)
2733 inform (input_location,
2734 "-freorder-blocks-and-partition not supported on this architecture");
2735 flag_reorder_blocks_and_partition = 0;
2736 flag_reorder_blocks = 1;
2739 if (flag_pic)
2740 /* Hoisting PIC address calculations more aggressively provides a small,
2741 but measurable, size reduction for PIC code. Therefore, we decrease
2742 the bar for unrestricted expression hoisting to the cost of PIC address
2743 calculation, which is 2 instructions. */
2744 maybe_set_param_value (PARAM_GCSE_UNRESTRICTED_COST, 2,
2745 global_options.x_param_values,
2746 global_options_set.x_param_values);
2748 /* ARM EABI defaults to strict volatile bitfields. */
2749 if (TARGET_AAPCS_BASED && flag_strict_volatile_bitfields < 0
2750 && abi_version_at_least(2))
2751 flag_strict_volatile_bitfields = 1;
2753 /* Enable sw prefetching at -O3 for CPUS that have prefetch, and we have deemed
2754 it beneficial (signified by setting num_prefetch_slots to 1 or more.) */
2755 if (flag_prefetch_loop_arrays < 0
2756 && HAVE_prefetch
2757 && optimize >= 3
2758 && current_tune->num_prefetch_slots > 0)
2759 flag_prefetch_loop_arrays = 1;
2761 /* Set up parameters to be used in prefetching algorithm. Do not override the
2762 defaults unless we are tuning for a core we have researched values for. */
2763 if (current_tune->num_prefetch_slots > 0)
2764 maybe_set_param_value (PARAM_SIMULTANEOUS_PREFETCHES,
2765 current_tune->num_prefetch_slots,
2766 global_options.x_param_values,
2767 global_options_set.x_param_values);
2768 if (current_tune->l1_cache_line_size >= 0)
2769 maybe_set_param_value (PARAM_L1_CACHE_LINE_SIZE,
2770 current_tune->l1_cache_line_size,
2771 global_options.x_param_values,
2772 global_options_set.x_param_values);
2773 if (current_tune->l1_cache_size >= 0)
2774 maybe_set_param_value (PARAM_L1_CACHE_SIZE,
2775 current_tune->l1_cache_size,
2776 global_options.x_param_values,
2777 global_options_set.x_param_values);
2779 /* Use Neon to perform 64-bits operations rather than core
2780 registers. */
2781 prefer_neon_for_64bits = current_tune->prefer_neon_for_64bits;
2782 if (use_neon_for_64bits == 1)
2783 prefer_neon_for_64bits = true;
2785 /* Use the alternative scheduling-pressure algorithm by default. */
2786 maybe_set_param_value (PARAM_SCHED_PRESSURE_ALGORITHM, 2,
2787 global_options.x_param_values,
2788 global_options_set.x_param_values);
2790 /* Disable shrink-wrap when optimizing function for size, since it tends to
2791 generate additional returns. */
2792 if (optimize_function_for_size_p (cfun) && TARGET_THUMB2)
2793 flag_shrink_wrap = false;
2794 /* TBD: Dwarf info for apcs frame is not handled yet. */
2795 if (TARGET_APCS_FRAME)
2796 flag_shrink_wrap = false;
2798 /* We only support -mslow-flash-data on armv7-m targets. */
2799 if (target_slow_flash_data
2800 && ((!(arm_arch7 && !arm_arch_notm) && !arm_arch7em)
2801 || (TARGET_THUMB1 || flag_pic || TARGET_NEON)))
2802 error ("-mslow-flash-data only supports non-pic code on armv7-m targets");
2804 /* Currently, for slow flash data, we just disable literal pools. */
2805 if (target_slow_flash_data)
2806 arm_disable_literal_pool = true;
2808 /* Register global variables with the garbage collector. */
2809 arm_add_gc_roots ();
2812 static void
2813 arm_add_gc_roots (void)
2815 gcc_obstack_init(&minipool_obstack);
2816 minipool_startobj = (char *) obstack_alloc (&minipool_obstack, 0);
2819 /* A table of known ARM exception types.
2820 For use with the interrupt function attribute. */
2822 typedef struct
2824 const char *const arg;
2825 const unsigned long return_value;
2827 isr_attribute_arg;
2829 static const isr_attribute_arg isr_attribute_args [] =
2831 { "IRQ", ARM_FT_ISR },
2832 { "irq", ARM_FT_ISR },
2833 { "FIQ", ARM_FT_FIQ },
2834 { "fiq", ARM_FT_FIQ },
2835 { "ABORT", ARM_FT_ISR },
2836 { "abort", ARM_FT_ISR },
2837 { "ABORT", ARM_FT_ISR },
2838 { "abort", ARM_FT_ISR },
2839 { "UNDEF", ARM_FT_EXCEPTION },
2840 { "undef", ARM_FT_EXCEPTION },
2841 { "SWI", ARM_FT_EXCEPTION },
2842 { "swi", ARM_FT_EXCEPTION },
2843 { NULL, ARM_FT_NORMAL }
2846 /* Returns the (interrupt) function type of the current
2847 function, or ARM_FT_UNKNOWN if the type cannot be determined. */
2849 static unsigned long
2850 arm_isr_value (tree argument)
2852 const isr_attribute_arg * ptr;
2853 const char * arg;
2855 if (!arm_arch_notm)
2856 return ARM_FT_NORMAL | ARM_FT_STACKALIGN;
2858 /* No argument - default to IRQ. */
2859 if (argument == NULL_TREE)
2860 return ARM_FT_ISR;
2862 /* Get the value of the argument. */
2863 if (TREE_VALUE (argument) == NULL_TREE
2864 || TREE_CODE (TREE_VALUE (argument)) != STRING_CST)
2865 return ARM_FT_UNKNOWN;
2867 arg = TREE_STRING_POINTER (TREE_VALUE (argument));
2869 /* Check it against the list of known arguments. */
2870 for (ptr = isr_attribute_args; ptr->arg != NULL; ptr++)
2871 if (streq (arg, ptr->arg))
2872 return ptr->return_value;
2874 /* An unrecognized interrupt type. */
2875 return ARM_FT_UNKNOWN;
2878 /* Computes the type of the current function. */
2880 static unsigned long
2881 arm_compute_func_type (void)
2883 unsigned long type = ARM_FT_UNKNOWN;
2884 tree a;
2885 tree attr;
2887 gcc_assert (TREE_CODE (current_function_decl) == FUNCTION_DECL);
2889 /* Decide if the current function is volatile. Such functions
2890 never return, and many memory cycles can be saved by not storing
2891 register values that will never be needed again. This optimization
2892 was added to speed up context switching in a kernel application. */
2893 if (optimize > 0
2894 && (TREE_NOTHROW (current_function_decl)
2895 || !(flag_unwind_tables
2896 || (flag_exceptions
2897 && arm_except_unwind_info (&global_options) != UI_SJLJ)))
2898 && TREE_THIS_VOLATILE (current_function_decl))
2899 type |= ARM_FT_VOLATILE;
2901 if (cfun->static_chain_decl != NULL)
2902 type |= ARM_FT_NESTED;
2904 attr = DECL_ATTRIBUTES (current_function_decl);
2906 a = lookup_attribute ("naked", attr);
2907 if (a != NULL_TREE)
2908 type |= ARM_FT_NAKED;
2910 a = lookup_attribute ("isr", attr);
2911 if (a == NULL_TREE)
2912 a = lookup_attribute ("interrupt", attr);
2914 if (a == NULL_TREE)
2915 type |= TARGET_INTERWORK ? ARM_FT_INTERWORKED : ARM_FT_NORMAL;
2916 else
2917 type |= arm_isr_value (TREE_VALUE (a));
2919 return type;
2922 /* Returns the type of the current function. */
2924 unsigned long
2925 arm_current_func_type (void)
2927 if (ARM_FUNC_TYPE (cfun->machine->func_type) == ARM_FT_UNKNOWN)
2928 cfun->machine->func_type = arm_compute_func_type ();
2930 return cfun->machine->func_type;
2933 bool
2934 arm_allocate_stack_slots_for_args (void)
2936 /* Naked functions should not allocate stack slots for arguments. */
2937 return !IS_NAKED (arm_current_func_type ());
2940 static bool
2941 arm_warn_func_return (tree decl)
2943 /* Naked functions are implemented entirely in assembly, including the
2944 return sequence, so suppress warnings about this. */
2945 return lookup_attribute ("naked", DECL_ATTRIBUTES (decl)) == NULL_TREE;
2949 /* Output assembler code for a block containing the constant parts
2950 of a trampoline, leaving space for the variable parts.
2952 On the ARM, (if r8 is the static chain regnum, and remembering that
2953 referencing pc adds an offset of 8) the trampoline looks like:
2954 ldr r8, [pc, #0]
2955 ldr pc, [pc]
2956 .word static chain value
2957 .word function's address
2958 XXX FIXME: When the trampoline returns, r8 will be clobbered. */
2960 static void
2961 arm_asm_trampoline_template (FILE *f)
2963 if (TARGET_ARM)
2965 asm_fprintf (f, "\tldr\t%r, [%r, #0]\n", STATIC_CHAIN_REGNUM, PC_REGNUM);
2966 asm_fprintf (f, "\tldr\t%r, [%r, #0]\n", PC_REGNUM, PC_REGNUM);
2968 else if (TARGET_THUMB2)
2970 /* The Thumb-2 trampoline is similar to the arm implementation.
2971 Unlike 16-bit Thumb, we enter the stub in thumb mode. */
2972 asm_fprintf (f, "\tldr.w\t%r, [%r, #4]\n",
2973 STATIC_CHAIN_REGNUM, PC_REGNUM);
2974 asm_fprintf (f, "\tldr.w\t%r, [%r, #4]\n", PC_REGNUM, PC_REGNUM);
2976 else
2978 ASM_OUTPUT_ALIGN (f, 2);
2979 fprintf (f, "\t.code\t16\n");
2980 fprintf (f, ".Ltrampoline_start:\n");
2981 asm_fprintf (f, "\tpush\t{r0, r1}\n");
2982 asm_fprintf (f, "\tldr\tr0, [%r, #8]\n", PC_REGNUM);
2983 asm_fprintf (f, "\tmov\t%r, r0\n", STATIC_CHAIN_REGNUM);
2984 asm_fprintf (f, "\tldr\tr0, [%r, #8]\n", PC_REGNUM);
2985 asm_fprintf (f, "\tstr\tr0, [%r, #4]\n", SP_REGNUM);
2986 asm_fprintf (f, "\tpop\t{r0, %r}\n", PC_REGNUM);
2988 assemble_aligned_integer (UNITS_PER_WORD, const0_rtx);
2989 assemble_aligned_integer (UNITS_PER_WORD, const0_rtx);
2992 /* Emit RTL insns to initialize the variable parts of a trampoline. */
2994 static void
2995 arm_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value)
2997 rtx fnaddr, mem, a_tramp;
2999 emit_block_move (m_tramp, assemble_trampoline_template (),
3000 GEN_INT (TRAMPOLINE_SIZE), BLOCK_OP_NORMAL);
3002 mem = adjust_address (m_tramp, SImode, TARGET_32BIT ? 8 : 12);
3003 emit_move_insn (mem, chain_value);
3005 mem = adjust_address (m_tramp, SImode, TARGET_32BIT ? 12 : 16);
3006 fnaddr = XEXP (DECL_RTL (fndecl), 0);
3007 emit_move_insn (mem, fnaddr);
3009 a_tramp = XEXP (m_tramp, 0);
3010 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__clear_cache"),
3011 LCT_NORMAL, VOIDmode, 2, a_tramp, Pmode,
3012 plus_constant (Pmode, a_tramp, TRAMPOLINE_SIZE), Pmode);
3015 /* Thumb trampolines should be entered in thumb mode, so set
3016 the bottom bit of the address. */
3018 static rtx
3019 arm_trampoline_adjust_address (rtx addr)
3021 if (TARGET_THUMB)
3022 addr = expand_simple_binop (Pmode, IOR, addr, const1_rtx,
3023 NULL, 0, OPTAB_LIB_WIDEN);
3024 return addr;
3027 /* Return 1 if it is possible to return using a single instruction.
3028 If SIBLING is non-null, this is a test for a return before a sibling
3029 call. SIBLING is the call insn, so we can examine its register usage. */
3032 use_return_insn (int iscond, rtx sibling)
3034 int regno;
3035 unsigned int func_type;
3036 unsigned long saved_int_regs;
3037 unsigned HOST_WIDE_INT stack_adjust;
3038 arm_stack_offsets *offsets;
3040 /* Never use a return instruction before reload has run. */
3041 if (!reload_completed)
3042 return 0;
3044 func_type = arm_current_func_type ();
3046 /* Naked, volatile and stack alignment functions need special
3047 consideration. */
3048 if (func_type & (ARM_FT_VOLATILE | ARM_FT_NAKED | ARM_FT_STACKALIGN))
3049 return 0;
3051 /* So do interrupt functions that use the frame pointer and Thumb
3052 interrupt functions. */
3053 if (IS_INTERRUPT (func_type) && (frame_pointer_needed || TARGET_THUMB))
3054 return 0;
3056 if (TARGET_LDRD && current_tune->prefer_ldrd_strd
3057 && !optimize_function_for_size_p (cfun))
3058 return 0;
3060 offsets = arm_get_frame_offsets ();
3061 stack_adjust = offsets->outgoing_args - offsets->saved_regs;
3063 /* As do variadic functions. */
3064 if (crtl->args.pretend_args_size
3065 || cfun->machine->uses_anonymous_args
3066 /* Or if the function calls __builtin_eh_return () */
3067 || crtl->calls_eh_return
3068 /* Or if the function calls alloca */
3069 || cfun->calls_alloca
3070 /* Or if there is a stack adjustment. However, if the stack pointer
3071 is saved on the stack, we can use a pre-incrementing stack load. */
3072 || !(stack_adjust == 0 || (TARGET_APCS_FRAME && frame_pointer_needed
3073 && stack_adjust == 4)))
3074 return 0;
3076 saved_int_regs = offsets->saved_regs_mask;
3078 /* Unfortunately, the insn
3080 ldmib sp, {..., sp, ...}
3082 triggers a bug on most SA-110 based devices, such that the stack
3083 pointer won't be correctly restored if the instruction takes a
3084 page fault. We work around this problem by popping r3 along with
3085 the other registers, since that is never slower than executing
3086 another instruction.
3088 We test for !arm_arch5 here, because code for any architecture
3089 less than this could potentially be run on one of the buggy
3090 chips. */
3091 if (stack_adjust == 4 && !arm_arch5 && TARGET_ARM)
3093 /* Validate that r3 is a call-clobbered register (always true in
3094 the default abi) ... */
3095 if (!call_used_regs[3])
3096 return 0;
3098 /* ... that it isn't being used for a return value ... */
3099 if (arm_size_return_regs () >= (4 * UNITS_PER_WORD))
3100 return 0;
3102 /* ... or for a tail-call argument ... */
3103 if (sibling)
3105 gcc_assert (CALL_P (sibling));
3107 if (find_regno_fusage (sibling, USE, 3))
3108 return 0;
3111 /* ... and that there are no call-saved registers in r0-r2
3112 (always true in the default ABI). */
3113 if (saved_int_regs & 0x7)
3114 return 0;
3117 /* Can't be done if interworking with Thumb, and any registers have been
3118 stacked. */
3119 if (TARGET_INTERWORK && saved_int_regs != 0 && !IS_INTERRUPT(func_type))
3120 return 0;
3122 /* On StrongARM, conditional returns are expensive if they aren't
3123 taken and multiple registers have been stacked. */
3124 if (iscond && arm_tune_strongarm)
3126 /* Conditional return when just the LR is stored is a simple
3127 conditional-load instruction, that's not expensive. */
3128 if (saved_int_regs != 0 && saved_int_regs != (1 << LR_REGNUM))
3129 return 0;
3131 if (flag_pic
3132 && arm_pic_register != INVALID_REGNUM
3133 && df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM))
3134 return 0;
3137 /* If there are saved registers but the LR isn't saved, then we need
3138 two instructions for the return. */
3139 if (saved_int_regs && !(saved_int_regs & (1 << LR_REGNUM)))
3140 return 0;
3142 /* Can't be done if any of the VFP regs are pushed,
3143 since this also requires an insn. */
3144 if (TARGET_HARD_FLOAT && TARGET_VFP)
3145 for (regno = FIRST_VFP_REGNUM; regno <= LAST_VFP_REGNUM; regno++)
3146 if (df_regs_ever_live_p (regno) && !call_used_regs[regno])
3147 return 0;
3149 if (TARGET_REALLY_IWMMXT)
3150 for (regno = FIRST_IWMMXT_REGNUM; regno <= LAST_IWMMXT_REGNUM; regno++)
3151 if (df_regs_ever_live_p (regno) && ! call_used_regs[regno])
3152 return 0;
3154 return 1;
3157 /* Return TRUE if we should try to use a simple_return insn, i.e. perform
3158 shrink-wrapping if possible. This is the case if we need to emit a
3159 prologue, which we can test by looking at the offsets. */
3160 bool
3161 use_simple_return_p (void)
3163 arm_stack_offsets *offsets;
3165 offsets = arm_get_frame_offsets ();
3166 return offsets->outgoing_args != 0;
3169 /* Return TRUE if int I is a valid immediate ARM constant. */
3172 const_ok_for_arm (HOST_WIDE_INT i)
3174 int lowbit;
3176 /* For machines with >32 bit HOST_WIDE_INT, the bits above bit 31 must
3177 be all zero, or all one. */
3178 if ((i & ~(unsigned HOST_WIDE_INT) 0xffffffff) != 0
3179 && ((i & ~(unsigned HOST_WIDE_INT) 0xffffffff)
3180 != ((~(unsigned HOST_WIDE_INT) 0)
3181 & ~(unsigned HOST_WIDE_INT) 0xffffffff)))
3182 return FALSE;
3184 i &= (unsigned HOST_WIDE_INT) 0xffffffff;
3186 /* Fast return for 0 and small values. We must do this for zero, since
3187 the code below can't handle that one case. */
3188 if ((i & ~(unsigned HOST_WIDE_INT) 0xff) == 0)
3189 return TRUE;
3191 /* Get the number of trailing zeros. */
3192 lowbit = ffs((int) i) - 1;
3194 /* Only even shifts are allowed in ARM mode so round down to the
3195 nearest even number. */
3196 if (TARGET_ARM)
3197 lowbit &= ~1;
3199 if ((i & ~(((unsigned HOST_WIDE_INT) 0xff) << lowbit)) == 0)
3200 return TRUE;
3202 if (TARGET_ARM)
3204 /* Allow rotated constants in ARM mode. */
3205 if (lowbit <= 4
3206 && ((i & ~0xc000003f) == 0
3207 || (i & ~0xf000000f) == 0
3208 || (i & ~0xfc000003) == 0))
3209 return TRUE;
3211 else
3213 HOST_WIDE_INT v;
3215 /* Allow repeated patterns 0x00XY00XY or 0xXYXYXYXY. */
3216 v = i & 0xff;
3217 v |= v << 16;
3218 if (i == v || i == (v | (v << 8)))
3219 return TRUE;
3221 /* Allow repeated pattern 0xXY00XY00. */
3222 v = i & 0xff00;
3223 v |= v << 16;
3224 if (i == v)
3225 return TRUE;
3228 return FALSE;
3231 /* Return true if I is a valid constant for the operation CODE. */
3233 const_ok_for_op (HOST_WIDE_INT i, enum rtx_code code)
3235 if (const_ok_for_arm (i))
3236 return 1;
3238 switch (code)
3240 case SET:
3241 /* See if we can use movw. */
3242 if (arm_arch_thumb2 && (i & 0xffff0000) == 0)
3243 return 1;
3244 else
3245 /* Otherwise, try mvn. */
3246 return const_ok_for_arm (ARM_SIGN_EXTEND (~i));
3248 case PLUS:
3249 /* See if we can use addw or subw. */
3250 if (TARGET_THUMB2
3251 && ((i & 0xfffff000) == 0
3252 || ((-i) & 0xfffff000) == 0))
3253 return 1;
3254 /* else fall through. */
3256 case COMPARE:
3257 case EQ:
3258 case NE:
3259 case GT:
3260 case LE:
3261 case LT:
3262 case GE:
3263 case GEU:
3264 case LTU:
3265 case GTU:
3266 case LEU:
3267 case UNORDERED:
3268 case ORDERED:
3269 case UNEQ:
3270 case UNGE:
3271 case UNLT:
3272 case UNGT:
3273 case UNLE:
3274 return const_ok_for_arm (ARM_SIGN_EXTEND (-i));
3276 case MINUS: /* Should only occur with (MINUS I reg) => rsb */
3277 case XOR:
3278 return 0;
3280 case IOR:
3281 if (TARGET_THUMB2)
3282 return const_ok_for_arm (ARM_SIGN_EXTEND (~i));
3283 return 0;
3285 case AND:
3286 return const_ok_for_arm (ARM_SIGN_EXTEND (~i));
3288 default:
3289 gcc_unreachable ();
3293 /* Return true if I is a valid di mode constant for the operation CODE. */
3295 const_ok_for_dimode_op (HOST_WIDE_INT i, enum rtx_code code)
3297 HOST_WIDE_INT hi_val = (i >> 32) & 0xFFFFFFFF;
3298 HOST_WIDE_INT lo_val = i & 0xFFFFFFFF;
3299 rtx hi = GEN_INT (hi_val);
3300 rtx lo = GEN_INT (lo_val);
3302 if (TARGET_THUMB1)
3303 return 0;
3305 switch (code)
3307 case AND:
3308 case IOR:
3309 case XOR:
3310 return (const_ok_for_op (hi_val, code) || hi_val == 0xFFFFFFFF)
3311 && (const_ok_for_op (lo_val, code) || lo_val == 0xFFFFFFFF);
3312 case PLUS:
3313 return arm_not_operand (hi, SImode) && arm_add_operand (lo, SImode);
3315 default:
3316 return 0;
3320 /* Emit a sequence of insns to handle a large constant.
3321 CODE is the code of the operation required, it can be any of SET, PLUS,
3322 IOR, AND, XOR, MINUS;
3323 MODE is the mode in which the operation is being performed;
3324 VAL is the integer to operate on;
3325 SOURCE is the other operand (a register, or a null-pointer for SET);
3326 SUBTARGETS means it is safe to create scratch registers if that will
3327 either produce a simpler sequence, or we will want to cse the values.
3328 Return value is the number of insns emitted. */
3330 /* ??? Tweak this for thumb2. */
3332 arm_split_constant (enum rtx_code code, enum machine_mode mode, rtx insn,
3333 HOST_WIDE_INT val, rtx target, rtx source, int subtargets)
3335 rtx cond;
3337 if (insn && GET_CODE (PATTERN (insn)) == COND_EXEC)
3338 cond = COND_EXEC_TEST (PATTERN (insn));
3339 else
3340 cond = NULL_RTX;
3342 if (subtargets || code == SET
3343 || (REG_P (target) && REG_P (source)
3344 && REGNO (target) != REGNO (source)))
3346 /* After arm_reorg has been called, we can't fix up expensive
3347 constants by pushing them into memory so we must synthesize
3348 them in-line, regardless of the cost. This is only likely to
3349 be more costly on chips that have load delay slots and we are
3350 compiling without running the scheduler (so no splitting
3351 occurred before the final instruction emission).
3353 Ref: gcc -O1 -mcpu=strongarm gcc.c-torture/compile/980506-2.c
3355 if (!after_arm_reorg
3356 && !cond
3357 && (arm_gen_constant (code, mode, NULL_RTX, val, target, source,
3358 1, 0)
3359 > (arm_constant_limit (optimize_function_for_size_p (cfun))
3360 + (code != SET))))
3362 if (code == SET)
3364 /* Currently SET is the only monadic value for CODE, all
3365 the rest are diadic. */
3366 if (TARGET_USE_MOVT)
3367 arm_emit_movpair (target, GEN_INT (val));
3368 else
3369 emit_set_insn (target, GEN_INT (val));
3371 return 1;
3373 else
3375 rtx temp = subtargets ? gen_reg_rtx (mode) : target;
3377 if (TARGET_USE_MOVT)
3378 arm_emit_movpair (temp, GEN_INT (val));
3379 else
3380 emit_set_insn (temp, GEN_INT (val));
3382 /* For MINUS, the value is subtracted from, since we never
3383 have subtraction of a constant. */
3384 if (code == MINUS)
3385 emit_set_insn (target, gen_rtx_MINUS (mode, temp, source));
3386 else
3387 emit_set_insn (target,
3388 gen_rtx_fmt_ee (code, mode, source, temp));
3389 return 2;
3394 return arm_gen_constant (code, mode, cond, val, target, source, subtargets,
3398 /* Return a sequence of integers, in RETURN_SEQUENCE that fit into
3399 ARM/THUMB2 immediates, and add up to VAL.
3400 Thr function return value gives the number of insns required. */
3401 static int
3402 optimal_immediate_sequence (enum rtx_code code, unsigned HOST_WIDE_INT val,
3403 struct four_ints *return_sequence)
3405 int best_consecutive_zeros = 0;
3406 int i;
3407 int best_start = 0;
3408 int insns1, insns2;
3409 struct four_ints tmp_sequence;
3411 /* If we aren't targeting ARM, the best place to start is always at
3412 the bottom, otherwise look more closely. */
3413 if (TARGET_ARM)
3415 for (i = 0; i < 32; i += 2)
3417 int consecutive_zeros = 0;
3419 if (!(val & (3 << i)))
3421 while ((i < 32) && !(val & (3 << i)))
3423 consecutive_zeros += 2;
3424 i += 2;
3426 if (consecutive_zeros > best_consecutive_zeros)
3428 best_consecutive_zeros = consecutive_zeros;
3429 best_start = i - consecutive_zeros;
3431 i -= 2;
3436 /* So long as it won't require any more insns to do so, it's
3437 desirable to emit a small constant (in bits 0...9) in the last
3438 insn. This way there is more chance that it can be combined with
3439 a later addressing insn to form a pre-indexed load or store
3440 operation. Consider:
3442 *((volatile int *)0xe0000100) = 1;
3443 *((volatile int *)0xe0000110) = 2;
3445 We want this to wind up as:
3447 mov rA, #0xe0000000
3448 mov rB, #1
3449 str rB, [rA, #0x100]
3450 mov rB, #2
3451 str rB, [rA, #0x110]
3453 rather than having to synthesize both large constants from scratch.
3455 Therefore, we calculate how many insns would be required to emit
3456 the constant starting from `best_start', and also starting from
3457 zero (i.e. with bit 31 first to be output). If `best_start' doesn't
3458 yield a shorter sequence, we may as well use zero. */
3459 insns1 = optimal_immediate_sequence_1 (code, val, return_sequence, best_start);
3460 if (best_start != 0
3461 && ((((unsigned HOST_WIDE_INT) 1) << best_start) < val))
3463 insns2 = optimal_immediate_sequence_1 (code, val, &tmp_sequence, 0);
3464 if (insns2 <= insns1)
3466 *return_sequence = tmp_sequence;
3467 insns1 = insns2;
3471 return insns1;
3474 /* As for optimal_immediate_sequence, but starting at bit-position I. */
3475 static int
3476 optimal_immediate_sequence_1 (enum rtx_code code, unsigned HOST_WIDE_INT val,
3477 struct four_ints *return_sequence, int i)
3479 int remainder = val & 0xffffffff;
3480 int insns = 0;
3482 /* Try and find a way of doing the job in either two or three
3483 instructions.
3485 In ARM mode we can use 8-bit constants, rotated to any 2-bit aligned
3486 location. We start at position I. This may be the MSB, or
3487 optimial_immediate_sequence may have positioned it at the largest block
3488 of zeros that are aligned on a 2-bit boundary. We then fill up the temps,
3489 wrapping around to the top of the word when we drop off the bottom.
3490 In the worst case this code should produce no more than four insns.
3492 In Thumb2 mode, we can use 32/16-bit replicated constants, and 8-bit
3493 constants, shifted to any arbitrary location. We should always start
3494 at the MSB. */
3497 int end;
3498 unsigned int b1, b2, b3, b4;
3499 unsigned HOST_WIDE_INT result;
3500 int loc;
3502 gcc_assert (insns < 4);
3504 if (i <= 0)
3505 i += 32;
3507 /* First, find the next normal 12/8-bit shifted/rotated immediate. */
3508 if (remainder & ((TARGET_ARM ? (3 << (i - 2)) : (1 << (i - 1)))))
3510 loc = i;
3511 if (i <= 12 && TARGET_THUMB2 && code == PLUS)
3512 /* We can use addw/subw for the last 12 bits. */
3513 result = remainder;
3514 else
3516 /* Use an 8-bit shifted/rotated immediate. */
3517 end = i - 8;
3518 if (end < 0)
3519 end += 32;
3520 result = remainder & ((0x0ff << end)
3521 | ((i < end) ? (0xff >> (32 - end))
3522 : 0));
3523 i -= 8;
3526 else
3528 /* Arm allows rotates by a multiple of two. Thumb-2 allows
3529 arbitrary shifts. */
3530 i -= TARGET_ARM ? 2 : 1;
3531 continue;
3534 /* Next, see if we can do a better job with a thumb2 replicated
3535 constant.
3537 We do it this way around to catch the cases like 0x01F001E0 where
3538 two 8-bit immediates would work, but a replicated constant would
3539 make it worse.
3541 TODO: 16-bit constants that don't clear all the bits, but still win.
3542 TODO: Arithmetic splitting for set/add/sub, rather than bitwise. */
3543 if (TARGET_THUMB2)
3545 b1 = (remainder & 0xff000000) >> 24;
3546 b2 = (remainder & 0x00ff0000) >> 16;
3547 b3 = (remainder & 0x0000ff00) >> 8;
3548 b4 = remainder & 0xff;
3550 if (loc > 24)
3552 /* The 8-bit immediate already found clears b1 (and maybe b2),
3553 but must leave b3 and b4 alone. */
3555 /* First try to find a 32-bit replicated constant that clears
3556 almost everything. We can assume that we can't do it in one,
3557 or else we wouldn't be here. */
3558 unsigned int tmp = b1 & b2 & b3 & b4;
3559 unsigned int tmp2 = tmp + (tmp << 8) + (tmp << 16)
3560 + (tmp << 24);
3561 unsigned int matching_bytes = (tmp == b1) + (tmp == b2)
3562 + (tmp == b3) + (tmp == b4);
3563 if (tmp
3564 && (matching_bytes >= 3
3565 || (matching_bytes == 2
3566 && const_ok_for_op (remainder & ~tmp2, code))))
3568 /* At least 3 of the bytes match, and the fourth has at
3569 least as many bits set, or two of the bytes match
3570 and it will only require one more insn to finish. */
3571 result = tmp2;
3572 i = tmp != b1 ? 32
3573 : tmp != b2 ? 24
3574 : tmp != b3 ? 16
3575 : 8;
3578 /* Second, try to find a 16-bit replicated constant that can
3579 leave three of the bytes clear. If b2 or b4 is already
3580 zero, then we can. If the 8-bit from above would not
3581 clear b2 anyway, then we still win. */
3582 else if (b1 == b3 && (!b2 || !b4
3583 || (remainder & 0x00ff0000 & ~result)))
3585 result = remainder & 0xff00ff00;
3586 i = 24;
3589 else if (loc > 16)
3591 /* The 8-bit immediate already found clears b2 (and maybe b3)
3592 and we don't get here unless b1 is alredy clear, but it will
3593 leave b4 unchanged. */
3595 /* If we can clear b2 and b4 at once, then we win, since the
3596 8-bits couldn't possibly reach that far. */
3597 if (b2 == b4)
3599 result = remainder & 0x00ff00ff;
3600 i = 16;
3605 return_sequence->i[insns++] = result;
3606 remainder &= ~result;
3608 if (code == SET || code == MINUS)
3609 code = PLUS;
3611 while (remainder);
3613 return insns;
3616 /* Emit an instruction with the indicated PATTERN. If COND is
3617 non-NULL, conditionalize the execution of the instruction on COND
3618 being true. */
3620 static void
3621 emit_constant_insn (rtx cond, rtx pattern)
3623 if (cond)
3624 pattern = gen_rtx_COND_EXEC (VOIDmode, copy_rtx (cond), pattern);
3625 emit_insn (pattern);
3628 /* As above, but extra parameter GENERATE which, if clear, suppresses
3629 RTL generation. */
3631 static int
3632 arm_gen_constant (enum rtx_code code, enum machine_mode mode, rtx cond,
3633 HOST_WIDE_INT val, rtx target, rtx source, int subtargets,
3634 int generate)
3636 int can_invert = 0;
3637 int can_negate = 0;
3638 int final_invert = 0;
3639 int i;
3640 int set_sign_bit_copies = 0;
3641 int clear_sign_bit_copies = 0;
3642 int clear_zero_bit_copies = 0;
3643 int set_zero_bit_copies = 0;
3644 int insns = 0, neg_insns, inv_insns;
3645 unsigned HOST_WIDE_INT temp1, temp2;
3646 unsigned HOST_WIDE_INT remainder = val & 0xffffffff;
3647 struct four_ints *immediates;
3648 struct four_ints pos_immediates, neg_immediates, inv_immediates;
3650 /* Find out which operations are safe for a given CODE. Also do a quick
3651 check for degenerate cases; these can occur when DImode operations
3652 are split. */
3653 switch (code)
3655 case SET:
3656 can_invert = 1;
3657 break;
3659 case PLUS:
3660 can_negate = 1;
3661 break;
3663 case IOR:
3664 if (remainder == 0xffffffff)
3666 if (generate)
3667 emit_constant_insn (cond,
3668 gen_rtx_SET (VOIDmode, target,
3669 GEN_INT (ARM_SIGN_EXTEND (val))));
3670 return 1;
3673 if (remainder == 0)
3675 if (reload_completed && rtx_equal_p (target, source))
3676 return 0;
3678 if (generate)
3679 emit_constant_insn (cond,
3680 gen_rtx_SET (VOIDmode, target, source));
3681 return 1;
3683 break;
3685 case AND:
3686 if (remainder == 0)
3688 if (generate)
3689 emit_constant_insn (cond,
3690 gen_rtx_SET (VOIDmode, target, const0_rtx));
3691 return 1;
3693 if (remainder == 0xffffffff)
3695 if (reload_completed && rtx_equal_p (target, source))
3696 return 0;
3697 if (generate)
3698 emit_constant_insn (cond,
3699 gen_rtx_SET (VOIDmode, target, source));
3700 return 1;
3702 can_invert = 1;
3703 break;
3705 case XOR:
3706 if (remainder == 0)
3708 if (reload_completed && rtx_equal_p (target, source))
3709 return 0;
3710 if (generate)
3711 emit_constant_insn (cond,
3712 gen_rtx_SET (VOIDmode, target, source));
3713 return 1;
3716 if (remainder == 0xffffffff)
3718 if (generate)
3719 emit_constant_insn (cond,
3720 gen_rtx_SET (VOIDmode, target,
3721 gen_rtx_NOT (mode, source)));
3722 return 1;
3724 final_invert = 1;
3725 break;
3727 case MINUS:
3728 /* We treat MINUS as (val - source), since (source - val) is always
3729 passed as (source + (-val)). */
3730 if (remainder == 0)
3732 if (generate)
3733 emit_constant_insn (cond,
3734 gen_rtx_SET (VOIDmode, target,
3735 gen_rtx_NEG (mode, source)));
3736 return 1;
3738 if (const_ok_for_arm (val))
3740 if (generate)
3741 emit_constant_insn (cond,
3742 gen_rtx_SET (VOIDmode, target,
3743 gen_rtx_MINUS (mode, GEN_INT (val),
3744 source)));
3745 return 1;
3748 break;
3750 default:
3751 gcc_unreachable ();
3754 /* If we can do it in one insn get out quickly. */
3755 if (const_ok_for_op (val, code))
3757 if (generate)
3758 emit_constant_insn (cond,
3759 gen_rtx_SET (VOIDmode, target,
3760 (source
3761 ? gen_rtx_fmt_ee (code, mode, source,
3762 GEN_INT (val))
3763 : GEN_INT (val))));
3764 return 1;
3767 /* On targets with UXTH/UBFX, we can deal with AND (2^N)-1 in a single
3768 insn. */
3769 if (code == AND && (i = exact_log2 (remainder + 1)) > 0
3770 && (arm_arch_thumb2 || (i == 16 && arm_arch6 && mode == SImode)))
3772 if (generate)
3774 if (mode == SImode && i == 16)
3775 /* Use UXTH in preference to UBFX, since on Thumb2 it's a
3776 smaller insn. */
3777 emit_constant_insn (cond,
3778 gen_zero_extendhisi2
3779 (target, gen_lowpart (HImode, source)));
3780 else
3781 /* Extz only supports SImode, but we can coerce the operands
3782 into that mode. */
3783 emit_constant_insn (cond,
3784 gen_extzv_t2 (gen_lowpart (SImode, target),
3785 gen_lowpart (SImode, source),
3786 GEN_INT (i), const0_rtx));
3789 return 1;
3792 /* Calculate a few attributes that may be useful for specific
3793 optimizations. */
3794 /* Count number of leading zeros. */
3795 for (i = 31; i >= 0; i--)
3797 if ((remainder & (1 << i)) == 0)
3798 clear_sign_bit_copies++;
3799 else
3800 break;
3803 /* Count number of leading 1's. */
3804 for (i = 31; i >= 0; i--)
3806 if ((remainder & (1 << i)) != 0)
3807 set_sign_bit_copies++;
3808 else
3809 break;
3812 /* Count number of trailing zero's. */
3813 for (i = 0; i <= 31; i++)
3815 if ((remainder & (1 << i)) == 0)
3816 clear_zero_bit_copies++;
3817 else
3818 break;
3821 /* Count number of trailing 1's. */
3822 for (i = 0; i <= 31; i++)
3824 if ((remainder & (1 << i)) != 0)
3825 set_zero_bit_copies++;
3826 else
3827 break;
3830 switch (code)
3832 case SET:
3833 /* See if we can do this by sign_extending a constant that is known
3834 to be negative. This is a good, way of doing it, since the shift
3835 may well merge into a subsequent insn. */
3836 if (set_sign_bit_copies > 1)
3838 if (const_ok_for_arm
3839 (temp1 = ARM_SIGN_EXTEND (remainder
3840 << (set_sign_bit_copies - 1))))
3842 if (generate)
3844 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
3845 emit_constant_insn (cond,
3846 gen_rtx_SET (VOIDmode, new_src,
3847 GEN_INT (temp1)));
3848 emit_constant_insn (cond,
3849 gen_ashrsi3 (target, new_src,
3850 GEN_INT (set_sign_bit_copies - 1)));
3852 return 2;
3854 /* For an inverted constant, we will need to set the low bits,
3855 these will be shifted out of harm's way. */
3856 temp1 |= (1 << (set_sign_bit_copies - 1)) - 1;
3857 if (const_ok_for_arm (~temp1))
3859 if (generate)
3861 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
3862 emit_constant_insn (cond,
3863 gen_rtx_SET (VOIDmode, new_src,
3864 GEN_INT (temp1)));
3865 emit_constant_insn (cond,
3866 gen_ashrsi3 (target, new_src,
3867 GEN_INT (set_sign_bit_copies - 1)));
3869 return 2;
3873 /* See if we can calculate the value as the difference between two
3874 valid immediates. */
3875 if (clear_sign_bit_copies + clear_zero_bit_copies <= 16)
3877 int topshift = clear_sign_bit_copies & ~1;
3879 temp1 = ARM_SIGN_EXTEND ((remainder + (0x00800000 >> topshift))
3880 & (0xff000000 >> topshift));
3882 /* If temp1 is zero, then that means the 9 most significant
3883 bits of remainder were 1 and we've caused it to overflow.
3884 When topshift is 0 we don't need to do anything since we
3885 can borrow from 'bit 32'. */
3886 if (temp1 == 0 && topshift != 0)
3887 temp1 = 0x80000000 >> (topshift - 1);
3889 temp2 = ARM_SIGN_EXTEND (temp1 - remainder);
3891 if (const_ok_for_arm (temp2))
3893 if (generate)
3895 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
3896 emit_constant_insn (cond,
3897 gen_rtx_SET (VOIDmode, new_src,
3898 GEN_INT (temp1)));
3899 emit_constant_insn (cond,
3900 gen_addsi3 (target, new_src,
3901 GEN_INT (-temp2)));
3904 return 2;
3908 /* See if we can generate this by setting the bottom (or the top)
3909 16 bits, and then shifting these into the other half of the
3910 word. We only look for the simplest cases, to do more would cost
3911 too much. Be careful, however, not to generate this when the
3912 alternative would take fewer insns. */
3913 if (val & 0xffff0000)
3915 temp1 = remainder & 0xffff0000;
3916 temp2 = remainder & 0x0000ffff;
3918 /* Overlaps outside this range are best done using other methods. */
3919 for (i = 9; i < 24; i++)
3921 if ((((temp2 | (temp2 << i)) & 0xffffffff) == remainder)
3922 && !const_ok_for_arm (temp2))
3924 rtx new_src = (subtargets
3925 ? (generate ? gen_reg_rtx (mode) : NULL_RTX)
3926 : target);
3927 insns = arm_gen_constant (code, mode, cond, temp2, new_src,
3928 source, subtargets, generate);
3929 source = new_src;
3930 if (generate)
3931 emit_constant_insn
3932 (cond,
3933 gen_rtx_SET
3934 (VOIDmode, target,
3935 gen_rtx_IOR (mode,
3936 gen_rtx_ASHIFT (mode, source,
3937 GEN_INT (i)),
3938 source)));
3939 return insns + 1;
3943 /* Don't duplicate cases already considered. */
3944 for (i = 17; i < 24; i++)
3946 if (((temp1 | (temp1 >> i)) == remainder)
3947 && !const_ok_for_arm (temp1))
3949 rtx new_src = (subtargets
3950 ? (generate ? gen_reg_rtx (mode) : NULL_RTX)
3951 : target);
3952 insns = arm_gen_constant (code, mode, cond, temp1, new_src,
3953 source, subtargets, generate);
3954 source = new_src;
3955 if (generate)
3956 emit_constant_insn
3957 (cond,
3958 gen_rtx_SET (VOIDmode, target,
3959 gen_rtx_IOR
3960 (mode,
3961 gen_rtx_LSHIFTRT (mode, source,
3962 GEN_INT (i)),
3963 source)));
3964 return insns + 1;
3968 break;
3970 case IOR:
3971 case XOR:
3972 /* If we have IOR or XOR, and the constant can be loaded in a
3973 single instruction, and we can find a temporary to put it in,
3974 then this can be done in two instructions instead of 3-4. */
3975 if (subtargets
3976 /* TARGET can't be NULL if SUBTARGETS is 0 */
3977 || (reload_completed && !reg_mentioned_p (target, source)))
3979 if (const_ok_for_arm (ARM_SIGN_EXTEND (~val)))
3981 if (generate)
3983 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
3985 emit_constant_insn (cond,
3986 gen_rtx_SET (VOIDmode, sub,
3987 GEN_INT (val)));
3988 emit_constant_insn (cond,
3989 gen_rtx_SET (VOIDmode, target,
3990 gen_rtx_fmt_ee (code, mode,
3991 source, sub)));
3993 return 2;
3997 if (code == XOR)
3998 break;
4000 /* Convert.
4001 x = y | constant ( which is composed of set_sign_bit_copies of leading 1s
4002 and the remainder 0s for e.g. 0xfff00000)
4003 x = ~(~(y ashift set_sign_bit_copies) lshiftrt set_sign_bit_copies)
4005 This can be done in 2 instructions by using shifts with mov or mvn.
4006 e.g. for
4007 x = x | 0xfff00000;
4008 we generate.
4009 mvn r0, r0, asl #12
4010 mvn r0, r0, lsr #12 */
4011 if (set_sign_bit_copies > 8
4012 && (val & (-1 << (32 - set_sign_bit_copies))) == val)
4014 if (generate)
4016 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
4017 rtx shift = GEN_INT (set_sign_bit_copies);
4019 emit_constant_insn
4020 (cond,
4021 gen_rtx_SET (VOIDmode, sub,
4022 gen_rtx_NOT (mode,
4023 gen_rtx_ASHIFT (mode,
4024 source,
4025 shift))));
4026 emit_constant_insn
4027 (cond,
4028 gen_rtx_SET (VOIDmode, target,
4029 gen_rtx_NOT (mode,
4030 gen_rtx_LSHIFTRT (mode, sub,
4031 shift))));
4033 return 2;
4036 /* Convert
4037 x = y | constant (which has set_zero_bit_copies number of trailing ones).
4039 x = ~((~y lshiftrt set_zero_bit_copies) ashift set_zero_bit_copies).
4041 For eg. r0 = r0 | 0xfff
4042 mvn r0, r0, lsr #12
4043 mvn r0, r0, asl #12
4046 if (set_zero_bit_copies > 8
4047 && (remainder & ((1 << set_zero_bit_copies) - 1)) == remainder)
4049 if (generate)
4051 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
4052 rtx shift = GEN_INT (set_zero_bit_copies);
4054 emit_constant_insn
4055 (cond,
4056 gen_rtx_SET (VOIDmode, sub,
4057 gen_rtx_NOT (mode,
4058 gen_rtx_LSHIFTRT (mode,
4059 source,
4060 shift))));
4061 emit_constant_insn
4062 (cond,
4063 gen_rtx_SET (VOIDmode, target,
4064 gen_rtx_NOT (mode,
4065 gen_rtx_ASHIFT (mode, sub,
4066 shift))));
4068 return 2;
4071 /* This will never be reached for Thumb2 because orn is a valid
4072 instruction. This is for Thumb1 and the ARM 32 bit cases.
4074 x = y | constant (such that ~constant is a valid constant)
4075 Transform this to
4076 x = ~(~y & ~constant).
4078 if (const_ok_for_arm (temp1 = ARM_SIGN_EXTEND (~val)))
4080 if (generate)
4082 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
4083 emit_constant_insn (cond,
4084 gen_rtx_SET (VOIDmode, sub,
4085 gen_rtx_NOT (mode, source)));
4086 source = sub;
4087 if (subtargets)
4088 sub = gen_reg_rtx (mode);
4089 emit_constant_insn (cond,
4090 gen_rtx_SET (VOIDmode, sub,
4091 gen_rtx_AND (mode, source,
4092 GEN_INT (temp1))));
4093 emit_constant_insn (cond,
4094 gen_rtx_SET (VOIDmode, target,
4095 gen_rtx_NOT (mode, sub)));
4097 return 3;
4099 break;
4101 case AND:
4102 /* See if two shifts will do 2 or more insn's worth of work. */
4103 if (clear_sign_bit_copies >= 16 && clear_sign_bit_copies < 24)
4105 HOST_WIDE_INT shift_mask = ((0xffffffff
4106 << (32 - clear_sign_bit_copies))
4107 & 0xffffffff);
4109 if ((remainder | shift_mask) != 0xffffffff)
4111 if (generate)
4113 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
4114 insns = arm_gen_constant (AND, mode, cond,
4115 remainder | shift_mask,
4116 new_src, source, subtargets, 1);
4117 source = new_src;
4119 else
4121 rtx targ = subtargets ? NULL_RTX : target;
4122 insns = arm_gen_constant (AND, mode, cond,
4123 remainder | shift_mask,
4124 targ, source, subtargets, 0);
4128 if (generate)
4130 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
4131 rtx shift = GEN_INT (clear_sign_bit_copies);
4133 emit_insn (gen_ashlsi3 (new_src, source, shift));
4134 emit_insn (gen_lshrsi3 (target, new_src, shift));
4137 return insns + 2;
4140 if (clear_zero_bit_copies >= 16 && clear_zero_bit_copies < 24)
4142 HOST_WIDE_INT shift_mask = (1 << clear_zero_bit_copies) - 1;
4144 if ((remainder | shift_mask) != 0xffffffff)
4146 if (generate)
4148 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
4150 insns = arm_gen_constant (AND, mode, cond,
4151 remainder | shift_mask,
4152 new_src, source, subtargets, 1);
4153 source = new_src;
4155 else
4157 rtx targ = subtargets ? NULL_RTX : target;
4159 insns = arm_gen_constant (AND, mode, cond,
4160 remainder | shift_mask,
4161 targ, source, subtargets, 0);
4165 if (generate)
4167 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
4168 rtx shift = GEN_INT (clear_zero_bit_copies);
4170 emit_insn (gen_lshrsi3 (new_src, source, shift));
4171 emit_insn (gen_ashlsi3 (target, new_src, shift));
4174 return insns + 2;
4177 break;
4179 default:
4180 break;
4183 /* Calculate what the instruction sequences would be if we generated it
4184 normally, negated, or inverted. */
4185 if (code == AND)
4186 /* AND cannot be split into multiple insns, so invert and use BIC. */
4187 insns = 99;
4188 else
4189 insns = optimal_immediate_sequence (code, remainder, &pos_immediates);
4191 if (can_negate)
4192 neg_insns = optimal_immediate_sequence (code, (-remainder) & 0xffffffff,
4193 &neg_immediates);
4194 else
4195 neg_insns = 99;
4197 if (can_invert || final_invert)
4198 inv_insns = optimal_immediate_sequence (code, remainder ^ 0xffffffff,
4199 &inv_immediates);
4200 else
4201 inv_insns = 99;
4203 immediates = &pos_immediates;
4205 /* Is the negated immediate sequence more efficient? */
4206 if (neg_insns < insns && neg_insns <= inv_insns)
4208 insns = neg_insns;
4209 immediates = &neg_immediates;
4211 else
4212 can_negate = 0;
4214 /* Is the inverted immediate sequence more efficient?
4215 We must allow for an extra NOT instruction for XOR operations, although
4216 there is some chance that the final 'mvn' will get optimized later. */
4217 if ((inv_insns + 1) < insns || (!final_invert && inv_insns < insns))
4219 insns = inv_insns;
4220 immediates = &inv_immediates;
4222 else
4224 can_invert = 0;
4225 final_invert = 0;
4228 /* Now output the chosen sequence as instructions. */
4229 if (generate)
4231 for (i = 0; i < insns; i++)
4233 rtx new_src, temp1_rtx;
4235 temp1 = immediates->i[i];
4237 if (code == SET || code == MINUS)
4238 new_src = (subtargets ? gen_reg_rtx (mode) : target);
4239 else if ((final_invert || i < (insns - 1)) && subtargets)
4240 new_src = gen_reg_rtx (mode);
4241 else
4242 new_src = target;
4244 if (can_invert)
4245 temp1 = ~temp1;
4246 else if (can_negate)
4247 temp1 = -temp1;
4249 temp1 = trunc_int_for_mode (temp1, mode);
4250 temp1_rtx = GEN_INT (temp1);
4252 if (code == SET)
4254 else if (code == MINUS)
4255 temp1_rtx = gen_rtx_MINUS (mode, temp1_rtx, source);
4256 else
4257 temp1_rtx = gen_rtx_fmt_ee (code, mode, source, temp1_rtx);
4259 emit_constant_insn (cond,
4260 gen_rtx_SET (VOIDmode, new_src,
4261 temp1_rtx));
4262 source = new_src;
4264 if (code == SET)
4266 can_negate = can_invert;
4267 can_invert = 0;
4268 code = PLUS;
4270 else if (code == MINUS)
4271 code = PLUS;
4275 if (final_invert)
4277 if (generate)
4278 emit_constant_insn (cond, gen_rtx_SET (VOIDmode, target,
4279 gen_rtx_NOT (mode, source)));
4280 insns++;
4283 return insns;
4286 /* Canonicalize a comparison so that we are more likely to recognize it.
4287 This can be done for a few constant compares, where we can make the
4288 immediate value easier to load. */
4290 static void
4291 arm_canonicalize_comparison (int *code, rtx *op0, rtx *op1,
4292 bool op0_preserve_value)
4294 enum machine_mode mode;
4295 unsigned HOST_WIDE_INT i, maxval;
4297 mode = GET_MODE (*op0);
4298 if (mode == VOIDmode)
4299 mode = GET_MODE (*op1);
4301 maxval = (((unsigned HOST_WIDE_INT) 1) << (GET_MODE_BITSIZE(mode) - 1)) - 1;
4303 /* For DImode, we have GE/LT/GEU/LTU comparisons. In ARM mode
4304 we can also use cmp/cmpeq for GTU/LEU. GT/LE must be either
4305 reversed or (for constant OP1) adjusted to GE/LT. Similarly
4306 for GTU/LEU in Thumb mode. */
4307 if (mode == DImode)
4309 rtx tem;
4311 if (*code == GT || *code == LE
4312 || (!TARGET_ARM && (*code == GTU || *code == LEU)))
4314 /* Missing comparison. First try to use an available
4315 comparison. */
4316 if (CONST_INT_P (*op1))
4318 i = INTVAL (*op1);
4319 switch (*code)
4321 case GT:
4322 case LE:
4323 if (i != maxval
4324 && arm_const_double_by_immediates (GEN_INT (i + 1)))
4326 *op1 = GEN_INT (i + 1);
4327 *code = *code == GT ? GE : LT;
4328 return;
4330 break;
4331 case GTU:
4332 case LEU:
4333 if (i != ~((unsigned HOST_WIDE_INT) 0)
4334 && arm_const_double_by_immediates (GEN_INT (i + 1)))
4336 *op1 = GEN_INT (i + 1);
4337 *code = *code == GTU ? GEU : LTU;
4338 return;
4340 break;
4341 default:
4342 gcc_unreachable ();
4346 /* If that did not work, reverse the condition. */
4347 if (!op0_preserve_value)
4349 tem = *op0;
4350 *op0 = *op1;
4351 *op1 = tem;
4352 *code = (int)swap_condition ((enum rtx_code)*code);
4355 return;
4358 /* If *op0 is (zero_extend:SI (subreg:QI (reg:SI) 0)) and comparing
4359 with const0_rtx, change it to (and:SI (reg:SI) (const_int 255)),
4360 to facilitate possible combining with a cmp into 'ands'. */
4361 if (mode == SImode
4362 && GET_CODE (*op0) == ZERO_EXTEND
4363 && GET_CODE (XEXP (*op0, 0)) == SUBREG
4364 && GET_MODE (XEXP (*op0, 0)) == QImode
4365 && GET_MODE (SUBREG_REG (XEXP (*op0, 0))) == SImode
4366 && subreg_lowpart_p (XEXP (*op0, 0))
4367 && *op1 == const0_rtx)
4368 *op0 = gen_rtx_AND (SImode, SUBREG_REG (XEXP (*op0, 0)),
4369 GEN_INT (255));
4371 /* Comparisons smaller than DImode. Only adjust comparisons against
4372 an out-of-range constant. */
4373 if (!CONST_INT_P (*op1)
4374 || const_ok_for_arm (INTVAL (*op1))
4375 || const_ok_for_arm (- INTVAL (*op1)))
4376 return;
4378 i = INTVAL (*op1);
4380 switch (*code)
4382 case EQ:
4383 case NE:
4384 return;
4386 case GT:
4387 case LE:
4388 if (i != maxval
4389 && (const_ok_for_arm (i + 1) || const_ok_for_arm (-(i + 1))))
4391 *op1 = GEN_INT (i + 1);
4392 *code = *code == GT ? GE : LT;
4393 return;
4395 break;
4397 case GE:
4398 case LT:
4399 if (i != ~maxval
4400 && (const_ok_for_arm (i - 1) || const_ok_for_arm (-(i - 1))))
4402 *op1 = GEN_INT (i - 1);
4403 *code = *code == GE ? GT : LE;
4404 return;
4406 break;
4408 case GTU:
4409 case LEU:
4410 if (i != ~((unsigned HOST_WIDE_INT) 0)
4411 && (const_ok_for_arm (i + 1) || const_ok_for_arm (-(i + 1))))
4413 *op1 = GEN_INT (i + 1);
4414 *code = *code == GTU ? GEU : LTU;
4415 return;
4417 break;
4419 case GEU:
4420 case LTU:
4421 if (i != 0
4422 && (const_ok_for_arm (i - 1) || const_ok_for_arm (-(i - 1))))
4424 *op1 = GEN_INT (i - 1);
4425 *code = *code == GEU ? GTU : LEU;
4426 return;
4428 break;
4430 default:
4431 gcc_unreachable ();
4436 /* Define how to find the value returned by a function. */
4438 static rtx
4439 arm_function_value(const_tree type, const_tree func,
4440 bool outgoing ATTRIBUTE_UNUSED)
4442 enum machine_mode mode;
4443 int unsignedp ATTRIBUTE_UNUSED;
4444 rtx r ATTRIBUTE_UNUSED;
4446 mode = TYPE_MODE (type);
4448 if (TARGET_AAPCS_BASED)
4449 return aapcs_allocate_return_reg (mode, type, func);
4451 /* Promote integer types. */
4452 if (INTEGRAL_TYPE_P (type))
4453 mode = arm_promote_function_mode (type, mode, &unsignedp, func, 1);
4455 /* Promotes small structs returned in a register to full-word size
4456 for big-endian AAPCS. */
4457 if (arm_return_in_msb (type))
4459 HOST_WIDE_INT size = int_size_in_bytes (type);
4460 if (size % UNITS_PER_WORD != 0)
4462 size += UNITS_PER_WORD - size % UNITS_PER_WORD;
4463 mode = mode_for_size (size * BITS_PER_UNIT, MODE_INT, 0);
4467 return arm_libcall_value_1 (mode);
4470 /* libcall hashtable helpers. */
4472 struct libcall_hasher : typed_noop_remove <rtx_def>
4474 typedef rtx_def value_type;
4475 typedef rtx_def compare_type;
4476 static inline hashval_t hash (const value_type *);
4477 static inline bool equal (const value_type *, const compare_type *);
4478 static inline void remove (value_type *);
4481 inline bool
4482 libcall_hasher::equal (const value_type *p1, const compare_type *p2)
4484 return rtx_equal_p (p1, p2);
4487 inline hashval_t
4488 libcall_hasher::hash (const value_type *p1)
4490 return hash_rtx (p1, VOIDmode, NULL, NULL, FALSE);
4493 typedef hash_table <libcall_hasher> libcall_table_type;
4495 static void
4496 add_libcall (libcall_table_type htab, rtx libcall)
4498 *htab.find_slot (libcall, INSERT) = libcall;
4501 static bool
4502 arm_libcall_uses_aapcs_base (const_rtx libcall)
4504 static bool init_done = false;
4505 static libcall_table_type libcall_htab;
4507 if (!init_done)
4509 init_done = true;
4511 libcall_htab.create (31);
4512 add_libcall (libcall_htab,
4513 convert_optab_libfunc (sfloat_optab, SFmode, SImode));
4514 add_libcall (libcall_htab,
4515 convert_optab_libfunc (sfloat_optab, DFmode, SImode));
4516 add_libcall (libcall_htab,
4517 convert_optab_libfunc (sfloat_optab, SFmode, DImode));
4518 add_libcall (libcall_htab,
4519 convert_optab_libfunc (sfloat_optab, DFmode, DImode));
4521 add_libcall (libcall_htab,
4522 convert_optab_libfunc (ufloat_optab, SFmode, SImode));
4523 add_libcall (libcall_htab,
4524 convert_optab_libfunc (ufloat_optab, DFmode, SImode));
4525 add_libcall (libcall_htab,
4526 convert_optab_libfunc (ufloat_optab, SFmode, DImode));
4527 add_libcall (libcall_htab,
4528 convert_optab_libfunc (ufloat_optab, DFmode, DImode));
4530 add_libcall (libcall_htab,
4531 convert_optab_libfunc (sext_optab, SFmode, HFmode));
4532 add_libcall (libcall_htab,
4533 convert_optab_libfunc (trunc_optab, HFmode, SFmode));
4534 add_libcall (libcall_htab,
4535 convert_optab_libfunc (sfix_optab, SImode, DFmode));
4536 add_libcall (libcall_htab,
4537 convert_optab_libfunc (ufix_optab, SImode, DFmode));
4538 add_libcall (libcall_htab,
4539 convert_optab_libfunc (sfix_optab, DImode, DFmode));
4540 add_libcall (libcall_htab,
4541 convert_optab_libfunc (ufix_optab, DImode, DFmode));
4542 add_libcall (libcall_htab,
4543 convert_optab_libfunc (sfix_optab, DImode, SFmode));
4544 add_libcall (libcall_htab,
4545 convert_optab_libfunc (ufix_optab, DImode, SFmode));
4547 /* Values from double-precision helper functions are returned in core
4548 registers if the selected core only supports single-precision
4549 arithmetic, even if we are using the hard-float ABI. The same is
4550 true for single-precision helpers, but we will never be using the
4551 hard-float ABI on a CPU which doesn't support single-precision
4552 operations in hardware. */
4553 add_libcall (libcall_htab, optab_libfunc (add_optab, DFmode));
4554 add_libcall (libcall_htab, optab_libfunc (sdiv_optab, DFmode));
4555 add_libcall (libcall_htab, optab_libfunc (smul_optab, DFmode));
4556 add_libcall (libcall_htab, optab_libfunc (neg_optab, DFmode));
4557 add_libcall (libcall_htab, optab_libfunc (sub_optab, DFmode));
4558 add_libcall (libcall_htab, optab_libfunc (eq_optab, DFmode));
4559 add_libcall (libcall_htab, optab_libfunc (lt_optab, DFmode));
4560 add_libcall (libcall_htab, optab_libfunc (le_optab, DFmode));
4561 add_libcall (libcall_htab, optab_libfunc (ge_optab, DFmode));
4562 add_libcall (libcall_htab, optab_libfunc (gt_optab, DFmode));
4563 add_libcall (libcall_htab, optab_libfunc (unord_optab, DFmode));
4564 add_libcall (libcall_htab, convert_optab_libfunc (sext_optab, DFmode,
4565 SFmode));
4566 add_libcall (libcall_htab, convert_optab_libfunc (trunc_optab, SFmode,
4567 DFmode));
4570 return libcall && libcall_htab.find (libcall) != NULL;
4573 static rtx
4574 arm_libcall_value_1 (enum machine_mode mode)
4576 if (TARGET_AAPCS_BASED)
4577 return aapcs_libcall_value (mode);
4578 else if (TARGET_IWMMXT_ABI
4579 && arm_vector_mode_supported_p (mode))
4580 return gen_rtx_REG (mode, FIRST_IWMMXT_REGNUM);
4581 else
4582 return gen_rtx_REG (mode, ARG_REGISTER (1));
4585 /* Define how to find the value returned by a library function
4586 assuming the value has mode MODE. */
4588 static rtx
4589 arm_libcall_value (enum machine_mode mode, const_rtx libcall)
4591 if (TARGET_AAPCS_BASED && arm_pcs_default != ARM_PCS_AAPCS
4592 && GET_MODE_CLASS (mode) == MODE_FLOAT)
4594 /* The following libcalls return their result in integer registers,
4595 even though they return a floating point value. */
4596 if (arm_libcall_uses_aapcs_base (libcall))
4597 return gen_rtx_REG (mode, ARG_REGISTER(1));
4601 return arm_libcall_value_1 (mode);
4604 /* Implement TARGET_FUNCTION_VALUE_REGNO_P. */
4606 static bool
4607 arm_function_value_regno_p (const unsigned int regno)
4609 if (regno == ARG_REGISTER (1)
4610 || (TARGET_32BIT
4611 && TARGET_AAPCS_BASED
4612 && TARGET_VFP
4613 && TARGET_HARD_FLOAT
4614 && regno == FIRST_VFP_REGNUM)
4615 || (TARGET_IWMMXT_ABI
4616 && regno == FIRST_IWMMXT_REGNUM))
4617 return true;
4619 return false;
4622 /* Determine the amount of memory needed to store the possible return
4623 registers of an untyped call. */
4625 arm_apply_result_size (void)
4627 int size = 16;
4629 if (TARGET_32BIT)
4631 if (TARGET_HARD_FLOAT_ABI && TARGET_VFP)
4632 size += 32;
4633 if (TARGET_IWMMXT_ABI)
4634 size += 8;
4637 return size;
4640 /* Decide whether TYPE should be returned in memory (true)
4641 or in a register (false). FNTYPE is the type of the function making
4642 the call. */
4643 static bool
4644 arm_return_in_memory (const_tree type, const_tree fntype)
4646 HOST_WIDE_INT size;
4648 size = int_size_in_bytes (type); /* Negative if not fixed size. */
4650 if (TARGET_AAPCS_BASED)
4652 /* Simple, non-aggregate types (ie not including vectors and
4653 complex) are always returned in a register (or registers).
4654 We don't care about which register here, so we can short-cut
4655 some of the detail. */
4656 if (!AGGREGATE_TYPE_P (type)
4657 && TREE_CODE (type) != VECTOR_TYPE
4658 && TREE_CODE (type) != COMPLEX_TYPE)
4659 return false;
4661 /* Any return value that is no larger than one word can be
4662 returned in r0. */
4663 if (((unsigned HOST_WIDE_INT) size) <= UNITS_PER_WORD)
4664 return false;
4666 /* Check any available co-processors to see if they accept the
4667 type as a register candidate (VFP, for example, can return
4668 some aggregates in consecutive registers). These aren't
4669 available if the call is variadic. */
4670 if (aapcs_select_return_coproc (type, fntype) >= 0)
4671 return false;
4673 /* Vector values should be returned using ARM registers, not
4674 memory (unless they're over 16 bytes, which will break since
4675 we only have four call-clobbered registers to play with). */
4676 if (TREE_CODE (type) == VECTOR_TYPE)
4677 return (size < 0 || size > (4 * UNITS_PER_WORD));
4679 /* The rest go in memory. */
4680 return true;
4683 if (TREE_CODE (type) == VECTOR_TYPE)
4684 return (size < 0 || size > (4 * UNITS_PER_WORD));
4686 if (!AGGREGATE_TYPE_P (type) &&
4687 (TREE_CODE (type) != VECTOR_TYPE))
4688 /* All simple types are returned in registers. */
4689 return false;
4691 if (arm_abi != ARM_ABI_APCS)
4693 /* ATPCS and later return aggregate types in memory only if they are
4694 larger than a word (or are variable size). */
4695 return (size < 0 || size > UNITS_PER_WORD);
4698 /* For the arm-wince targets we choose to be compatible with Microsoft's
4699 ARM and Thumb compilers, which always return aggregates in memory. */
4700 #ifndef ARM_WINCE
4701 /* All structures/unions bigger than one word are returned in memory.
4702 Also catch the case where int_size_in_bytes returns -1. In this case
4703 the aggregate is either huge or of variable size, and in either case
4704 we will want to return it via memory and not in a register. */
4705 if (size < 0 || size > UNITS_PER_WORD)
4706 return true;
4708 if (TREE_CODE (type) == RECORD_TYPE)
4710 tree field;
4712 /* For a struct the APCS says that we only return in a register
4713 if the type is 'integer like' and every addressable element
4714 has an offset of zero. For practical purposes this means
4715 that the structure can have at most one non bit-field element
4716 and that this element must be the first one in the structure. */
4718 /* Find the first field, ignoring non FIELD_DECL things which will
4719 have been created by C++. */
4720 for (field = TYPE_FIELDS (type);
4721 field && TREE_CODE (field) != FIELD_DECL;
4722 field = DECL_CHAIN (field))
4723 continue;
4725 if (field == NULL)
4726 return false; /* An empty structure. Allowed by an extension to ANSI C. */
4728 /* Check that the first field is valid for returning in a register. */
4730 /* ... Floats are not allowed */
4731 if (FLOAT_TYPE_P (TREE_TYPE (field)))
4732 return true;
4734 /* ... Aggregates that are not themselves valid for returning in
4735 a register are not allowed. */
4736 if (arm_return_in_memory (TREE_TYPE (field), NULL_TREE))
4737 return true;
4739 /* Now check the remaining fields, if any. Only bitfields are allowed,
4740 since they are not addressable. */
4741 for (field = DECL_CHAIN (field);
4742 field;
4743 field = DECL_CHAIN (field))
4745 if (TREE_CODE (field) != FIELD_DECL)
4746 continue;
4748 if (!DECL_BIT_FIELD_TYPE (field))
4749 return true;
4752 return false;
4755 if (TREE_CODE (type) == UNION_TYPE)
4757 tree field;
4759 /* Unions can be returned in registers if every element is
4760 integral, or can be returned in an integer register. */
4761 for (field = TYPE_FIELDS (type);
4762 field;
4763 field = DECL_CHAIN (field))
4765 if (TREE_CODE (field) != FIELD_DECL)
4766 continue;
4768 if (FLOAT_TYPE_P (TREE_TYPE (field)))
4769 return true;
4771 if (arm_return_in_memory (TREE_TYPE (field), NULL_TREE))
4772 return true;
4775 return false;
4777 #endif /* not ARM_WINCE */
4779 /* Return all other types in memory. */
4780 return true;
4783 const struct pcs_attribute_arg
4785 const char *arg;
4786 enum arm_pcs value;
4787 } pcs_attribute_args[] =
4789 {"aapcs", ARM_PCS_AAPCS},
4790 {"aapcs-vfp", ARM_PCS_AAPCS_VFP},
4791 #if 0
4792 /* We could recognize these, but changes would be needed elsewhere
4793 * to implement them. */
4794 {"aapcs-iwmmxt", ARM_PCS_AAPCS_IWMMXT},
4795 {"atpcs", ARM_PCS_ATPCS},
4796 {"apcs", ARM_PCS_APCS},
4797 #endif
4798 {NULL, ARM_PCS_UNKNOWN}
4801 static enum arm_pcs
4802 arm_pcs_from_attribute (tree attr)
4804 const struct pcs_attribute_arg *ptr;
4805 const char *arg;
4807 /* Get the value of the argument. */
4808 if (TREE_VALUE (attr) == NULL_TREE
4809 || TREE_CODE (TREE_VALUE (attr)) != STRING_CST)
4810 return ARM_PCS_UNKNOWN;
4812 arg = TREE_STRING_POINTER (TREE_VALUE (attr));
4814 /* Check it against the list of known arguments. */
4815 for (ptr = pcs_attribute_args; ptr->arg != NULL; ptr++)
4816 if (streq (arg, ptr->arg))
4817 return ptr->value;
4819 /* An unrecognized interrupt type. */
4820 return ARM_PCS_UNKNOWN;
4823 /* Get the PCS variant to use for this call. TYPE is the function's type
4824 specification, DECL is the specific declartion. DECL may be null if
4825 the call could be indirect or if this is a library call. */
4826 static enum arm_pcs
4827 arm_get_pcs_model (const_tree type, const_tree decl)
4829 bool user_convention = false;
4830 enum arm_pcs user_pcs = arm_pcs_default;
4831 tree attr;
4833 gcc_assert (type);
4835 attr = lookup_attribute ("pcs", TYPE_ATTRIBUTES (type));
4836 if (attr)
4838 user_pcs = arm_pcs_from_attribute (TREE_VALUE (attr));
4839 user_convention = true;
4842 if (TARGET_AAPCS_BASED)
4844 /* Detect varargs functions. These always use the base rules
4845 (no argument is ever a candidate for a co-processor
4846 register). */
4847 bool base_rules = stdarg_p (type);
4849 if (user_convention)
4851 if (user_pcs > ARM_PCS_AAPCS_LOCAL)
4852 sorry ("non-AAPCS derived PCS variant");
4853 else if (base_rules && user_pcs != ARM_PCS_AAPCS)
4854 error ("variadic functions must use the base AAPCS variant");
4857 if (base_rules)
4858 return ARM_PCS_AAPCS;
4859 else if (user_convention)
4860 return user_pcs;
4861 else if (decl && flag_unit_at_a_time)
4863 /* Local functions never leak outside this compilation unit,
4864 so we are free to use whatever conventions are
4865 appropriate. */
4866 /* FIXME: remove CONST_CAST_TREE when cgraph is constified. */
4867 struct cgraph_local_info *i = cgraph_local_info (CONST_CAST_TREE(decl));
4868 if (i && i->local)
4869 return ARM_PCS_AAPCS_LOCAL;
4872 else if (user_convention && user_pcs != arm_pcs_default)
4873 sorry ("PCS variant");
4875 /* For everything else we use the target's default. */
4876 return arm_pcs_default;
4880 static void
4881 aapcs_vfp_cum_init (CUMULATIVE_ARGS *pcum ATTRIBUTE_UNUSED,
4882 const_tree fntype ATTRIBUTE_UNUSED,
4883 rtx libcall ATTRIBUTE_UNUSED,
4884 const_tree fndecl ATTRIBUTE_UNUSED)
4886 /* Record the unallocated VFP registers. */
4887 pcum->aapcs_vfp_regs_free = (1 << NUM_VFP_ARG_REGS) - 1;
4888 pcum->aapcs_vfp_reg_alloc = 0;
4891 /* Walk down the type tree of TYPE counting consecutive base elements.
4892 If *MODEP is VOIDmode, then set it to the first valid floating point
4893 type. If a non-floating point type is found, or if a floating point
4894 type that doesn't match a non-VOIDmode *MODEP is found, then return -1,
4895 otherwise return the count in the sub-tree. */
4896 static int
4897 aapcs_vfp_sub_candidate (const_tree type, enum machine_mode *modep)
4899 enum machine_mode mode;
4900 HOST_WIDE_INT size;
4902 switch (TREE_CODE (type))
4904 case REAL_TYPE:
4905 mode = TYPE_MODE (type);
4906 if (mode != DFmode && mode != SFmode)
4907 return -1;
4909 if (*modep == VOIDmode)
4910 *modep = mode;
4912 if (*modep == mode)
4913 return 1;
4915 break;
4917 case COMPLEX_TYPE:
4918 mode = TYPE_MODE (TREE_TYPE (type));
4919 if (mode != DFmode && mode != SFmode)
4920 return -1;
4922 if (*modep == VOIDmode)
4923 *modep = mode;
4925 if (*modep == mode)
4926 return 2;
4928 break;
4930 case VECTOR_TYPE:
4931 /* Use V2SImode and V4SImode as representatives of all 64-bit
4932 and 128-bit vector types, whether or not those modes are
4933 supported with the present options. */
4934 size = int_size_in_bytes (type);
4935 switch (size)
4937 case 8:
4938 mode = V2SImode;
4939 break;
4940 case 16:
4941 mode = V4SImode;
4942 break;
4943 default:
4944 return -1;
4947 if (*modep == VOIDmode)
4948 *modep = mode;
4950 /* Vector modes are considered to be opaque: two vectors are
4951 equivalent for the purposes of being homogeneous aggregates
4952 if they are the same size. */
4953 if (*modep == mode)
4954 return 1;
4956 break;
4958 case ARRAY_TYPE:
4960 int count;
4961 tree index = TYPE_DOMAIN (type);
4963 /* Can't handle incomplete types. */
4964 if (!COMPLETE_TYPE_P (type))
4965 return -1;
4967 count = aapcs_vfp_sub_candidate (TREE_TYPE (type), modep);
4968 if (count == -1
4969 || !index
4970 || !TYPE_MAX_VALUE (index)
4971 || !tree_fits_uhwi_p (TYPE_MAX_VALUE (index))
4972 || !TYPE_MIN_VALUE (index)
4973 || !tree_fits_uhwi_p (TYPE_MIN_VALUE (index))
4974 || count < 0)
4975 return -1;
4977 count *= (1 + tree_to_uhwi (TYPE_MAX_VALUE (index))
4978 - tree_to_uhwi (TYPE_MIN_VALUE (index)));
4980 /* There must be no padding. */
4981 if (!tree_fits_uhwi_p (TYPE_SIZE (type))
4982 || ((HOST_WIDE_INT) tree_to_uhwi (TYPE_SIZE (type))
4983 != count * GET_MODE_BITSIZE (*modep)))
4984 return -1;
4986 return count;
4989 case RECORD_TYPE:
4991 int count = 0;
4992 int sub_count;
4993 tree field;
4995 /* Can't handle incomplete types. */
4996 if (!COMPLETE_TYPE_P (type))
4997 return -1;
4999 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
5001 if (TREE_CODE (field) != FIELD_DECL)
5002 continue;
5004 sub_count = aapcs_vfp_sub_candidate (TREE_TYPE (field), modep);
5005 if (sub_count < 0)
5006 return -1;
5007 count += sub_count;
5010 /* There must be no padding. */
5011 if (!tree_fits_uhwi_p (TYPE_SIZE (type))
5012 || ((HOST_WIDE_INT) tree_to_uhwi (TYPE_SIZE (type))
5013 != count * GET_MODE_BITSIZE (*modep)))
5014 return -1;
5016 return count;
5019 case UNION_TYPE:
5020 case QUAL_UNION_TYPE:
5022 /* These aren't very interesting except in a degenerate case. */
5023 int count = 0;
5024 int sub_count;
5025 tree field;
5027 /* Can't handle incomplete types. */
5028 if (!COMPLETE_TYPE_P (type))
5029 return -1;
5031 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
5033 if (TREE_CODE (field) != FIELD_DECL)
5034 continue;
5036 sub_count = aapcs_vfp_sub_candidate (TREE_TYPE (field), modep);
5037 if (sub_count < 0)
5038 return -1;
5039 count = count > sub_count ? count : sub_count;
5042 /* There must be no padding. */
5043 if (!tree_fits_uhwi_p (TYPE_SIZE (type))
5044 || ((HOST_WIDE_INT) tree_to_uhwi (TYPE_SIZE (type))
5045 != count * GET_MODE_BITSIZE (*modep)))
5046 return -1;
5048 return count;
5051 default:
5052 break;
5055 return -1;
5058 /* Return true if PCS_VARIANT should use VFP registers. */
5059 static bool
5060 use_vfp_abi (enum arm_pcs pcs_variant, bool is_double)
5062 if (pcs_variant == ARM_PCS_AAPCS_VFP)
5064 static bool seen_thumb1_vfp = false;
5066 if (TARGET_THUMB1 && !seen_thumb1_vfp)
5068 sorry ("Thumb-1 hard-float VFP ABI");
5069 /* sorry() is not immediately fatal, so only display this once. */
5070 seen_thumb1_vfp = true;
5073 return true;
5076 if (pcs_variant != ARM_PCS_AAPCS_LOCAL)
5077 return false;
5079 return (TARGET_32BIT && TARGET_VFP && TARGET_HARD_FLOAT &&
5080 (TARGET_VFP_DOUBLE || !is_double));
5083 /* Return true if an argument whose type is TYPE, or mode is MODE, is
5084 suitable for passing or returning in VFP registers for the PCS
5085 variant selected. If it is, then *BASE_MODE is updated to contain
5086 a machine mode describing each element of the argument's type and
5087 *COUNT to hold the number of such elements. */
5088 static bool
5089 aapcs_vfp_is_call_or_return_candidate (enum arm_pcs pcs_variant,
5090 enum machine_mode mode, const_tree type,
5091 enum machine_mode *base_mode, int *count)
5093 enum machine_mode new_mode = VOIDmode;
5095 /* If we have the type information, prefer that to working things
5096 out from the mode. */
5097 if (type)
5099 int ag_count = aapcs_vfp_sub_candidate (type, &new_mode);
5101 if (ag_count > 0 && ag_count <= 4)
5102 *count = ag_count;
5103 else
5104 return false;
5106 else if (GET_MODE_CLASS (mode) == MODE_FLOAT
5107 || GET_MODE_CLASS (mode) == MODE_VECTOR_INT
5108 || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
5110 *count = 1;
5111 new_mode = mode;
5113 else if (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT)
5115 *count = 2;
5116 new_mode = (mode == DCmode ? DFmode : SFmode);
5118 else
5119 return false;
5122 if (!use_vfp_abi (pcs_variant, ARM_NUM_REGS (new_mode) > 1))
5123 return false;
5125 *base_mode = new_mode;
5126 return true;
5129 static bool
5130 aapcs_vfp_is_return_candidate (enum arm_pcs pcs_variant,
5131 enum machine_mode mode, const_tree type)
5133 int count ATTRIBUTE_UNUSED;
5134 enum machine_mode ag_mode ATTRIBUTE_UNUSED;
5136 if (!use_vfp_abi (pcs_variant, false))
5137 return false;
5138 return aapcs_vfp_is_call_or_return_candidate (pcs_variant, mode, type,
5139 &ag_mode, &count);
5142 static bool
5143 aapcs_vfp_is_call_candidate (CUMULATIVE_ARGS *pcum, enum machine_mode mode,
5144 const_tree type)
5146 if (!use_vfp_abi (pcum->pcs_variant, false))
5147 return false;
5149 return aapcs_vfp_is_call_or_return_candidate (pcum->pcs_variant, mode, type,
5150 &pcum->aapcs_vfp_rmode,
5151 &pcum->aapcs_vfp_rcount);
5154 static bool
5155 aapcs_vfp_allocate (CUMULATIVE_ARGS *pcum, enum machine_mode mode,
5156 const_tree type ATTRIBUTE_UNUSED)
5158 int shift = GET_MODE_SIZE (pcum->aapcs_vfp_rmode) / GET_MODE_SIZE (SFmode);
5159 unsigned mask = (1 << (shift * pcum->aapcs_vfp_rcount)) - 1;
5160 int regno;
5162 for (regno = 0; regno < NUM_VFP_ARG_REGS; regno += shift)
5163 if (((pcum->aapcs_vfp_regs_free >> regno) & mask) == mask)
5165 pcum->aapcs_vfp_reg_alloc = mask << regno;
5166 if (mode == BLKmode
5167 || (mode == TImode && ! TARGET_NEON)
5168 || ! arm_hard_regno_mode_ok (FIRST_VFP_REGNUM + regno, mode))
5170 int i;
5171 int rcount = pcum->aapcs_vfp_rcount;
5172 int rshift = shift;
5173 enum machine_mode rmode = pcum->aapcs_vfp_rmode;
5174 rtx par;
5175 if (!TARGET_NEON)
5177 /* Avoid using unsupported vector modes. */
5178 if (rmode == V2SImode)
5179 rmode = DImode;
5180 else if (rmode == V4SImode)
5182 rmode = DImode;
5183 rcount *= 2;
5184 rshift /= 2;
5187 par = gen_rtx_PARALLEL (mode, rtvec_alloc (rcount));
5188 for (i = 0; i < rcount; i++)
5190 rtx tmp = gen_rtx_REG (rmode,
5191 FIRST_VFP_REGNUM + regno + i * rshift);
5192 tmp = gen_rtx_EXPR_LIST
5193 (VOIDmode, tmp,
5194 GEN_INT (i * GET_MODE_SIZE (rmode)));
5195 XVECEXP (par, 0, i) = tmp;
5198 pcum->aapcs_reg = par;
5200 else
5201 pcum->aapcs_reg = gen_rtx_REG (mode, FIRST_VFP_REGNUM + regno);
5202 return true;
5204 return false;
5207 static rtx
5208 aapcs_vfp_allocate_return_reg (enum arm_pcs pcs_variant ATTRIBUTE_UNUSED,
5209 enum machine_mode mode,
5210 const_tree type ATTRIBUTE_UNUSED)
5212 if (!use_vfp_abi (pcs_variant, false))
5213 return NULL;
5215 if (mode == BLKmode || (mode == TImode && !TARGET_NEON))
5217 int count;
5218 enum machine_mode ag_mode;
5219 int i;
5220 rtx par;
5221 int shift;
5223 aapcs_vfp_is_call_or_return_candidate (pcs_variant, mode, type,
5224 &ag_mode, &count);
5226 if (!TARGET_NEON)
5228 if (ag_mode == V2SImode)
5229 ag_mode = DImode;
5230 else if (ag_mode == V4SImode)
5232 ag_mode = DImode;
5233 count *= 2;
5236 shift = GET_MODE_SIZE(ag_mode) / GET_MODE_SIZE(SFmode);
5237 par = gen_rtx_PARALLEL (mode, rtvec_alloc (count));
5238 for (i = 0; i < count; i++)
5240 rtx tmp = gen_rtx_REG (ag_mode, FIRST_VFP_REGNUM + i * shift);
5241 tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp,
5242 GEN_INT (i * GET_MODE_SIZE (ag_mode)));
5243 XVECEXP (par, 0, i) = tmp;
5246 return par;
5249 return gen_rtx_REG (mode, FIRST_VFP_REGNUM);
5252 static void
5253 aapcs_vfp_advance (CUMULATIVE_ARGS *pcum ATTRIBUTE_UNUSED,
5254 enum machine_mode mode ATTRIBUTE_UNUSED,
5255 const_tree type ATTRIBUTE_UNUSED)
5257 pcum->aapcs_vfp_regs_free &= ~pcum->aapcs_vfp_reg_alloc;
5258 pcum->aapcs_vfp_reg_alloc = 0;
5259 return;
5262 #define AAPCS_CP(X) \
5264 aapcs_ ## X ## _cum_init, \
5265 aapcs_ ## X ## _is_call_candidate, \
5266 aapcs_ ## X ## _allocate, \
5267 aapcs_ ## X ## _is_return_candidate, \
5268 aapcs_ ## X ## _allocate_return_reg, \
5269 aapcs_ ## X ## _advance \
5272 /* Table of co-processors that can be used to pass arguments in
5273 registers. Idealy no arugment should be a candidate for more than
5274 one co-processor table entry, but the table is processed in order
5275 and stops after the first match. If that entry then fails to put
5276 the argument into a co-processor register, the argument will go on
5277 the stack. */
5278 static struct
5280 /* Initialize co-processor related state in CUMULATIVE_ARGS structure. */
5281 void (*cum_init) (CUMULATIVE_ARGS *, const_tree, rtx, const_tree);
5283 /* Return true if an argument of mode MODE (or type TYPE if MODE is
5284 BLKmode) is a candidate for this co-processor's registers; this
5285 function should ignore any position-dependent state in
5286 CUMULATIVE_ARGS and only use call-type dependent information. */
5287 bool (*is_call_candidate) (CUMULATIVE_ARGS *, enum machine_mode, const_tree);
5289 /* Return true if the argument does get a co-processor register; it
5290 should set aapcs_reg to an RTX of the register allocated as is
5291 required for a return from FUNCTION_ARG. */
5292 bool (*allocate) (CUMULATIVE_ARGS *, enum machine_mode, const_tree);
5294 /* Return true if a result of mode MODE (or type TYPE if MODE is
5295 BLKmode) is can be returned in this co-processor's registers. */
5296 bool (*is_return_candidate) (enum arm_pcs, enum machine_mode, const_tree);
5298 /* Allocate and return an RTX element to hold the return type of a
5299 call, this routine must not fail and will only be called if
5300 is_return_candidate returned true with the same parameters. */
5301 rtx (*allocate_return_reg) (enum arm_pcs, enum machine_mode, const_tree);
5303 /* Finish processing this argument and prepare to start processing
5304 the next one. */
5305 void (*advance) (CUMULATIVE_ARGS *, enum machine_mode, const_tree);
5306 } aapcs_cp_arg_layout[ARM_NUM_COPROC_SLOTS] =
5308 AAPCS_CP(vfp)
5311 #undef AAPCS_CP
5313 static int
5314 aapcs_select_call_coproc (CUMULATIVE_ARGS *pcum, enum machine_mode mode,
5315 const_tree type)
5317 int i;
5319 for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
5320 if (aapcs_cp_arg_layout[i].is_call_candidate (pcum, mode, type))
5321 return i;
5323 return -1;
5326 static int
5327 aapcs_select_return_coproc (const_tree type, const_tree fntype)
5329 /* We aren't passed a decl, so we can't check that a call is local.
5330 However, it isn't clear that that would be a win anyway, since it
5331 might limit some tail-calling opportunities. */
5332 enum arm_pcs pcs_variant;
5334 if (fntype)
5336 const_tree fndecl = NULL_TREE;
5338 if (TREE_CODE (fntype) == FUNCTION_DECL)
5340 fndecl = fntype;
5341 fntype = TREE_TYPE (fntype);
5344 pcs_variant = arm_get_pcs_model (fntype, fndecl);
5346 else
5347 pcs_variant = arm_pcs_default;
5349 if (pcs_variant != ARM_PCS_AAPCS)
5351 int i;
5353 for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
5354 if (aapcs_cp_arg_layout[i].is_return_candidate (pcs_variant,
5355 TYPE_MODE (type),
5356 type))
5357 return i;
5359 return -1;
5362 static rtx
5363 aapcs_allocate_return_reg (enum machine_mode mode, const_tree type,
5364 const_tree fntype)
5366 /* We aren't passed a decl, so we can't check that a call is local.
5367 However, it isn't clear that that would be a win anyway, since it
5368 might limit some tail-calling opportunities. */
5369 enum arm_pcs pcs_variant;
5370 int unsignedp ATTRIBUTE_UNUSED;
5372 if (fntype)
5374 const_tree fndecl = NULL_TREE;
5376 if (TREE_CODE (fntype) == FUNCTION_DECL)
5378 fndecl = fntype;
5379 fntype = TREE_TYPE (fntype);
5382 pcs_variant = arm_get_pcs_model (fntype, fndecl);
5384 else
5385 pcs_variant = arm_pcs_default;
5387 /* Promote integer types. */
5388 if (type && INTEGRAL_TYPE_P (type))
5389 mode = arm_promote_function_mode (type, mode, &unsignedp, fntype, 1);
5391 if (pcs_variant != ARM_PCS_AAPCS)
5393 int i;
5395 for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
5396 if (aapcs_cp_arg_layout[i].is_return_candidate (pcs_variant, mode,
5397 type))
5398 return aapcs_cp_arg_layout[i].allocate_return_reg (pcs_variant,
5399 mode, type);
5402 /* Promotes small structs returned in a register to full-word size
5403 for big-endian AAPCS. */
5404 if (type && arm_return_in_msb (type))
5406 HOST_WIDE_INT size = int_size_in_bytes (type);
5407 if (size % UNITS_PER_WORD != 0)
5409 size += UNITS_PER_WORD - size % UNITS_PER_WORD;
5410 mode = mode_for_size (size * BITS_PER_UNIT, MODE_INT, 0);
5414 return gen_rtx_REG (mode, R0_REGNUM);
5417 static rtx
5418 aapcs_libcall_value (enum machine_mode mode)
5420 if (BYTES_BIG_ENDIAN && ALL_FIXED_POINT_MODE_P (mode)
5421 && GET_MODE_SIZE (mode) <= 4)
5422 mode = SImode;
5424 return aapcs_allocate_return_reg (mode, NULL_TREE, NULL_TREE);
5427 /* Lay out a function argument using the AAPCS rules. The rule
5428 numbers referred to here are those in the AAPCS. */
5429 static void
5430 aapcs_layout_arg (CUMULATIVE_ARGS *pcum, enum machine_mode mode,
5431 const_tree type, bool named)
5433 int nregs, nregs2;
5434 int ncrn;
5436 /* We only need to do this once per argument. */
5437 if (pcum->aapcs_arg_processed)
5438 return;
5440 pcum->aapcs_arg_processed = true;
5442 /* Special case: if named is false then we are handling an incoming
5443 anonymous argument which is on the stack. */
5444 if (!named)
5445 return;
5447 /* Is this a potential co-processor register candidate? */
5448 if (pcum->pcs_variant != ARM_PCS_AAPCS)
5450 int slot = aapcs_select_call_coproc (pcum, mode, type);
5451 pcum->aapcs_cprc_slot = slot;
5453 /* We don't have to apply any of the rules from part B of the
5454 preparation phase, these are handled elsewhere in the
5455 compiler. */
5457 if (slot >= 0)
5459 /* A Co-processor register candidate goes either in its own
5460 class of registers or on the stack. */
5461 if (!pcum->aapcs_cprc_failed[slot])
5463 /* C1.cp - Try to allocate the argument to co-processor
5464 registers. */
5465 if (aapcs_cp_arg_layout[slot].allocate (pcum, mode, type))
5466 return;
5468 /* C2.cp - Put the argument on the stack and note that we
5469 can't assign any more candidates in this slot. We also
5470 need to note that we have allocated stack space, so that
5471 we won't later try to split a non-cprc candidate between
5472 core registers and the stack. */
5473 pcum->aapcs_cprc_failed[slot] = true;
5474 pcum->can_split = false;
5477 /* We didn't get a register, so this argument goes on the
5478 stack. */
5479 gcc_assert (pcum->can_split == false);
5480 return;
5484 /* C3 - For double-word aligned arguments, round the NCRN up to the
5485 next even number. */
5486 ncrn = pcum->aapcs_ncrn;
5487 if ((ncrn & 1) && arm_needs_doubleword_align (mode, type))
5488 ncrn++;
5490 nregs = ARM_NUM_REGS2(mode, type);
5492 /* Sigh, this test should really assert that nregs > 0, but a GCC
5493 extension allows empty structs and then gives them empty size; it
5494 then allows such a structure to be passed by value. For some of
5495 the code below we have to pretend that such an argument has
5496 non-zero size so that we 'locate' it correctly either in
5497 registers or on the stack. */
5498 gcc_assert (nregs >= 0);
5500 nregs2 = nregs ? nregs : 1;
5502 /* C4 - Argument fits entirely in core registers. */
5503 if (ncrn + nregs2 <= NUM_ARG_REGS)
5505 pcum->aapcs_reg = gen_rtx_REG (mode, ncrn);
5506 pcum->aapcs_next_ncrn = ncrn + nregs;
5507 return;
5510 /* C5 - Some core registers left and there are no arguments already
5511 on the stack: split this argument between the remaining core
5512 registers and the stack. */
5513 if (ncrn < NUM_ARG_REGS && pcum->can_split)
5515 pcum->aapcs_reg = gen_rtx_REG (mode, ncrn);
5516 pcum->aapcs_next_ncrn = NUM_ARG_REGS;
5517 pcum->aapcs_partial = (NUM_ARG_REGS - ncrn) * UNITS_PER_WORD;
5518 return;
5521 /* C6 - NCRN is set to 4. */
5522 pcum->aapcs_next_ncrn = NUM_ARG_REGS;
5524 /* C7,C8 - arugment goes on the stack. We have nothing to do here. */
5525 return;
5528 /* Initialize a variable CUM of type CUMULATIVE_ARGS
5529 for a call to a function whose data type is FNTYPE.
5530 For a library call, FNTYPE is NULL. */
5531 void
5532 arm_init_cumulative_args (CUMULATIVE_ARGS *pcum, tree fntype,
5533 rtx libname,
5534 tree fndecl ATTRIBUTE_UNUSED)
5536 /* Long call handling. */
5537 if (fntype)
5538 pcum->pcs_variant = arm_get_pcs_model (fntype, fndecl);
5539 else
5540 pcum->pcs_variant = arm_pcs_default;
5542 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
5544 if (arm_libcall_uses_aapcs_base (libname))
5545 pcum->pcs_variant = ARM_PCS_AAPCS;
5547 pcum->aapcs_ncrn = pcum->aapcs_next_ncrn = 0;
5548 pcum->aapcs_reg = NULL_RTX;
5549 pcum->aapcs_partial = 0;
5550 pcum->aapcs_arg_processed = false;
5551 pcum->aapcs_cprc_slot = -1;
5552 pcum->can_split = true;
5554 if (pcum->pcs_variant != ARM_PCS_AAPCS)
5556 int i;
5558 for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
5560 pcum->aapcs_cprc_failed[i] = false;
5561 aapcs_cp_arg_layout[i].cum_init (pcum, fntype, libname, fndecl);
5564 return;
5567 /* Legacy ABIs */
5569 /* On the ARM, the offset starts at 0. */
5570 pcum->nregs = 0;
5571 pcum->iwmmxt_nregs = 0;
5572 pcum->can_split = true;
5574 /* Varargs vectors are treated the same as long long.
5575 named_count avoids having to change the way arm handles 'named' */
5576 pcum->named_count = 0;
5577 pcum->nargs = 0;
5579 if (TARGET_REALLY_IWMMXT && fntype)
5581 tree fn_arg;
5583 for (fn_arg = TYPE_ARG_TYPES (fntype);
5584 fn_arg;
5585 fn_arg = TREE_CHAIN (fn_arg))
5586 pcum->named_count += 1;
5588 if (! pcum->named_count)
5589 pcum->named_count = INT_MAX;
5593 /* Return true if we use LRA instead of reload pass. */
5594 static bool
5595 arm_lra_p (void)
5597 return arm_lra_flag;
5600 /* Return true if mode/type need doubleword alignment. */
5601 static bool
5602 arm_needs_doubleword_align (enum machine_mode mode, const_tree type)
5604 return (GET_MODE_ALIGNMENT (mode) > PARM_BOUNDARY
5605 || (type && TYPE_ALIGN (type) > PARM_BOUNDARY));
5609 /* Determine where to put an argument to a function.
5610 Value is zero to push the argument on the stack,
5611 or a hard register in which to store the argument.
5613 MODE is the argument's machine mode.
5614 TYPE is the data type of the argument (as a tree).
5615 This is null for libcalls where that information may
5616 not be available.
5617 CUM is a variable of type CUMULATIVE_ARGS which gives info about
5618 the preceding args and about the function being called.
5619 NAMED is nonzero if this argument is a named parameter
5620 (otherwise it is an extra parameter matching an ellipsis).
5622 On the ARM, normally the first 16 bytes are passed in registers r0-r3; all
5623 other arguments are passed on the stack. If (NAMED == 0) (which happens
5624 only in assign_parms, since TARGET_SETUP_INCOMING_VARARGS is
5625 defined), say it is passed in the stack (function_prologue will
5626 indeed make it pass in the stack if necessary). */
5628 static rtx
5629 arm_function_arg (cumulative_args_t pcum_v, enum machine_mode mode,
5630 const_tree type, bool named)
5632 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
5633 int nregs;
5635 /* Handle the special case quickly. Pick an arbitrary value for op2 of
5636 a call insn (op3 of a call_value insn). */
5637 if (mode == VOIDmode)
5638 return const0_rtx;
5640 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
5642 aapcs_layout_arg (pcum, mode, type, named);
5643 return pcum->aapcs_reg;
5646 /* Varargs vectors are treated the same as long long.
5647 named_count avoids having to change the way arm handles 'named' */
5648 if (TARGET_IWMMXT_ABI
5649 && arm_vector_mode_supported_p (mode)
5650 && pcum->named_count > pcum->nargs + 1)
5652 if (pcum->iwmmxt_nregs <= 9)
5653 return gen_rtx_REG (mode, pcum->iwmmxt_nregs + FIRST_IWMMXT_REGNUM);
5654 else
5656 pcum->can_split = false;
5657 return NULL_RTX;
5661 /* Put doubleword aligned quantities in even register pairs. */
5662 if (pcum->nregs & 1
5663 && ARM_DOUBLEWORD_ALIGN
5664 && arm_needs_doubleword_align (mode, type))
5665 pcum->nregs++;
5667 /* Only allow splitting an arg between regs and memory if all preceding
5668 args were allocated to regs. For args passed by reference we only count
5669 the reference pointer. */
5670 if (pcum->can_split)
5671 nregs = 1;
5672 else
5673 nregs = ARM_NUM_REGS2 (mode, type);
5675 if (!named || pcum->nregs + nregs > NUM_ARG_REGS)
5676 return NULL_RTX;
5678 return gen_rtx_REG (mode, pcum->nregs);
5681 static unsigned int
5682 arm_function_arg_boundary (enum machine_mode mode, const_tree type)
5684 return (ARM_DOUBLEWORD_ALIGN && arm_needs_doubleword_align (mode, type)
5685 ? DOUBLEWORD_ALIGNMENT
5686 : PARM_BOUNDARY);
5689 static int
5690 arm_arg_partial_bytes (cumulative_args_t pcum_v, enum machine_mode mode,
5691 tree type, bool named)
5693 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
5694 int nregs = pcum->nregs;
5696 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
5698 aapcs_layout_arg (pcum, mode, type, named);
5699 return pcum->aapcs_partial;
5702 if (TARGET_IWMMXT_ABI && arm_vector_mode_supported_p (mode))
5703 return 0;
5705 if (NUM_ARG_REGS > nregs
5706 && (NUM_ARG_REGS < nregs + ARM_NUM_REGS2 (mode, type))
5707 && pcum->can_split)
5708 return (NUM_ARG_REGS - nregs) * UNITS_PER_WORD;
5710 return 0;
5713 /* Update the data in PCUM to advance over an argument
5714 of mode MODE and data type TYPE.
5715 (TYPE is null for libcalls where that information may not be available.) */
5717 static void
5718 arm_function_arg_advance (cumulative_args_t pcum_v, enum machine_mode mode,
5719 const_tree type, bool named)
5721 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
5723 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
5725 aapcs_layout_arg (pcum, mode, type, named);
5727 if (pcum->aapcs_cprc_slot >= 0)
5729 aapcs_cp_arg_layout[pcum->aapcs_cprc_slot].advance (pcum, mode,
5730 type);
5731 pcum->aapcs_cprc_slot = -1;
5734 /* Generic stuff. */
5735 pcum->aapcs_arg_processed = false;
5736 pcum->aapcs_ncrn = pcum->aapcs_next_ncrn;
5737 pcum->aapcs_reg = NULL_RTX;
5738 pcum->aapcs_partial = 0;
5740 else
5742 pcum->nargs += 1;
5743 if (arm_vector_mode_supported_p (mode)
5744 && pcum->named_count > pcum->nargs
5745 && TARGET_IWMMXT_ABI)
5746 pcum->iwmmxt_nregs += 1;
5747 else
5748 pcum->nregs += ARM_NUM_REGS2 (mode, type);
5752 /* Variable sized types are passed by reference. This is a GCC
5753 extension to the ARM ABI. */
5755 static bool
5756 arm_pass_by_reference (cumulative_args_t cum ATTRIBUTE_UNUSED,
5757 enum machine_mode mode ATTRIBUTE_UNUSED,
5758 const_tree type, bool named ATTRIBUTE_UNUSED)
5760 return type && TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST;
5763 /* Encode the current state of the #pragma [no_]long_calls. */
5764 typedef enum
5766 OFF, /* No #pragma [no_]long_calls is in effect. */
5767 LONG, /* #pragma long_calls is in effect. */
5768 SHORT /* #pragma no_long_calls is in effect. */
5769 } arm_pragma_enum;
5771 static arm_pragma_enum arm_pragma_long_calls = OFF;
5773 void
5774 arm_pr_long_calls (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
5776 arm_pragma_long_calls = LONG;
5779 void
5780 arm_pr_no_long_calls (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
5782 arm_pragma_long_calls = SHORT;
5785 void
5786 arm_pr_long_calls_off (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
5788 arm_pragma_long_calls = OFF;
5791 /* Handle an attribute requiring a FUNCTION_DECL;
5792 arguments as in struct attribute_spec.handler. */
5793 static tree
5794 arm_handle_fndecl_attribute (tree *node, tree name, tree args ATTRIBUTE_UNUSED,
5795 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
5797 if (TREE_CODE (*node) != FUNCTION_DECL)
5799 warning (OPT_Wattributes, "%qE attribute only applies to functions",
5800 name);
5801 *no_add_attrs = true;
5804 return NULL_TREE;
5807 /* Handle an "interrupt" or "isr" attribute;
5808 arguments as in struct attribute_spec.handler. */
5809 static tree
5810 arm_handle_isr_attribute (tree *node, tree name, tree args, int flags,
5811 bool *no_add_attrs)
5813 if (DECL_P (*node))
5815 if (TREE_CODE (*node) != FUNCTION_DECL)
5817 warning (OPT_Wattributes, "%qE attribute only applies to functions",
5818 name);
5819 *no_add_attrs = true;
5821 /* FIXME: the argument if any is checked for type attributes;
5822 should it be checked for decl ones? */
5824 else
5826 if (TREE_CODE (*node) == FUNCTION_TYPE
5827 || TREE_CODE (*node) == METHOD_TYPE)
5829 if (arm_isr_value (args) == ARM_FT_UNKNOWN)
5831 warning (OPT_Wattributes, "%qE attribute ignored",
5832 name);
5833 *no_add_attrs = true;
5836 else if (TREE_CODE (*node) == POINTER_TYPE
5837 && (TREE_CODE (TREE_TYPE (*node)) == FUNCTION_TYPE
5838 || TREE_CODE (TREE_TYPE (*node)) == METHOD_TYPE)
5839 && arm_isr_value (args) != ARM_FT_UNKNOWN)
5841 *node = build_variant_type_copy (*node);
5842 TREE_TYPE (*node) = build_type_attribute_variant
5843 (TREE_TYPE (*node),
5844 tree_cons (name, args, TYPE_ATTRIBUTES (TREE_TYPE (*node))));
5845 *no_add_attrs = true;
5847 else
5849 /* Possibly pass this attribute on from the type to a decl. */
5850 if (flags & ((int) ATTR_FLAG_DECL_NEXT
5851 | (int) ATTR_FLAG_FUNCTION_NEXT
5852 | (int) ATTR_FLAG_ARRAY_NEXT))
5854 *no_add_attrs = true;
5855 return tree_cons (name, args, NULL_TREE);
5857 else
5859 warning (OPT_Wattributes, "%qE attribute ignored",
5860 name);
5865 return NULL_TREE;
5868 /* Handle a "pcs" attribute; arguments as in struct
5869 attribute_spec.handler. */
5870 static tree
5871 arm_handle_pcs_attribute (tree *node ATTRIBUTE_UNUSED, tree name, tree args,
5872 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
5874 if (arm_pcs_from_attribute (args) == ARM_PCS_UNKNOWN)
5876 warning (OPT_Wattributes, "%qE attribute ignored", name);
5877 *no_add_attrs = true;
5879 return NULL_TREE;
5882 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
5883 /* Handle the "notshared" attribute. This attribute is another way of
5884 requesting hidden visibility. ARM's compiler supports
5885 "__declspec(notshared)"; we support the same thing via an
5886 attribute. */
5888 static tree
5889 arm_handle_notshared_attribute (tree *node,
5890 tree name ATTRIBUTE_UNUSED,
5891 tree args ATTRIBUTE_UNUSED,
5892 int flags ATTRIBUTE_UNUSED,
5893 bool *no_add_attrs)
5895 tree decl = TYPE_NAME (*node);
5897 if (decl)
5899 DECL_VISIBILITY (decl) = VISIBILITY_HIDDEN;
5900 DECL_VISIBILITY_SPECIFIED (decl) = 1;
5901 *no_add_attrs = false;
5903 return NULL_TREE;
5905 #endif
5907 /* Return 0 if the attributes for two types are incompatible, 1 if they
5908 are compatible, and 2 if they are nearly compatible (which causes a
5909 warning to be generated). */
5910 static int
5911 arm_comp_type_attributes (const_tree type1, const_tree type2)
5913 int l1, l2, s1, s2;
5915 /* Check for mismatch of non-default calling convention. */
5916 if (TREE_CODE (type1) != FUNCTION_TYPE)
5917 return 1;
5919 /* Check for mismatched call attributes. */
5920 l1 = lookup_attribute ("long_call", TYPE_ATTRIBUTES (type1)) != NULL;
5921 l2 = lookup_attribute ("long_call", TYPE_ATTRIBUTES (type2)) != NULL;
5922 s1 = lookup_attribute ("short_call", TYPE_ATTRIBUTES (type1)) != NULL;
5923 s2 = lookup_attribute ("short_call", TYPE_ATTRIBUTES (type2)) != NULL;
5925 /* Only bother to check if an attribute is defined. */
5926 if (l1 | l2 | s1 | s2)
5928 /* If one type has an attribute, the other must have the same attribute. */
5929 if ((l1 != l2) || (s1 != s2))
5930 return 0;
5932 /* Disallow mixed attributes. */
5933 if ((l1 & s2) || (l2 & s1))
5934 return 0;
5937 /* Check for mismatched ISR attribute. */
5938 l1 = lookup_attribute ("isr", TYPE_ATTRIBUTES (type1)) != NULL;
5939 if (! l1)
5940 l1 = lookup_attribute ("interrupt", TYPE_ATTRIBUTES (type1)) != NULL;
5941 l2 = lookup_attribute ("isr", TYPE_ATTRIBUTES (type2)) != NULL;
5942 if (! l2)
5943 l1 = lookup_attribute ("interrupt", TYPE_ATTRIBUTES (type2)) != NULL;
5944 if (l1 != l2)
5945 return 0;
5947 return 1;
5950 /* Assigns default attributes to newly defined type. This is used to
5951 set short_call/long_call attributes for function types of
5952 functions defined inside corresponding #pragma scopes. */
5953 static void
5954 arm_set_default_type_attributes (tree type)
5956 /* Add __attribute__ ((long_call)) to all functions, when
5957 inside #pragma long_calls or __attribute__ ((short_call)),
5958 when inside #pragma no_long_calls. */
5959 if (TREE_CODE (type) == FUNCTION_TYPE || TREE_CODE (type) == METHOD_TYPE)
5961 tree type_attr_list, attr_name;
5962 type_attr_list = TYPE_ATTRIBUTES (type);
5964 if (arm_pragma_long_calls == LONG)
5965 attr_name = get_identifier ("long_call");
5966 else if (arm_pragma_long_calls == SHORT)
5967 attr_name = get_identifier ("short_call");
5968 else
5969 return;
5971 type_attr_list = tree_cons (attr_name, NULL_TREE, type_attr_list);
5972 TYPE_ATTRIBUTES (type) = type_attr_list;
5976 /* Return true if DECL is known to be linked into section SECTION. */
5978 static bool
5979 arm_function_in_section_p (tree decl, section *section)
5981 /* We can only be certain about functions defined in the same
5982 compilation unit. */
5983 if (!TREE_STATIC (decl))
5984 return false;
5986 /* Make sure that SYMBOL always binds to the definition in this
5987 compilation unit. */
5988 if (!targetm.binds_local_p (decl))
5989 return false;
5991 /* If DECL_SECTION_NAME is set, assume it is trustworthy. */
5992 if (!DECL_SECTION_NAME (decl))
5994 /* Make sure that we will not create a unique section for DECL. */
5995 if (flag_function_sections || DECL_ONE_ONLY (decl))
5996 return false;
5999 return function_section (decl) == section;
6002 /* Return nonzero if a 32-bit "long_call" should be generated for
6003 a call from the current function to DECL. We generate a long_call
6004 if the function:
6006 a. has an __attribute__((long call))
6007 or b. is within the scope of a #pragma long_calls
6008 or c. the -mlong-calls command line switch has been specified
6010 However we do not generate a long call if the function:
6012 d. has an __attribute__ ((short_call))
6013 or e. is inside the scope of a #pragma no_long_calls
6014 or f. is defined in the same section as the current function. */
6016 bool
6017 arm_is_long_call_p (tree decl)
6019 tree attrs;
6021 if (!decl)
6022 return TARGET_LONG_CALLS;
6024 attrs = TYPE_ATTRIBUTES (TREE_TYPE (decl));
6025 if (lookup_attribute ("short_call", attrs))
6026 return false;
6028 /* For "f", be conservative, and only cater for cases in which the
6029 whole of the current function is placed in the same section. */
6030 if (!flag_reorder_blocks_and_partition
6031 && TREE_CODE (decl) == FUNCTION_DECL
6032 && arm_function_in_section_p (decl, current_function_section ()))
6033 return false;
6035 if (lookup_attribute ("long_call", attrs))
6036 return true;
6038 return TARGET_LONG_CALLS;
6041 /* Return nonzero if it is ok to make a tail-call to DECL. */
6042 static bool
6043 arm_function_ok_for_sibcall (tree decl, tree exp)
6045 unsigned long func_type;
6047 if (cfun->machine->sibcall_blocked)
6048 return false;
6050 /* Never tailcall something if we are generating code for Thumb-1. */
6051 if (TARGET_THUMB1)
6052 return false;
6054 /* The PIC register is live on entry to VxWorks PLT entries, so we
6055 must make the call before restoring the PIC register. */
6056 if (TARGET_VXWORKS_RTP && flag_pic && !targetm.binds_local_p (decl))
6057 return false;
6059 /* Cannot tail-call to long calls, since these are out of range of
6060 a branch instruction. */
6061 if (decl && arm_is_long_call_p (decl))
6062 return false;
6064 /* If we are interworking and the function is not declared static
6065 then we can't tail-call it unless we know that it exists in this
6066 compilation unit (since it might be a Thumb routine). */
6067 if (TARGET_INTERWORK && decl && TREE_PUBLIC (decl)
6068 && !TREE_ASM_WRITTEN (decl))
6069 return false;
6071 func_type = arm_current_func_type ();
6072 /* Never tailcall from an ISR routine - it needs a special exit sequence. */
6073 if (IS_INTERRUPT (func_type))
6074 return false;
6076 if (!VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun->decl))))
6078 /* Check that the return value locations are the same. For
6079 example that we aren't returning a value from the sibling in
6080 a VFP register but then need to transfer it to a core
6081 register. */
6082 rtx a, b;
6084 a = arm_function_value (TREE_TYPE (exp), decl, false);
6085 b = arm_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)),
6086 cfun->decl, false);
6087 if (!rtx_equal_p (a, b))
6088 return false;
6091 /* Never tailcall if function may be called with a misaligned SP. */
6092 if (IS_STACKALIGN (func_type))
6093 return false;
6095 /* The AAPCS says that, on bare-metal, calls to unresolved weak
6096 references should become a NOP. Don't convert such calls into
6097 sibling calls. */
6098 if (TARGET_AAPCS_BASED
6099 && arm_abi == ARM_ABI_AAPCS
6100 && decl
6101 && DECL_WEAK (decl))
6102 return false;
6104 /* Everything else is ok. */
6105 return true;
6109 /* Addressing mode support functions. */
6111 /* Return nonzero if X is a legitimate immediate operand when compiling
6112 for PIC. We know that X satisfies CONSTANT_P and flag_pic is true. */
6114 legitimate_pic_operand_p (rtx x)
6116 if (GET_CODE (x) == SYMBOL_REF
6117 || (GET_CODE (x) == CONST
6118 && GET_CODE (XEXP (x, 0)) == PLUS
6119 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF))
6120 return 0;
6122 return 1;
6125 /* Record that the current function needs a PIC register. Initialize
6126 cfun->machine->pic_reg if we have not already done so. */
6128 static void
6129 require_pic_register (void)
6131 /* A lot of the logic here is made obscure by the fact that this
6132 routine gets called as part of the rtx cost estimation process.
6133 We don't want those calls to affect any assumptions about the real
6134 function; and further, we can't call entry_of_function() until we
6135 start the real expansion process. */
6136 if (!crtl->uses_pic_offset_table)
6138 gcc_assert (can_create_pseudo_p ());
6139 if (arm_pic_register != INVALID_REGNUM
6140 && !(TARGET_THUMB1 && arm_pic_register > LAST_LO_REGNUM))
6142 if (!cfun->machine->pic_reg)
6143 cfun->machine->pic_reg = gen_rtx_REG (Pmode, arm_pic_register);
6145 /* Play games to avoid marking the function as needing pic
6146 if we are being called as part of the cost-estimation
6147 process. */
6148 if (current_ir_type () != IR_GIMPLE || currently_expanding_to_rtl)
6149 crtl->uses_pic_offset_table = 1;
6151 else
6153 rtx seq, insn;
6155 if (!cfun->machine->pic_reg)
6156 cfun->machine->pic_reg = gen_reg_rtx (Pmode);
6158 /* Play games to avoid marking the function as needing pic
6159 if we are being called as part of the cost-estimation
6160 process. */
6161 if (current_ir_type () != IR_GIMPLE || currently_expanding_to_rtl)
6163 crtl->uses_pic_offset_table = 1;
6164 start_sequence ();
6166 if (TARGET_THUMB1 && arm_pic_register != INVALID_REGNUM
6167 && arm_pic_register > LAST_LO_REGNUM)
6168 emit_move_insn (cfun->machine->pic_reg,
6169 gen_rtx_REG (Pmode, arm_pic_register));
6170 else
6171 arm_load_pic_register (0UL);
6173 seq = get_insns ();
6174 end_sequence ();
6176 for (insn = seq; insn; insn = NEXT_INSN (insn))
6177 if (INSN_P (insn))
6178 INSN_LOCATION (insn) = prologue_location;
6180 /* We can be called during expansion of PHI nodes, where
6181 we can't yet emit instructions directly in the final
6182 insn stream. Queue the insns on the entry edge, they will
6183 be committed after everything else is expanded. */
6184 insert_insn_on_edge (seq,
6185 single_succ_edge (ENTRY_BLOCK_PTR_FOR_FN (cfun)));
6192 legitimize_pic_address (rtx orig, enum machine_mode mode, rtx reg)
6194 if (GET_CODE (orig) == SYMBOL_REF
6195 || GET_CODE (orig) == LABEL_REF)
6197 rtx insn;
6199 if (reg == 0)
6201 gcc_assert (can_create_pseudo_p ());
6202 reg = gen_reg_rtx (Pmode);
6205 /* VxWorks does not impose a fixed gap between segments; the run-time
6206 gap can be different from the object-file gap. We therefore can't
6207 use GOTOFF unless we are absolutely sure that the symbol is in the
6208 same segment as the GOT. Unfortunately, the flexibility of linker
6209 scripts means that we can't be sure of that in general, so assume
6210 that GOTOFF is never valid on VxWorks. */
6211 if ((GET_CODE (orig) == LABEL_REF
6212 || (GET_CODE (orig) == SYMBOL_REF &&
6213 SYMBOL_REF_LOCAL_P (orig)))
6214 && NEED_GOT_RELOC
6215 && arm_pic_data_is_text_relative)
6216 insn = arm_pic_static_addr (orig, reg);
6217 else
6219 rtx pat;
6220 rtx mem;
6222 /* If this function doesn't have a pic register, create one now. */
6223 require_pic_register ();
6225 pat = gen_calculate_pic_address (reg, cfun->machine->pic_reg, orig);
6227 /* Make the MEM as close to a constant as possible. */
6228 mem = SET_SRC (pat);
6229 gcc_assert (MEM_P (mem) && !MEM_VOLATILE_P (mem));
6230 MEM_READONLY_P (mem) = 1;
6231 MEM_NOTRAP_P (mem) = 1;
6233 insn = emit_insn (pat);
6236 /* Put a REG_EQUAL note on this insn, so that it can be optimized
6237 by loop. */
6238 set_unique_reg_note (insn, REG_EQUAL, orig);
6240 return reg;
6242 else if (GET_CODE (orig) == CONST)
6244 rtx base, offset;
6246 if (GET_CODE (XEXP (orig, 0)) == PLUS
6247 && XEXP (XEXP (orig, 0), 0) == cfun->machine->pic_reg)
6248 return orig;
6250 /* Handle the case where we have: const (UNSPEC_TLS). */
6251 if (GET_CODE (XEXP (orig, 0)) == UNSPEC
6252 && XINT (XEXP (orig, 0), 1) == UNSPEC_TLS)
6253 return orig;
6255 /* Handle the case where we have:
6256 const (plus (UNSPEC_TLS) (ADDEND)). The ADDEND must be a
6257 CONST_INT. */
6258 if (GET_CODE (XEXP (orig, 0)) == PLUS
6259 && GET_CODE (XEXP (XEXP (orig, 0), 0)) == UNSPEC
6260 && XINT (XEXP (XEXP (orig, 0), 0), 1) == UNSPEC_TLS)
6262 gcc_assert (CONST_INT_P (XEXP (XEXP (orig, 0), 1)));
6263 return orig;
6266 if (reg == 0)
6268 gcc_assert (can_create_pseudo_p ());
6269 reg = gen_reg_rtx (Pmode);
6272 gcc_assert (GET_CODE (XEXP (orig, 0)) == PLUS);
6274 base = legitimize_pic_address (XEXP (XEXP (orig, 0), 0), Pmode, reg);
6275 offset = legitimize_pic_address (XEXP (XEXP (orig, 0), 1), Pmode,
6276 base == reg ? 0 : reg);
6278 if (CONST_INT_P (offset))
6280 /* The base register doesn't really matter, we only want to
6281 test the index for the appropriate mode. */
6282 if (!arm_legitimate_index_p (mode, offset, SET, 0))
6284 gcc_assert (can_create_pseudo_p ());
6285 offset = force_reg (Pmode, offset);
6288 if (CONST_INT_P (offset))
6289 return plus_constant (Pmode, base, INTVAL (offset));
6292 if (GET_MODE_SIZE (mode) > 4
6293 && (GET_MODE_CLASS (mode) == MODE_INT
6294 || TARGET_SOFT_FLOAT))
6296 emit_insn (gen_addsi3 (reg, base, offset));
6297 return reg;
6300 return gen_rtx_PLUS (Pmode, base, offset);
6303 return orig;
6307 /* Find a spare register to use during the prolog of a function. */
6309 static int
6310 thumb_find_work_register (unsigned long pushed_regs_mask)
6312 int reg;
6314 /* Check the argument registers first as these are call-used. The
6315 register allocation order means that sometimes r3 might be used
6316 but earlier argument registers might not, so check them all. */
6317 for (reg = LAST_ARG_REGNUM; reg >= 0; reg --)
6318 if (!df_regs_ever_live_p (reg))
6319 return reg;
6321 /* Before going on to check the call-saved registers we can try a couple
6322 more ways of deducing that r3 is available. The first is when we are
6323 pushing anonymous arguments onto the stack and we have less than 4
6324 registers worth of fixed arguments(*). In this case r3 will be part of
6325 the variable argument list and so we can be sure that it will be
6326 pushed right at the start of the function. Hence it will be available
6327 for the rest of the prologue.
6328 (*): ie crtl->args.pretend_args_size is greater than 0. */
6329 if (cfun->machine->uses_anonymous_args
6330 && crtl->args.pretend_args_size > 0)
6331 return LAST_ARG_REGNUM;
6333 /* The other case is when we have fixed arguments but less than 4 registers
6334 worth. In this case r3 might be used in the body of the function, but
6335 it is not being used to convey an argument into the function. In theory
6336 we could just check crtl->args.size to see how many bytes are
6337 being passed in argument registers, but it seems that it is unreliable.
6338 Sometimes it will have the value 0 when in fact arguments are being
6339 passed. (See testcase execute/20021111-1.c for an example). So we also
6340 check the args_info.nregs field as well. The problem with this field is
6341 that it makes no allowances for arguments that are passed to the
6342 function but which are not used. Hence we could miss an opportunity
6343 when a function has an unused argument in r3. But it is better to be
6344 safe than to be sorry. */
6345 if (! cfun->machine->uses_anonymous_args
6346 && crtl->args.size >= 0
6347 && crtl->args.size <= (LAST_ARG_REGNUM * UNITS_PER_WORD)
6348 && (TARGET_AAPCS_BASED
6349 ? crtl->args.info.aapcs_ncrn < 4
6350 : crtl->args.info.nregs < 4))
6351 return LAST_ARG_REGNUM;
6353 /* Otherwise look for a call-saved register that is going to be pushed. */
6354 for (reg = LAST_LO_REGNUM; reg > LAST_ARG_REGNUM; reg --)
6355 if (pushed_regs_mask & (1 << reg))
6356 return reg;
6358 if (TARGET_THUMB2)
6360 /* Thumb-2 can use high regs. */
6361 for (reg = FIRST_HI_REGNUM; reg < 15; reg ++)
6362 if (pushed_regs_mask & (1 << reg))
6363 return reg;
6365 /* Something went wrong - thumb_compute_save_reg_mask()
6366 should have arranged for a suitable register to be pushed. */
6367 gcc_unreachable ();
6370 static GTY(()) int pic_labelno;
6372 /* Generate code to load the PIC register. In thumb mode SCRATCH is a
6373 low register. */
6375 void
6376 arm_load_pic_register (unsigned long saved_regs ATTRIBUTE_UNUSED)
6378 rtx l1, labelno, pic_tmp, pic_rtx, pic_reg;
6380 if (crtl->uses_pic_offset_table == 0 || TARGET_SINGLE_PIC_BASE)
6381 return;
6383 gcc_assert (flag_pic);
6385 pic_reg = cfun->machine->pic_reg;
6386 if (TARGET_VXWORKS_RTP)
6388 pic_rtx = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_BASE);
6389 pic_rtx = gen_rtx_CONST (Pmode, pic_rtx);
6390 emit_insn (gen_pic_load_addr_32bit (pic_reg, pic_rtx));
6392 emit_insn (gen_rtx_SET (Pmode, pic_reg, gen_rtx_MEM (Pmode, pic_reg)));
6394 pic_tmp = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_INDEX);
6395 emit_insn (gen_pic_offset_arm (pic_reg, pic_reg, pic_tmp));
6397 else
6399 /* We use an UNSPEC rather than a LABEL_REF because this label
6400 never appears in the code stream. */
6402 labelno = GEN_INT (pic_labelno++);
6403 l1 = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
6404 l1 = gen_rtx_CONST (VOIDmode, l1);
6406 /* On the ARM the PC register contains 'dot + 8' at the time of the
6407 addition, on the Thumb it is 'dot + 4'. */
6408 pic_rtx = plus_constant (Pmode, l1, TARGET_ARM ? 8 : 4);
6409 pic_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, pic_rtx),
6410 UNSPEC_GOTSYM_OFF);
6411 pic_rtx = gen_rtx_CONST (Pmode, pic_rtx);
6413 if (TARGET_32BIT)
6415 emit_insn (gen_pic_load_addr_unified (pic_reg, pic_rtx, labelno));
6417 else /* TARGET_THUMB1 */
6419 if (arm_pic_register != INVALID_REGNUM
6420 && REGNO (pic_reg) > LAST_LO_REGNUM)
6422 /* We will have pushed the pic register, so we should always be
6423 able to find a work register. */
6424 pic_tmp = gen_rtx_REG (SImode,
6425 thumb_find_work_register (saved_regs));
6426 emit_insn (gen_pic_load_addr_thumb1 (pic_tmp, pic_rtx));
6427 emit_insn (gen_movsi (pic_offset_table_rtx, pic_tmp));
6428 emit_insn (gen_pic_add_dot_plus_four (pic_reg, pic_reg, labelno));
6430 else if (arm_pic_register != INVALID_REGNUM
6431 && arm_pic_register > LAST_LO_REGNUM
6432 && REGNO (pic_reg) <= LAST_LO_REGNUM)
6434 emit_insn (gen_pic_load_addr_unified (pic_reg, pic_rtx, labelno));
6435 emit_move_insn (gen_rtx_REG (Pmode, arm_pic_register), pic_reg);
6436 emit_use (gen_rtx_REG (Pmode, arm_pic_register));
6438 else
6439 emit_insn (gen_pic_load_addr_unified (pic_reg, pic_rtx, labelno));
6443 /* Need to emit this whether or not we obey regdecls,
6444 since setjmp/longjmp can cause life info to screw up. */
6445 emit_use (pic_reg);
6448 /* Generate code to load the address of a static var when flag_pic is set. */
6449 static rtx
6450 arm_pic_static_addr (rtx orig, rtx reg)
6452 rtx l1, labelno, offset_rtx, insn;
6454 gcc_assert (flag_pic);
6456 /* We use an UNSPEC rather than a LABEL_REF because this label
6457 never appears in the code stream. */
6458 labelno = GEN_INT (pic_labelno++);
6459 l1 = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
6460 l1 = gen_rtx_CONST (VOIDmode, l1);
6462 /* On the ARM the PC register contains 'dot + 8' at the time of the
6463 addition, on the Thumb it is 'dot + 4'. */
6464 offset_rtx = plus_constant (Pmode, l1, TARGET_ARM ? 8 : 4);
6465 offset_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, orig, offset_rtx),
6466 UNSPEC_SYMBOL_OFFSET);
6467 offset_rtx = gen_rtx_CONST (Pmode, offset_rtx);
6469 insn = emit_insn (gen_pic_load_addr_unified (reg, offset_rtx, labelno));
6470 return insn;
6473 /* Return nonzero if X is valid as an ARM state addressing register. */
6474 static int
6475 arm_address_register_rtx_p (rtx x, int strict_p)
6477 int regno;
6479 if (!REG_P (x))
6480 return 0;
6482 regno = REGNO (x);
6484 if (strict_p)
6485 return ARM_REGNO_OK_FOR_BASE_P (regno);
6487 return (regno <= LAST_ARM_REGNUM
6488 || regno >= FIRST_PSEUDO_REGISTER
6489 || regno == FRAME_POINTER_REGNUM
6490 || regno == ARG_POINTER_REGNUM);
6493 /* Return TRUE if this rtx is the difference of a symbol and a label,
6494 and will reduce to a PC-relative relocation in the object file.
6495 Expressions like this can be left alone when generating PIC, rather
6496 than forced through the GOT. */
6497 static int
6498 pcrel_constant_p (rtx x)
6500 if (GET_CODE (x) == MINUS)
6501 return symbol_mentioned_p (XEXP (x, 0)) && label_mentioned_p (XEXP (x, 1));
6503 return FALSE;
6506 /* Return true if X will surely end up in an index register after next
6507 splitting pass. */
6508 static bool
6509 will_be_in_index_register (const_rtx x)
6511 /* arm.md: calculate_pic_address will split this into a register. */
6512 return GET_CODE (x) == UNSPEC && (XINT (x, 1) == UNSPEC_PIC_SYM);
6515 /* Return nonzero if X is a valid ARM state address operand. */
6517 arm_legitimate_address_outer_p (enum machine_mode mode, rtx x, RTX_CODE outer,
6518 int strict_p)
6520 bool use_ldrd;
6521 enum rtx_code code = GET_CODE (x);
6523 if (arm_address_register_rtx_p (x, strict_p))
6524 return 1;
6526 use_ldrd = (TARGET_LDRD
6527 && (mode == DImode
6528 || (mode == DFmode && (TARGET_SOFT_FLOAT || TARGET_VFP))));
6530 if (code == POST_INC || code == PRE_DEC
6531 || ((code == PRE_INC || code == POST_DEC)
6532 && (use_ldrd || GET_MODE_SIZE (mode) <= 4)))
6533 return arm_address_register_rtx_p (XEXP (x, 0), strict_p);
6535 else if ((code == POST_MODIFY || code == PRE_MODIFY)
6536 && arm_address_register_rtx_p (XEXP (x, 0), strict_p)
6537 && GET_CODE (XEXP (x, 1)) == PLUS
6538 && rtx_equal_p (XEXP (XEXP (x, 1), 0), XEXP (x, 0)))
6540 rtx addend = XEXP (XEXP (x, 1), 1);
6542 /* Don't allow ldrd post increment by register because it's hard
6543 to fixup invalid register choices. */
6544 if (use_ldrd
6545 && GET_CODE (x) == POST_MODIFY
6546 && REG_P (addend))
6547 return 0;
6549 return ((use_ldrd || GET_MODE_SIZE (mode) <= 4)
6550 && arm_legitimate_index_p (mode, addend, outer, strict_p));
6553 /* After reload constants split into minipools will have addresses
6554 from a LABEL_REF. */
6555 else if (reload_completed
6556 && (code == LABEL_REF
6557 || (code == CONST
6558 && GET_CODE (XEXP (x, 0)) == PLUS
6559 && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
6560 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))))
6561 return 1;
6563 else if (mode == TImode || (TARGET_NEON && VALID_NEON_STRUCT_MODE (mode)))
6564 return 0;
6566 else if (code == PLUS)
6568 rtx xop0 = XEXP (x, 0);
6569 rtx xop1 = XEXP (x, 1);
6571 return ((arm_address_register_rtx_p (xop0, strict_p)
6572 && ((CONST_INT_P (xop1)
6573 && arm_legitimate_index_p (mode, xop1, outer, strict_p))
6574 || (!strict_p && will_be_in_index_register (xop1))))
6575 || (arm_address_register_rtx_p (xop1, strict_p)
6576 && arm_legitimate_index_p (mode, xop0, outer, strict_p)));
6579 #if 0
6580 /* Reload currently can't handle MINUS, so disable this for now */
6581 else if (GET_CODE (x) == MINUS)
6583 rtx xop0 = XEXP (x, 0);
6584 rtx xop1 = XEXP (x, 1);
6586 return (arm_address_register_rtx_p (xop0, strict_p)
6587 && arm_legitimate_index_p (mode, xop1, outer, strict_p));
6589 #endif
6591 else if (GET_MODE_CLASS (mode) != MODE_FLOAT
6592 && code == SYMBOL_REF
6593 && CONSTANT_POOL_ADDRESS_P (x)
6594 && ! (flag_pic
6595 && symbol_mentioned_p (get_pool_constant (x))
6596 && ! pcrel_constant_p (get_pool_constant (x))))
6597 return 1;
6599 return 0;
6602 /* Return nonzero if X is a valid Thumb-2 address operand. */
6603 static int
6604 thumb2_legitimate_address_p (enum machine_mode mode, rtx x, int strict_p)
6606 bool use_ldrd;
6607 enum rtx_code code = GET_CODE (x);
6609 if (arm_address_register_rtx_p (x, strict_p))
6610 return 1;
6612 use_ldrd = (TARGET_LDRD
6613 && (mode == DImode
6614 || (mode == DFmode && (TARGET_SOFT_FLOAT || TARGET_VFP))));
6616 if (code == POST_INC || code == PRE_DEC
6617 || ((code == PRE_INC || code == POST_DEC)
6618 && (use_ldrd || GET_MODE_SIZE (mode) <= 4)))
6619 return arm_address_register_rtx_p (XEXP (x, 0), strict_p);
6621 else if ((code == POST_MODIFY || code == PRE_MODIFY)
6622 && arm_address_register_rtx_p (XEXP (x, 0), strict_p)
6623 && GET_CODE (XEXP (x, 1)) == PLUS
6624 && rtx_equal_p (XEXP (XEXP (x, 1), 0), XEXP (x, 0)))
6626 /* Thumb-2 only has autoincrement by constant. */
6627 rtx addend = XEXP (XEXP (x, 1), 1);
6628 HOST_WIDE_INT offset;
6630 if (!CONST_INT_P (addend))
6631 return 0;
6633 offset = INTVAL(addend);
6634 if (GET_MODE_SIZE (mode) <= 4)
6635 return (offset > -256 && offset < 256);
6637 return (use_ldrd && offset > -1024 && offset < 1024
6638 && (offset & 3) == 0);
6641 /* After reload constants split into minipools will have addresses
6642 from a LABEL_REF. */
6643 else if (reload_completed
6644 && (code == LABEL_REF
6645 || (code == CONST
6646 && GET_CODE (XEXP (x, 0)) == PLUS
6647 && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
6648 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))))
6649 return 1;
6651 else if (mode == TImode || (TARGET_NEON && VALID_NEON_STRUCT_MODE (mode)))
6652 return 0;
6654 else if (code == PLUS)
6656 rtx xop0 = XEXP (x, 0);
6657 rtx xop1 = XEXP (x, 1);
6659 return ((arm_address_register_rtx_p (xop0, strict_p)
6660 && (thumb2_legitimate_index_p (mode, xop1, strict_p)
6661 || (!strict_p && will_be_in_index_register (xop1))))
6662 || (arm_address_register_rtx_p (xop1, strict_p)
6663 && thumb2_legitimate_index_p (mode, xop0, strict_p)));
6666 /* Normally we can assign constant values to target registers without
6667 the help of constant pool. But there are cases we have to use constant
6668 pool like:
6669 1) assign a label to register.
6670 2) sign-extend a 8bit value to 32bit and then assign to register.
6672 Constant pool access in format:
6673 (set (reg r0) (mem (symbol_ref (".LC0"))))
6674 will cause the use of literal pool (later in function arm_reorg).
6675 So here we mark such format as an invalid format, then the compiler
6676 will adjust it into:
6677 (set (reg r0) (symbol_ref (".LC0")))
6678 (set (reg r0) (mem (reg r0))).
6679 No extra register is required, and (mem (reg r0)) won't cause the use
6680 of literal pools. */
6681 else if (arm_disable_literal_pool && code == SYMBOL_REF
6682 && CONSTANT_POOL_ADDRESS_P (x))
6683 return 0;
6685 else if (GET_MODE_CLASS (mode) != MODE_FLOAT
6686 && code == SYMBOL_REF
6687 && CONSTANT_POOL_ADDRESS_P (x)
6688 && ! (flag_pic
6689 && symbol_mentioned_p (get_pool_constant (x))
6690 && ! pcrel_constant_p (get_pool_constant (x))))
6691 return 1;
6693 return 0;
6696 /* Return nonzero if INDEX is valid for an address index operand in
6697 ARM state. */
6698 static int
6699 arm_legitimate_index_p (enum machine_mode mode, rtx index, RTX_CODE outer,
6700 int strict_p)
6702 HOST_WIDE_INT range;
6703 enum rtx_code code = GET_CODE (index);
6705 /* Standard coprocessor addressing modes. */
6706 if (TARGET_HARD_FLOAT
6707 && TARGET_VFP
6708 && (mode == SFmode || mode == DFmode))
6709 return (code == CONST_INT && INTVAL (index) < 1024
6710 && INTVAL (index) > -1024
6711 && (INTVAL (index) & 3) == 0);
6713 /* For quad modes, we restrict the constant offset to be slightly less
6714 than what the instruction format permits. We do this because for
6715 quad mode moves, we will actually decompose them into two separate
6716 double-mode reads or writes. INDEX must therefore be a valid
6717 (double-mode) offset and so should INDEX+8. */
6718 if (TARGET_NEON && VALID_NEON_QREG_MODE (mode))
6719 return (code == CONST_INT
6720 && INTVAL (index) < 1016
6721 && INTVAL (index) > -1024
6722 && (INTVAL (index) & 3) == 0);
6724 /* We have no such constraint on double mode offsets, so we permit the
6725 full range of the instruction format. */
6726 if (TARGET_NEON && VALID_NEON_DREG_MODE (mode))
6727 return (code == CONST_INT
6728 && INTVAL (index) < 1024
6729 && INTVAL (index) > -1024
6730 && (INTVAL (index) & 3) == 0);
6732 if (TARGET_REALLY_IWMMXT && VALID_IWMMXT_REG_MODE (mode))
6733 return (code == CONST_INT
6734 && INTVAL (index) < 1024
6735 && INTVAL (index) > -1024
6736 && (INTVAL (index) & 3) == 0);
6738 if (arm_address_register_rtx_p (index, strict_p)
6739 && (GET_MODE_SIZE (mode) <= 4))
6740 return 1;
6742 if (mode == DImode || mode == DFmode)
6744 if (code == CONST_INT)
6746 HOST_WIDE_INT val = INTVAL (index);
6748 if (TARGET_LDRD)
6749 return val > -256 && val < 256;
6750 else
6751 return val > -4096 && val < 4092;
6754 return TARGET_LDRD && arm_address_register_rtx_p (index, strict_p);
6757 if (GET_MODE_SIZE (mode) <= 4
6758 && ! (arm_arch4
6759 && (mode == HImode
6760 || mode == HFmode
6761 || (mode == QImode && outer == SIGN_EXTEND))))
6763 if (code == MULT)
6765 rtx xiop0 = XEXP (index, 0);
6766 rtx xiop1 = XEXP (index, 1);
6768 return ((arm_address_register_rtx_p (xiop0, strict_p)
6769 && power_of_two_operand (xiop1, SImode))
6770 || (arm_address_register_rtx_p (xiop1, strict_p)
6771 && power_of_two_operand (xiop0, SImode)));
6773 else if (code == LSHIFTRT || code == ASHIFTRT
6774 || code == ASHIFT || code == ROTATERT)
6776 rtx op = XEXP (index, 1);
6778 return (arm_address_register_rtx_p (XEXP (index, 0), strict_p)
6779 && CONST_INT_P (op)
6780 && INTVAL (op) > 0
6781 && INTVAL (op) <= 31);
6785 /* For ARM v4 we may be doing a sign-extend operation during the
6786 load. */
6787 if (arm_arch4)
6789 if (mode == HImode
6790 || mode == HFmode
6791 || (outer == SIGN_EXTEND && mode == QImode))
6792 range = 256;
6793 else
6794 range = 4096;
6796 else
6797 range = (mode == HImode || mode == HFmode) ? 4095 : 4096;
6799 return (code == CONST_INT
6800 && INTVAL (index) < range
6801 && INTVAL (index) > -range);
6804 /* Return true if OP is a valid index scaling factor for Thumb-2 address
6805 index operand. i.e. 1, 2, 4 or 8. */
6806 static bool
6807 thumb2_index_mul_operand (rtx op)
6809 HOST_WIDE_INT val;
6811 if (!CONST_INT_P (op))
6812 return false;
6814 val = INTVAL(op);
6815 return (val == 1 || val == 2 || val == 4 || val == 8);
6818 /* Return nonzero if INDEX is a valid Thumb-2 address index operand. */
6819 static int
6820 thumb2_legitimate_index_p (enum machine_mode mode, rtx index, int strict_p)
6822 enum rtx_code code = GET_CODE (index);
6824 /* ??? Combine arm and thumb2 coprocessor addressing modes. */
6825 /* Standard coprocessor addressing modes. */
6826 if (TARGET_HARD_FLOAT
6827 && TARGET_VFP
6828 && (mode == SFmode || mode == DFmode))
6829 return (code == CONST_INT && INTVAL (index) < 1024
6830 /* Thumb-2 allows only > -256 index range for it's core register
6831 load/stores. Since we allow SF/DF in core registers, we have
6832 to use the intersection between -256~4096 (core) and -1024~1024
6833 (coprocessor). */
6834 && INTVAL (index) > -256
6835 && (INTVAL (index) & 3) == 0);
6837 if (TARGET_REALLY_IWMMXT && VALID_IWMMXT_REG_MODE (mode))
6839 /* For DImode assume values will usually live in core regs
6840 and only allow LDRD addressing modes. */
6841 if (!TARGET_LDRD || mode != DImode)
6842 return (code == CONST_INT
6843 && INTVAL (index) < 1024
6844 && INTVAL (index) > -1024
6845 && (INTVAL (index) & 3) == 0);
6848 /* For quad modes, we restrict the constant offset to be slightly less
6849 than what the instruction format permits. We do this because for
6850 quad mode moves, we will actually decompose them into two separate
6851 double-mode reads or writes. INDEX must therefore be a valid
6852 (double-mode) offset and so should INDEX+8. */
6853 if (TARGET_NEON && VALID_NEON_QREG_MODE (mode))
6854 return (code == CONST_INT
6855 && INTVAL (index) < 1016
6856 && INTVAL (index) > -1024
6857 && (INTVAL (index) & 3) == 0);
6859 /* We have no such constraint on double mode offsets, so we permit the
6860 full range of the instruction format. */
6861 if (TARGET_NEON && VALID_NEON_DREG_MODE (mode))
6862 return (code == CONST_INT
6863 && INTVAL (index) < 1024
6864 && INTVAL (index) > -1024
6865 && (INTVAL (index) & 3) == 0);
6867 if (arm_address_register_rtx_p (index, strict_p)
6868 && (GET_MODE_SIZE (mode) <= 4))
6869 return 1;
6871 if (mode == DImode || mode == DFmode)
6873 if (code == CONST_INT)
6875 HOST_WIDE_INT val = INTVAL (index);
6876 /* ??? Can we assume ldrd for thumb2? */
6877 /* Thumb-2 ldrd only has reg+const addressing modes. */
6878 /* ldrd supports offsets of +-1020.
6879 However the ldr fallback does not. */
6880 return val > -256 && val < 256 && (val & 3) == 0;
6882 else
6883 return 0;
6886 if (code == MULT)
6888 rtx xiop0 = XEXP (index, 0);
6889 rtx xiop1 = XEXP (index, 1);
6891 return ((arm_address_register_rtx_p (xiop0, strict_p)
6892 && thumb2_index_mul_operand (xiop1))
6893 || (arm_address_register_rtx_p (xiop1, strict_p)
6894 && thumb2_index_mul_operand (xiop0)));
6896 else if (code == ASHIFT)
6898 rtx op = XEXP (index, 1);
6900 return (arm_address_register_rtx_p (XEXP (index, 0), strict_p)
6901 && CONST_INT_P (op)
6902 && INTVAL (op) > 0
6903 && INTVAL (op) <= 3);
6906 return (code == CONST_INT
6907 && INTVAL (index) < 4096
6908 && INTVAL (index) > -256);
6911 /* Return nonzero if X is valid as a 16-bit Thumb state base register. */
6912 static int
6913 thumb1_base_register_rtx_p (rtx x, enum machine_mode mode, int strict_p)
6915 int regno;
6917 if (!REG_P (x))
6918 return 0;
6920 regno = REGNO (x);
6922 if (strict_p)
6923 return THUMB1_REGNO_MODE_OK_FOR_BASE_P (regno, mode);
6925 return (regno <= LAST_LO_REGNUM
6926 || regno > LAST_VIRTUAL_REGISTER
6927 || regno == FRAME_POINTER_REGNUM
6928 || (GET_MODE_SIZE (mode) >= 4
6929 && (regno == STACK_POINTER_REGNUM
6930 || regno >= FIRST_PSEUDO_REGISTER
6931 || x == hard_frame_pointer_rtx
6932 || x == arg_pointer_rtx)));
6935 /* Return nonzero if x is a legitimate index register. This is the case
6936 for any base register that can access a QImode object. */
6937 inline static int
6938 thumb1_index_register_rtx_p (rtx x, int strict_p)
6940 return thumb1_base_register_rtx_p (x, QImode, strict_p);
6943 /* Return nonzero if x is a legitimate 16-bit Thumb-state address.
6945 The AP may be eliminated to either the SP or the FP, so we use the
6946 least common denominator, e.g. SImode, and offsets from 0 to 64.
6948 ??? Verify whether the above is the right approach.
6950 ??? Also, the FP may be eliminated to the SP, so perhaps that
6951 needs special handling also.
6953 ??? Look at how the mips16 port solves this problem. It probably uses
6954 better ways to solve some of these problems.
6956 Although it is not incorrect, we don't accept QImode and HImode
6957 addresses based on the frame pointer or arg pointer until the
6958 reload pass starts. This is so that eliminating such addresses
6959 into stack based ones won't produce impossible code. */
6961 thumb1_legitimate_address_p (enum machine_mode mode, rtx x, int strict_p)
6963 /* ??? Not clear if this is right. Experiment. */
6964 if (GET_MODE_SIZE (mode) < 4
6965 && !(reload_in_progress || reload_completed)
6966 && (reg_mentioned_p (frame_pointer_rtx, x)
6967 || reg_mentioned_p (arg_pointer_rtx, x)
6968 || reg_mentioned_p (virtual_incoming_args_rtx, x)
6969 || reg_mentioned_p (virtual_outgoing_args_rtx, x)
6970 || reg_mentioned_p (virtual_stack_dynamic_rtx, x)
6971 || reg_mentioned_p (virtual_stack_vars_rtx, x)))
6972 return 0;
6974 /* Accept any base register. SP only in SImode or larger. */
6975 else if (thumb1_base_register_rtx_p (x, mode, strict_p))
6976 return 1;
6978 /* This is PC relative data before arm_reorg runs. */
6979 else if (GET_MODE_SIZE (mode) >= 4 && CONSTANT_P (x)
6980 && GET_CODE (x) == SYMBOL_REF
6981 && CONSTANT_POOL_ADDRESS_P (x) && !flag_pic)
6982 return 1;
6984 /* This is PC relative data after arm_reorg runs. */
6985 else if ((GET_MODE_SIZE (mode) >= 4 || mode == HFmode)
6986 && reload_completed
6987 && (GET_CODE (x) == LABEL_REF
6988 || (GET_CODE (x) == CONST
6989 && GET_CODE (XEXP (x, 0)) == PLUS
6990 && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
6991 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))))
6992 return 1;
6994 /* Post-inc indexing only supported for SImode and larger. */
6995 else if (GET_CODE (x) == POST_INC && GET_MODE_SIZE (mode) >= 4
6996 && thumb1_index_register_rtx_p (XEXP (x, 0), strict_p))
6997 return 1;
6999 else if (GET_CODE (x) == PLUS)
7001 /* REG+REG address can be any two index registers. */
7002 /* We disallow FRAME+REG addressing since we know that FRAME
7003 will be replaced with STACK, and SP relative addressing only
7004 permits SP+OFFSET. */
7005 if (GET_MODE_SIZE (mode) <= 4
7006 && XEXP (x, 0) != frame_pointer_rtx
7007 && XEXP (x, 1) != frame_pointer_rtx
7008 && thumb1_index_register_rtx_p (XEXP (x, 0), strict_p)
7009 && (thumb1_index_register_rtx_p (XEXP (x, 1), strict_p)
7010 || (!strict_p && will_be_in_index_register (XEXP (x, 1)))))
7011 return 1;
7013 /* REG+const has 5-7 bit offset for non-SP registers. */
7014 else if ((thumb1_index_register_rtx_p (XEXP (x, 0), strict_p)
7015 || XEXP (x, 0) == arg_pointer_rtx)
7016 && CONST_INT_P (XEXP (x, 1))
7017 && thumb_legitimate_offset_p (mode, INTVAL (XEXP (x, 1))))
7018 return 1;
7020 /* REG+const has 10-bit offset for SP, but only SImode and
7021 larger is supported. */
7022 /* ??? Should probably check for DI/DFmode overflow here
7023 just like GO_IF_LEGITIMATE_OFFSET does. */
7024 else if (REG_P (XEXP (x, 0))
7025 && REGNO (XEXP (x, 0)) == STACK_POINTER_REGNUM
7026 && GET_MODE_SIZE (mode) >= 4
7027 && CONST_INT_P (XEXP (x, 1))
7028 && INTVAL (XEXP (x, 1)) >= 0
7029 && INTVAL (XEXP (x, 1)) + GET_MODE_SIZE (mode) <= 1024
7030 && (INTVAL (XEXP (x, 1)) & 3) == 0)
7031 return 1;
7033 else if (REG_P (XEXP (x, 0))
7034 && (REGNO (XEXP (x, 0)) == FRAME_POINTER_REGNUM
7035 || REGNO (XEXP (x, 0)) == ARG_POINTER_REGNUM
7036 || (REGNO (XEXP (x, 0)) >= FIRST_VIRTUAL_REGISTER
7037 && REGNO (XEXP (x, 0))
7038 <= LAST_VIRTUAL_POINTER_REGISTER))
7039 && GET_MODE_SIZE (mode) >= 4
7040 && CONST_INT_P (XEXP (x, 1))
7041 && (INTVAL (XEXP (x, 1)) & 3) == 0)
7042 return 1;
7045 else if (GET_MODE_CLASS (mode) != MODE_FLOAT
7046 && GET_MODE_SIZE (mode) == 4
7047 && GET_CODE (x) == SYMBOL_REF
7048 && CONSTANT_POOL_ADDRESS_P (x)
7049 && ! (flag_pic
7050 && symbol_mentioned_p (get_pool_constant (x))
7051 && ! pcrel_constant_p (get_pool_constant (x))))
7052 return 1;
7054 return 0;
7057 /* Return nonzero if VAL can be used as an offset in a Thumb-state address
7058 instruction of mode MODE. */
7060 thumb_legitimate_offset_p (enum machine_mode mode, HOST_WIDE_INT val)
7062 switch (GET_MODE_SIZE (mode))
7064 case 1:
7065 return val >= 0 && val < 32;
7067 case 2:
7068 return val >= 0 && val < 64 && (val & 1) == 0;
7070 default:
7071 return (val >= 0
7072 && (val + GET_MODE_SIZE (mode)) <= 128
7073 && (val & 3) == 0);
7077 bool
7078 arm_legitimate_address_p (enum machine_mode mode, rtx x, bool strict_p)
7080 if (TARGET_ARM)
7081 return arm_legitimate_address_outer_p (mode, x, SET, strict_p);
7082 else if (TARGET_THUMB2)
7083 return thumb2_legitimate_address_p (mode, x, strict_p);
7084 else /* if (TARGET_THUMB1) */
7085 return thumb1_legitimate_address_p (mode, x, strict_p);
7088 /* Worker function for TARGET_PREFERRED_RELOAD_CLASS.
7090 Given an rtx X being reloaded into a reg required to be
7091 in class CLASS, return the class of reg to actually use.
7092 In general this is just CLASS, but for the Thumb core registers and
7093 immediate constants we prefer a LO_REGS class or a subset. */
7095 static reg_class_t
7096 arm_preferred_reload_class (rtx x ATTRIBUTE_UNUSED, reg_class_t rclass)
7098 if (TARGET_32BIT)
7099 return rclass;
7100 else
7102 if (rclass == GENERAL_REGS)
7103 return LO_REGS;
7104 else
7105 return rclass;
7109 /* Build the SYMBOL_REF for __tls_get_addr. */
7111 static GTY(()) rtx tls_get_addr_libfunc;
7113 static rtx
7114 get_tls_get_addr (void)
7116 if (!tls_get_addr_libfunc)
7117 tls_get_addr_libfunc = init_one_libfunc ("__tls_get_addr");
7118 return tls_get_addr_libfunc;
7122 arm_load_tp (rtx target)
7124 if (!target)
7125 target = gen_reg_rtx (SImode);
7127 if (TARGET_HARD_TP)
7129 /* Can return in any reg. */
7130 emit_insn (gen_load_tp_hard (target));
7132 else
7134 /* Always returned in r0. Immediately copy the result into a pseudo,
7135 otherwise other uses of r0 (e.g. setting up function arguments) may
7136 clobber the value. */
7138 rtx tmp;
7140 emit_insn (gen_load_tp_soft ());
7142 tmp = gen_rtx_REG (SImode, 0);
7143 emit_move_insn (target, tmp);
7145 return target;
7148 static rtx
7149 load_tls_operand (rtx x, rtx reg)
7151 rtx tmp;
7153 if (reg == NULL_RTX)
7154 reg = gen_reg_rtx (SImode);
7156 tmp = gen_rtx_CONST (SImode, x);
7158 emit_move_insn (reg, tmp);
7160 return reg;
7163 static rtx
7164 arm_call_tls_get_addr (rtx x, rtx reg, rtx *valuep, int reloc)
7166 rtx insns, label, labelno, sum;
7168 gcc_assert (reloc != TLS_DESCSEQ);
7169 start_sequence ();
7171 labelno = GEN_INT (pic_labelno++);
7172 label = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
7173 label = gen_rtx_CONST (VOIDmode, label);
7175 sum = gen_rtx_UNSPEC (Pmode,
7176 gen_rtvec (4, x, GEN_INT (reloc), label,
7177 GEN_INT (TARGET_ARM ? 8 : 4)),
7178 UNSPEC_TLS);
7179 reg = load_tls_operand (sum, reg);
7181 if (TARGET_ARM)
7182 emit_insn (gen_pic_add_dot_plus_eight (reg, reg, labelno));
7183 else
7184 emit_insn (gen_pic_add_dot_plus_four (reg, reg, labelno));
7186 *valuep = emit_library_call_value (get_tls_get_addr (), NULL_RTX,
7187 LCT_PURE, /* LCT_CONST? */
7188 Pmode, 1, reg, Pmode);
7190 insns = get_insns ();
7191 end_sequence ();
7193 return insns;
7196 static rtx
7197 arm_tls_descseq_addr (rtx x, rtx reg)
7199 rtx labelno = GEN_INT (pic_labelno++);
7200 rtx label = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
7201 rtx sum = gen_rtx_UNSPEC (Pmode,
7202 gen_rtvec (4, x, GEN_INT (TLS_DESCSEQ),
7203 gen_rtx_CONST (VOIDmode, label),
7204 GEN_INT (!TARGET_ARM)),
7205 UNSPEC_TLS);
7206 rtx reg0 = load_tls_operand (sum, gen_rtx_REG (SImode, 0));
7208 emit_insn (gen_tlscall (x, labelno));
7209 if (!reg)
7210 reg = gen_reg_rtx (SImode);
7211 else
7212 gcc_assert (REGNO (reg) != 0);
7214 emit_move_insn (reg, reg0);
7216 return reg;
7220 legitimize_tls_address (rtx x, rtx reg)
7222 rtx dest, tp, label, labelno, sum, insns, ret, eqv, addend;
7223 unsigned int model = SYMBOL_REF_TLS_MODEL (x);
7225 switch (model)
7227 case TLS_MODEL_GLOBAL_DYNAMIC:
7228 if (TARGET_GNU2_TLS)
7230 reg = arm_tls_descseq_addr (x, reg);
7232 tp = arm_load_tp (NULL_RTX);
7234 dest = gen_rtx_PLUS (Pmode, tp, reg);
7236 else
7238 /* Original scheme */
7239 insns = arm_call_tls_get_addr (x, reg, &ret, TLS_GD32);
7240 dest = gen_reg_rtx (Pmode);
7241 emit_libcall_block (insns, dest, ret, x);
7243 return dest;
7245 case TLS_MODEL_LOCAL_DYNAMIC:
7246 if (TARGET_GNU2_TLS)
7248 reg = arm_tls_descseq_addr (x, reg);
7250 tp = arm_load_tp (NULL_RTX);
7252 dest = gen_rtx_PLUS (Pmode, tp, reg);
7254 else
7256 insns = arm_call_tls_get_addr (x, reg, &ret, TLS_LDM32);
7258 /* Attach a unique REG_EQUIV, to allow the RTL optimizers to
7259 share the LDM result with other LD model accesses. */
7260 eqv = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const1_rtx),
7261 UNSPEC_TLS);
7262 dest = gen_reg_rtx (Pmode);
7263 emit_libcall_block (insns, dest, ret, eqv);
7265 /* Load the addend. */
7266 addend = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, x,
7267 GEN_INT (TLS_LDO32)),
7268 UNSPEC_TLS);
7269 addend = force_reg (SImode, gen_rtx_CONST (SImode, addend));
7270 dest = gen_rtx_PLUS (Pmode, dest, addend);
7272 return dest;
7274 case TLS_MODEL_INITIAL_EXEC:
7275 labelno = GEN_INT (pic_labelno++);
7276 label = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
7277 label = gen_rtx_CONST (VOIDmode, label);
7278 sum = gen_rtx_UNSPEC (Pmode,
7279 gen_rtvec (4, x, GEN_INT (TLS_IE32), label,
7280 GEN_INT (TARGET_ARM ? 8 : 4)),
7281 UNSPEC_TLS);
7282 reg = load_tls_operand (sum, reg);
7284 if (TARGET_ARM)
7285 emit_insn (gen_tls_load_dot_plus_eight (reg, reg, labelno));
7286 else if (TARGET_THUMB2)
7287 emit_insn (gen_tls_load_dot_plus_four (reg, NULL, reg, labelno));
7288 else
7290 emit_insn (gen_pic_add_dot_plus_four (reg, reg, labelno));
7291 emit_move_insn (reg, gen_const_mem (SImode, reg));
7294 tp = arm_load_tp (NULL_RTX);
7296 return gen_rtx_PLUS (Pmode, tp, reg);
7298 case TLS_MODEL_LOCAL_EXEC:
7299 tp = arm_load_tp (NULL_RTX);
7301 reg = gen_rtx_UNSPEC (Pmode,
7302 gen_rtvec (2, x, GEN_INT (TLS_LE32)),
7303 UNSPEC_TLS);
7304 reg = force_reg (SImode, gen_rtx_CONST (SImode, reg));
7306 return gen_rtx_PLUS (Pmode, tp, reg);
7308 default:
7309 abort ();
7313 /* Try machine-dependent ways of modifying an illegitimate address
7314 to be legitimate. If we find one, return the new, valid address. */
7316 arm_legitimize_address (rtx x, rtx orig_x, enum machine_mode mode)
7318 if (!TARGET_ARM)
7320 /* TODO: legitimize_address for Thumb2. */
7321 if (TARGET_THUMB2)
7322 return x;
7323 return thumb_legitimize_address (x, orig_x, mode);
7326 if (arm_tls_symbol_p (x))
7327 return legitimize_tls_address (x, NULL_RTX);
7329 if (GET_CODE (x) == PLUS)
7331 rtx xop0 = XEXP (x, 0);
7332 rtx xop1 = XEXP (x, 1);
7334 if (CONSTANT_P (xop0) && !symbol_mentioned_p (xop0))
7335 xop0 = force_reg (SImode, xop0);
7337 if (CONSTANT_P (xop1) && !CONST_INT_P (xop1)
7338 && !symbol_mentioned_p (xop1))
7339 xop1 = force_reg (SImode, xop1);
7341 if (ARM_BASE_REGISTER_RTX_P (xop0)
7342 && CONST_INT_P (xop1))
7344 HOST_WIDE_INT n, low_n;
7345 rtx base_reg, val;
7346 n = INTVAL (xop1);
7348 /* VFP addressing modes actually allow greater offsets, but for
7349 now we just stick with the lowest common denominator. */
7350 if (mode == DImode
7351 || ((TARGET_SOFT_FLOAT || TARGET_VFP) && mode == DFmode))
7353 low_n = n & 0x0f;
7354 n &= ~0x0f;
7355 if (low_n > 4)
7357 n += 16;
7358 low_n -= 16;
7361 else
7363 low_n = ((mode) == TImode ? 0
7364 : n >= 0 ? (n & 0xfff) : -((-n) & 0xfff));
7365 n -= low_n;
7368 base_reg = gen_reg_rtx (SImode);
7369 val = force_operand (plus_constant (Pmode, xop0, n), NULL_RTX);
7370 emit_move_insn (base_reg, val);
7371 x = plus_constant (Pmode, base_reg, low_n);
7373 else if (xop0 != XEXP (x, 0) || xop1 != XEXP (x, 1))
7374 x = gen_rtx_PLUS (SImode, xop0, xop1);
7377 /* XXX We don't allow MINUS any more -- see comment in
7378 arm_legitimate_address_outer_p (). */
7379 else if (GET_CODE (x) == MINUS)
7381 rtx xop0 = XEXP (x, 0);
7382 rtx xop1 = XEXP (x, 1);
7384 if (CONSTANT_P (xop0))
7385 xop0 = force_reg (SImode, xop0);
7387 if (CONSTANT_P (xop1) && ! symbol_mentioned_p (xop1))
7388 xop1 = force_reg (SImode, xop1);
7390 if (xop0 != XEXP (x, 0) || xop1 != XEXP (x, 1))
7391 x = gen_rtx_MINUS (SImode, xop0, xop1);
7394 /* Make sure to take full advantage of the pre-indexed addressing mode
7395 with absolute addresses which often allows for the base register to
7396 be factorized for multiple adjacent memory references, and it might
7397 even allows for the mini pool to be avoided entirely. */
7398 else if (CONST_INT_P (x) && optimize > 0)
7400 unsigned int bits;
7401 HOST_WIDE_INT mask, base, index;
7402 rtx base_reg;
7404 /* ldr and ldrb can use a 12-bit index, ldrsb and the rest can only
7405 use a 8-bit index. So let's use a 12-bit index for SImode only and
7406 hope that arm_gen_constant will enable ldrb to use more bits. */
7407 bits = (mode == SImode) ? 12 : 8;
7408 mask = (1 << bits) - 1;
7409 base = INTVAL (x) & ~mask;
7410 index = INTVAL (x) & mask;
7411 if (bit_count (base & 0xffffffff) > (32 - bits)/2)
7413 /* It'll most probably be more efficient to generate the base
7414 with more bits set and use a negative index instead. */
7415 base |= mask;
7416 index -= mask;
7418 base_reg = force_reg (SImode, GEN_INT (base));
7419 x = plus_constant (Pmode, base_reg, index);
7422 if (flag_pic)
7424 /* We need to find and carefully transform any SYMBOL and LABEL
7425 references; so go back to the original address expression. */
7426 rtx new_x = legitimize_pic_address (orig_x, mode, NULL_RTX);
7428 if (new_x != orig_x)
7429 x = new_x;
7432 return x;
7436 /* Try machine-dependent ways of modifying an illegitimate Thumb address
7437 to be legitimate. If we find one, return the new, valid address. */
7439 thumb_legitimize_address (rtx x, rtx orig_x, enum machine_mode mode)
7441 if (arm_tls_symbol_p (x))
7442 return legitimize_tls_address (x, NULL_RTX);
7444 if (GET_CODE (x) == PLUS
7445 && CONST_INT_P (XEXP (x, 1))
7446 && (INTVAL (XEXP (x, 1)) >= 32 * GET_MODE_SIZE (mode)
7447 || INTVAL (XEXP (x, 1)) < 0))
7449 rtx xop0 = XEXP (x, 0);
7450 rtx xop1 = XEXP (x, 1);
7451 HOST_WIDE_INT offset = INTVAL (xop1);
7453 /* Try and fold the offset into a biasing of the base register and
7454 then offsetting that. Don't do this when optimizing for space
7455 since it can cause too many CSEs. */
7456 if (optimize_size && offset >= 0
7457 && offset < 256 + 31 * GET_MODE_SIZE (mode))
7459 HOST_WIDE_INT delta;
7461 if (offset >= 256)
7462 delta = offset - (256 - GET_MODE_SIZE (mode));
7463 else if (offset < 32 * GET_MODE_SIZE (mode) + 8)
7464 delta = 31 * GET_MODE_SIZE (mode);
7465 else
7466 delta = offset & (~31 * GET_MODE_SIZE (mode));
7468 xop0 = force_operand (plus_constant (Pmode, xop0, offset - delta),
7469 NULL_RTX);
7470 x = plus_constant (Pmode, xop0, delta);
7472 else if (offset < 0 && offset > -256)
7473 /* Small negative offsets are best done with a subtract before the
7474 dereference, forcing these into a register normally takes two
7475 instructions. */
7476 x = force_operand (x, NULL_RTX);
7477 else
7479 /* For the remaining cases, force the constant into a register. */
7480 xop1 = force_reg (SImode, xop1);
7481 x = gen_rtx_PLUS (SImode, xop0, xop1);
7484 else if (GET_CODE (x) == PLUS
7485 && s_register_operand (XEXP (x, 1), SImode)
7486 && !s_register_operand (XEXP (x, 0), SImode))
7488 rtx xop0 = force_operand (XEXP (x, 0), NULL_RTX);
7490 x = gen_rtx_PLUS (SImode, xop0, XEXP (x, 1));
7493 if (flag_pic)
7495 /* We need to find and carefully transform any SYMBOL and LABEL
7496 references; so go back to the original address expression. */
7497 rtx new_x = legitimize_pic_address (orig_x, mode, NULL_RTX);
7499 if (new_x != orig_x)
7500 x = new_x;
7503 return x;
7506 bool
7507 arm_legitimize_reload_address (rtx *p,
7508 enum machine_mode mode,
7509 int opnum, int type,
7510 int ind_levels ATTRIBUTE_UNUSED)
7512 /* We must recognize output that we have already generated ourselves. */
7513 if (GET_CODE (*p) == PLUS
7514 && GET_CODE (XEXP (*p, 0)) == PLUS
7515 && REG_P (XEXP (XEXP (*p, 0), 0))
7516 && CONST_INT_P (XEXP (XEXP (*p, 0), 1))
7517 && CONST_INT_P (XEXP (*p, 1)))
7519 push_reload (XEXP (*p, 0), NULL_RTX, &XEXP (*p, 0), NULL,
7520 MODE_BASE_REG_CLASS (mode), GET_MODE (*p),
7521 VOIDmode, 0, 0, opnum, (enum reload_type) type);
7522 return true;
7525 if (GET_CODE (*p) == PLUS
7526 && REG_P (XEXP (*p, 0))
7527 && ARM_REGNO_OK_FOR_BASE_P (REGNO (XEXP (*p, 0)))
7528 /* If the base register is equivalent to a constant, let the generic
7529 code handle it. Otherwise we will run into problems if a future
7530 reload pass decides to rematerialize the constant. */
7531 && !reg_equiv_constant (ORIGINAL_REGNO (XEXP (*p, 0)))
7532 && CONST_INT_P (XEXP (*p, 1)))
7534 HOST_WIDE_INT val = INTVAL (XEXP (*p, 1));
7535 HOST_WIDE_INT low, high;
7537 /* Detect coprocessor load/stores. */
7538 bool coproc_p = ((TARGET_HARD_FLOAT
7539 && TARGET_VFP
7540 && (mode == SFmode || mode == DFmode))
7541 || (TARGET_REALLY_IWMMXT
7542 && VALID_IWMMXT_REG_MODE (mode))
7543 || (TARGET_NEON
7544 && (VALID_NEON_DREG_MODE (mode)
7545 || VALID_NEON_QREG_MODE (mode))));
7547 /* For some conditions, bail out when lower two bits are unaligned. */
7548 if ((val & 0x3) != 0
7549 /* Coprocessor load/store indexes are 8-bits + '00' appended. */
7550 && (coproc_p
7551 /* For DI, and DF under soft-float: */
7552 || ((mode == DImode || mode == DFmode)
7553 /* Without ldrd, we use stm/ldm, which does not
7554 fair well with unaligned bits. */
7555 && (! TARGET_LDRD
7556 /* Thumb-2 ldrd/strd is [-1020,+1020] in steps of 4. */
7557 || TARGET_THUMB2))))
7558 return false;
7560 /* When breaking down a [reg+index] reload address into [(reg+high)+low],
7561 of which the (reg+high) gets turned into a reload add insn,
7562 we try to decompose the index into high/low values that can often
7563 also lead to better reload CSE.
7564 For example:
7565 ldr r0, [r2, #4100] // Offset too large
7566 ldr r1, [r2, #4104] // Offset too large
7568 is best reloaded as:
7569 add t1, r2, #4096
7570 ldr r0, [t1, #4]
7571 add t2, r2, #4096
7572 ldr r1, [t2, #8]
7574 which post-reload CSE can simplify in most cases to eliminate the
7575 second add instruction:
7576 add t1, r2, #4096
7577 ldr r0, [t1, #4]
7578 ldr r1, [t1, #8]
7580 The idea here is that we want to split out the bits of the constant
7581 as a mask, rather than as subtracting the maximum offset that the
7582 respective type of load/store used can handle.
7584 When encountering negative offsets, we can still utilize it even if
7585 the overall offset is positive; sometimes this may lead to an immediate
7586 that can be constructed with fewer instructions.
7587 For example:
7588 ldr r0, [r2, #0x3FFFFC]
7590 This is best reloaded as:
7591 add t1, r2, #0x400000
7592 ldr r0, [t1, #-4]
7594 The trick for spotting this for a load insn with N bits of offset
7595 (i.e. bits N-1:0) is to look at bit N; if it is set, then chose a
7596 negative offset that is going to make bit N and all the bits below
7597 it become zero in the remainder part.
7599 The SIGN_MAG_LOW_ADDR_BITS macro below implements this, with respect
7600 to sign-magnitude addressing (i.e. separate +- bit, or 1's complement),
7601 used in most cases of ARM load/store instructions. */
7603 #define SIGN_MAG_LOW_ADDR_BITS(VAL, N) \
7604 (((VAL) & ((1 << (N)) - 1)) \
7605 ? (((VAL) & ((1 << ((N) + 1)) - 1)) ^ (1 << (N))) - (1 << (N)) \
7606 : 0)
7608 if (coproc_p)
7610 low = SIGN_MAG_LOW_ADDR_BITS (val, 10);
7612 /* NEON quad-word load/stores are made of two double-word accesses,
7613 so the valid index range is reduced by 8. Treat as 9-bit range if
7614 we go over it. */
7615 if (TARGET_NEON && VALID_NEON_QREG_MODE (mode) && low >= 1016)
7616 low = SIGN_MAG_LOW_ADDR_BITS (val, 9);
7618 else if (GET_MODE_SIZE (mode) == 8)
7620 if (TARGET_LDRD)
7621 low = (TARGET_THUMB2
7622 ? SIGN_MAG_LOW_ADDR_BITS (val, 10)
7623 : SIGN_MAG_LOW_ADDR_BITS (val, 8));
7624 else
7625 /* For pre-ARMv5TE (without ldrd), we use ldm/stm(db/da/ib)
7626 to access doublewords. The supported load/store offsets are
7627 -8, -4, and 4, which we try to produce here. */
7628 low = ((val & 0xf) ^ 0x8) - 0x8;
7630 else if (GET_MODE_SIZE (mode) < 8)
7632 /* NEON element load/stores do not have an offset. */
7633 if (TARGET_NEON_FP16 && mode == HFmode)
7634 return false;
7636 if (TARGET_THUMB2)
7638 /* Thumb-2 has an asymmetrical index range of (-256,4096).
7639 Try the wider 12-bit range first, and re-try if the result
7640 is out of range. */
7641 low = SIGN_MAG_LOW_ADDR_BITS (val, 12);
7642 if (low < -255)
7643 low = SIGN_MAG_LOW_ADDR_BITS (val, 8);
7645 else
7647 if (mode == HImode || mode == HFmode)
7649 if (arm_arch4)
7650 low = SIGN_MAG_LOW_ADDR_BITS (val, 8);
7651 else
7653 /* The storehi/movhi_bytes fallbacks can use only
7654 [-4094,+4094] of the full ldrb/strb index range. */
7655 low = SIGN_MAG_LOW_ADDR_BITS (val, 12);
7656 if (low == 4095 || low == -4095)
7657 return false;
7660 else
7661 low = SIGN_MAG_LOW_ADDR_BITS (val, 12);
7664 else
7665 return false;
7667 high = ((((val - low) & (unsigned HOST_WIDE_INT) 0xffffffff)
7668 ^ (unsigned HOST_WIDE_INT) 0x80000000)
7669 - (unsigned HOST_WIDE_INT) 0x80000000);
7670 /* Check for overflow or zero */
7671 if (low == 0 || high == 0 || (high + low != val))
7672 return false;
7674 /* Reload the high part into a base reg; leave the low part
7675 in the mem.
7676 Note that replacing this gen_rtx_PLUS with plus_constant is
7677 wrong in this case because we rely on the
7678 (plus (plus reg c1) c2) structure being preserved so that
7679 XEXP (*p, 0) in push_reload below uses the correct term. */
7680 *p = gen_rtx_PLUS (GET_MODE (*p),
7681 gen_rtx_PLUS (GET_MODE (*p), XEXP (*p, 0),
7682 GEN_INT (high)),
7683 GEN_INT (low));
7684 push_reload (XEXP (*p, 0), NULL_RTX, &XEXP (*p, 0), NULL,
7685 MODE_BASE_REG_CLASS (mode), GET_MODE (*p),
7686 VOIDmode, 0, 0, opnum, (enum reload_type) type);
7687 return true;
7690 return false;
7694 thumb_legitimize_reload_address (rtx *x_p,
7695 enum machine_mode mode,
7696 int opnum, int type,
7697 int ind_levels ATTRIBUTE_UNUSED)
7699 rtx x = *x_p;
7701 if (GET_CODE (x) == PLUS
7702 && GET_MODE_SIZE (mode) < 4
7703 && REG_P (XEXP (x, 0))
7704 && XEXP (x, 0) == stack_pointer_rtx
7705 && CONST_INT_P (XEXP (x, 1))
7706 && !thumb_legitimate_offset_p (mode, INTVAL (XEXP (x, 1))))
7708 rtx orig_x = x;
7710 x = copy_rtx (x);
7711 push_reload (orig_x, NULL_RTX, x_p, NULL, MODE_BASE_REG_CLASS (mode),
7712 Pmode, VOIDmode, 0, 0, opnum, (enum reload_type) type);
7713 return x;
7716 /* If both registers are hi-regs, then it's better to reload the
7717 entire expression rather than each register individually. That
7718 only requires one reload register rather than two. */
7719 if (GET_CODE (x) == PLUS
7720 && REG_P (XEXP (x, 0))
7721 && REG_P (XEXP (x, 1))
7722 && !REG_MODE_OK_FOR_REG_BASE_P (XEXP (x, 0), mode)
7723 && !REG_MODE_OK_FOR_REG_BASE_P (XEXP (x, 1), mode))
7725 rtx orig_x = x;
7727 x = copy_rtx (x);
7728 push_reload (orig_x, NULL_RTX, x_p, NULL, MODE_BASE_REG_CLASS (mode),
7729 Pmode, VOIDmode, 0, 0, opnum, (enum reload_type) type);
7730 return x;
7733 return NULL;
7736 /* Test for various thread-local symbols. */
7738 /* Return TRUE if X is a thread-local symbol. */
7740 static bool
7741 arm_tls_symbol_p (rtx x)
7743 if (! TARGET_HAVE_TLS)
7744 return false;
7746 if (GET_CODE (x) != SYMBOL_REF)
7747 return false;
7749 return SYMBOL_REF_TLS_MODEL (x) != 0;
7752 /* Helper for arm_tls_referenced_p. */
7754 static int
7755 arm_tls_operand_p_1 (rtx *x, void *data ATTRIBUTE_UNUSED)
7757 if (GET_CODE (*x) == SYMBOL_REF)
7758 return SYMBOL_REF_TLS_MODEL (*x) != 0;
7760 /* Don't recurse into UNSPEC_TLS looking for TLS symbols; these are
7761 TLS offsets, not real symbol references. */
7762 if (GET_CODE (*x) == UNSPEC
7763 && XINT (*x, 1) == UNSPEC_TLS)
7764 return -1;
7766 return 0;
7769 /* Return TRUE if X contains any TLS symbol references. */
7771 bool
7772 arm_tls_referenced_p (rtx x)
7774 if (! TARGET_HAVE_TLS)
7775 return false;
7777 return for_each_rtx (&x, arm_tls_operand_p_1, NULL);
7780 /* Implement TARGET_LEGITIMATE_CONSTANT_P.
7782 On the ARM, allow any integer (invalid ones are removed later by insn
7783 patterns), nice doubles and symbol_refs which refer to the function's
7784 constant pool XXX.
7786 When generating pic allow anything. */
7788 static bool
7789 arm_legitimate_constant_p_1 (enum machine_mode mode, rtx x)
7791 /* At present, we have no support for Neon structure constants, so forbid
7792 them here. It might be possible to handle simple cases like 0 and -1
7793 in future. */
7794 if (TARGET_NEON && VALID_NEON_STRUCT_MODE (mode))
7795 return false;
7797 return flag_pic || !label_mentioned_p (x);
7800 static bool
7801 thumb_legitimate_constant_p (enum machine_mode mode ATTRIBUTE_UNUSED, rtx x)
7803 return (CONST_INT_P (x)
7804 || CONST_DOUBLE_P (x)
7805 || CONSTANT_ADDRESS_P (x)
7806 || flag_pic);
7809 static bool
7810 arm_legitimate_constant_p (enum machine_mode mode, rtx x)
7812 return (!arm_cannot_force_const_mem (mode, x)
7813 && (TARGET_32BIT
7814 ? arm_legitimate_constant_p_1 (mode, x)
7815 : thumb_legitimate_constant_p (mode, x)));
7818 /* Implement TARGET_CANNOT_FORCE_CONST_MEM. */
7820 static bool
7821 arm_cannot_force_const_mem (enum machine_mode mode ATTRIBUTE_UNUSED, rtx x)
7823 rtx base, offset;
7825 if (ARM_OFFSETS_MUST_BE_WITHIN_SECTIONS_P)
7827 split_const (x, &base, &offset);
7828 if (GET_CODE (base) == SYMBOL_REF
7829 && !offset_within_block_p (base, INTVAL (offset)))
7830 return true;
7832 return arm_tls_referenced_p (x);
7835 #define REG_OR_SUBREG_REG(X) \
7836 (REG_P (X) \
7837 || (GET_CODE (X) == SUBREG && REG_P (SUBREG_REG (X))))
7839 #define REG_OR_SUBREG_RTX(X) \
7840 (REG_P (X) ? (X) : SUBREG_REG (X))
7842 static inline int
7843 thumb1_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer)
7845 enum machine_mode mode = GET_MODE (x);
7846 int total, words;
7848 switch (code)
7850 case ASHIFT:
7851 case ASHIFTRT:
7852 case LSHIFTRT:
7853 case ROTATERT:
7854 return (mode == SImode) ? COSTS_N_INSNS (1) : COSTS_N_INSNS (2);
7856 case PLUS:
7857 case MINUS:
7858 case COMPARE:
7859 case NEG:
7860 case NOT:
7861 return COSTS_N_INSNS (1);
7863 case MULT:
7864 if (CONST_INT_P (XEXP (x, 1)))
7866 int cycles = 0;
7867 unsigned HOST_WIDE_INT i = INTVAL (XEXP (x, 1));
7869 while (i)
7871 i >>= 2;
7872 cycles++;
7874 return COSTS_N_INSNS (2) + cycles;
7876 return COSTS_N_INSNS (1) + 16;
7878 case SET:
7879 /* A SET doesn't have a mode, so let's look at the SET_DEST to get
7880 the mode. */
7881 words = ARM_NUM_INTS (GET_MODE_SIZE (GET_MODE (SET_DEST (x))));
7882 return (COSTS_N_INSNS (words)
7883 + 4 * ((MEM_P (SET_SRC (x)))
7884 + MEM_P (SET_DEST (x))));
7886 case CONST_INT:
7887 if (outer == SET)
7889 if ((unsigned HOST_WIDE_INT) INTVAL (x) < 256)
7890 return 0;
7891 if (thumb_shiftable_const (INTVAL (x)))
7892 return COSTS_N_INSNS (2);
7893 return COSTS_N_INSNS (3);
7895 else if ((outer == PLUS || outer == COMPARE)
7896 && INTVAL (x) < 256 && INTVAL (x) > -256)
7897 return 0;
7898 else if ((outer == IOR || outer == XOR || outer == AND)
7899 && INTVAL (x) < 256 && INTVAL (x) >= -256)
7900 return COSTS_N_INSNS (1);
7901 else if (outer == AND)
7903 int i;
7904 /* This duplicates the tests in the andsi3 expander. */
7905 for (i = 9; i <= 31; i++)
7906 if ((((HOST_WIDE_INT) 1) << i) - 1 == INTVAL (x)
7907 || (((HOST_WIDE_INT) 1) << i) - 1 == ~INTVAL (x))
7908 return COSTS_N_INSNS (2);
7910 else if (outer == ASHIFT || outer == ASHIFTRT
7911 || outer == LSHIFTRT)
7912 return 0;
7913 return COSTS_N_INSNS (2);
7915 case CONST:
7916 case CONST_DOUBLE:
7917 case LABEL_REF:
7918 case SYMBOL_REF:
7919 return COSTS_N_INSNS (3);
7921 case UDIV:
7922 case UMOD:
7923 case DIV:
7924 case MOD:
7925 return 100;
7927 case TRUNCATE:
7928 return 99;
7930 case AND:
7931 case XOR:
7932 case IOR:
7933 /* XXX guess. */
7934 return 8;
7936 case MEM:
7937 /* XXX another guess. */
7938 /* Memory costs quite a lot for the first word, but subsequent words
7939 load at the equivalent of a single insn each. */
7940 return (10 + 4 * ((GET_MODE_SIZE (mode) - 1) / UNITS_PER_WORD)
7941 + ((GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
7942 ? 4 : 0));
7944 case IF_THEN_ELSE:
7945 /* XXX a guess. */
7946 if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
7947 return 14;
7948 return 2;
7950 case SIGN_EXTEND:
7951 case ZERO_EXTEND:
7952 total = mode == DImode ? COSTS_N_INSNS (1) : 0;
7953 total += thumb1_rtx_costs (XEXP (x, 0), GET_CODE (XEXP (x, 0)), code);
7955 if (mode == SImode)
7956 return total;
7958 if (arm_arch6)
7959 return total + COSTS_N_INSNS (1);
7961 /* Assume a two-shift sequence. Increase the cost slightly so
7962 we prefer actual shifts over an extend operation. */
7963 return total + 1 + COSTS_N_INSNS (2);
7965 default:
7966 return 99;
7970 static inline bool
7971 arm_rtx_costs_1 (rtx x, enum rtx_code outer, int* total, bool speed)
7973 enum machine_mode mode = GET_MODE (x);
7974 enum rtx_code subcode;
7975 rtx operand;
7976 enum rtx_code code = GET_CODE (x);
7977 *total = 0;
7979 switch (code)
7981 case MEM:
7982 /* Memory costs quite a lot for the first word, but subsequent words
7983 load at the equivalent of a single insn each. */
7984 *total = COSTS_N_INSNS (2 + ARM_NUM_REGS (mode));
7985 return true;
7987 case DIV:
7988 case MOD:
7989 case UDIV:
7990 case UMOD:
7991 if (TARGET_HARD_FLOAT && mode == SFmode)
7992 *total = COSTS_N_INSNS (2);
7993 else if (TARGET_HARD_FLOAT && mode == DFmode && !TARGET_VFP_SINGLE)
7994 *total = COSTS_N_INSNS (4);
7995 else
7996 *total = COSTS_N_INSNS (20);
7997 return false;
7999 case ROTATE:
8000 if (REG_P (XEXP (x, 1)))
8001 *total = COSTS_N_INSNS (1); /* Need to subtract from 32 */
8002 else if (!CONST_INT_P (XEXP (x, 1)))
8003 *total = rtx_cost (XEXP (x, 1), code, 1, speed);
8005 /* Fall through */
8006 case ROTATERT:
8007 if (mode != SImode)
8009 *total += COSTS_N_INSNS (4);
8010 return true;
8013 /* Fall through */
8014 case ASHIFT: case LSHIFTRT: case ASHIFTRT:
8015 *total += rtx_cost (XEXP (x, 0), code, 0, speed);
8016 if (mode == DImode)
8018 *total += COSTS_N_INSNS (3);
8019 return true;
8022 *total += COSTS_N_INSNS (1);
8023 /* Increase the cost of complex shifts because they aren't any faster,
8024 and reduce dual issue opportunities. */
8025 if (arm_tune_cortex_a9
8026 && outer != SET && !CONST_INT_P (XEXP (x, 1)))
8027 ++*total;
8029 return true;
8031 case MINUS:
8032 if (mode == DImode)
8034 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
8035 if (CONST_INT_P (XEXP (x, 0))
8036 && const_ok_for_arm (INTVAL (XEXP (x, 0))))
8038 *total += rtx_cost (XEXP (x, 1), code, 1, speed);
8039 return true;
8042 if (CONST_INT_P (XEXP (x, 1))
8043 && const_ok_for_arm (INTVAL (XEXP (x, 1))))
8045 *total += rtx_cost (XEXP (x, 0), code, 0, speed);
8046 return true;
8049 return false;
8052 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
8054 if (TARGET_HARD_FLOAT
8055 && (mode == SFmode
8056 || (mode == DFmode && !TARGET_VFP_SINGLE)))
8058 *total = COSTS_N_INSNS (1);
8059 if (CONST_DOUBLE_P (XEXP (x, 0))
8060 && arm_const_double_rtx (XEXP (x, 0)))
8062 *total += rtx_cost (XEXP (x, 1), code, 1, speed);
8063 return true;
8066 if (CONST_DOUBLE_P (XEXP (x, 1))
8067 && arm_const_double_rtx (XEXP (x, 1)))
8069 *total += rtx_cost (XEXP (x, 0), code, 0, speed);
8070 return true;
8073 return false;
8075 *total = COSTS_N_INSNS (20);
8076 return false;
8079 *total = COSTS_N_INSNS (1);
8080 if (CONST_INT_P (XEXP (x, 0))
8081 && const_ok_for_arm (INTVAL (XEXP (x, 0))))
8083 *total += rtx_cost (XEXP (x, 1), code, 1, speed);
8084 return true;
8087 subcode = GET_CODE (XEXP (x, 1));
8088 if (subcode == ASHIFT || subcode == ASHIFTRT
8089 || subcode == LSHIFTRT
8090 || subcode == ROTATE || subcode == ROTATERT)
8092 *total += rtx_cost (XEXP (x, 0), code, 0, speed);
8093 *total += rtx_cost (XEXP (XEXP (x, 1), 0), subcode, 0, speed);
8094 return true;
8097 /* A shift as a part of RSB costs no more than RSB itself. */
8098 if (GET_CODE (XEXP (x, 0)) == MULT
8099 && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode))
8101 *total += rtx_cost (XEXP (XEXP (x, 0), 0), code, 0, speed);
8102 *total += rtx_cost (XEXP (x, 1), code, 1, speed);
8103 return true;
8106 if (subcode == MULT
8107 && power_of_two_operand (XEXP (XEXP (x, 1), 1), SImode))
8109 *total += rtx_cost (XEXP (x, 0), code, 0, speed);
8110 *total += rtx_cost (XEXP (XEXP (x, 1), 0), subcode, 0, speed);
8111 return true;
8114 if (GET_RTX_CLASS (GET_CODE (XEXP (x, 1))) == RTX_COMPARE
8115 || GET_RTX_CLASS (GET_CODE (XEXP (x, 1))) == RTX_COMM_COMPARE)
8117 *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 0), code, 0, speed);
8118 if (REG_P (XEXP (XEXP (x, 1), 0))
8119 && REGNO (XEXP (XEXP (x, 1), 0)) != CC_REGNUM)
8120 *total += COSTS_N_INSNS (1);
8122 return true;
8125 /* Fall through */
8127 case PLUS:
8128 if (code == PLUS && arm_arch6 && mode == SImode
8129 && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
8130 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
8132 *total = COSTS_N_INSNS (1);
8133 *total += rtx_cost (XEXP (XEXP (x, 0), 0), GET_CODE (XEXP (x, 0)),
8134 0, speed);
8135 *total += rtx_cost (XEXP (x, 1), code, 1, speed);
8136 return true;
8139 /* MLA: All arguments must be registers. We filter out
8140 multiplication by a power of two, so that we fall down into
8141 the code below. */
8142 if (GET_CODE (XEXP (x, 0)) == MULT
8143 && !power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode))
8145 /* The cost comes from the cost of the multiply. */
8146 return false;
8149 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
8151 if (TARGET_HARD_FLOAT
8152 && (mode == SFmode
8153 || (mode == DFmode && !TARGET_VFP_SINGLE)))
8155 *total = COSTS_N_INSNS (1);
8156 if (CONST_DOUBLE_P (XEXP (x, 1))
8157 && arm_const_double_rtx (XEXP (x, 1)))
8159 *total += rtx_cost (XEXP (x, 0), code, 0, speed);
8160 return true;
8163 return false;
8166 *total = COSTS_N_INSNS (20);
8167 return false;
8170 if (GET_RTX_CLASS (GET_CODE (XEXP (x, 0))) == RTX_COMPARE
8171 || GET_RTX_CLASS (GET_CODE (XEXP (x, 0))) == RTX_COMM_COMPARE)
8173 *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 1), code, 1, speed);
8174 if (REG_P (XEXP (XEXP (x, 0), 0))
8175 && REGNO (XEXP (XEXP (x, 0), 0)) != CC_REGNUM)
8176 *total += COSTS_N_INSNS (1);
8177 return true;
8180 /* Fall through */
8182 case AND: case XOR: case IOR:
8184 /* Normally the frame registers will be spilt into reg+const during
8185 reload, so it is a bad idea to combine them with other instructions,
8186 since then they might not be moved outside of loops. As a compromise
8187 we allow integration with ops that have a constant as their second
8188 operand. */
8189 if (REG_OR_SUBREG_REG (XEXP (x, 0))
8190 && ARM_FRAME_RTX (REG_OR_SUBREG_RTX (XEXP (x, 0)))
8191 && !CONST_INT_P (XEXP (x, 1)))
8192 *total = COSTS_N_INSNS (1);
8194 if (mode == DImode)
8196 *total += COSTS_N_INSNS (2);
8197 if (CONST_INT_P (XEXP (x, 1))
8198 && const_ok_for_op (INTVAL (XEXP (x, 1)), code))
8200 *total += rtx_cost (XEXP (x, 0), code, 0, speed);
8201 return true;
8204 return false;
8207 *total += COSTS_N_INSNS (1);
8208 if (CONST_INT_P (XEXP (x, 1))
8209 && const_ok_for_op (INTVAL (XEXP (x, 1)), code))
8211 *total += rtx_cost (XEXP (x, 0), code, 0, speed);
8212 return true;
8214 subcode = GET_CODE (XEXP (x, 0));
8215 if (subcode == ASHIFT || subcode == ASHIFTRT
8216 || subcode == LSHIFTRT
8217 || subcode == ROTATE || subcode == ROTATERT)
8219 *total += rtx_cost (XEXP (x, 1), code, 1, speed);
8220 *total += rtx_cost (XEXP (XEXP (x, 0), 0), subcode, 0, speed);
8221 return true;
8224 if (subcode == MULT
8225 && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode))
8227 *total += rtx_cost (XEXP (x, 1), code, 1, speed);
8228 *total += rtx_cost (XEXP (XEXP (x, 0), 0), subcode, 0, speed);
8229 return true;
8232 if (subcode == UMIN || subcode == UMAX
8233 || subcode == SMIN || subcode == SMAX)
8235 *total = COSTS_N_INSNS (3);
8236 return true;
8239 return false;
8241 case MULT:
8242 /* This should have been handled by the CPU specific routines. */
8243 gcc_unreachable ();
8245 case TRUNCATE:
8246 if (arm_arch3m && mode == SImode
8247 && GET_CODE (XEXP (x, 0)) == LSHIFTRT
8248 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
8249 && (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0))
8250 == GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1)))
8251 && (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == ZERO_EXTEND
8252 || GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == SIGN_EXTEND))
8254 *total = rtx_cost (XEXP (XEXP (x, 0), 0), LSHIFTRT, 0, speed);
8255 return true;
8257 *total = COSTS_N_INSNS (2); /* Plus the cost of the MULT */
8258 return false;
8260 case NEG:
8261 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
8263 if (TARGET_HARD_FLOAT
8264 && (mode == SFmode
8265 || (mode == DFmode && !TARGET_VFP_SINGLE)))
8267 *total = COSTS_N_INSNS (1);
8268 return false;
8270 *total = COSTS_N_INSNS (2);
8271 return false;
8274 /* Fall through */
8275 case NOT:
8276 *total = COSTS_N_INSNS (ARM_NUM_REGS(mode));
8277 if (mode == SImode && code == NOT)
8279 subcode = GET_CODE (XEXP (x, 0));
8280 if (subcode == ASHIFT || subcode == ASHIFTRT
8281 || subcode == LSHIFTRT
8282 || subcode == ROTATE || subcode == ROTATERT
8283 || (subcode == MULT
8284 && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode)))
8286 *total += rtx_cost (XEXP (XEXP (x, 0), 0), subcode, 0, speed);
8287 /* Register shifts cost an extra cycle. */
8288 if (!CONST_INT_P (XEXP (XEXP (x, 0), 1)))
8289 *total += COSTS_N_INSNS (1) + rtx_cost (XEXP (XEXP (x, 0), 1),
8290 subcode, 1, speed);
8291 return true;
8295 return false;
8297 case IF_THEN_ELSE:
8298 if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
8300 *total = COSTS_N_INSNS (4);
8301 return true;
8304 operand = XEXP (x, 0);
8306 if (!((GET_RTX_CLASS (GET_CODE (operand)) == RTX_COMPARE
8307 || GET_RTX_CLASS (GET_CODE (operand)) == RTX_COMM_COMPARE)
8308 && REG_P (XEXP (operand, 0))
8309 && REGNO (XEXP (operand, 0)) == CC_REGNUM))
8310 *total += COSTS_N_INSNS (1);
8311 *total += (rtx_cost (XEXP (x, 1), code, 1, speed)
8312 + rtx_cost (XEXP (x, 2), code, 2, speed));
8313 return true;
8315 case NE:
8316 if (mode == SImode && XEXP (x, 1) == const0_rtx)
8318 *total = COSTS_N_INSNS (2) + rtx_cost (XEXP (x, 0), code, 0, speed);
8319 return true;
8321 goto scc_insn;
8323 case GE:
8324 if ((!REG_P (XEXP (x, 0)) || REGNO (XEXP (x, 0)) != CC_REGNUM)
8325 && mode == SImode && XEXP (x, 1) == const0_rtx)
8327 *total = COSTS_N_INSNS (2) + rtx_cost (XEXP (x, 0), code, 0, speed);
8328 return true;
8330 goto scc_insn;
8332 case LT:
8333 if ((!REG_P (XEXP (x, 0)) || REGNO (XEXP (x, 0)) != CC_REGNUM)
8334 && mode == SImode && XEXP (x, 1) == const0_rtx)
8336 *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 0), code, 0, speed);
8337 return true;
8339 goto scc_insn;
8341 case EQ:
8342 case GT:
8343 case LE:
8344 case GEU:
8345 case LTU:
8346 case GTU:
8347 case LEU:
8348 case UNORDERED:
8349 case ORDERED:
8350 case UNEQ:
8351 case UNGE:
8352 case UNLT:
8353 case UNGT:
8354 case UNLE:
8355 scc_insn:
8356 /* SCC insns. In the case where the comparison has already been
8357 performed, then they cost 2 instructions. Otherwise they need
8358 an additional comparison before them. */
8359 *total = COSTS_N_INSNS (2);
8360 if (REG_P (XEXP (x, 0)) && REGNO (XEXP (x, 0)) == CC_REGNUM)
8362 return true;
8365 /* Fall through */
8366 case COMPARE:
8367 if (REG_P (XEXP (x, 0)) && REGNO (XEXP (x, 0)) == CC_REGNUM)
8369 *total = 0;
8370 return true;
8373 *total += COSTS_N_INSNS (1);
8374 if (CONST_INT_P (XEXP (x, 1))
8375 && const_ok_for_op (INTVAL (XEXP (x, 1)), code))
8377 *total += rtx_cost (XEXP (x, 0), code, 0, speed);
8378 return true;
8381 subcode = GET_CODE (XEXP (x, 0));
8382 if (subcode == ASHIFT || subcode == ASHIFTRT
8383 || subcode == LSHIFTRT
8384 || subcode == ROTATE || subcode == ROTATERT)
8386 *total += rtx_cost (XEXP (x, 1), code, 1, speed);
8387 *total += rtx_cost (XEXP (XEXP (x, 0), 0), subcode, 0, speed);
8388 return true;
8391 if (subcode == MULT
8392 && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode))
8394 *total += rtx_cost (XEXP (x, 1), code, 1, speed);
8395 *total += rtx_cost (XEXP (XEXP (x, 0), 0), subcode, 0, speed);
8396 return true;
8399 return false;
8401 case UMIN:
8402 case UMAX:
8403 case SMIN:
8404 case SMAX:
8405 *total = COSTS_N_INSNS (2) + rtx_cost (XEXP (x, 0), code, 0, speed);
8406 if (!CONST_INT_P (XEXP (x, 1))
8407 || !const_ok_for_arm (INTVAL (XEXP (x, 1))))
8408 *total += rtx_cost (XEXP (x, 1), code, 1, speed);
8409 return true;
8411 case ABS:
8412 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
8414 if (TARGET_HARD_FLOAT
8415 && (mode == SFmode
8416 || (mode == DFmode && !TARGET_VFP_SINGLE)))
8418 *total = COSTS_N_INSNS (1);
8419 return false;
8421 *total = COSTS_N_INSNS (20);
8422 return false;
8424 *total = COSTS_N_INSNS (1);
8425 if (mode == DImode)
8426 *total += COSTS_N_INSNS (3);
8427 return false;
8429 case SIGN_EXTEND:
8430 case ZERO_EXTEND:
8431 *total = 0;
8432 if (GET_MODE_CLASS (mode) == MODE_INT)
8434 rtx op = XEXP (x, 0);
8435 enum machine_mode opmode = GET_MODE (op);
8437 if (mode == DImode)
8438 *total += COSTS_N_INSNS (1);
8440 if (opmode != SImode)
8442 if (MEM_P (op))
8444 /* If !arm_arch4, we use one of the extendhisi2_mem
8445 or movhi_bytes patterns for HImode. For a QImode
8446 sign extension, we first zero-extend from memory
8447 and then perform a shift sequence. */
8448 if (!arm_arch4 && (opmode != QImode || code == SIGN_EXTEND))
8449 *total += COSTS_N_INSNS (2);
8451 else if (arm_arch6)
8452 *total += COSTS_N_INSNS (1);
8454 /* We don't have the necessary insn, so we need to perform some
8455 other operation. */
8456 else if (TARGET_ARM && code == ZERO_EXTEND && mode == QImode)
8457 /* An and with constant 255. */
8458 *total += COSTS_N_INSNS (1);
8459 else
8460 /* A shift sequence. Increase costs slightly to avoid
8461 combining two shifts into an extend operation. */
8462 *total += COSTS_N_INSNS (2) + 1;
8465 return false;
8468 switch (GET_MODE (XEXP (x, 0)))
8470 case V8QImode:
8471 case V4HImode:
8472 case V2SImode:
8473 case V4QImode:
8474 case V2HImode:
8475 *total = COSTS_N_INSNS (1);
8476 return false;
8478 default:
8479 gcc_unreachable ();
8481 gcc_unreachable ();
8483 case ZERO_EXTRACT:
8484 case SIGN_EXTRACT:
8485 *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 0), code, 0, speed);
8486 return true;
8488 case CONST_INT:
8489 if (const_ok_for_arm (INTVAL (x))
8490 || const_ok_for_arm (~INTVAL (x)))
8491 *total = COSTS_N_INSNS (1);
8492 else
8493 *total = COSTS_N_INSNS (arm_gen_constant (SET, mode, NULL_RTX,
8494 INTVAL (x), NULL_RTX,
8495 NULL_RTX, 0, 0));
8496 return true;
8498 case CONST:
8499 case LABEL_REF:
8500 case SYMBOL_REF:
8501 *total = COSTS_N_INSNS (3);
8502 return true;
8504 case HIGH:
8505 *total = COSTS_N_INSNS (1);
8506 return true;
8508 case LO_SUM:
8509 *total = COSTS_N_INSNS (1);
8510 *total += rtx_cost (XEXP (x, 0), code, 0, speed);
8511 return true;
8513 case CONST_DOUBLE:
8514 if (TARGET_HARD_FLOAT && vfp3_const_double_rtx (x)
8515 && (mode == SFmode || !TARGET_VFP_SINGLE))
8516 *total = COSTS_N_INSNS (1);
8517 else
8518 *total = COSTS_N_INSNS (4);
8519 return true;
8521 case SET:
8522 /* The vec_extract patterns accept memory operands that require an
8523 address reload. Account for the cost of that reload to give the
8524 auto-inc-dec pass an incentive to try to replace them. */
8525 if (TARGET_NEON && MEM_P (SET_DEST (x))
8526 && GET_CODE (SET_SRC (x)) == VEC_SELECT)
8528 *total = rtx_cost (SET_DEST (x), code, 0, speed);
8529 if (!neon_vector_mem_operand (SET_DEST (x), 2, true))
8530 *total += COSTS_N_INSNS (1);
8531 return true;
8533 /* Likewise for the vec_set patterns. */
8534 if (TARGET_NEON && GET_CODE (SET_SRC (x)) == VEC_MERGE
8535 && GET_CODE (XEXP (SET_SRC (x), 0)) == VEC_DUPLICATE
8536 && MEM_P (XEXP (XEXP (SET_SRC (x), 0), 0)))
8538 rtx mem = XEXP (XEXP (SET_SRC (x), 0), 0);
8539 *total = rtx_cost (mem, code, 0, speed);
8540 if (!neon_vector_mem_operand (mem, 2, true))
8541 *total += COSTS_N_INSNS (1);
8542 return true;
8544 return false;
8546 case UNSPEC:
8547 /* We cost this as high as our memory costs to allow this to
8548 be hoisted from loops. */
8549 if (XINT (x, 1) == UNSPEC_PIC_UNIFIED)
8551 *total = COSTS_N_INSNS (2 + ARM_NUM_REGS (mode));
8553 return true;
8555 case CONST_VECTOR:
8556 if (TARGET_NEON
8557 && TARGET_HARD_FLOAT
8558 && outer == SET
8559 && (VALID_NEON_DREG_MODE (mode) || VALID_NEON_QREG_MODE (mode))
8560 && neon_immediate_valid_for_move (x, mode, NULL, NULL))
8561 *total = COSTS_N_INSNS (1);
8562 else
8563 *total = COSTS_N_INSNS (4);
8564 return true;
8566 default:
8567 *total = COSTS_N_INSNS (4);
8568 return false;
8572 /* Estimates the size cost of thumb1 instructions.
8573 For now most of the code is copied from thumb1_rtx_costs. We need more
8574 fine grain tuning when we have more related test cases. */
8575 static inline int
8576 thumb1_size_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer)
8578 enum machine_mode mode = GET_MODE (x);
8579 int words;
8581 switch (code)
8583 case ASHIFT:
8584 case ASHIFTRT:
8585 case LSHIFTRT:
8586 case ROTATERT:
8587 return (mode == SImode) ? COSTS_N_INSNS (1) : COSTS_N_INSNS (2);
8589 case PLUS:
8590 case MINUS:
8591 /* Thumb-1 needs two instructions to fulfill shiftadd/shiftsub0/shiftsub1
8592 defined by RTL expansion, especially for the expansion of
8593 multiplication. */
8594 if ((GET_CODE (XEXP (x, 0)) == MULT
8595 && power_of_two_operand (XEXP (XEXP (x,0),1), SImode))
8596 || (GET_CODE (XEXP (x, 1)) == MULT
8597 && power_of_two_operand (XEXP (XEXP (x, 1), 1), SImode)))
8598 return COSTS_N_INSNS (2);
8599 /* On purpose fall through for normal RTX. */
8600 case COMPARE:
8601 case NEG:
8602 case NOT:
8603 return COSTS_N_INSNS (1);
8605 case MULT:
8606 if (CONST_INT_P (XEXP (x, 1)))
8608 /* Thumb1 mul instruction can't operate on const. We must Load it
8609 into a register first. */
8610 int const_size = thumb1_size_rtx_costs (XEXP (x, 1), CONST_INT, SET);
8611 return COSTS_N_INSNS (1) + const_size;
8613 return COSTS_N_INSNS (1);
8615 case SET:
8616 /* A SET doesn't have a mode, so let's look at the SET_DEST to get
8617 the mode. */
8618 words = ARM_NUM_INTS (GET_MODE_SIZE (GET_MODE (SET_DEST (x))));
8619 return (COSTS_N_INSNS (words)
8620 + 4 * ((MEM_P (SET_SRC (x)))
8621 + MEM_P (SET_DEST (x))));
8623 case CONST_INT:
8624 if (outer == SET)
8626 if ((unsigned HOST_WIDE_INT) INTVAL (x) < 256)
8627 return COSTS_N_INSNS (1);
8628 /* See split "TARGET_THUMB1 && satisfies_constraint_J". */
8629 if (INTVAL (x) >= -255 && INTVAL (x) <= -1)
8630 return COSTS_N_INSNS (2);
8631 /* See split "TARGET_THUMB1 && satisfies_constraint_K". */
8632 if (thumb_shiftable_const (INTVAL (x)))
8633 return COSTS_N_INSNS (2);
8634 return COSTS_N_INSNS (3);
8636 else if ((outer == PLUS || outer == COMPARE)
8637 && INTVAL (x) < 256 && INTVAL (x) > -256)
8638 return 0;
8639 else if ((outer == IOR || outer == XOR || outer == AND)
8640 && INTVAL (x) < 256 && INTVAL (x) >= -256)
8641 return COSTS_N_INSNS (1);
8642 else if (outer == AND)
8644 int i;
8645 /* This duplicates the tests in the andsi3 expander. */
8646 for (i = 9; i <= 31; i++)
8647 if ((((HOST_WIDE_INT) 1) << i) - 1 == INTVAL (x)
8648 || (((HOST_WIDE_INT) 1) << i) - 1 == ~INTVAL (x))
8649 return COSTS_N_INSNS (2);
8651 else if (outer == ASHIFT || outer == ASHIFTRT
8652 || outer == LSHIFTRT)
8653 return 0;
8654 return COSTS_N_INSNS (2);
8656 case CONST:
8657 case CONST_DOUBLE:
8658 case LABEL_REF:
8659 case SYMBOL_REF:
8660 return COSTS_N_INSNS (3);
8662 case UDIV:
8663 case UMOD:
8664 case DIV:
8665 case MOD:
8666 return 100;
8668 case TRUNCATE:
8669 return 99;
8671 case AND:
8672 case XOR:
8673 case IOR:
8674 /* XXX guess. */
8675 return 8;
8677 case MEM:
8678 /* XXX another guess. */
8679 /* Memory costs quite a lot for the first word, but subsequent words
8680 load at the equivalent of a single insn each. */
8681 return (10 + 4 * ((GET_MODE_SIZE (mode) - 1) / UNITS_PER_WORD)
8682 + ((GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
8683 ? 4 : 0));
8685 case IF_THEN_ELSE:
8686 /* XXX a guess. */
8687 if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
8688 return 14;
8689 return 2;
8691 case ZERO_EXTEND:
8692 /* XXX still guessing. */
8693 switch (GET_MODE (XEXP (x, 0)))
8695 case QImode:
8696 return (1 + (mode == DImode ? 4 : 0)
8697 + (MEM_P (XEXP (x, 0)) ? 10 : 0));
8699 case HImode:
8700 return (4 + (mode == DImode ? 4 : 0)
8701 + (MEM_P (XEXP (x, 0)) ? 10 : 0));
8703 case SImode:
8704 return (1 + (MEM_P (XEXP (x, 0)) ? 10 : 0));
8706 default:
8707 return 99;
8710 default:
8711 return 99;
8715 /* RTX costs when optimizing for size. */
8716 static bool
8717 arm_size_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
8718 int *total)
8720 enum machine_mode mode = GET_MODE (x);
8721 if (TARGET_THUMB1)
8723 *total = thumb1_size_rtx_costs (x, code, outer_code);
8724 return true;
8727 /* FIXME: This makes no attempt to prefer narrow Thumb-2 instructions. */
8728 switch (code)
8730 case MEM:
8731 /* A memory access costs 1 insn if the mode is small, or the address is
8732 a single register, otherwise it costs one insn per word. */
8733 if (REG_P (XEXP (x, 0)))
8734 *total = COSTS_N_INSNS (1);
8735 else if (flag_pic
8736 && GET_CODE (XEXP (x, 0)) == PLUS
8737 && will_be_in_index_register (XEXP (XEXP (x, 0), 1)))
8738 /* This will be split into two instructions.
8739 See arm.md:calculate_pic_address. */
8740 *total = COSTS_N_INSNS (2);
8741 else
8742 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
8743 return true;
8745 case DIV:
8746 case MOD:
8747 case UDIV:
8748 case UMOD:
8749 /* Needs a libcall, so it costs about this. */
8750 *total = COSTS_N_INSNS (2);
8751 return false;
8753 case ROTATE:
8754 if (mode == SImode && REG_P (XEXP (x, 1)))
8756 *total = COSTS_N_INSNS (2) + rtx_cost (XEXP (x, 0), code, 0, false);
8757 return true;
8759 /* Fall through */
8760 case ROTATERT:
8761 case ASHIFT:
8762 case LSHIFTRT:
8763 case ASHIFTRT:
8764 if (mode == DImode && CONST_INT_P (XEXP (x, 1)))
8766 *total = COSTS_N_INSNS (3) + rtx_cost (XEXP (x, 0), code, 0, false);
8767 return true;
8769 else if (mode == SImode)
8771 *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 0), code, 0, false);
8772 /* Slightly disparage register shifts, but not by much. */
8773 if (!CONST_INT_P (XEXP (x, 1)))
8774 *total += 1 + rtx_cost (XEXP (x, 1), code, 1, false);
8775 return true;
8778 /* Needs a libcall. */
8779 *total = COSTS_N_INSNS (2);
8780 return false;
8782 case MINUS:
8783 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
8784 && (mode == SFmode || !TARGET_VFP_SINGLE))
8786 *total = COSTS_N_INSNS (1);
8787 return false;
8790 if (mode == SImode)
8792 enum rtx_code subcode0 = GET_CODE (XEXP (x, 0));
8793 enum rtx_code subcode1 = GET_CODE (XEXP (x, 1));
8795 if (subcode0 == ROTATE || subcode0 == ROTATERT || subcode0 == ASHIFT
8796 || subcode0 == LSHIFTRT || subcode0 == ASHIFTRT
8797 || subcode1 == ROTATE || subcode1 == ROTATERT
8798 || subcode1 == ASHIFT || subcode1 == LSHIFTRT
8799 || subcode1 == ASHIFTRT)
8801 /* It's just the cost of the two operands. */
8802 *total = 0;
8803 return false;
8806 *total = COSTS_N_INSNS (1);
8807 return false;
8810 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
8811 return false;
8813 case PLUS:
8814 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
8815 && (mode == SFmode || !TARGET_VFP_SINGLE))
8817 *total = COSTS_N_INSNS (1);
8818 return false;
8821 /* A shift as a part of ADD costs nothing. */
8822 if (GET_CODE (XEXP (x, 0)) == MULT
8823 && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode))
8825 *total = COSTS_N_INSNS (TARGET_THUMB2 ? 2 : 1);
8826 *total += rtx_cost (XEXP (XEXP (x, 0), 0), code, 0, false);
8827 *total += rtx_cost (XEXP (x, 1), code, 1, false);
8828 return true;
8831 /* Fall through */
8832 case AND: case XOR: case IOR:
8833 if (mode == SImode)
8835 enum rtx_code subcode = GET_CODE (XEXP (x, 0));
8837 if (subcode == ROTATE || subcode == ROTATERT || subcode == ASHIFT
8838 || subcode == LSHIFTRT || subcode == ASHIFTRT
8839 || (code == AND && subcode == NOT))
8841 /* It's just the cost of the two operands. */
8842 *total = 0;
8843 return false;
8847 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
8848 return false;
8850 case MULT:
8851 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
8852 return false;
8854 case NEG:
8855 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
8856 && (mode == SFmode || !TARGET_VFP_SINGLE))
8858 *total = COSTS_N_INSNS (1);
8859 return false;
8862 /* Fall through */
8863 case NOT:
8864 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
8866 return false;
8868 case IF_THEN_ELSE:
8869 *total = 0;
8870 return false;
8872 case COMPARE:
8873 if (cc_register (XEXP (x, 0), VOIDmode))
8874 * total = 0;
8875 else
8876 *total = COSTS_N_INSNS (1);
8877 return false;
8879 case ABS:
8880 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
8881 && (mode == SFmode || !TARGET_VFP_SINGLE))
8882 *total = COSTS_N_INSNS (1);
8883 else
8884 *total = COSTS_N_INSNS (1 + ARM_NUM_REGS (mode));
8885 return false;
8887 case SIGN_EXTEND:
8888 case ZERO_EXTEND:
8889 return arm_rtx_costs_1 (x, outer_code, total, 0);
8891 case CONST_INT:
8892 if (const_ok_for_arm (INTVAL (x)))
8893 /* A multiplication by a constant requires another instruction
8894 to load the constant to a register. */
8895 *total = COSTS_N_INSNS ((outer_code == SET || outer_code == MULT)
8896 ? 1 : 0);
8897 else if (const_ok_for_arm (~INTVAL (x)))
8898 *total = COSTS_N_INSNS (outer_code == AND ? 0 : 1);
8899 else if (const_ok_for_arm (-INTVAL (x)))
8901 if (outer_code == COMPARE || outer_code == PLUS
8902 || outer_code == MINUS)
8903 *total = 0;
8904 else
8905 *total = COSTS_N_INSNS (1);
8907 else
8908 *total = COSTS_N_INSNS (2);
8909 return true;
8911 case CONST:
8912 case LABEL_REF:
8913 case SYMBOL_REF:
8914 *total = COSTS_N_INSNS (2);
8915 return true;
8917 case CONST_DOUBLE:
8918 *total = COSTS_N_INSNS (4);
8919 return true;
8921 case CONST_VECTOR:
8922 if (TARGET_NEON
8923 && TARGET_HARD_FLOAT
8924 && outer_code == SET
8925 && (VALID_NEON_DREG_MODE (mode) || VALID_NEON_QREG_MODE (mode))
8926 && neon_immediate_valid_for_move (x, mode, NULL, NULL))
8927 *total = COSTS_N_INSNS (1);
8928 else
8929 *total = COSTS_N_INSNS (4);
8930 return true;
8932 case HIGH:
8933 case LO_SUM:
8934 /* We prefer constant pool entries to MOVW/MOVT pairs, so bump the
8935 cost of these slightly. */
8936 *total = COSTS_N_INSNS (1) + 1;
8937 return true;
8939 case SET:
8940 return false;
8942 default:
8943 if (mode != VOIDmode)
8944 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
8945 else
8946 *total = COSTS_N_INSNS (4); /* How knows? */
8947 return false;
8951 /* Helper function for arm_rtx_costs. If the operand is a valid shift
8952 operand, then return the operand that is being shifted. If the shift
8953 is not by a constant, then set SHIFT_REG to point to the operand.
8954 Return NULL if OP is not a shifter operand. */
8955 static rtx
8956 shifter_op_p (rtx op, rtx *shift_reg)
8958 enum rtx_code code = GET_CODE (op);
8960 if (code == MULT && CONST_INT_P (XEXP (op, 1))
8961 && exact_log2 (INTVAL (XEXP (op, 1))) > 0)
8962 return XEXP (op, 0);
8963 else if (code == ROTATE && CONST_INT_P (XEXP (op, 1)))
8964 return XEXP (op, 0);
8965 else if (code == ROTATERT || code == ASHIFT || code == LSHIFTRT
8966 || code == ASHIFTRT)
8968 if (!CONST_INT_P (XEXP (op, 1)))
8969 *shift_reg = XEXP (op, 1);
8970 return XEXP (op, 0);
8973 return NULL;
8976 static bool
8977 arm_unspec_cost (rtx x, enum rtx_code /* outer_code */, bool speed_p, int *cost)
8979 const struct cpu_cost_table *extra_cost = current_tune->insn_extra_cost;
8980 gcc_assert (GET_CODE (x) == UNSPEC);
8982 switch (XINT (x, 1))
8984 case UNSPEC_UNALIGNED_LOAD:
8985 /* We can only do unaligned loads into the integer unit, and we can't
8986 use LDM or LDRD. */
8987 *cost = COSTS_N_INSNS (ARM_NUM_REGS (GET_MODE (x)));
8988 if (speed_p)
8989 *cost += (ARM_NUM_REGS (GET_MODE (x)) * extra_cost->ldst.load
8990 + extra_cost->ldst.load_unaligned);
8992 #ifdef NOT_YET
8993 *cost += arm_address_cost (XEXP (XVECEXP (x, 0, 0), 0), GET_MODE (x),
8994 ADDR_SPACE_GENERIC, speed_p);
8995 #endif
8996 return true;
8998 case UNSPEC_UNALIGNED_STORE:
8999 *cost = COSTS_N_INSNS (ARM_NUM_REGS (GET_MODE (x)));
9000 if (speed_p)
9001 *cost += (ARM_NUM_REGS (GET_MODE (x)) * extra_cost->ldst.store
9002 + extra_cost->ldst.store_unaligned);
9004 *cost += rtx_cost (XVECEXP (x, 0, 0), UNSPEC, 0, speed_p);
9005 #ifdef NOT_YET
9006 *cost += arm_address_cost (XEXP (XVECEXP (x, 0, 0), 0), GET_MODE (x),
9007 ADDR_SPACE_GENERIC, speed_p);
9008 #endif
9009 return true;
9011 case UNSPEC_VRINTZ:
9012 case UNSPEC_VRINTP:
9013 case UNSPEC_VRINTM:
9014 case UNSPEC_VRINTR:
9015 case UNSPEC_VRINTX:
9016 case UNSPEC_VRINTA:
9017 *cost = COSTS_N_INSNS (1);
9018 if (speed_p)
9019 *cost += extra_cost->fp[GET_MODE (x) == DFmode].roundint;
9021 return true;
9022 default:
9023 *cost = COSTS_N_INSNS (2);
9024 break;
9026 return false;
9029 /* Cost of a libcall. We assume one insn per argument, an amount for the
9030 call (one insn for -Os) and then one for processing the result. */
9031 #define LIBCALL_COST(N) COSTS_N_INSNS (N + (speed_p ? 18 : 2))
9033 #define HANDLE_NARROW_SHIFT_ARITH(OP, IDX) \
9034 do \
9036 shift_op = shifter_op_p (XEXP (x, IDX), &shift_reg); \
9037 if (shift_op != NULL \
9038 && arm_rtx_shift_left_p (XEXP (x, IDX))) \
9040 if (shift_reg) \
9042 if (speed_p) \
9043 *cost += extra_cost->alu.arith_shift_reg; \
9044 *cost += rtx_cost (shift_reg, ASHIFT, 1, speed_p); \
9046 else if (speed_p) \
9047 *cost += extra_cost->alu.arith_shift; \
9049 *cost += (rtx_cost (shift_op, ASHIFT, 0, speed_p) \
9050 + rtx_cost (XEXP (x, 1 - IDX), \
9051 OP, 1, speed_p)); \
9052 return true; \
9055 while (0);
9057 /* RTX costs. Make an estimate of the cost of executing the operation
9058 X, which is contained with an operation with code OUTER_CODE.
9059 SPEED_P indicates whether the cost desired is the performance cost,
9060 or the size cost. The estimate is stored in COST and the return
9061 value is TRUE if the cost calculation is final, or FALSE if the
9062 caller should recurse through the operands of X to add additional
9063 costs.
9065 We currently make no attempt to model the size savings of Thumb-2
9066 16-bit instructions. At the normal points in compilation where
9067 this code is called we have no measure of whether the condition
9068 flags are live or not, and thus no realistic way to determine what
9069 the size will eventually be. */
9070 static bool
9071 arm_new_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
9072 const struct cpu_cost_table *extra_cost,
9073 int *cost, bool speed_p)
9075 enum machine_mode mode = GET_MODE (x);
9077 if (TARGET_THUMB1)
9079 if (speed_p)
9080 *cost = thumb1_rtx_costs (x, code, outer_code);
9081 else
9082 *cost = thumb1_size_rtx_costs (x, code, outer_code);
9083 return true;
9086 switch (code)
9088 case SET:
9089 *cost = 0;
9090 if (REG_P (SET_SRC (x))
9091 && REG_P (SET_DEST (x)))
9093 /* Assume that most copies can be done with a single insn,
9094 unless we don't have HW FP, in which case everything
9095 larger than word mode will require two insns. */
9096 *cost = COSTS_N_INSNS (((!TARGET_HARD_FLOAT
9097 && GET_MODE_SIZE (mode) > 4)
9098 || mode == DImode)
9099 ? 2 : 1);
9100 /* Conditional register moves can be encoded
9101 in 16 bits in Thumb mode. */
9102 if (!speed_p && TARGET_THUMB && outer_code == COND_EXEC)
9103 *cost >>= 1;
9106 if (CONST_INT_P (SET_SRC (x)))
9108 /* Handle CONST_INT here, since the value doesn't have a mode
9109 and we would otherwise be unable to work out the true cost. */
9110 *cost = rtx_cost (SET_DEST (x), SET, 0, speed_p);
9111 mode = GET_MODE (SET_DEST (x));
9112 outer_code = SET;
9113 /* Slightly lower the cost of setting a core reg to a constant.
9114 This helps break up chains and allows for better scheduling. */
9115 if (REG_P (SET_DEST (x))
9116 && REGNO (SET_DEST (x)) <= LR_REGNUM)
9117 *cost -= 1;
9118 x = SET_SRC (x);
9119 /* Immediate moves with an immediate in the range [0, 255] can be
9120 encoded in 16 bits in Thumb mode. */
9121 if (!speed_p && TARGET_THUMB && GET_MODE (x) == SImode
9122 && INTVAL (x) >= 0 && INTVAL (x) <=255)
9123 *cost >>= 1;
9124 goto const_int_cost;
9127 return false;
9129 case MEM:
9130 /* A memory access costs 1 insn if the mode is small, or the address is
9131 a single register, otherwise it costs one insn per word. */
9132 if (REG_P (XEXP (x, 0)))
9133 *cost = COSTS_N_INSNS (1);
9134 else if (flag_pic
9135 && GET_CODE (XEXP (x, 0)) == PLUS
9136 && will_be_in_index_register (XEXP (XEXP (x, 0), 1)))
9137 /* This will be split into two instructions.
9138 See arm.md:calculate_pic_address. */
9139 *cost = COSTS_N_INSNS (2);
9140 else
9141 *cost = COSTS_N_INSNS (ARM_NUM_REGS (mode));
9143 /* For speed optimizations, add the costs of the address and
9144 accessing memory. */
9145 if (speed_p)
9146 #ifdef NOT_YET
9147 *cost += (extra_cost->ldst.load
9148 + arm_address_cost (XEXP (x, 0), mode,
9149 ADDR_SPACE_GENERIC, speed_p));
9150 #else
9151 *cost += extra_cost->ldst.load;
9152 #endif
9153 return true;
9155 case PARALLEL:
9157 /* Calculations of LDM costs are complex. We assume an initial cost
9158 (ldm_1st) which will load the number of registers mentioned in
9159 ldm_regs_per_insn_1st registers; then each additional
9160 ldm_regs_per_insn_subsequent registers cost one more insn. The
9161 formula for N regs is thus:
9163 ldm_1st + COSTS_N_INSNS ((max (N - ldm_regs_per_insn_1st, 0)
9164 + ldm_regs_per_insn_subsequent - 1)
9165 / ldm_regs_per_insn_subsequent).
9167 Additional costs may also be added for addressing. A similar
9168 formula is used for STM. */
9170 bool is_ldm = load_multiple_operation (x, SImode);
9171 bool is_stm = store_multiple_operation (x, SImode);
9173 *cost = COSTS_N_INSNS (1);
9175 if (is_ldm || is_stm)
9177 if (speed_p)
9179 HOST_WIDE_INT nregs = XVECLEN (x, 0);
9180 HOST_WIDE_INT regs_per_insn_1st = is_ldm
9181 ? extra_cost->ldst.ldm_regs_per_insn_1st
9182 : extra_cost->ldst.stm_regs_per_insn_1st;
9183 HOST_WIDE_INT regs_per_insn_sub = is_ldm
9184 ? extra_cost->ldst.ldm_regs_per_insn_subsequent
9185 : extra_cost->ldst.stm_regs_per_insn_subsequent;
9187 *cost += regs_per_insn_1st
9188 + COSTS_N_INSNS (((MAX (nregs - regs_per_insn_1st, 0))
9189 + regs_per_insn_sub - 1)
9190 / regs_per_insn_sub);
9191 return true;
9195 return false;
9197 case DIV:
9198 case UDIV:
9199 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
9200 && (mode == SFmode || !TARGET_VFP_SINGLE))
9201 *cost = COSTS_N_INSNS (speed_p
9202 ? extra_cost->fp[mode != SFmode].div : 1);
9203 else if (mode == SImode && TARGET_IDIV)
9204 *cost = COSTS_N_INSNS (speed_p ? extra_cost->mult[0].idiv : 1);
9205 else
9206 *cost = LIBCALL_COST (2);
9207 return false; /* All arguments must be in registers. */
9209 case MOD:
9210 case UMOD:
9211 *cost = LIBCALL_COST (2);
9212 return false; /* All arguments must be in registers. */
9214 case ROTATE:
9215 if (mode == SImode && REG_P (XEXP (x, 1)))
9217 *cost = (COSTS_N_INSNS (2)
9218 + rtx_cost (XEXP (x, 0), code, 0, speed_p));
9219 if (speed_p)
9220 *cost += extra_cost->alu.shift_reg;
9221 return true;
9223 /* Fall through */
9224 case ROTATERT:
9225 case ASHIFT:
9226 case LSHIFTRT:
9227 case ASHIFTRT:
9228 if (mode == DImode && CONST_INT_P (XEXP (x, 1)))
9230 *cost = (COSTS_N_INSNS (3)
9231 + rtx_cost (XEXP (x, 0), code, 0, speed_p));
9232 if (speed_p)
9233 *cost += 2 * extra_cost->alu.shift;
9234 return true;
9236 else if (mode == SImode)
9238 *cost = (COSTS_N_INSNS (1)
9239 + rtx_cost (XEXP (x, 0), code, 0, speed_p));
9240 /* Slightly disparage register shifts at -Os, but not by much. */
9241 if (!CONST_INT_P (XEXP (x, 1)))
9242 *cost += (speed_p ? extra_cost->alu.shift_reg : 1
9243 + rtx_cost (XEXP (x, 1), code, 1, speed_p));
9244 return true;
9246 else if (GET_MODE_CLASS (mode) == MODE_INT
9247 && GET_MODE_SIZE (mode) < 4)
9249 if (code == ASHIFT)
9251 *cost = (COSTS_N_INSNS (1)
9252 + rtx_cost (XEXP (x, 0), code, 0, speed_p));
9253 /* Slightly disparage register shifts at -Os, but not by
9254 much. */
9255 if (!CONST_INT_P (XEXP (x, 1)))
9256 *cost += (speed_p ? extra_cost->alu.shift_reg : 1
9257 + rtx_cost (XEXP (x, 1), code, 1, speed_p));
9259 else if (code == LSHIFTRT || code == ASHIFTRT)
9261 if (arm_arch_thumb2 && CONST_INT_P (XEXP (x, 1)))
9263 /* Can use SBFX/UBFX. */
9264 *cost = COSTS_N_INSNS (1);
9265 if (speed_p)
9266 *cost += extra_cost->alu.bfx;
9267 *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
9269 else
9271 *cost = COSTS_N_INSNS (2);
9272 *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
9273 if (speed_p)
9275 if (CONST_INT_P (XEXP (x, 1)))
9276 *cost += 2 * extra_cost->alu.shift;
9277 else
9278 *cost += (extra_cost->alu.shift
9279 + extra_cost->alu.shift_reg);
9281 else
9282 /* Slightly disparage register shifts. */
9283 *cost += !CONST_INT_P (XEXP (x, 1));
9286 else /* Rotates. */
9288 *cost = COSTS_N_INSNS (3 + !CONST_INT_P (XEXP (x, 1)));
9289 *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
9290 if (speed_p)
9292 if (CONST_INT_P (XEXP (x, 1)))
9293 *cost += (2 * extra_cost->alu.shift
9294 + extra_cost->alu.log_shift);
9295 else
9296 *cost += (extra_cost->alu.shift
9297 + extra_cost->alu.shift_reg
9298 + extra_cost->alu.log_shift_reg);
9301 return true;
9304 *cost = LIBCALL_COST (2);
9305 return false;
9307 case MINUS:
9308 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
9309 && (mode == SFmode || !TARGET_VFP_SINGLE))
9311 *cost = COSTS_N_INSNS (1);
9312 if (GET_CODE (XEXP (x, 0)) == MULT
9313 || GET_CODE (XEXP (x, 1)) == MULT)
9315 rtx mul_op0, mul_op1, sub_op;
9317 if (speed_p)
9318 *cost += extra_cost->fp[mode != SFmode].mult_addsub;
9320 if (GET_CODE (XEXP (x, 0)) == MULT)
9322 mul_op0 = XEXP (XEXP (x, 0), 0);
9323 mul_op1 = XEXP (XEXP (x, 0), 1);
9324 sub_op = XEXP (x, 1);
9326 else
9328 mul_op0 = XEXP (XEXP (x, 1), 0);
9329 mul_op1 = XEXP (XEXP (x, 1), 1);
9330 sub_op = XEXP (x, 0);
9333 /* The first operand of the multiply may be optionally
9334 negated. */
9335 if (GET_CODE (mul_op0) == NEG)
9336 mul_op0 = XEXP (mul_op0, 0);
9338 *cost += (rtx_cost (mul_op0, code, 0, speed_p)
9339 + rtx_cost (mul_op1, code, 0, speed_p)
9340 + rtx_cost (sub_op, code, 0, speed_p));
9342 return true;
9345 if (speed_p)
9346 *cost += extra_cost->fp[mode != SFmode].addsub;
9347 return false;
9350 if (mode == SImode)
9352 rtx shift_by_reg = NULL;
9353 rtx shift_op;
9354 rtx non_shift_op;
9356 *cost = COSTS_N_INSNS (1);
9358 shift_op = shifter_op_p (XEXP (x, 0), &shift_by_reg);
9359 if (shift_op == NULL)
9361 shift_op = shifter_op_p (XEXP (x, 1), &shift_by_reg);
9362 non_shift_op = XEXP (x, 0);
9364 else
9365 non_shift_op = XEXP (x, 1);
9367 if (shift_op != NULL)
9369 if (shift_by_reg != NULL)
9371 if (speed_p)
9372 *cost += extra_cost->alu.arith_shift_reg;
9373 *cost += rtx_cost (shift_by_reg, code, 0, speed_p);
9375 else if (speed_p)
9376 *cost += extra_cost->alu.arith_shift;
9378 *cost += (rtx_cost (shift_op, code, 0, speed_p)
9379 + rtx_cost (non_shift_op, code, 0, speed_p));
9380 return true;
9383 if (arm_arch_thumb2
9384 && GET_CODE (XEXP (x, 1)) == MULT)
9386 /* MLS. */
9387 if (speed_p)
9388 *cost += extra_cost->mult[0].add;
9389 *cost += (rtx_cost (XEXP (x, 0), MINUS, 0, speed_p)
9390 + rtx_cost (XEXP (XEXP (x, 1), 0), MULT, 0, speed_p)
9391 + rtx_cost (XEXP (XEXP (x, 1), 1), MULT, 1, speed_p));
9392 return true;
9395 if (CONST_INT_P (XEXP (x, 0)))
9397 int insns = arm_gen_constant (MINUS, SImode, NULL_RTX,
9398 INTVAL (XEXP (x, 0)), NULL_RTX,
9399 NULL_RTX, 1, 0);
9400 *cost = COSTS_N_INSNS (insns);
9401 if (speed_p)
9402 *cost += insns * extra_cost->alu.arith;
9403 *cost += rtx_cost (XEXP (x, 1), code, 1, speed_p);
9404 return true;
9407 return false;
9410 if (GET_MODE_CLASS (mode) == MODE_INT
9411 && GET_MODE_SIZE (mode) < 4)
9413 rtx shift_op, shift_reg;
9414 shift_reg = NULL;
9416 /* We check both sides of the MINUS for shifter operands since,
9417 unlike PLUS, it's not commutative. */
9419 HANDLE_NARROW_SHIFT_ARITH (MINUS, 0)
9420 HANDLE_NARROW_SHIFT_ARITH (MINUS, 1)
9422 /* Slightly disparage, as we might need to widen the result. */
9423 *cost = 1 + COSTS_N_INSNS (1);
9424 if (speed_p)
9425 *cost += extra_cost->alu.arith;
9427 if (CONST_INT_P (XEXP (x, 0)))
9429 *cost += rtx_cost (XEXP (x, 1), code, 1, speed_p);
9430 return true;
9433 return false;
9436 if (mode == DImode)
9438 *cost = COSTS_N_INSNS (2);
9440 if (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND)
9442 rtx op1 = XEXP (x, 1);
9444 if (speed_p)
9445 *cost += 2 * extra_cost->alu.arith;
9447 if (GET_CODE (op1) == ZERO_EXTEND)
9448 *cost += rtx_cost (XEXP (op1, 0), ZERO_EXTEND, 0, speed_p);
9449 else
9450 *cost += rtx_cost (op1, MINUS, 1, speed_p);
9451 *cost += rtx_cost (XEXP (XEXP (x, 0), 0), ZERO_EXTEND,
9452 0, speed_p);
9453 return true;
9455 else if (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND)
9457 if (speed_p)
9458 *cost += extra_cost->alu.arith + extra_cost->alu.arith_shift;
9459 *cost += (rtx_cost (XEXP (XEXP (x, 0), 0), SIGN_EXTEND,
9460 0, speed_p)
9461 + rtx_cost (XEXP (x, 1), MINUS, 1, speed_p));
9462 return true;
9464 else if (GET_CODE (XEXP (x, 1)) == ZERO_EXTEND
9465 || GET_CODE (XEXP (x, 1)) == SIGN_EXTEND)
9467 if (speed_p)
9468 *cost += (extra_cost->alu.arith
9469 + (GET_CODE (XEXP (x, 1)) == ZERO_EXTEND
9470 ? extra_cost->alu.arith
9471 : extra_cost->alu.arith_shift));
9472 *cost += (rtx_cost (XEXP (x, 0), MINUS, 0, speed_p)
9473 + rtx_cost (XEXP (XEXP (x, 1), 0),
9474 GET_CODE (XEXP (x, 1)), 0, speed_p));
9475 return true;
9478 if (speed_p)
9479 *cost += 2 * extra_cost->alu.arith;
9480 return false;
9483 /* Vector mode? */
9485 *cost = LIBCALL_COST (2);
9486 return false;
9488 case PLUS:
9489 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
9490 && (mode == SFmode || !TARGET_VFP_SINGLE))
9492 *cost = COSTS_N_INSNS (1);
9493 if (GET_CODE (XEXP (x, 0)) == MULT)
9495 rtx mul_op0, mul_op1, add_op;
9497 if (speed_p)
9498 *cost += extra_cost->fp[mode != SFmode].mult_addsub;
9500 mul_op0 = XEXP (XEXP (x, 0), 0);
9501 mul_op1 = XEXP (XEXP (x, 0), 1);
9502 add_op = XEXP (x, 1);
9504 *cost += (rtx_cost (mul_op0, code, 0, speed_p)
9505 + rtx_cost (mul_op1, code, 0, speed_p)
9506 + rtx_cost (add_op, code, 0, speed_p));
9508 return true;
9511 if (speed_p)
9512 *cost += extra_cost->fp[mode != SFmode].addsub;
9513 return false;
9515 else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
9517 *cost = LIBCALL_COST (2);
9518 return false;
9521 /* Narrow modes can be synthesized in SImode, but the range
9522 of useful sub-operations is limited. Check for shift operations
9523 on one of the operands. Only left shifts can be used in the
9524 narrow modes. */
9525 if (GET_MODE_CLASS (mode) == MODE_INT
9526 && GET_MODE_SIZE (mode) < 4)
9528 rtx shift_op, shift_reg;
9529 shift_reg = NULL;
9531 HANDLE_NARROW_SHIFT_ARITH (PLUS, 0)
9533 if (CONST_INT_P (XEXP (x, 1)))
9535 int insns = arm_gen_constant (PLUS, SImode, NULL_RTX,
9536 INTVAL (XEXP (x, 1)), NULL_RTX,
9537 NULL_RTX, 1, 0);
9538 *cost = COSTS_N_INSNS (insns);
9539 if (speed_p)
9540 *cost += insns * extra_cost->alu.arith;
9541 /* Slightly penalize a narrow operation as the result may
9542 need widening. */
9543 *cost += 1 + rtx_cost (XEXP (x, 0), PLUS, 0, speed_p);
9544 return true;
9547 /* Slightly penalize a narrow operation as the result may
9548 need widening. */
9549 *cost = 1 + COSTS_N_INSNS (1);
9550 if (speed_p)
9551 *cost += extra_cost->alu.arith;
9553 return false;
9556 if (mode == SImode)
9558 rtx shift_op, shift_reg;
9560 *cost = COSTS_N_INSNS (1);
9561 if (TARGET_INT_SIMD
9562 && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
9563 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
9565 /* UXTA[BH] or SXTA[BH]. */
9566 if (speed_p)
9567 *cost += extra_cost->alu.extnd_arith;
9568 *cost += (rtx_cost (XEXP (XEXP (x, 0), 0), ZERO_EXTEND, 0,
9569 speed_p)
9570 + rtx_cost (XEXP (x, 1), PLUS, 0, speed_p));
9571 return true;
9574 shift_reg = NULL;
9575 shift_op = shifter_op_p (XEXP (x, 0), &shift_reg);
9576 if (shift_op != NULL)
9578 if (shift_reg)
9580 if (speed_p)
9581 *cost += extra_cost->alu.arith_shift_reg;
9582 *cost += rtx_cost (shift_reg, ASHIFT, 1, speed_p);
9584 else if (speed_p)
9585 *cost += extra_cost->alu.arith_shift;
9587 *cost += (rtx_cost (shift_op, ASHIFT, 0, speed_p)
9588 + rtx_cost (XEXP (x, 1), PLUS, 1, speed_p));
9589 return true;
9591 if (GET_CODE (XEXP (x, 0)) == MULT)
9593 rtx mul_op = XEXP (x, 0);
9595 *cost = COSTS_N_INSNS (1);
9597 if (TARGET_DSP_MULTIPLY
9598 && ((GET_CODE (XEXP (mul_op, 0)) == SIGN_EXTEND
9599 && (GET_CODE (XEXP (mul_op, 1)) == SIGN_EXTEND
9600 || (GET_CODE (XEXP (mul_op, 1)) == ASHIFTRT
9601 && CONST_INT_P (XEXP (XEXP (mul_op, 1), 1))
9602 && INTVAL (XEXP (XEXP (mul_op, 1), 1)) == 16)))
9603 || (GET_CODE (XEXP (mul_op, 0)) == ASHIFTRT
9604 && CONST_INT_P (XEXP (XEXP (mul_op, 0), 1))
9605 && INTVAL (XEXP (XEXP (mul_op, 0), 1)) == 16
9606 && (GET_CODE (XEXP (mul_op, 1)) == SIGN_EXTEND
9607 || (GET_CODE (XEXP (mul_op, 1)) == ASHIFTRT
9608 && CONST_INT_P (XEXP (XEXP (mul_op, 1), 1))
9609 && (INTVAL (XEXP (XEXP (mul_op, 1), 1))
9610 == 16))))))
9612 /* SMLA[BT][BT]. */
9613 if (speed_p)
9614 *cost += extra_cost->mult[0].extend_add;
9615 *cost += (rtx_cost (XEXP (XEXP (mul_op, 0), 0),
9616 SIGN_EXTEND, 0, speed_p)
9617 + rtx_cost (XEXP (XEXP (mul_op, 1), 0),
9618 SIGN_EXTEND, 0, speed_p)
9619 + rtx_cost (XEXP (x, 1), PLUS, 1, speed_p));
9620 return true;
9623 if (speed_p)
9624 *cost += extra_cost->mult[0].add;
9625 *cost += (rtx_cost (XEXP (mul_op, 0), MULT, 0, speed_p)
9626 + rtx_cost (XEXP (mul_op, 1), MULT, 1, speed_p)
9627 + rtx_cost (XEXP (x, 1), PLUS, 1, speed_p));
9628 return true;
9630 if (CONST_INT_P (XEXP (x, 1)))
9632 int insns = arm_gen_constant (PLUS, SImode, NULL_RTX,
9633 INTVAL (XEXP (x, 1)), NULL_RTX,
9634 NULL_RTX, 1, 0);
9635 *cost = COSTS_N_INSNS (insns);
9636 if (speed_p)
9637 *cost += insns * extra_cost->alu.arith;
9638 *cost += rtx_cost (XEXP (x, 0), PLUS, 0, speed_p);
9639 return true;
9641 return false;
9644 if (mode == DImode)
9646 if (arm_arch3m
9647 && GET_CODE (XEXP (x, 0)) == MULT
9648 && ((GET_CODE (XEXP (XEXP (x, 0), 0)) == ZERO_EXTEND
9649 && GET_CODE (XEXP (XEXP (x, 0), 1)) == ZERO_EXTEND)
9650 || (GET_CODE (XEXP (XEXP (x, 0), 0)) == SIGN_EXTEND
9651 && GET_CODE (XEXP (XEXP (x, 0), 1)) == SIGN_EXTEND)))
9653 *cost = COSTS_N_INSNS (1);
9654 if (speed_p)
9655 *cost += extra_cost->mult[1].extend_add;
9656 *cost += (rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0),
9657 ZERO_EXTEND, 0, speed_p)
9658 + rtx_cost (XEXP (XEXP (XEXP (x, 0), 1), 0),
9659 ZERO_EXTEND, 0, speed_p)
9660 + rtx_cost (XEXP (x, 1), PLUS, 1, speed_p));
9661 return true;
9664 *cost = COSTS_N_INSNS (2);
9666 if (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
9667 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND)
9669 if (speed_p)
9670 *cost += (extra_cost->alu.arith
9671 + (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
9672 ? extra_cost->alu.arith
9673 : extra_cost->alu.arith_shift));
9675 *cost += (rtx_cost (XEXP (XEXP (x, 0), 0), ZERO_EXTEND, 0,
9676 speed_p)
9677 + rtx_cost (XEXP (x, 1), PLUS, 1, speed_p));
9678 return true;
9681 if (speed_p)
9682 *cost += 2 * extra_cost->alu.arith;
9683 return false;
9686 /* Vector mode? */
9687 *cost = LIBCALL_COST (2);
9688 return false;
9690 case AND: case XOR: case IOR:
9691 if (mode == SImode)
9693 enum rtx_code subcode = GET_CODE (XEXP (x, 0));
9694 rtx op0 = XEXP (x, 0);
9695 rtx shift_op, shift_reg;
9697 *cost = COSTS_N_INSNS (1);
9699 if (subcode == NOT
9700 && (code == AND
9701 || (code == IOR && TARGET_THUMB2)))
9702 op0 = XEXP (op0, 0);
9704 shift_reg = NULL;
9705 shift_op = shifter_op_p (op0, &shift_reg);
9706 if (shift_op != NULL)
9708 if (shift_reg)
9710 if (speed_p)
9711 *cost += extra_cost->alu.log_shift_reg;
9712 *cost += rtx_cost (shift_reg, ASHIFT, 1, speed_p);
9714 else if (speed_p)
9715 *cost += extra_cost->alu.log_shift;
9717 *cost += (rtx_cost (shift_op, ASHIFT, 0, speed_p)
9718 + rtx_cost (XEXP (x, 1), code, 1, speed_p));
9719 return true;
9722 if (CONST_INT_P (XEXP (x, 1)))
9724 int insns = arm_gen_constant (code, SImode, NULL_RTX,
9725 INTVAL (XEXP (x, 1)), NULL_RTX,
9726 NULL_RTX, 1, 0);
9728 *cost = COSTS_N_INSNS (insns);
9729 if (speed_p)
9730 *cost += insns * extra_cost->alu.logical;
9731 *cost += rtx_cost (op0, code, 0, speed_p);
9732 return true;
9735 if (speed_p)
9736 *cost += extra_cost->alu.logical;
9737 *cost += (rtx_cost (op0, code, 0, speed_p)
9738 + rtx_cost (XEXP (x, 1), code, 1, speed_p));
9739 return true;
9742 if (mode == DImode)
9744 rtx op0 = XEXP (x, 0);
9745 enum rtx_code subcode = GET_CODE (op0);
9747 *cost = COSTS_N_INSNS (2);
9749 if (subcode == NOT
9750 && (code == AND
9751 || (code == IOR && TARGET_THUMB2)))
9752 op0 = XEXP (op0, 0);
9754 if (GET_CODE (op0) == ZERO_EXTEND)
9756 if (speed_p)
9757 *cost += 2 * extra_cost->alu.logical;
9759 *cost += (rtx_cost (XEXP (op0, 0), ZERO_EXTEND, 0, speed_p)
9760 + rtx_cost (XEXP (x, 1), code, 0, speed_p));
9761 return true;
9763 else if (GET_CODE (op0) == SIGN_EXTEND)
9765 if (speed_p)
9766 *cost += extra_cost->alu.logical + extra_cost->alu.log_shift;
9768 *cost += (rtx_cost (XEXP (op0, 0), SIGN_EXTEND, 0, speed_p)
9769 + rtx_cost (XEXP (x, 1), code, 0, speed_p));
9770 return true;
9773 if (speed_p)
9774 *cost += 2 * extra_cost->alu.logical;
9776 return true;
9778 /* Vector mode? */
9780 *cost = LIBCALL_COST (2);
9781 return false;
9783 case MULT:
9784 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
9785 && (mode == SFmode || !TARGET_VFP_SINGLE))
9787 rtx op0 = XEXP (x, 0);
9789 *cost = COSTS_N_INSNS (1);
9791 if (GET_CODE (op0) == NEG)
9792 op0 = XEXP (op0, 0);
9794 if (speed_p)
9795 *cost += extra_cost->fp[mode != SFmode].mult;
9797 *cost += (rtx_cost (op0, MULT, 0, speed_p)
9798 + rtx_cost (XEXP (x, 1), MULT, 1, speed_p));
9799 return true;
9801 else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
9803 *cost = LIBCALL_COST (2);
9804 return false;
9807 if (mode == SImode)
9809 *cost = COSTS_N_INSNS (1);
9810 if (TARGET_DSP_MULTIPLY
9811 && ((GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
9812 && (GET_CODE (XEXP (x, 1)) == SIGN_EXTEND
9813 || (GET_CODE (XEXP (x, 1)) == ASHIFTRT
9814 && CONST_INT_P (XEXP (XEXP (x, 1), 1))
9815 && INTVAL (XEXP (XEXP (x, 1), 1)) == 16)))
9816 || (GET_CODE (XEXP (x, 0)) == ASHIFTRT
9817 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
9818 && INTVAL (XEXP (XEXP (x, 0), 1)) == 16
9819 && (GET_CODE (XEXP (x, 1)) == SIGN_EXTEND
9820 || (GET_CODE (XEXP (x, 1)) == ASHIFTRT
9821 && CONST_INT_P (XEXP (XEXP (x, 1), 1))
9822 && (INTVAL (XEXP (XEXP (x, 1), 1))
9823 == 16))))))
9825 /* SMUL[TB][TB]. */
9826 if (speed_p)
9827 *cost += extra_cost->mult[0].extend;
9828 *cost += (rtx_cost (XEXP (x, 0), SIGN_EXTEND, 0, speed_p)
9829 + rtx_cost (XEXP (x, 1), SIGN_EXTEND, 0, speed_p));
9830 return true;
9832 if (speed_p)
9833 *cost += extra_cost->mult[0].simple;
9834 return false;
9837 if (mode == DImode)
9839 if (arm_arch3m
9840 && ((GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
9841 && GET_CODE (XEXP (x, 1)) == ZERO_EXTEND)
9842 || (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
9843 && GET_CODE (XEXP (x, 1)) == SIGN_EXTEND)))
9845 *cost = COSTS_N_INSNS (1);
9846 if (speed_p)
9847 *cost += extra_cost->mult[1].extend;
9848 *cost += (rtx_cost (XEXP (XEXP (x, 0), 0),
9849 ZERO_EXTEND, 0, speed_p)
9850 + rtx_cost (XEXP (XEXP (x, 1), 0),
9851 ZERO_EXTEND, 0, speed_p));
9852 return true;
9855 *cost = LIBCALL_COST (2);
9856 return false;
9859 /* Vector mode? */
9860 *cost = LIBCALL_COST (2);
9861 return false;
9863 case NEG:
9864 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
9865 && (mode == SFmode || !TARGET_VFP_SINGLE))
9867 *cost = COSTS_N_INSNS (1);
9868 if (speed_p)
9869 *cost += extra_cost->fp[mode != SFmode].neg;
9871 return false;
9873 else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
9875 *cost = LIBCALL_COST (1);
9876 return false;
9879 if (mode == SImode)
9881 if (GET_CODE (XEXP (x, 0)) == ABS)
9883 *cost = COSTS_N_INSNS (2);
9884 /* Assume the non-flag-changing variant. */
9885 if (speed_p)
9886 *cost += (extra_cost->alu.log_shift
9887 + extra_cost->alu.arith_shift);
9888 *cost += rtx_cost (XEXP (XEXP (x, 0), 0), ABS, 0, speed_p);
9889 return true;
9892 if (GET_RTX_CLASS (GET_CODE (XEXP (x, 0))) == RTX_COMPARE
9893 || GET_RTX_CLASS (GET_CODE (XEXP (x, 0))) == RTX_COMM_COMPARE)
9895 *cost = COSTS_N_INSNS (2);
9896 /* No extra cost for MOV imm and MVN imm. */
9897 /* If the comparison op is using the flags, there's no further
9898 cost, otherwise we need to add the cost of the comparison. */
9899 if (!(REG_P (XEXP (XEXP (x, 0), 0))
9900 && REGNO (XEXP (XEXP (x, 0), 0)) == CC_REGNUM
9901 && XEXP (XEXP (x, 0), 1) == const0_rtx))
9903 *cost += (COSTS_N_INSNS (1)
9904 + rtx_cost (XEXP (XEXP (x, 0), 0), COMPARE, 0,
9905 speed_p)
9906 + rtx_cost (XEXP (XEXP (x, 0), 1), COMPARE, 1,
9907 speed_p));
9908 if (speed_p)
9909 *cost += extra_cost->alu.arith;
9911 return true;
9913 *cost = COSTS_N_INSNS (1);
9914 if (speed_p)
9915 *cost += extra_cost->alu.arith;
9916 return false;
9919 if (GET_MODE_CLASS (mode) == MODE_INT
9920 && GET_MODE_SIZE (mode) < 4)
9922 /* Slightly disparage, as we might need an extend operation. */
9923 *cost = 1 + COSTS_N_INSNS (1);
9924 if (speed_p)
9925 *cost += extra_cost->alu.arith;
9926 return false;
9929 if (mode == DImode)
9931 *cost = COSTS_N_INSNS (2);
9932 if (speed_p)
9933 *cost += 2 * extra_cost->alu.arith;
9934 return false;
9937 /* Vector mode? */
9938 *cost = LIBCALL_COST (1);
9939 return false;
9941 case NOT:
9942 if (mode == SImode)
9944 rtx shift_op;
9945 rtx shift_reg = NULL;
9947 *cost = COSTS_N_INSNS (1);
9948 shift_op = shifter_op_p (XEXP (x, 0), &shift_reg);
9950 if (shift_op)
9952 if (shift_reg != NULL)
9954 if (speed_p)
9955 *cost += extra_cost->alu.log_shift_reg;
9956 *cost += rtx_cost (shift_reg, ASHIFT, 1, speed_p);
9958 else if (speed_p)
9959 *cost += extra_cost->alu.log_shift;
9960 *cost += rtx_cost (shift_op, ASHIFT, 0, speed_p);
9961 return true;
9964 if (speed_p)
9965 *cost += extra_cost->alu.logical;
9966 return false;
9968 if (mode == DImode)
9970 *cost = COSTS_N_INSNS (2);
9971 return false;
9974 /* Vector mode? */
9976 *cost += LIBCALL_COST (1);
9977 return false;
9979 case IF_THEN_ELSE:
9981 if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
9983 *cost = COSTS_N_INSNS (4);
9984 return true;
9986 int op1cost = rtx_cost (XEXP (x, 1), SET, 1, speed_p);
9987 int op2cost = rtx_cost (XEXP (x, 2), SET, 1, speed_p);
9989 *cost = rtx_cost (XEXP (x, 0), IF_THEN_ELSE, 0, speed_p);
9990 /* Assume that if one arm of the if_then_else is a register,
9991 that it will be tied with the result and eliminate the
9992 conditional insn. */
9993 if (REG_P (XEXP (x, 1)))
9994 *cost += op2cost;
9995 else if (REG_P (XEXP (x, 2)))
9996 *cost += op1cost;
9997 else
9999 if (speed_p)
10001 if (extra_cost->alu.non_exec_costs_exec)
10002 *cost += op1cost + op2cost + extra_cost->alu.non_exec;
10003 else
10004 *cost += MAX (op1cost, op2cost) + extra_cost->alu.non_exec;
10006 else
10007 *cost += op1cost + op2cost;
10010 return true;
10012 case COMPARE:
10013 if (cc_register (XEXP (x, 0), VOIDmode) && XEXP (x, 1) == const0_rtx)
10014 *cost = 0;
10015 else
10017 enum machine_mode op0mode;
10018 /* We'll mostly assume that the cost of a compare is the cost of the
10019 LHS. However, there are some notable exceptions. */
10021 /* Floating point compares are never done as side-effects. */
10022 op0mode = GET_MODE (XEXP (x, 0));
10023 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (op0mode) == MODE_FLOAT
10024 && (op0mode == SFmode || !TARGET_VFP_SINGLE))
10026 *cost = COSTS_N_INSNS (1);
10027 if (speed_p)
10028 *cost += extra_cost->fp[op0mode != SFmode].compare;
10030 if (XEXP (x, 1) == CONST0_RTX (op0mode))
10032 *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
10033 return true;
10036 return false;
10038 else if (GET_MODE_CLASS (op0mode) == MODE_FLOAT)
10040 *cost = LIBCALL_COST (2);
10041 return false;
10044 /* DImode compares normally take two insns. */
10045 if (op0mode == DImode)
10047 *cost = COSTS_N_INSNS (2);
10048 if (speed_p)
10049 *cost += 2 * extra_cost->alu.arith;
10050 return false;
10053 if (op0mode == SImode)
10055 rtx shift_op;
10056 rtx shift_reg;
10058 if (XEXP (x, 1) == const0_rtx
10059 && !(REG_P (XEXP (x, 0))
10060 || (GET_CODE (XEXP (x, 0)) == SUBREG
10061 && REG_P (SUBREG_REG (XEXP (x, 0))))))
10063 *cost = rtx_cost (XEXP (x, 0), COMPARE, 0, speed_p);
10065 /* Multiply operations that set the flags are often
10066 significantly more expensive. */
10067 if (speed_p
10068 && GET_CODE (XEXP (x, 0)) == MULT
10069 && !power_of_two_operand (XEXP (XEXP (x, 0), 1), mode))
10070 *cost += extra_cost->mult[0].flag_setting;
10072 if (speed_p
10073 && GET_CODE (XEXP (x, 0)) == PLUS
10074 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
10075 && !power_of_two_operand (XEXP (XEXP (XEXP (x, 0),
10076 0), 1), mode))
10077 *cost += extra_cost->mult[0].flag_setting;
10078 return true;
10081 shift_reg = NULL;
10082 shift_op = shifter_op_p (XEXP (x, 0), &shift_reg);
10083 if (shift_op != NULL)
10085 *cost = COSTS_N_INSNS (1);
10086 if (shift_reg != NULL)
10088 *cost += rtx_cost (shift_reg, ASHIFT, 1, speed_p);
10089 if (speed_p)
10090 *cost += extra_cost->alu.arith_shift_reg;
10092 else if (speed_p)
10093 *cost += extra_cost->alu.arith_shift;
10094 *cost += (rtx_cost (shift_op, ASHIFT, 0, speed_p)
10095 + rtx_cost (XEXP (x, 1), COMPARE, 1, speed_p));
10096 return true;
10099 *cost = COSTS_N_INSNS (1);
10100 if (speed_p)
10101 *cost += extra_cost->alu.arith;
10102 if (CONST_INT_P (XEXP (x, 1))
10103 && const_ok_for_op (INTVAL (XEXP (x, 1)), COMPARE))
10105 *cost += rtx_cost (XEXP (x, 0), COMPARE, 0, speed_p);
10106 return true;
10108 return false;
10111 /* Vector mode? */
10113 *cost = LIBCALL_COST (2);
10114 return false;
10116 return true;
10118 case EQ:
10119 case NE:
10120 case LT:
10121 case LE:
10122 case GT:
10123 case GE:
10124 case LTU:
10125 case LEU:
10126 case GEU:
10127 case GTU:
10128 case ORDERED:
10129 case UNORDERED:
10130 case UNEQ:
10131 case UNLE:
10132 case UNLT:
10133 case UNGE:
10134 case UNGT:
10135 case LTGT:
10136 if (outer_code == SET)
10138 /* Is it a store-flag operation? */
10139 if (REG_P (XEXP (x, 0)) && REGNO (XEXP (x, 0)) == CC_REGNUM
10140 && XEXP (x, 1) == const0_rtx)
10142 /* Thumb also needs an IT insn. */
10143 *cost = COSTS_N_INSNS (TARGET_THUMB ? 3 : 2);
10144 return true;
10146 if (XEXP (x, 1) == const0_rtx)
10148 switch (code)
10150 case LT:
10151 /* LSR Rd, Rn, #31. */
10152 *cost = COSTS_N_INSNS (1);
10153 if (speed_p)
10154 *cost += extra_cost->alu.shift;
10155 break;
10157 case EQ:
10158 /* RSBS T1, Rn, #0
10159 ADC Rd, Rn, T1. */
10161 case NE:
10162 /* SUBS T1, Rn, #1
10163 SBC Rd, Rn, T1. */
10164 *cost = COSTS_N_INSNS (2);
10165 break;
10167 case LE:
10168 /* RSBS T1, Rn, Rn, LSR #31
10169 ADC Rd, Rn, T1. */
10170 *cost = COSTS_N_INSNS (2);
10171 if (speed_p)
10172 *cost += extra_cost->alu.arith_shift;
10173 break;
10175 case GT:
10176 /* RSB Rd, Rn, Rn, ASR #1
10177 LSR Rd, Rd, #31. */
10178 *cost = COSTS_N_INSNS (2);
10179 if (speed_p)
10180 *cost += (extra_cost->alu.arith_shift
10181 + extra_cost->alu.shift);
10182 break;
10184 case GE:
10185 /* ASR Rd, Rn, #31
10186 ADD Rd, Rn, #1. */
10187 *cost = COSTS_N_INSNS (2);
10188 if (speed_p)
10189 *cost += extra_cost->alu.shift;
10190 break;
10192 default:
10193 /* Remaining cases are either meaningless or would take
10194 three insns anyway. */
10195 *cost = COSTS_N_INSNS (3);
10196 break;
10198 *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
10199 return true;
10201 else
10203 *cost = COSTS_N_INSNS (TARGET_THUMB ? 4 : 3);
10204 if (CONST_INT_P (XEXP (x, 1))
10205 && const_ok_for_op (INTVAL (XEXP (x, 1)), COMPARE))
10207 *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
10208 return true;
10211 return false;
10214 /* Not directly inside a set. If it involves the condition code
10215 register it must be the condition for a branch, cond_exec or
10216 I_T_E operation. Since the comparison is performed elsewhere
10217 this is just the control part which has no additional
10218 cost. */
10219 else if (REG_P (XEXP (x, 0)) && REGNO (XEXP (x, 0)) == CC_REGNUM
10220 && XEXP (x, 1) == const0_rtx)
10222 *cost = 0;
10223 return true;
10225 return false;
10227 case ABS:
10228 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
10229 && (mode == SFmode || !TARGET_VFP_SINGLE))
10231 *cost = COSTS_N_INSNS (1);
10232 if (speed_p)
10233 *cost += extra_cost->fp[mode != SFmode].neg;
10235 return false;
10237 else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
10239 *cost = LIBCALL_COST (1);
10240 return false;
10243 if (mode == SImode)
10245 *cost = COSTS_N_INSNS (1);
10246 if (speed_p)
10247 *cost += extra_cost->alu.log_shift + extra_cost->alu.arith_shift;
10248 return false;
10250 /* Vector mode? */
10251 *cost = LIBCALL_COST (1);
10252 return false;
10254 case SIGN_EXTEND:
10255 if ((arm_arch4 || GET_MODE (XEXP (x, 0)) == SImode)
10256 && MEM_P (XEXP (x, 0)))
10258 *cost = rtx_cost (XEXP (x, 0), code, 0, speed_p);
10260 if (mode == DImode)
10261 *cost += COSTS_N_INSNS (1);
10263 if (!speed_p)
10264 return true;
10266 if (GET_MODE (XEXP (x, 0)) == SImode)
10267 *cost += extra_cost->ldst.load;
10268 else
10269 *cost += extra_cost->ldst.load_sign_extend;
10271 if (mode == DImode)
10272 *cost += extra_cost->alu.shift;
10274 return true;
10277 /* Widening from less than 32-bits requires an extend operation. */
10278 if (GET_MODE (XEXP (x, 0)) != SImode && arm_arch6)
10280 /* We have SXTB/SXTH. */
10281 *cost = COSTS_N_INSNS (1);
10282 *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
10283 if (speed_p)
10284 *cost += extra_cost->alu.extnd;
10286 else if (GET_MODE (XEXP (x, 0)) != SImode)
10288 /* Needs two shifts. */
10289 *cost = COSTS_N_INSNS (2);
10290 *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
10291 if (speed_p)
10292 *cost += 2 * extra_cost->alu.shift;
10295 /* Widening beyond 32-bits requires one more insn. */
10296 if (mode == DImode)
10298 *cost += COSTS_N_INSNS (1);
10299 if (speed_p)
10300 *cost += extra_cost->alu.shift;
10303 return true;
10305 case ZERO_EXTEND:
10306 if ((arm_arch4
10307 || GET_MODE (XEXP (x, 0)) == SImode
10308 || GET_MODE (XEXP (x, 0)) == QImode)
10309 && MEM_P (XEXP (x, 0)))
10311 *cost = rtx_cost (XEXP (x, 0), code, 0, speed_p);
10313 if (mode == DImode)
10314 *cost += COSTS_N_INSNS (1); /* No speed penalty. */
10316 return true;
10319 /* Widening from less than 32-bits requires an extend operation. */
10320 if (GET_MODE (XEXP (x, 0)) == QImode)
10322 /* UXTB can be a shorter instruction in Thumb2, but it might
10323 be slower than the AND Rd, Rn, #255 alternative. When
10324 optimizing for speed it should never be slower to use
10325 AND, and we don't really model 16-bit vs 32-bit insns
10326 here. */
10327 *cost = COSTS_N_INSNS (1);
10328 if (speed_p)
10329 *cost += extra_cost->alu.logical;
10331 else if (GET_MODE (XEXP (x, 0)) != SImode && arm_arch6)
10333 /* We have UXTB/UXTH. */
10334 *cost = COSTS_N_INSNS (1);
10335 *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
10336 if (speed_p)
10337 *cost += extra_cost->alu.extnd;
10339 else if (GET_MODE (XEXP (x, 0)) != SImode)
10341 /* Needs two shifts. It's marginally preferable to use
10342 shifts rather than two BIC instructions as the second
10343 shift may merge with a subsequent insn as a shifter
10344 op. */
10345 *cost = COSTS_N_INSNS (2);
10346 *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
10347 if (speed_p)
10348 *cost += 2 * extra_cost->alu.shift;
10350 else /* GET_MODE (XEXP (x, 0)) == SImode. */
10351 *cost = COSTS_N_INSNS (1);
10353 /* Widening beyond 32-bits requires one more insn. */
10354 if (mode == DImode)
10356 *cost += COSTS_N_INSNS (1); /* No speed penalty. */
10359 return true;
10361 case CONST_INT:
10362 *cost = 0;
10363 /* CONST_INT has no mode, so we cannot tell for sure how many
10364 insns are really going to be needed. The best we can do is
10365 look at the value passed. If it fits in SImode, then assume
10366 that's the mode it will be used for. Otherwise assume it
10367 will be used in DImode. */
10368 if (INTVAL (x) == trunc_int_for_mode (INTVAL (x), SImode))
10369 mode = SImode;
10370 else
10371 mode = DImode;
10373 /* Avoid blowing up in arm_gen_constant (). */
10374 if (!(outer_code == PLUS
10375 || outer_code == AND
10376 || outer_code == IOR
10377 || outer_code == XOR
10378 || outer_code == MINUS))
10379 outer_code = SET;
10381 const_int_cost:
10382 if (mode == SImode)
10384 *cost += 0;
10385 *cost += COSTS_N_INSNS (arm_gen_constant (outer_code, SImode, NULL,
10386 INTVAL (x), NULL, NULL,
10387 0, 0));
10388 /* Extra costs? */
10390 else
10392 *cost += COSTS_N_INSNS (arm_gen_constant
10393 (outer_code, SImode, NULL,
10394 trunc_int_for_mode (INTVAL (x), SImode),
10395 NULL, NULL, 0, 0)
10396 + arm_gen_constant (outer_code, SImode, NULL,
10397 INTVAL (x) >> 32, NULL,
10398 NULL, 0, 0));
10399 /* Extra costs? */
10402 return true;
10404 case CONST:
10405 case LABEL_REF:
10406 case SYMBOL_REF:
10407 if (speed_p)
10409 if (arm_arch_thumb2 && !flag_pic)
10410 *cost = COSTS_N_INSNS (2);
10411 else
10412 *cost = COSTS_N_INSNS (1) + extra_cost->ldst.load;
10414 else
10415 *cost = COSTS_N_INSNS (2);
10417 if (flag_pic)
10419 *cost += COSTS_N_INSNS (1);
10420 if (speed_p)
10421 *cost += extra_cost->alu.arith;
10424 return true;
10426 case CONST_FIXED:
10427 *cost = COSTS_N_INSNS (4);
10428 /* Fixme. */
10429 return true;
10431 case CONST_DOUBLE:
10432 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
10433 && (mode == SFmode || !TARGET_VFP_SINGLE))
10435 if (vfp3_const_double_rtx (x))
10437 *cost = COSTS_N_INSNS (1);
10438 if (speed_p)
10439 *cost += extra_cost->fp[mode == DFmode].fpconst;
10440 return true;
10443 if (speed_p)
10445 *cost = COSTS_N_INSNS (1);
10446 if (mode == DFmode)
10447 *cost += extra_cost->ldst.loadd;
10448 else
10449 *cost += extra_cost->ldst.loadf;
10451 else
10452 *cost = COSTS_N_INSNS (2 + (mode == DFmode));
10454 return true;
10456 *cost = COSTS_N_INSNS (4);
10457 return true;
10459 case CONST_VECTOR:
10460 /* Fixme. */
10461 if (TARGET_NEON
10462 && TARGET_HARD_FLOAT
10463 && (VALID_NEON_DREG_MODE (mode) || VALID_NEON_QREG_MODE (mode))
10464 && neon_immediate_valid_for_move (x, mode, NULL, NULL))
10465 *cost = COSTS_N_INSNS (1);
10466 else
10467 *cost = COSTS_N_INSNS (4);
10468 return true;
10470 case HIGH:
10471 case LO_SUM:
10472 *cost = COSTS_N_INSNS (1);
10473 /* When optimizing for size, we prefer constant pool entries to
10474 MOVW/MOVT pairs, so bump the cost of these slightly. */
10475 if (!speed_p)
10476 *cost += 1;
10477 return true;
10479 case CLZ:
10480 *cost = COSTS_N_INSNS (1);
10481 if (speed_p)
10482 *cost += extra_cost->alu.clz;
10483 return false;
10485 case SMIN:
10486 if (XEXP (x, 1) == const0_rtx)
10488 *cost = COSTS_N_INSNS (1);
10489 if (speed_p)
10490 *cost += extra_cost->alu.log_shift;
10491 *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
10492 return true;
10494 /* Fall through. */
10495 case SMAX:
10496 case UMIN:
10497 case UMAX:
10498 *cost = COSTS_N_INSNS (2);
10499 return false;
10501 case TRUNCATE:
10502 if (GET_CODE (XEXP (x, 0)) == ASHIFTRT
10503 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
10504 && INTVAL (XEXP (XEXP (x, 0), 1)) == 32
10505 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
10506 && ((GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == SIGN_EXTEND
10507 && GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1)) == SIGN_EXTEND)
10508 || (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == ZERO_EXTEND
10509 && (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1))
10510 == ZERO_EXTEND))))
10512 *cost = COSTS_N_INSNS (1);
10513 if (speed_p)
10514 *cost += extra_cost->mult[1].extend;
10515 *cost += (rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0), ZERO_EXTEND, 0,
10516 speed_p)
10517 + rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 1), ZERO_EXTEND,
10518 0, speed_p));
10519 return true;
10521 *cost = LIBCALL_COST (1);
10522 return false;
10524 case UNSPEC:
10525 return arm_unspec_cost (x, outer_code, speed_p, cost);
10527 case PC:
10528 /* Reading the PC is like reading any other register. Writing it
10529 is more expensive, but we take that into account elsewhere. */
10530 *cost = 0;
10531 return true;
10533 case ZERO_EXTRACT:
10534 /* TODO: Simple zero_extract of bottom bits using AND. */
10535 /* Fall through. */
10536 case SIGN_EXTRACT:
10537 if (arm_arch6
10538 && mode == SImode
10539 && CONST_INT_P (XEXP (x, 1))
10540 && CONST_INT_P (XEXP (x, 2)))
10542 *cost = COSTS_N_INSNS (1);
10543 if (speed_p)
10544 *cost += extra_cost->alu.bfx;
10545 *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
10546 return true;
10548 /* Without UBFX/SBFX, need to resort to shift operations. */
10549 *cost = COSTS_N_INSNS (2);
10550 if (speed_p)
10551 *cost += 2 * extra_cost->alu.shift;
10552 *cost += rtx_cost (XEXP (x, 0), ASHIFT, 0, speed_p);
10553 return true;
10555 case FLOAT_EXTEND:
10556 if (TARGET_HARD_FLOAT)
10558 *cost = COSTS_N_INSNS (1);
10559 if (speed_p)
10560 *cost += extra_cost->fp[mode == DFmode].widen;
10561 if (!TARGET_FPU_ARMV8
10562 && GET_MODE (XEXP (x, 0)) == HFmode)
10564 /* Pre v8, widening HF->DF is a two-step process, first
10565 widening to SFmode. */
10566 *cost += COSTS_N_INSNS (1);
10567 if (speed_p)
10568 *cost += extra_cost->fp[0].widen;
10570 *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
10571 return true;
10574 *cost = LIBCALL_COST (1);
10575 return false;
10577 case FLOAT_TRUNCATE:
10578 if (TARGET_HARD_FLOAT)
10580 *cost = COSTS_N_INSNS (1);
10581 if (speed_p)
10582 *cost += extra_cost->fp[mode == DFmode].narrow;
10583 *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
10584 return true;
10585 /* Vector modes? */
10587 *cost = LIBCALL_COST (1);
10588 return false;
10590 case FIX:
10591 case UNSIGNED_FIX:
10592 if (TARGET_HARD_FLOAT)
10594 if (GET_MODE_CLASS (mode) == MODE_INT)
10596 *cost = COSTS_N_INSNS (1);
10597 if (speed_p)
10598 *cost += extra_cost->fp[GET_MODE (XEXP (x, 0)) == DFmode].toint;
10599 /* Strip of the 'cost' of rounding towards zero. */
10600 if (GET_CODE (XEXP (x, 0)) == FIX)
10601 *cost += rtx_cost (XEXP (XEXP (x, 0), 0), code, 0, speed_p);
10602 else
10603 *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
10604 /* ??? Increase the cost to deal with transferring from
10605 FP -> CORE registers? */
10606 return true;
10608 else if (GET_MODE_CLASS (mode) == MODE_FLOAT
10609 && TARGET_FPU_ARMV8)
10611 *cost = COSTS_N_INSNS (1);
10612 if (speed_p)
10613 *cost += extra_cost->fp[mode == DFmode].roundint;
10614 return false;
10616 /* Vector costs? */
10618 *cost = LIBCALL_COST (1);
10619 return false;
10621 case FLOAT:
10622 case UNSIGNED_FLOAT:
10623 if (TARGET_HARD_FLOAT)
10625 /* ??? Increase the cost to deal with transferring from CORE
10626 -> FP registers? */
10627 *cost = COSTS_N_INSNS (1);
10628 if (speed_p)
10629 *cost += extra_cost->fp[mode == DFmode].fromint;
10630 return false;
10632 *cost = LIBCALL_COST (1);
10633 return false;
10635 case CALL:
10636 *cost = COSTS_N_INSNS (1);
10637 return true;
10639 case ASM_OPERANDS:
10640 /* Just a guess. Cost one insn per input. */
10641 *cost = COSTS_N_INSNS (ASM_OPERANDS_INPUT_LENGTH (x));
10642 return true;
10644 default:
10645 if (mode != VOIDmode)
10646 *cost = COSTS_N_INSNS (ARM_NUM_REGS (mode));
10647 else
10648 *cost = COSTS_N_INSNS (4); /* Who knows? */
10649 return false;
10653 #undef HANDLE_NARROW_SHIFT_ARITH
10655 /* RTX costs when optimizing for size. */
10656 static bool
10657 arm_rtx_costs (rtx x, int code, int outer_code, int opno ATTRIBUTE_UNUSED,
10658 int *total, bool speed)
10660 bool result;
10662 if (TARGET_OLD_RTX_COSTS
10663 || (!current_tune->insn_extra_cost && !TARGET_NEW_GENERIC_COSTS))
10665 /* Old way. (Deprecated.) */
10666 if (!speed)
10667 result = arm_size_rtx_costs (x, (enum rtx_code) code,
10668 (enum rtx_code) outer_code, total);
10669 else
10670 result = current_tune->rtx_costs (x, (enum rtx_code) code,
10671 (enum rtx_code) outer_code, total,
10672 speed);
10674 else
10676 /* New way. */
10677 if (current_tune->insn_extra_cost)
10678 result = arm_new_rtx_costs (x, (enum rtx_code) code,
10679 (enum rtx_code) outer_code,
10680 current_tune->insn_extra_cost,
10681 total, speed);
10682 /* TARGET_NEW_GENERIC_COSTS && !TARGET_OLD_RTX_COSTS
10683 && current_tune->insn_extra_cost != NULL */
10684 else
10685 result = arm_new_rtx_costs (x, (enum rtx_code) code,
10686 (enum rtx_code) outer_code,
10687 &generic_extra_costs, total, speed);
10690 if (dump_file && (dump_flags & TDF_DETAILS))
10692 print_rtl_single (dump_file, x);
10693 fprintf (dump_file, "\n%s cost: %d (%s)\n", speed ? "Hot" : "Cold",
10694 *total, result ? "final" : "partial");
10696 return result;
10699 /* RTX costs for cores with a slow MUL implementation. Thumb-2 is not
10700 supported on any "slowmul" cores, so it can be ignored. */
10702 static bool
10703 arm_slowmul_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
10704 int *total, bool speed)
10706 enum machine_mode mode = GET_MODE (x);
10708 if (TARGET_THUMB)
10710 *total = thumb1_rtx_costs (x, code, outer_code);
10711 return true;
10714 switch (code)
10716 case MULT:
10717 if (GET_MODE_CLASS (mode) == MODE_FLOAT
10718 || mode == DImode)
10720 *total = COSTS_N_INSNS (20);
10721 return false;
10724 if (CONST_INT_P (XEXP (x, 1)))
10726 unsigned HOST_WIDE_INT i = (INTVAL (XEXP (x, 1))
10727 & (unsigned HOST_WIDE_INT) 0xffffffff);
10728 int cost, const_ok = const_ok_for_arm (i);
10729 int j, booth_unit_size;
10731 /* Tune as appropriate. */
10732 cost = const_ok ? 4 : 8;
10733 booth_unit_size = 2;
10734 for (j = 0; i && j < 32; j += booth_unit_size)
10736 i >>= booth_unit_size;
10737 cost++;
10740 *total = COSTS_N_INSNS (cost);
10741 *total += rtx_cost (XEXP (x, 0), code, 0, speed);
10742 return true;
10745 *total = COSTS_N_INSNS (20);
10746 return false;
10748 default:
10749 return arm_rtx_costs_1 (x, outer_code, total, speed);;
10754 /* RTX cost for cores with a fast multiply unit (M variants). */
10756 static bool
10757 arm_fastmul_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
10758 int *total, bool speed)
10760 enum machine_mode mode = GET_MODE (x);
10762 if (TARGET_THUMB1)
10764 *total = thumb1_rtx_costs (x, code, outer_code);
10765 return true;
10768 /* ??? should thumb2 use different costs? */
10769 switch (code)
10771 case MULT:
10772 /* There is no point basing this on the tuning, since it is always the
10773 fast variant if it exists at all. */
10774 if (mode == DImode
10775 && (GET_CODE (XEXP (x, 0)) == GET_CODE (XEXP (x, 1)))
10776 && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
10777 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
10779 *total = COSTS_N_INSNS(2);
10780 return false;
10784 if (mode == DImode)
10786 *total = COSTS_N_INSNS (5);
10787 return false;
10790 if (CONST_INT_P (XEXP (x, 1)))
10792 unsigned HOST_WIDE_INT i = (INTVAL (XEXP (x, 1))
10793 & (unsigned HOST_WIDE_INT) 0xffffffff);
10794 int cost, const_ok = const_ok_for_arm (i);
10795 int j, booth_unit_size;
10797 /* Tune as appropriate. */
10798 cost = const_ok ? 4 : 8;
10799 booth_unit_size = 8;
10800 for (j = 0; i && j < 32; j += booth_unit_size)
10802 i >>= booth_unit_size;
10803 cost++;
10806 *total = COSTS_N_INSNS(cost);
10807 return false;
10810 if (mode == SImode)
10812 *total = COSTS_N_INSNS (4);
10813 return false;
10816 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
10818 if (TARGET_HARD_FLOAT
10819 && (mode == SFmode
10820 || (mode == DFmode && !TARGET_VFP_SINGLE)))
10822 *total = COSTS_N_INSNS (1);
10823 return false;
10827 /* Requires a lib call */
10828 *total = COSTS_N_INSNS (20);
10829 return false;
10831 default:
10832 return arm_rtx_costs_1 (x, outer_code, total, speed);
10837 /* RTX cost for XScale CPUs. Thumb-2 is not supported on any xscale cores,
10838 so it can be ignored. */
10840 static bool
10841 arm_xscale_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
10842 int *total, bool speed)
10844 enum machine_mode mode = GET_MODE (x);
10846 if (TARGET_THUMB)
10848 *total = thumb1_rtx_costs (x, code, outer_code);
10849 return true;
10852 switch (code)
10854 case COMPARE:
10855 if (GET_CODE (XEXP (x, 0)) != MULT)
10856 return arm_rtx_costs_1 (x, outer_code, total, speed);
10858 /* A COMPARE of a MULT is slow on XScale; the muls instruction
10859 will stall until the multiplication is complete. */
10860 *total = COSTS_N_INSNS (3);
10861 return false;
10863 case MULT:
10864 /* There is no point basing this on the tuning, since it is always the
10865 fast variant if it exists at all. */
10866 if (mode == DImode
10867 && (GET_CODE (XEXP (x, 0)) == GET_CODE (XEXP (x, 1)))
10868 && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
10869 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
10871 *total = COSTS_N_INSNS (2);
10872 return false;
10876 if (mode == DImode)
10878 *total = COSTS_N_INSNS (5);
10879 return false;
10882 if (CONST_INT_P (XEXP (x, 1)))
10884 /* If operand 1 is a constant we can more accurately
10885 calculate the cost of the multiply. The multiplier can
10886 retire 15 bits on the first cycle and a further 12 on the
10887 second. We do, of course, have to load the constant into
10888 a register first. */
10889 unsigned HOST_WIDE_INT i = INTVAL (XEXP (x, 1));
10890 /* There's a general overhead of one cycle. */
10891 int cost = 1;
10892 unsigned HOST_WIDE_INT masked_const;
10894 if (i & 0x80000000)
10895 i = ~i;
10897 i &= (unsigned HOST_WIDE_INT) 0xffffffff;
10899 masked_const = i & 0xffff8000;
10900 if (masked_const != 0)
10902 cost++;
10903 masked_const = i & 0xf8000000;
10904 if (masked_const != 0)
10905 cost++;
10907 *total = COSTS_N_INSNS (cost);
10908 return false;
10911 if (mode == SImode)
10913 *total = COSTS_N_INSNS (3);
10914 return false;
10917 /* Requires a lib call */
10918 *total = COSTS_N_INSNS (20);
10919 return false;
10921 default:
10922 return arm_rtx_costs_1 (x, outer_code, total, speed);
10927 /* RTX costs for 9e (and later) cores. */
10929 static bool
10930 arm_9e_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
10931 int *total, bool speed)
10933 enum machine_mode mode = GET_MODE (x);
10935 if (TARGET_THUMB1)
10937 switch (code)
10939 case MULT:
10940 *total = COSTS_N_INSNS (3);
10941 return true;
10943 default:
10944 *total = thumb1_rtx_costs (x, code, outer_code);
10945 return true;
10949 switch (code)
10951 case MULT:
10952 /* There is no point basing this on the tuning, since it is always the
10953 fast variant if it exists at all. */
10954 if (mode == DImode
10955 && (GET_CODE (XEXP (x, 0)) == GET_CODE (XEXP (x, 1)))
10956 && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
10957 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
10959 *total = COSTS_N_INSNS (2);
10960 return false;
10964 if (mode == DImode)
10966 *total = COSTS_N_INSNS (5);
10967 return false;
10970 if (mode == SImode)
10972 *total = COSTS_N_INSNS (2);
10973 return false;
10976 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
10978 if (TARGET_HARD_FLOAT
10979 && (mode == SFmode
10980 || (mode == DFmode && !TARGET_VFP_SINGLE)))
10982 *total = COSTS_N_INSNS (1);
10983 return false;
10987 *total = COSTS_N_INSNS (20);
10988 return false;
10990 default:
10991 return arm_rtx_costs_1 (x, outer_code, total, speed);
10994 /* All address computations that can be done are free, but rtx cost returns
10995 the same for practically all of them. So we weight the different types
10996 of address here in the order (most pref first):
10997 PRE/POST_INC/DEC, SHIFT or NON-INT sum, INT sum, REG, MEM or LABEL. */
10998 static inline int
10999 arm_arm_address_cost (rtx x)
11001 enum rtx_code c = GET_CODE (x);
11003 if (c == PRE_INC || c == PRE_DEC || c == POST_INC || c == POST_DEC)
11004 return 0;
11005 if (c == MEM || c == LABEL_REF || c == SYMBOL_REF)
11006 return 10;
11008 if (c == PLUS)
11010 if (CONST_INT_P (XEXP (x, 1)))
11011 return 2;
11013 if (ARITHMETIC_P (XEXP (x, 0)) || ARITHMETIC_P (XEXP (x, 1)))
11014 return 3;
11016 return 4;
11019 return 6;
11022 static inline int
11023 arm_thumb_address_cost (rtx x)
11025 enum rtx_code c = GET_CODE (x);
11027 if (c == REG)
11028 return 1;
11029 if (c == PLUS
11030 && REG_P (XEXP (x, 0))
11031 && CONST_INT_P (XEXP (x, 1)))
11032 return 1;
11034 return 2;
11037 static int
11038 arm_address_cost (rtx x, enum machine_mode mode ATTRIBUTE_UNUSED,
11039 addr_space_t as ATTRIBUTE_UNUSED, bool speed ATTRIBUTE_UNUSED)
11041 return TARGET_32BIT ? arm_arm_address_cost (x) : arm_thumb_address_cost (x);
11044 /* Adjust cost hook for XScale. */
11045 static bool
11046 xscale_sched_adjust_cost (rtx insn, rtx link, rtx dep, int * cost)
11048 /* Some true dependencies can have a higher cost depending
11049 on precisely how certain input operands are used. */
11050 if (REG_NOTE_KIND(link) == 0
11051 && recog_memoized (insn) >= 0
11052 && recog_memoized (dep) >= 0)
11054 int shift_opnum = get_attr_shift (insn);
11055 enum attr_type attr_type = get_attr_type (dep);
11057 /* If nonzero, SHIFT_OPNUM contains the operand number of a shifted
11058 operand for INSN. If we have a shifted input operand and the
11059 instruction we depend on is another ALU instruction, then we may
11060 have to account for an additional stall. */
11061 if (shift_opnum != 0
11062 && (attr_type == TYPE_ALU_SHIFT_IMM
11063 || attr_type == TYPE_ALUS_SHIFT_IMM
11064 || attr_type == TYPE_LOGIC_SHIFT_IMM
11065 || attr_type == TYPE_LOGICS_SHIFT_IMM
11066 || attr_type == TYPE_ALU_SHIFT_REG
11067 || attr_type == TYPE_ALUS_SHIFT_REG
11068 || attr_type == TYPE_LOGIC_SHIFT_REG
11069 || attr_type == TYPE_LOGICS_SHIFT_REG
11070 || attr_type == TYPE_MOV_SHIFT
11071 || attr_type == TYPE_MVN_SHIFT
11072 || attr_type == TYPE_MOV_SHIFT_REG
11073 || attr_type == TYPE_MVN_SHIFT_REG))
11075 rtx shifted_operand;
11076 int opno;
11078 /* Get the shifted operand. */
11079 extract_insn (insn);
11080 shifted_operand = recog_data.operand[shift_opnum];
11082 /* Iterate over all the operands in DEP. If we write an operand
11083 that overlaps with SHIFTED_OPERAND, then we have increase the
11084 cost of this dependency. */
11085 extract_insn (dep);
11086 preprocess_constraints ();
11087 for (opno = 0; opno < recog_data.n_operands; opno++)
11089 /* We can ignore strict inputs. */
11090 if (recog_data.operand_type[opno] == OP_IN)
11091 continue;
11093 if (reg_overlap_mentioned_p (recog_data.operand[opno],
11094 shifted_operand))
11096 *cost = 2;
11097 return false;
11102 return true;
11105 /* Adjust cost hook for Cortex A9. */
11106 static bool
11107 cortex_a9_sched_adjust_cost (rtx insn, rtx link, rtx dep, int * cost)
11109 switch (REG_NOTE_KIND (link))
11111 case REG_DEP_ANTI:
11112 *cost = 0;
11113 return false;
11115 case REG_DEP_TRUE:
11116 case REG_DEP_OUTPUT:
11117 if (recog_memoized (insn) >= 0
11118 && recog_memoized (dep) >= 0)
11120 if (GET_CODE (PATTERN (insn)) == SET)
11122 if (GET_MODE_CLASS
11123 (GET_MODE (SET_DEST (PATTERN (insn)))) == MODE_FLOAT
11124 || GET_MODE_CLASS
11125 (GET_MODE (SET_SRC (PATTERN (insn)))) == MODE_FLOAT)
11127 enum attr_type attr_type_insn = get_attr_type (insn);
11128 enum attr_type attr_type_dep = get_attr_type (dep);
11130 /* By default all dependencies of the form
11131 s0 = s0 <op> s1
11132 s0 = s0 <op> s2
11133 have an extra latency of 1 cycle because
11134 of the input and output dependency in this
11135 case. However this gets modeled as an true
11136 dependency and hence all these checks. */
11137 if (REG_P (SET_DEST (PATTERN (insn)))
11138 && REG_P (SET_DEST (PATTERN (dep)))
11139 && reg_overlap_mentioned_p (SET_DEST (PATTERN (insn)),
11140 SET_DEST (PATTERN (dep))))
11142 /* FMACS is a special case where the dependent
11143 instruction can be issued 3 cycles before
11144 the normal latency in case of an output
11145 dependency. */
11146 if ((attr_type_insn == TYPE_FMACS
11147 || attr_type_insn == TYPE_FMACD)
11148 && (attr_type_dep == TYPE_FMACS
11149 || attr_type_dep == TYPE_FMACD))
11151 if (REG_NOTE_KIND (link) == REG_DEP_OUTPUT)
11152 *cost = insn_default_latency (dep) - 3;
11153 else
11154 *cost = insn_default_latency (dep);
11155 return false;
11157 else
11159 if (REG_NOTE_KIND (link) == REG_DEP_OUTPUT)
11160 *cost = insn_default_latency (dep) + 1;
11161 else
11162 *cost = insn_default_latency (dep);
11164 return false;
11169 break;
11171 default:
11172 gcc_unreachable ();
11175 return true;
11178 /* Adjust cost hook for FA726TE. */
11179 static bool
11180 fa726te_sched_adjust_cost (rtx insn, rtx link, rtx dep, int * cost)
11182 /* For FA726TE, true dependency on CPSR (i.e. set cond followed by predicated)
11183 have penalty of 3. */
11184 if (REG_NOTE_KIND (link) == REG_DEP_TRUE
11185 && recog_memoized (insn) >= 0
11186 && recog_memoized (dep) >= 0
11187 && get_attr_conds (dep) == CONDS_SET)
11189 /* Use of carry (e.g. 64-bit arithmetic) in ALU: 3-cycle latency. */
11190 if (get_attr_conds (insn) == CONDS_USE
11191 && get_attr_type (insn) != TYPE_BRANCH)
11193 *cost = 3;
11194 return false;
11197 if (GET_CODE (PATTERN (insn)) == COND_EXEC
11198 || get_attr_conds (insn) == CONDS_USE)
11200 *cost = 0;
11201 return false;
11205 return true;
11208 /* Implement TARGET_REGISTER_MOVE_COST.
11210 Moves between VFP_REGS and GENERAL_REGS are a single insn, but
11211 it is typically more expensive than a single memory access. We set
11212 the cost to less than two memory accesses so that floating
11213 point to integer conversion does not go through memory. */
11216 arm_register_move_cost (enum machine_mode mode ATTRIBUTE_UNUSED,
11217 reg_class_t from, reg_class_t to)
11219 if (TARGET_32BIT)
11221 if ((IS_VFP_CLASS (from) && !IS_VFP_CLASS (to))
11222 || (!IS_VFP_CLASS (from) && IS_VFP_CLASS (to)))
11223 return 15;
11224 else if ((from == IWMMXT_REGS && to != IWMMXT_REGS)
11225 || (from != IWMMXT_REGS && to == IWMMXT_REGS))
11226 return 4;
11227 else if (from == IWMMXT_GR_REGS || to == IWMMXT_GR_REGS)
11228 return 20;
11229 else
11230 return 2;
11232 else
11234 if (from == HI_REGS || to == HI_REGS)
11235 return 4;
11236 else
11237 return 2;
11241 /* Implement TARGET_MEMORY_MOVE_COST. */
11244 arm_memory_move_cost (enum machine_mode mode, reg_class_t rclass,
11245 bool in ATTRIBUTE_UNUSED)
11247 if (TARGET_32BIT)
11248 return 10;
11249 else
11251 if (GET_MODE_SIZE (mode) < 4)
11252 return 8;
11253 else
11254 return ((2 * GET_MODE_SIZE (mode)) * (rclass == LO_REGS ? 1 : 2));
11258 /* Vectorizer cost model implementation. */
11260 /* Implement targetm.vectorize.builtin_vectorization_cost. */
11261 static int
11262 arm_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
11263 tree vectype,
11264 int misalign ATTRIBUTE_UNUSED)
11266 unsigned elements;
11268 switch (type_of_cost)
11270 case scalar_stmt:
11271 return current_tune->vec_costs->scalar_stmt_cost;
11273 case scalar_load:
11274 return current_tune->vec_costs->scalar_load_cost;
11276 case scalar_store:
11277 return current_tune->vec_costs->scalar_store_cost;
11279 case vector_stmt:
11280 return current_tune->vec_costs->vec_stmt_cost;
11282 case vector_load:
11283 return current_tune->vec_costs->vec_align_load_cost;
11285 case vector_store:
11286 return current_tune->vec_costs->vec_store_cost;
11288 case vec_to_scalar:
11289 return current_tune->vec_costs->vec_to_scalar_cost;
11291 case scalar_to_vec:
11292 return current_tune->vec_costs->scalar_to_vec_cost;
11294 case unaligned_load:
11295 return current_tune->vec_costs->vec_unalign_load_cost;
11297 case unaligned_store:
11298 return current_tune->vec_costs->vec_unalign_store_cost;
11300 case cond_branch_taken:
11301 return current_tune->vec_costs->cond_taken_branch_cost;
11303 case cond_branch_not_taken:
11304 return current_tune->vec_costs->cond_not_taken_branch_cost;
11306 case vec_perm:
11307 case vec_promote_demote:
11308 return current_tune->vec_costs->vec_stmt_cost;
11310 case vec_construct:
11311 elements = TYPE_VECTOR_SUBPARTS (vectype);
11312 return elements / 2 + 1;
11314 default:
11315 gcc_unreachable ();
11319 /* Implement targetm.vectorize.add_stmt_cost. */
11321 static unsigned
11322 arm_add_stmt_cost (void *data, int count, enum vect_cost_for_stmt kind,
11323 struct _stmt_vec_info *stmt_info, int misalign,
11324 enum vect_cost_model_location where)
11326 unsigned *cost = (unsigned *) data;
11327 unsigned retval = 0;
11329 if (flag_vect_cost_model)
11331 tree vectype = stmt_info ? stmt_vectype (stmt_info) : NULL_TREE;
11332 int stmt_cost = arm_builtin_vectorization_cost (kind, vectype, misalign);
11334 /* Statements in an inner loop relative to the loop being
11335 vectorized are weighted more heavily. The value here is
11336 arbitrary and could potentially be improved with analysis. */
11337 if (where == vect_body && stmt_info && stmt_in_inner_loop_p (stmt_info))
11338 count *= 50; /* FIXME. */
11340 retval = (unsigned) (count * stmt_cost);
11341 cost[where] += retval;
11344 return retval;
11347 /* Return true if and only if this insn can dual-issue only as older. */
11348 static bool
11349 cortexa7_older_only (rtx insn)
11351 if (recog_memoized (insn) < 0)
11352 return false;
11354 switch (get_attr_type (insn))
11356 case TYPE_ALU_REG:
11357 case TYPE_ALUS_REG:
11358 case TYPE_LOGIC_REG:
11359 case TYPE_LOGICS_REG:
11360 case TYPE_ADC_REG:
11361 case TYPE_ADCS_REG:
11362 case TYPE_ADR:
11363 case TYPE_BFM:
11364 case TYPE_REV:
11365 case TYPE_MVN_REG:
11366 case TYPE_SHIFT_IMM:
11367 case TYPE_SHIFT_REG:
11368 case TYPE_LOAD_BYTE:
11369 case TYPE_LOAD1:
11370 case TYPE_STORE1:
11371 case TYPE_FFARITHS:
11372 case TYPE_FADDS:
11373 case TYPE_FFARITHD:
11374 case TYPE_FADDD:
11375 case TYPE_FMOV:
11376 case TYPE_F_CVT:
11377 case TYPE_FCMPS:
11378 case TYPE_FCMPD:
11379 case TYPE_FCONSTS:
11380 case TYPE_FCONSTD:
11381 case TYPE_FMULS:
11382 case TYPE_FMACS:
11383 case TYPE_FMULD:
11384 case TYPE_FMACD:
11385 case TYPE_FDIVS:
11386 case TYPE_FDIVD:
11387 case TYPE_F_MRC:
11388 case TYPE_F_MRRC:
11389 case TYPE_F_FLAG:
11390 case TYPE_F_LOADS:
11391 case TYPE_F_STORES:
11392 return true;
11393 default:
11394 return false;
11398 /* Return true if and only if this insn can dual-issue as younger. */
11399 static bool
11400 cortexa7_younger (FILE *file, int verbose, rtx insn)
11402 if (recog_memoized (insn) < 0)
11404 if (verbose > 5)
11405 fprintf (file, ";; not cortexa7_younger %d\n", INSN_UID (insn));
11406 return false;
11409 switch (get_attr_type (insn))
11411 case TYPE_ALU_IMM:
11412 case TYPE_ALUS_IMM:
11413 case TYPE_LOGIC_IMM:
11414 case TYPE_LOGICS_IMM:
11415 case TYPE_EXTEND:
11416 case TYPE_MVN_IMM:
11417 case TYPE_MOV_IMM:
11418 case TYPE_MOV_REG:
11419 case TYPE_MOV_SHIFT:
11420 case TYPE_MOV_SHIFT_REG:
11421 case TYPE_BRANCH:
11422 case TYPE_CALL:
11423 return true;
11424 default:
11425 return false;
11430 /* Look for an instruction that can dual issue only as an older
11431 instruction, and move it in front of any instructions that can
11432 dual-issue as younger, while preserving the relative order of all
11433 other instructions in the ready list. This is a hueuristic to help
11434 dual-issue in later cycles, by postponing issue of more flexible
11435 instructions. This heuristic may affect dual issue opportunities
11436 in the current cycle. */
11437 static void
11438 cortexa7_sched_reorder (FILE *file, int verbose, rtx *ready, int *n_readyp,
11439 int clock)
11441 int i;
11442 int first_older_only = -1, first_younger = -1;
11444 if (verbose > 5)
11445 fprintf (file,
11446 ";; sched_reorder for cycle %d with %d insns in ready list\n",
11447 clock,
11448 *n_readyp);
11450 /* Traverse the ready list from the head (the instruction to issue
11451 first), and looking for the first instruction that can issue as
11452 younger and the first instruction that can dual-issue only as
11453 older. */
11454 for (i = *n_readyp - 1; i >= 0; i--)
11456 rtx insn = ready[i];
11457 if (cortexa7_older_only (insn))
11459 first_older_only = i;
11460 if (verbose > 5)
11461 fprintf (file, ";; reorder older found %d\n", INSN_UID (insn));
11462 break;
11464 else if (cortexa7_younger (file, verbose, insn) && first_younger == -1)
11465 first_younger = i;
11468 /* Nothing to reorder because either no younger insn found or insn
11469 that can dual-issue only as older appears before any insn that
11470 can dual-issue as younger. */
11471 if (first_younger == -1)
11473 if (verbose > 5)
11474 fprintf (file, ";; sched_reorder nothing to reorder as no younger\n");
11475 return;
11478 /* Nothing to reorder because no older-only insn in the ready list. */
11479 if (first_older_only == -1)
11481 if (verbose > 5)
11482 fprintf (file, ";; sched_reorder nothing to reorder as no older_only\n");
11483 return;
11486 /* Move first_older_only insn before first_younger. */
11487 if (verbose > 5)
11488 fprintf (file, ";; cortexa7_sched_reorder insn %d before %d\n",
11489 INSN_UID(ready [first_older_only]),
11490 INSN_UID(ready [first_younger]));
11491 rtx first_older_only_insn = ready [first_older_only];
11492 for (i = first_older_only; i < first_younger; i++)
11494 ready[i] = ready[i+1];
11497 ready[i] = first_older_only_insn;
11498 return;
11501 /* Implement TARGET_SCHED_REORDER. */
11502 static int
11503 arm_sched_reorder (FILE *file, int verbose, rtx *ready, int *n_readyp,
11504 int clock)
11506 switch (arm_tune)
11508 case cortexa7:
11509 cortexa7_sched_reorder (file, verbose, ready, n_readyp, clock);
11510 break;
11511 default:
11512 /* Do nothing for other cores. */
11513 break;
11516 return arm_issue_rate ();
11519 /* This function implements the target macro TARGET_SCHED_ADJUST_COST.
11520 It corrects the value of COST based on the relationship between
11521 INSN and DEP through the dependence LINK. It returns the new
11522 value. There is a per-core adjust_cost hook to adjust scheduler costs
11523 and the per-core hook can choose to completely override the generic
11524 adjust_cost function. Only put bits of code into arm_adjust_cost that
11525 are common across all cores. */
11526 static int
11527 arm_adjust_cost (rtx insn, rtx link, rtx dep, int cost)
11529 rtx i_pat, d_pat;
11531 /* When generating Thumb-1 code, we want to place flag-setting operations
11532 close to a conditional branch which depends on them, so that we can
11533 omit the comparison. */
11534 if (TARGET_THUMB1
11535 && REG_NOTE_KIND (link) == 0
11536 && recog_memoized (insn) == CODE_FOR_cbranchsi4_insn
11537 && recog_memoized (dep) >= 0
11538 && get_attr_conds (dep) == CONDS_SET)
11539 return 0;
11541 if (current_tune->sched_adjust_cost != NULL)
11543 if (!current_tune->sched_adjust_cost (insn, link, dep, &cost))
11544 return cost;
11547 /* XXX Is this strictly true? */
11548 if (REG_NOTE_KIND (link) == REG_DEP_ANTI
11549 || REG_NOTE_KIND (link) == REG_DEP_OUTPUT)
11550 return 0;
11552 /* Call insns don't incur a stall, even if they follow a load. */
11553 if (REG_NOTE_KIND (link) == 0
11554 && CALL_P (insn))
11555 return 1;
11557 if ((i_pat = single_set (insn)) != NULL
11558 && MEM_P (SET_SRC (i_pat))
11559 && (d_pat = single_set (dep)) != NULL
11560 && MEM_P (SET_DEST (d_pat)))
11562 rtx src_mem = XEXP (SET_SRC (i_pat), 0);
11563 /* This is a load after a store, there is no conflict if the load reads
11564 from a cached area. Assume that loads from the stack, and from the
11565 constant pool are cached, and that others will miss. This is a
11566 hack. */
11568 if ((GET_CODE (src_mem) == SYMBOL_REF
11569 && CONSTANT_POOL_ADDRESS_P (src_mem))
11570 || reg_mentioned_p (stack_pointer_rtx, src_mem)
11571 || reg_mentioned_p (frame_pointer_rtx, src_mem)
11572 || reg_mentioned_p (hard_frame_pointer_rtx, src_mem))
11573 return 1;
11576 return cost;
11580 arm_max_conditional_execute (void)
11582 return max_insns_skipped;
11585 static int
11586 arm_default_branch_cost (bool speed_p, bool predictable_p ATTRIBUTE_UNUSED)
11588 if (TARGET_32BIT)
11589 return (TARGET_THUMB2 && !speed_p) ? 1 : 4;
11590 else
11591 return (optimize > 0) ? 2 : 0;
11594 static int
11595 arm_cortex_a5_branch_cost (bool speed_p, bool predictable_p)
11597 return speed_p ? 0 : arm_default_branch_cost (speed_p, predictable_p);
11600 /* Thumb-2 branches are relatively cheap on Cortex-M processors ("1 + P cycles"
11601 on Cortex-M4, where P varies from 1 to 3 according to some criteria), since
11602 sequences of non-executed instructions in IT blocks probably take the same
11603 amount of time as executed instructions (and the IT instruction itself takes
11604 space in icache). This function was experimentally determined to give good
11605 results on a popular embedded benchmark. */
11607 static int
11608 arm_cortex_m_branch_cost (bool speed_p, bool predictable_p)
11610 return (TARGET_32BIT && speed_p) ? 1
11611 : arm_default_branch_cost (speed_p, predictable_p);
11614 static bool fp_consts_inited = false;
11616 static REAL_VALUE_TYPE value_fp0;
11618 static void
11619 init_fp_table (void)
11621 REAL_VALUE_TYPE r;
11623 r = REAL_VALUE_ATOF ("0", DFmode);
11624 value_fp0 = r;
11625 fp_consts_inited = true;
11628 /* Return TRUE if rtx X is a valid immediate FP constant. */
11630 arm_const_double_rtx (rtx x)
11632 REAL_VALUE_TYPE r;
11634 if (!fp_consts_inited)
11635 init_fp_table ();
11637 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
11638 if (REAL_VALUE_MINUS_ZERO (r))
11639 return 0;
11641 if (REAL_VALUES_EQUAL (r, value_fp0))
11642 return 1;
11644 return 0;
11647 /* VFPv3 has a fairly wide range of representable immediates, formed from
11648 "quarter-precision" floating-point values. These can be evaluated using this
11649 formula (with ^ for exponentiation):
11651 -1^s * n * 2^-r
11653 Where 's' is a sign bit (0/1), 'n' and 'r' are integers such that
11654 16 <= n <= 31 and 0 <= r <= 7.
11656 These values are mapped onto an 8-bit integer ABCDEFGH s.t.
11658 - A (most-significant) is the sign bit.
11659 - BCD are the exponent (encoded as r XOR 3).
11660 - EFGH are the mantissa (encoded as n - 16).
11663 /* Return an integer index for a VFPv3 immediate operand X suitable for the
11664 fconst[sd] instruction, or -1 if X isn't suitable. */
11665 static int
11666 vfp3_const_double_index (rtx x)
11668 REAL_VALUE_TYPE r, m;
11669 int sign, exponent;
11670 unsigned HOST_WIDE_INT mantissa, mant_hi;
11671 unsigned HOST_WIDE_INT mask;
11672 HOST_WIDE_INT m1, m2;
11673 int point_pos = 2 * HOST_BITS_PER_WIDE_INT - 1;
11675 if (!TARGET_VFP3 || !CONST_DOUBLE_P (x))
11676 return -1;
11678 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
11680 /* We can't represent these things, so detect them first. */
11681 if (REAL_VALUE_ISINF (r) || REAL_VALUE_ISNAN (r) || REAL_VALUE_MINUS_ZERO (r))
11682 return -1;
11684 /* Extract sign, exponent and mantissa. */
11685 sign = REAL_VALUE_NEGATIVE (r) ? 1 : 0;
11686 r = real_value_abs (&r);
11687 exponent = REAL_EXP (&r);
11688 /* For the mantissa, we expand into two HOST_WIDE_INTS, apart from the
11689 highest (sign) bit, with a fixed binary point at bit point_pos.
11690 WARNING: If there's ever a VFP version which uses more than 2 * H_W_I - 1
11691 bits for the mantissa, this may fail (low bits would be lost). */
11692 real_ldexp (&m, &r, point_pos - exponent);
11693 REAL_VALUE_TO_INT (&m1, &m2, m);
11694 mantissa = m1;
11695 mant_hi = m2;
11697 /* If there are bits set in the low part of the mantissa, we can't
11698 represent this value. */
11699 if (mantissa != 0)
11700 return -1;
11702 /* Now make it so that mantissa contains the most-significant bits, and move
11703 the point_pos to indicate that the least-significant bits have been
11704 discarded. */
11705 point_pos -= HOST_BITS_PER_WIDE_INT;
11706 mantissa = mant_hi;
11708 /* We can permit four significant bits of mantissa only, plus a high bit
11709 which is always 1. */
11710 mask = ((unsigned HOST_WIDE_INT)1 << (point_pos - 5)) - 1;
11711 if ((mantissa & mask) != 0)
11712 return -1;
11714 /* Now we know the mantissa is in range, chop off the unneeded bits. */
11715 mantissa >>= point_pos - 5;
11717 /* The mantissa may be zero. Disallow that case. (It's possible to load the
11718 floating-point immediate zero with Neon using an integer-zero load, but
11719 that case is handled elsewhere.) */
11720 if (mantissa == 0)
11721 return -1;
11723 gcc_assert (mantissa >= 16 && mantissa <= 31);
11725 /* The value of 5 here would be 4 if GCC used IEEE754-like encoding (where
11726 normalized significands are in the range [1, 2). (Our mantissa is shifted
11727 left 4 places at this point relative to normalized IEEE754 values). GCC
11728 internally uses [0.5, 1) (see real.c), so the exponent returned from
11729 REAL_EXP must be altered. */
11730 exponent = 5 - exponent;
11732 if (exponent < 0 || exponent > 7)
11733 return -1;
11735 /* Sign, mantissa and exponent are now in the correct form to plug into the
11736 formula described in the comment above. */
11737 return (sign << 7) | ((exponent ^ 3) << 4) | (mantissa - 16);
11740 /* Return TRUE if rtx X is a valid immediate VFPv3 constant. */
11742 vfp3_const_double_rtx (rtx x)
11744 if (!TARGET_VFP3)
11745 return 0;
11747 return vfp3_const_double_index (x) != -1;
11750 /* Recognize immediates which can be used in various Neon instructions. Legal
11751 immediates are described by the following table (for VMVN variants, the
11752 bitwise inverse of the constant shown is recognized. In either case, VMOV
11753 is output and the correct instruction to use for a given constant is chosen
11754 by the assembler). The constant shown is replicated across all elements of
11755 the destination vector.
11757 insn elems variant constant (binary)
11758 ---- ----- ------- -----------------
11759 vmov i32 0 00000000 00000000 00000000 abcdefgh
11760 vmov i32 1 00000000 00000000 abcdefgh 00000000
11761 vmov i32 2 00000000 abcdefgh 00000000 00000000
11762 vmov i32 3 abcdefgh 00000000 00000000 00000000
11763 vmov i16 4 00000000 abcdefgh
11764 vmov i16 5 abcdefgh 00000000
11765 vmvn i32 6 00000000 00000000 00000000 abcdefgh
11766 vmvn i32 7 00000000 00000000 abcdefgh 00000000
11767 vmvn i32 8 00000000 abcdefgh 00000000 00000000
11768 vmvn i32 9 abcdefgh 00000000 00000000 00000000
11769 vmvn i16 10 00000000 abcdefgh
11770 vmvn i16 11 abcdefgh 00000000
11771 vmov i32 12 00000000 00000000 abcdefgh 11111111
11772 vmvn i32 13 00000000 00000000 abcdefgh 11111111
11773 vmov i32 14 00000000 abcdefgh 11111111 11111111
11774 vmvn i32 15 00000000 abcdefgh 11111111 11111111
11775 vmov i8 16 abcdefgh
11776 vmov i64 17 aaaaaaaa bbbbbbbb cccccccc dddddddd
11777 eeeeeeee ffffffff gggggggg hhhhhhhh
11778 vmov f32 18 aBbbbbbc defgh000 00000000 00000000
11779 vmov f32 19 00000000 00000000 00000000 00000000
11781 For case 18, B = !b. Representable values are exactly those accepted by
11782 vfp3_const_double_index, but are output as floating-point numbers rather
11783 than indices.
11785 For case 19, we will change it to vmov.i32 when assembling.
11787 Variants 0-5 (inclusive) may also be used as immediates for the second
11788 operand of VORR/VBIC instructions.
11790 The INVERSE argument causes the bitwise inverse of the given operand to be
11791 recognized instead (used for recognizing legal immediates for the VAND/VORN
11792 pseudo-instructions). If INVERSE is true, the value placed in *MODCONST is
11793 *not* inverted (i.e. the pseudo-instruction forms vand/vorn should still be
11794 output, rather than the real insns vbic/vorr).
11796 INVERSE makes no difference to the recognition of float vectors.
11798 The return value is the variant of immediate as shown in the above table, or
11799 -1 if the given value doesn't match any of the listed patterns.
11801 static int
11802 neon_valid_immediate (rtx op, enum machine_mode mode, int inverse,
11803 rtx *modconst, int *elementwidth)
11805 #define CHECK(STRIDE, ELSIZE, CLASS, TEST) \
11806 matches = 1; \
11807 for (i = 0; i < idx; i += (STRIDE)) \
11808 if (!(TEST)) \
11809 matches = 0; \
11810 if (matches) \
11812 immtype = (CLASS); \
11813 elsize = (ELSIZE); \
11814 break; \
11817 unsigned int i, elsize = 0, idx = 0, n_elts;
11818 unsigned int innersize;
11819 unsigned char bytes[16];
11820 int immtype = -1, matches;
11821 unsigned int invmask = inverse ? 0xff : 0;
11822 bool vector = GET_CODE (op) == CONST_VECTOR;
11824 if (vector)
11826 n_elts = CONST_VECTOR_NUNITS (op);
11827 innersize = GET_MODE_SIZE (GET_MODE_INNER (mode));
11829 else
11831 n_elts = 1;
11832 if (mode == VOIDmode)
11833 mode = DImode;
11834 innersize = GET_MODE_SIZE (mode);
11837 /* Vectors of float constants. */
11838 if (GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
11840 rtx el0 = CONST_VECTOR_ELT (op, 0);
11841 REAL_VALUE_TYPE r0;
11843 if (!vfp3_const_double_rtx (el0) && el0 != CONST0_RTX (GET_MODE (el0)))
11844 return -1;
11846 REAL_VALUE_FROM_CONST_DOUBLE (r0, el0);
11848 for (i = 1; i < n_elts; i++)
11850 rtx elt = CONST_VECTOR_ELT (op, i);
11851 REAL_VALUE_TYPE re;
11853 REAL_VALUE_FROM_CONST_DOUBLE (re, elt);
11855 if (!REAL_VALUES_EQUAL (r0, re))
11856 return -1;
11859 if (modconst)
11860 *modconst = CONST_VECTOR_ELT (op, 0);
11862 if (elementwidth)
11863 *elementwidth = 0;
11865 if (el0 == CONST0_RTX (GET_MODE (el0)))
11866 return 19;
11867 else
11868 return 18;
11871 /* Splat vector constant out into a byte vector. */
11872 for (i = 0; i < n_elts; i++)
11874 rtx el = vector ? CONST_VECTOR_ELT (op, i) : op;
11875 unsigned HOST_WIDE_INT elpart;
11876 unsigned int part, parts;
11878 if (CONST_INT_P (el))
11880 elpart = INTVAL (el);
11881 parts = 1;
11883 else if (CONST_DOUBLE_P (el))
11885 elpart = CONST_DOUBLE_LOW (el);
11886 parts = 2;
11888 else
11889 gcc_unreachable ();
11891 for (part = 0; part < parts; part++)
11893 unsigned int byte;
11894 for (byte = 0; byte < innersize; byte++)
11896 bytes[idx++] = (elpart & 0xff) ^ invmask;
11897 elpart >>= BITS_PER_UNIT;
11899 if (CONST_DOUBLE_P (el))
11900 elpart = CONST_DOUBLE_HIGH (el);
11904 /* Sanity check. */
11905 gcc_assert (idx == GET_MODE_SIZE (mode));
11909 CHECK (4, 32, 0, bytes[i] == bytes[0] && bytes[i + 1] == 0
11910 && bytes[i + 2] == 0 && bytes[i + 3] == 0);
11912 CHECK (4, 32, 1, bytes[i] == 0 && bytes[i + 1] == bytes[1]
11913 && bytes[i + 2] == 0 && bytes[i + 3] == 0);
11915 CHECK (4, 32, 2, bytes[i] == 0 && bytes[i + 1] == 0
11916 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0);
11918 CHECK (4, 32, 3, bytes[i] == 0 && bytes[i + 1] == 0
11919 && bytes[i + 2] == 0 && bytes[i + 3] == bytes[3]);
11921 CHECK (2, 16, 4, bytes[i] == bytes[0] && bytes[i + 1] == 0);
11923 CHECK (2, 16, 5, bytes[i] == 0 && bytes[i + 1] == bytes[1]);
11925 CHECK (4, 32, 6, bytes[i] == bytes[0] && bytes[i + 1] == 0xff
11926 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff);
11928 CHECK (4, 32, 7, bytes[i] == 0xff && bytes[i + 1] == bytes[1]
11929 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff);
11931 CHECK (4, 32, 8, bytes[i] == 0xff && bytes[i + 1] == 0xff
11932 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff);
11934 CHECK (4, 32, 9, bytes[i] == 0xff && bytes[i + 1] == 0xff
11935 && bytes[i + 2] == 0xff && bytes[i + 3] == bytes[3]);
11937 CHECK (2, 16, 10, bytes[i] == bytes[0] && bytes[i + 1] == 0xff);
11939 CHECK (2, 16, 11, bytes[i] == 0xff && bytes[i + 1] == bytes[1]);
11941 CHECK (4, 32, 12, bytes[i] == 0xff && bytes[i + 1] == bytes[1]
11942 && bytes[i + 2] == 0 && bytes[i + 3] == 0);
11944 CHECK (4, 32, 13, bytes[i] == 0 && bytes[i + 1] == bytes[1]
11945 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff);
11947 CHECK (4, 32, 14, bytes[i] == 0xff && bytes[i + 1] == 0xff
11948 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0);
11950 CHECK (4, 32, 15, bytes[i] == 0 && bytes[i + 1] == 0
11951 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff);
11953 CHECK (1, 8, 16, bytes[i] == bytes[0]);
11955 CHECK (1, 64, 17, (bytes[i] == 0 || bytes[i] == 0xff)
11956 && bytes[i] == bytes[(i + 8) % idx]);
11958 while (0);
11960 if (immtype == -1)
11961 return -1;
11963 if (elementwidth)
11964 *elementwidth = elsize;
11966 if (modconst)
11968 unsigned HOST_WIDE_INT imm = 0;
11970 /* Un-invert bytes of recognized vector, if necessary. */
11971 if (invmask != 0)
11972 for (i = 0; i < idx; i++)
11973 bytes[i] ^= invmask;
11975 if (immtype == 17)
11977 /* FIXME: Broken on 32-bit H_W_I hosts. */
11978 gcc_assert (sizeof (HOST_WIDE_INT) == 8);
11980 for (i = 0; i < 8; i++)
11981 imm |= (unsigned HOST_WIDE_INT) (bytes[i] ? 0xff : 0)
11982 << (i * BITS_PER_UNIT);
11984 *modconst = GEN_INT (imm);
11986 else
11988 unsigned HOST_WIDE_INT imm = 0;
11990 for (i = 0; i < elsize / BITS_PER_UNIT; i++)
11991 imm |= (unsigned HOST_WIDE_INT) bytes[i] << (i * BITS_PER_UNIT);
11993 *modconst = GEN_INT (imm);
11997 return immtype;
11998 #undef CHECK
12001 /* Return TRUE if rtx X is legal for use as either a Neon VMOV (or, implicitly,
12002 VMVN) immediate. Write back width per element to *ELEMENTWIDTH (or zero for
12003 float elements), and a modified constant (whatever should be output for a
12004 VMOV) in *MODCONST. */
12007 neon_immediate_valid_for_move (rtx op, enum machine_mode mode,
12008 rtx *modconst, int *elementwidth)
12010 rtx tmpconst;
12011 int tmpwidth;
12012 int retval = neon_valid_immediate (op, mode, 0, &tmpconst, &tmpwidth);
12014 if (retval == -1)
12015 return 0;
12017 if (modconst)
12018 *modconst = tmpconst;
12020 if (elementwidth)
12021 *elementwidth = tmpwidth;
12023 return 1;
12026 /* Return TRUE if rtx X is legal for use in a VORR or VBIC instruction. If
12027 the immediate is valid, write a constant suitable for using as an operand
12028 to VORR/VBIC/VAND/VORN to *MODCONST and the corresponding element width to
12029 *ELEMENTWIDTH. See neon_valid_immediate for description of INVERSE. */
12032 neon_immediate_valid_for_logic (rtx op, enum machine_mode mode, int inverse,
12033 rtx *modconst, int *elementwidth)
12035 rtx tmpconst;
12036 int tmpwidth;
12037 int retval = neon_valid_immediate (op, mode, inverse, &tmpconst, &tmpwidth);
12039 if (retval < 0 || retval > 5)
12040 return 0;
12042 if (modconst)
12043 *modconst = tmpconst;
12045 if (elementwidth)
12046 *elementwidth = tmpwidth;
12048 return 1;
12051 /* Return TRUE if rtx OP is legal for use in a VSHR or VSHL instruction. If
12052 the immediate is valid, write a constant suitable for using as an operand
12053 to VSHR/VSHL to *MODCONST and the corresponding element width to
12054 *ELEMENTWIDTH. ISLEFTSHIFT is for determine left or right shift,
12055 because they have different limitations. */
12058 neon_immediate_valid_for_shift (rtx op, enum machine_mode mode,
12059 rtx *modconst, int *elementwidth,
12060 bool isleftshift)
12062 unsigned int innersize = GET_MODE_SIZE (GET_MODE_INNER (mode));
12063 unsigned int n_elts = CONST_VECTOR_NUNITS (op), i;
12064 unsigned HOST_WIDE_INT last_elt = 0;
12065 unsigned HOST_WIDE_INT maxshift;
12067 /* Split vector constant out into a byte vector. */
12068 for (i = 0; i < n_elts; i++)
12070 rtx el = CONST_VECTOR_ELT (op, i);
12071 unsigned HOST_WIDE_INT elpart;
12073 if (CONST_INT_P (el))
12074 elpart = INTVAL (el);
12075 else if (CONST_DOUBLE_P (el))
12076 return 0;
12077 else
12078 gcc_unreachable ();
12080 if (i != 0 && elpart != last_elt)
12081 return 0;
12083 last_elt = elpart;
12086 /* Shift less than element size. */
12087 maxshift = innersize * 8;
12089 if (isleftshift)
12091 /* Left shift immediate value can be from 0 to <size>-1. */
12092 if (last_elt >= maxshift)
12093 return 0;
12095 else
12097 /* Right shift immediate value can be from 1 to <size>. */
12098 if (last_elt == 0 || last_elt > maxshift)
12099 return 0;
12102 if (elementwidth)
12103 *elementwidth = innersize * 8;
12105 if (modconst)
12106 *modconst = CONST_VECTOR_ELT (op, 0);
12108 return 1;
12111 /* Return a string suitable for output of Neon immediate logic operation
12112 MNEM. */
12114 char *
12115 neon_output_logic_immediate (const char *mnem, rtx *op2, enum machine_mode mode,
12116 int inverse, int quad)
12118 int width, is_valid;
12119 static char templ[40];
12121 is_valid = neon_immediate_valid_for_logic (*op2, mode, inverse, op2, &width);
12123 gcc_assert (is_valid != 0);
12125 if (quad)
12126 sprintf (templ, "%s.i%d\t%%q0, %%2", mnem, width);
12127 else
12128 sprintf (templ, "%s.i%d\t%%P0, %%2", mnem, width);
12130 return templ;
12133 /* Return a string suitable for output of Neon immediate shift operation
12134 (VSHR or VSHL) MNEM. */
12136 char *
12137 neon_output_shift_immediate (const char *mnem, char sign, rtx *op2,
12138 enum machine_mode mode, int quad,
12139 bool isleftshift)
12141 int width, is_valid;
12142 static char templ[40];
12144 is_valid = neon_immediate_valid_for_shift (*op2, mode, op2, &width, isleftshift);
12145 gcc_assert (is_valid != 0);
12147 if (quad)
12148 sprintf (templ, "%s.%c%d\t%%q0, %%q1, %%2", mnem, sign, width);
12149 else
12150 sprintf (templ, "%s.%c%d\t%%P0, %%P1, %%2", mnem, sign, width);
12152 return templ;
12155 /* Output a sequence of pairwise operations to implement a reduction.
12156 NOTE: We do "too much work" here, because pairwise operations work on two
12157 registers-worth of operands in one go. Unfortunately we can't exploit those
12158 extra calculations to do the full operation in fewer steps, I don't think.
12159 Although all vector elements of the result but the first are ignored, we
12160 actually calculate the same result in each of the elements. An alternative
12161 such as initially loading a vector with zero to use as each of the second
12162 operands would use up an additional register and take an extra instruction,
12163 for no particular gain. */
12165 void
12166 neon_pairwise_reduce (rtx op0, rtx op1, enum machine_mode mode,
12167 rtx (*reduc) (rtx, rtx, rtx))
12169 enum machine_mode inner = GET_MODE_INNER (mode);
12170 unsigned int i, parts = GET_MODE_SIZE (mode) / GET_MODE_SIZE (inner);
12171 rtx tmpsum = op1;
12173 for (i = parts / 2; i >= 1; i /= 2)
12175 rtx dest = (i == 1) ? op0 : gen_reg_rtx (mode);
12176 emit_insn (reduc (dest, tmpsum, tmpsum));
12177 tmpsum = dest;
12181 /* If VALS is a vector constant that can be loaded into a register
12182 using VDUP, generate instructions to do so and return an RTX to
12183 assign to the register. Otherwise return NULL_RTX. */
12185 static rtx
12186 neon_vdup_constant (rtx vals)
12188 enum machine_mode mode = GET_MODE (vals);
12189 enum machine_mode inner_mode = GET_MODE_INNER (mode);
12190 int n_elts = GET_MODE_NUNITS (mode);
12191 bool all_same = true;
12192 rtx x;
12193 int i;
12195 if (GET_CODE (vals) != CONST_VECTOR || GET_MODE_SIZE (inner_mode) > 4)
12196 return NULL_RTX;
12198 for (i = 0; i < n_elts; ++i)
12200 x = XVECEXP (vals, 0, i);
12201 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
12202 all_same = false;
12205 if (!all_same)
12206 /* The elements are not all the same. We could handle repeating
12207 patterns of a mode larger than INNER_MODE here (e.g. int8x8_t
12208 {0, C, 0, C, 0, C, 0, C} which can be loaded using
12209 vdup.i16). */
12210 return NULL_RTX;
12212 /* We can load this constant by using VDUP and a constant in a
12213 single ARM register. This will be cheaper than a vector
12214 load. */
12216 x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, 0));
12217 return gen_rtx_VEC_DUPLICATE (mode, x);
12220 /* Generate code to load VALS, which is a PARALLEL containing only
12221 constants (for vec_init) or CONST_VECTOR, efficiently into a
12222 register. Returns an RTX to copy into the register, or NULL_RTX
12223 for a PARALLEL that can not be converted into a CONST_VECTOR. */
12226 neon_make_constant (rtx vals)
12228 enum machine_mode mode = GET_MODE (vals);
12229 rtx target;
12230 rtx const_vec = NULL_RTX;
12231 int n_elts = GET_MODE_NUNITS (mode);
12232 int n_const = 0;
12233 int i;
12235 if (GET_CODE (vals) == CONST_VECTOR)
12236 const_vec = vals;
12237 else if (GET_CODE (vals) == PARALLEL)
12239 /* A CONST_VECTOR must contain only CONST_INTs and
12240 CONST_DOUBLEs, but CONSTANT_P allows more (e.g. SYMBOL_REF).
12241 Only store valid constants in a CONST_VECTOR. */
12242 for (i = 0; i < n_elts; ++i)
12244 rtx x = XVECEXP (vals, 0, i);
12245 if (CONST_INT_P (x) || CONST_DOUBLE_P (x))
12246 n_const++;
12248 if (n_const == n_elts)
12249 const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0));
12251 else
12252 gcc_unreachable ();
12254 if (const_vec != NULL
12255 && neon_immediate_valid_for_move (const_vec, mode, NULL, NULL))
12256 /* Load using VMOV. On Cortex-A8 this takes one cycle. */
12257 return const_vec;
12258 else if ((target = neon_vdup_constant (vals)) != NULL_RTX)
12259 /* Loaded using VDUP. On Cortex-A8 the VDUP takes one NEON
12260 pipeline cycle; creating the constant takes one or two ARM
12261 pipeline cycles. */
12262 return target;
12263 else if (const_vec != NULL_RTX)
12264 /* Load from constant pool. On Cortex-A8 this takes two cycles
12265 (for either double or quad vectors). We can not take advantage
12266 of single-cycle VLD1 because we need a PC-relative addressing
12267 mode. */
12268 return const_vec;
12269 else
12270 /* A PARALLEL containing something not valid inside CONST_VECTOR.
12271 We can not construct an initializer. */
12272 return NULL_RTX;
12275 /* Initialize vector TARGET to VALS. */
12277 void
12278 neon_expand_vector_init (rtx target, rtx vals)
12280 enum machine_mode mode = GET_MODE (target);
12281 enum machine_mode inner_mode = GET_MODE_INNER (mode);
12282 int n_elts = GET_MODE_NUNITS (mode);
12283 int n_var = 0, one_var = -1;
12284 bool all_same = true;
12285 rtx x, mem;
12286 int i;
12288 for (i = 0; i < n_elts; ++i)
12290 x = XVECEXP (vals, 0, i);
12291 if (!CONSTANT_P (x))
12292 ++n_var, one_var = i;
12294 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
12295 all_same = false;
12298 if (n_var == 0)
12300 rtx constant = neon_make_constant (vals);
12301 if (constant != NULL_RTX)
12303 emit_move_insn (target, constant);
12304 return;
12308 /* Splat a single non-constant element if we can. */
12309 if (all_same && GET_MODE_SIZE (inner_mode) <= 4)
12311 x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, 0));
12312 emit_insn (gen_rtx_SET (VOIDmode, target,
12313 gen_rtx_VEC_DUPLICATE (mode, x)));
12314 return;
12317 /* One field is non-constant. Load constant then overwrite varying
12318 field. This is more efficient than using the stack. */
12319 if (n_var == 1)
12321 rtx copy = copy_rtx (vals);
12322 rtx index = GEN_INT (one_var);
12324 /* Load constant part of vector, substitute neighboring value for
12325 varying element. */
12326 XVECEXP (copy, 0, one_var) = XVECEXP (vals, 0, (one_var + 1) % n_elts);
12327 neon_expand_vector_init (target, copy);
12329 /* Insert variable. */
12330 x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, one_var));
12331 switch (mode)
12333 case V8QImode:
12334 emit_insn (gen_neon_vset_lanev8qi (target, x, target, index));
12335 break;
12336 case V16QImode:
12337 emit_insn (gen_neon_vset_lanev16qi (target, x, target, index));
12338 break;
12339 case V4HImode:
12340 emit_insn (gen_neon_vset_lanev4hi (target, x, target, index));
12341 break;
12342 case V8HImode:
12343 emit_insn (gen_neon_vset_lanev8hi (target, x, target, index));
12344 break;
12345 case V2SImode:
12346 emit_insn (gen_neon_vset_lanev2si (target, x, target, index));
12347 break;
12348 case V4SImode:
12349 emit_insn (gen_neon_vset_lanev4si (target, x, target, index));
12350 break;
12351 case V2SFmode:
12352 emit_insn (gen_neon_vset_lanev2sf (target, x, target, index));
12353 break;
12354 case V4SFmode:
12355 emit_insn (gen_neon_vset_lanev4sf (target, x, target, index));
12356 break;
12357 case V2DImode:
12358 emit_insn (gen_neon_vset_lanev2di (target, x, target, index));
12359 break;
12360 default:
12361 gcc_unreachable ();
12363 return;
12366 /* Construct the vector in memory one field at a time
12367 and load the whole vector. */
12368 mem = assign_stack_temp (mode, GET_MODE_SIZE (mode));
12369 for (i = 0; i < n_elts; i++)
12370 emit_move_insn (adjust_address_nv (mem, inner_mode,
12371 i * GET_MODE_SIZE (inner_mode)),
12372 XVECEXP (vals, 0, i));
12373 emit_move_insn (target, mem);
12376 /* Ensure OPERAND lies between LOW (inclusive) and HIGH (exclusive). Raise
12377 ERR if it doesn't. FIXME: NEON bounds checks occur late in compilation, so
12378 reported source locations are bogus. */
12380 static void
12381 bounds_check (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high,
12382 const char *err)
12384 HOST_WIDE_INT lane;
12386 gcc_assert (CONST_INT_P (operand));
12388 lane = INTVAL (operand);
12390 if (lane < low || lane >= high)
12391 error (err);
12394 /* Bounds-check lanes. */
12396 void
12397 neon_lane_bounds (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high)
12399 bounds_check (operand, low, high, "lane out of range");
12402 /* Bounds-check constants. */
12404 void
12405 neon_const_bounds (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high)
12407 bounds_check (operand, low, high, "constant out of range");
12410 HOST_WIDE_INT
12411 neon_element_bits (enum machine_mode mode)
12413 if (mode == DImode)
12414 return GET_MODE_BITSIZE (mode);
12415 else
12416 return GET_MODE_BITSIZE (GET_MODE_INNER (mode));
12420 /* Predicates for `match_operand' and `match_operator'. */
12422 /* Return TRUE if OP is a valid coprocessor memory address pattern.
12423 WB is true if full writeback address modes are allowed and is false
12424 if limited writeback address modes (POST_INC and PRE_DEC) are
12425 allowed. */
12428 arm_coproc_mem_operand (rtx op, bool wb)
12430 rtx ind;
12432 /* Reject eliminable registers. */
12433 if (! (reload_in_progress || reload_completed)
12434 && ( reg_mentioned_p (frame_pointer_rtx, op)
12435 || reg_mentioned_p (arg_pointer_rtx, op)
12436 || reg_mentioned_p (virtual_incoming_args_rtx, op)
12437 || reg_mentioned_p (virtual_outgoing_args_rtx, op)
12438 || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
12439 || reg_mentioned_p (virtual_stack_vars_rtx, op)))
12440 return FALSE;
12442 /* Constants are converted into offsets from labels. */
12443 if (!MEM_P (op))
12444 return FALSE;
12446 ind = XEXP (op, 0);
12448 if (reload_completed
12449 && (GET_CODE (ind) == LABEL_REF
12450 || (GET_CODE (ind) == CONST
12451 && GET_CODE (XEXP (ind, 0)) == PLUS
12452 && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF
12453 && CONST_INT_P (XEXP (XEXP (ind, 0), 1)))))
12454 return TRUE;
12456 /* Match: (mem (reg)). */
12457 if (REG_P (ind))
12458 return arm_address_register_rtx_p (ind, 0);
12460 /* Autoincremment addressing modes. POST_INC and PRE_DEC are
12461 acceptable in any case (subject to verification by
12462 arm_address_register_rtx_p). We need WB to be true to accept
12463 PRE_INC and POST_DEC. */
12464 if (GET_CODE (ind) == POST_INC
12465 || GET_CODE (ind) == PRE_DEC
12466 || (wb
12467 && (GET_CODE (ind) == PRE_INC
12468 || GET_CODE (ind) == POST_DEC)))
12469 return arm_address_register_rtx_p (XEXP (ind, 0), 0);
12471 if (wb
12472 && (GET_CODE (ind) == POST_MODIFY || GET_CODE (ind) == PRE_MODIFY)
12473 && arm_address_register_rtx_p (XEXP (ind, 0), 0)
12474 && GET_CODE (XEXP (ind, 1)) == PLUS
12475 && rtx_equal_p (XEXP (XEXP (ind, 1), 0), XEXP (ind, 0)))
12476 ind = XEXP (ind, 1);
12478 /* Match:
12479 (plus (reg)
12480 (const)). */
12481 if (GET_CODE (ind) == PLUS
12482 && REG_P (XEXP (ind, 0))
12483 && REG_MODE_OK_FOR_BASE_P (XEXP (ind, 0), VOIDmode)
12484 && CONST_INT_P (XEXP (ind, 1))
12485 && INTVAL (XEXP (ind, 1)) > -1024
12486 && INTVAL (XEXP (ind, 1)) < 1024
12487 && (INTVAL (XEXP (ind, 1)) & 3) == 0)
12488 return TRUE;
12490 return FALSE;
12493 /* Return TRUE if OP is a memory operand which we can load or store a vector
12494 to/from. TYPE is one of the following values:
12495 0 - Vector load/stor (vldr)
12496 1 - Core registers (ldm)
12497 2 - Element/structure loads (vld1)
12500 neon_vector_mem_operand (rtx op, int type, bool strict)
12502 rtx ind;
12504 /* Reject eliminable registers. */
12505 if (! (reload_in_progress || reload_completed)
12506 && ( reg_mentioned_p (frame_pointer_rtx, op)
12507 || reg_mentioned_p (arg_pointer_rtx, op)
12508 || reg_mentioned_p (virtual_incoming_args_rtx, op)
12509 || reg_mentioned_p (virtual_outgoing_args_rtx, op)
12510 || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
12511 || reg_mentioned_p (virtual_stack_vars_rtx, op)))
12512 return !strict;
12514 /* Constants are converted into offsets from labels. */
12515 if (!MEM_P (op))
12516 return FALSE;
12518 ind = XEXP (op, 0);
12520 if (reload_completed
12521 && (GET_CODE (ind) == LABEL_REF
12522 || (GET_CODE (ind) == CONST
12523 && GET_CODE (XEXP (ind, 0)) == PLUS
12524 && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF
12525 && CONST_INT_P (XEXP (XEXP (ind, 0), 1)))))
12526 return TRUE;
12528 /* Match: (mem (reg)). */
12529 if (REG_P (ind))
12530 return arm_address_register_rtx_p (ind, 0);
12532 /* Allow post-increment with Neon registers. */
12533 if ((type != 1 && GET_CODE (ind) == POST_INC)
12534 || (type == 0 && GET_CODE (ind) == PRE_DEC))
12535 return arm_address_register_rtx_p (XEXP (ind, 0), 0);
12537 /* FIXME: vld1 allows register post-modify. */
12539 /* Match:
12540 (plus (reg)
12541 (const)). */
12542 if (type == 0
12543 && GET_CODE (ind) == PLUS
12544 && REG_P (XEXP (ind, 0))
12545 && REG_MODE_OK_FOR_BASE_P (XEXP (ind, 0), VOIDmode)
12546 && CONST_INT_P (XEXP (ind, 1))
12547 && INTVAL (XEXP (ind, 1)) > -1024
12548 /* For quad modes, we restrict the constant offset to be slightly less
12549 than what the instruction format permits. We have no such constraint
12550 on double mode offsets. (This must match arm_legitimate_index_p.) */
12551 && (INTVAL (XEXP (ind, 1))
12552 < (VALID_NEON_QREG_MODE (GET_MODE (op))? 1016 : 1024))
12553 && (INTVAL (XEXP (ind, 1)) & 3) == 0)
12554 return TRUE;
12556 return FALSE;
12559 /* Return TRUE if OP is a mem suitable for loading/storing a Neon struct
12560 type. */
12562 neon_struct_mem_operand (rtx op)
12564 rtx ind;
12566 /* Reject eliminable registers. */
12567 if (! (reload_in_progress || reload_completed)
12568 && ( reg_mentioned_p (frame_pointer_rtx, op)
12569 || reg_mentioned_p (arg_pointer_rtx, op)
12570 || reg_mentioned_p (virtual_incoming_args_rtx, op)
12571 || reg_mentioned_p (virtual_outgoing_args_rtx, op)
12572 || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
12573 || reg_mentioned_p (virtual_stack_vars_rtx, op)))
12574 return FALSE;
12576 /* Constants are converted into offsets from labels. */
12577 if (!MEM_P (op))
12578 return FALSE;
12580 ind = XEXP (op, 0);
12582 if (reload_completed
12583 && (GET_CODE (ind) == LABEL_REF
12584 || (GET_CODE (ind) == CONST
12585 && GET_CODE (XEXP (ind, 0)) == PLUS
12586 && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF
12587 && CONST_INT_P (XEXP (XEXP (ind, 0), 1)))))
12588 return TRUE;
12590 /* Match: (mem (reg)). */
12591 if (REG_P (ind))
12592 return arm_address_register_rtx_p (ind, 0);
12594 /* vldm/vstm allows POST_INC (ia) and PRE_DEC (db). */
12595 if (GET_CODE (ind) == POST_INC
12596 || GET_CODE (ind) == PRE_DEC)
12597 return arm_address_register_rtx_p (XEXP (ind, 0), 0);
12599 return FALSE;
12602 /* Return true if X is a register that will be eliminated later on. */
12604 arm_eliminable_register (rtx x)
12606 return REG_P (x) && (REGNO (x) == FRAME_POINTER_REGNUM
12607 || REGNO (x) == ARG_POINTER_REGNUM
12608 || (REGNO (x) >= FIRST_VIRTUAL_REGISTER
12609 && REGNO (x) <= LAST_VIRTUAL_REGISTER));
12612 /* Return GENERAL_REGS if a scratch register required to reload x to/from
12613 coprocessor registers. Otherwise return NO_REGS. */
12615 enum reg_class
12616 coproc_secondary_reload_class (enum machine_mode mode, rtx x, bool wb)
12618 if (mode == HFmode)
12620 if (!TARGET_NEON_FP16)
12621 return GENERAL_REGS;
12622 if (s_register_operand (x, mode) || neon_vector_mem_operand (x, 2, true))
12623 return NO_REGS;
12624 return GENERAL_REGS;
12627 /* The neon move patterns handle all legitimate vector and struct
12628 addresses. */
12629 if (TARGET_NEON
12630 && (MEM_P (x) || GET_CODE (x) == CONST_VECTOR)
12631 && (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
12632 || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT
12633 || VALID_NEON_STRUCT_MODE (mode)))
12634 return NO_REGS;
12636 if (arm_coproc_mem_operand (x, wb) || s_register_operand (x, mode))
12637 return NO_REGS;
12639 return GENERAL_REGS;
12642 /* Values which must be returned in the most-significant end of the return
12643 register. */
12645 static bool
12646 arm_return_in_msb (const_tree valtype)
12648 return (TARGET_AAPCS_BASED
12649 && BYTES_BIG_ENDIAN
12650 && (AGGREGATE_TYPE_P (valtype)
12651 || TREE_CODE (valtype) == COMPLEX_TYPE
12652 || FIXED_POINT_TYPE_P (valtype)));
12655 /* Return TRUE if X references a SYMBOL_REF. */
12657 symbol_mentioned_p (rtx x)
12659 const char * fmt;
12660 int i;
12662 if (GET_CODE (x) == SYMBOL_REF)
12663 return 1;
12665 /* UNSPEC_TLS entries for a symbol include the SYMBOL_REF, but they
12666 are constant offsets, not symbols. */
12667 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
12668 return 0;
12670 fmt = GET_RTX_FORMAT (GET_CODE (x));
12672 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
12674 if (fmt[i] == 'E')
12676 int j;
12678 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
12679 if (symbol_mentioned_p (XVECEXP (x, i, j)))
12680 return 1;
12682 else if (fmt[i] == 'e' && symbol_mentioned_p (XEXP (x, i)))
12683 return 1;
12686 return 0;
12689 /* Return TRUE if X references a LABEL_REF. */
12691 label_mentioned_p (rtx x)
12693 const char * fmt;
12694 int i;
12696 if (GET_CODE (x) == LABEL_REF)
12697 return 1;
12699 /* UNSPEC_TLS entries for a symbol include a LABEL_REF for the referencing
12700 instruction, but they are constant offsets, not symbols. */
12701 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
12702 return 0;
12704 fmt = GET_RTX_FORMAT (GET_CODE (x));
12705 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
12707 if (fmt[i] == 'E')
12709 int j;
12711 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
12712 if (label_mentioned_p (XVECEXP (x, i, j)))
12713 return 1;
12715 else if (fmt[i] == 'e' && label_mentioned_p (XEXP (x, i)))
12716 return 1;
12719 return 0;
12723 tls_mentioned_p (rtx x)
12725 switch (GET_CODE (x))
12727 case CONST:
12728 return tls_mentioned_p (XEXP (x, 0));
12730 case UNSPEC:
12731 if (XINT (x, 1) == UNSPEC_TLS)
12732 return 1;
12734 default:
12735 return 0;
12739 /* Must not copy any rtx that uses a pc-relative address. */
12741 static int
12742 arm_note_pic_base (rtx *x, void *date ATTRIBUTE_UNUSED)
12744 if (GET_CODE (*x) == UNSPEC
12745 && (XINT (*x, 1) == UNSPEC_PIC_BASE
12746 || XINT (*x, 1) == UNSPEC_PIC_UNIFIED))
12747 return 1;
12748 return 0;
12751 static bool
12752 arm_cannot_copy_insn_p (rtx insn)
12754 /* The tls call insn cannot be copied, as it is paired with a data
12755 word. */
12756 if (recog_memoized (insn) == CODE_FOR_tlscall)
12757 return true;
12759 return for_each_rtx (&PATTERN (insn), arm_note_pic_base, NULL);
12762 enum rtx_code
12763 minmax_code (rtx x)
12765 enum rtx_code code = GET_CODE (x);
12767 switch (code)
12769 case SMAX:
12770 return GE;
12771 case SMIN:
12772 return LE;
12773 case UMIN:
12774 return LEU;
12775 case UMAX:
12776 return GEU;
12777 default:
12778 gcc_unreachable ();
12782 /* Match pair of min/max operators that can be implemented via usat/ssat. */
12784 bool
12785 arm_sat_operator_match (rtx lo_bound, rtx hi_bound,
12786 int *mask, bool *signed_sat)
12788 /* The high bound must be a power of two minus one. */
12789 int log = exact_log2 (INTVAL (hi_bound) + 1);
12790 if (log == -1)
12791 return false;
12793 /* The low bound is either zero (for usat) or one less than the
12794 negation of the high bound (for ssat). */
12795 if (INTVAL (lo_bound) == 0)
12797 if (mask)
12798 *mask = log;
12799 if (signed_sat)
12800 *signed_sat = false;
12802 return true;
12805 if (INTVAL (lo_bound) == -INTVAL (hi_bound) - 1)
12807 if (mask)
12808 *mask = log + 1;
12809 if (signed_sat)
12810 *signed_sat = true;
12812 return true;
12815 return false;
12818 /* Return 1 if memory locations are adjacent. */
12820 adjacent_mem_locations (rtx a, rtx b)
12822 /* We don't guarantee to preserve the order of these memory refs. */
12823 if (volatile_refs_p (a) || volatile_refs_p (b))
12824 return 0;
12826 if ((REG_P (XEXP (a, 0))
12827 || (GET_CODE (XEXP (a, 0)) == PLUS
12828 && CONST_INT_P (XEXP (XEXP (a, 0), 1))))
12829 && (REG_P (XEXP (b, 0))
12830 || (GET_CODE (XEXP (b, 0)) == PLUS
12831 && CONST_INT_P (XEXP (XEXP (b, 0), 1)))))
12833 HOST_WIDE_INT val0 = 0, val1 = 0;
12834 rtx reg0, reg1;
12835 int val_diff;
12837 if (GET_CODE (XEXP (a, 0)) == PLUS)
12839 reg0 = XEXP (XEXP (a, 0), 0);
12840 val0 = INTVAL (XEXP (XEXP (a, 0), 1));
12842 else
12843 reg0 = XEXP (a, 0);
12845 if (GET_CODE (XEXP (b, 0)) == PLUS)
12847 reg1 = XEXP (XEXP (b, 0), 0);
12848 val1 = INTVAL (XEXP (XEXP (b, 0), 1));
12850 else
12851 reg1 = XEXP (b, 0);
12853 /* Don't accept any offset that will require multiple
12854 instructions to handle, since this would cause the
12855 arith_adjacentmem pattern to output an overlong sequence. */
12856 if (!const_ok_for_op (val0, PLUS) || !const_ok_for_op (val1, PLUS))
12857 return 0;
12859 /* Don't allow an eliminable register: register elimination can make
12860 the offset too large. */
12861 if (arm_eliminable_register (reg0))
12862 return 0;
12864 val_diff = val1 - val0;
12866 if (arm_ld_sched)
12868 /* If the target has load delay slots, then there's no benefit
12869 to using an ldm instruction unless the offset is zero and
12870 we are optimizing for size. */
12871 return (optimize_size && (REGNO (reg0) == REGNO (reg1))
12872 && (val0 == 0 || val1 == 0 || val0 == 4 || val1 == 4)
12873 && (val_diff == 4 || val_diff == -4));
12876 return ((REGNO (reg0) == REGNO (reg1))
12877 && (val_diff == 4 || val_diff == -4));
12880 return 0;
12883 /* Return true if OP is a valid load or store multiple operation. LOAD is true
12884 for load operations, false for store operations. CONSECUTIVE is true
12885 if the register numbers in the operation must be consecutive in the register
12886 bank. RETURN_PC is true if value is to be loaded in PC.
12887 The pattern we are trying to match for load is:
12888 [(SET (R_d0) (MEM (PLUS (addr) (offset))))
12889 (SET (R_d1) (MEM (PLUS (addr) (offset + <reg_increment>))))
12892 (SET (R_dn) (MEM (PLUS (addr) (offset + n * <reg_increment>))))
12894 where
12895 1. If offset is 0, first insn should be (SET (R_d0) (MEM (src_addr))).
12896 2. REGNO (R_d0) < REGNO (R_d1) < ... < REGNO (R_dn).
12897 3. If consecutive is TRUE, then for kth register being loaded,
12898 REGNO (R_dk) = REGNO (R_d0) + k.
12899 The pattern for store is similar. */
12900 bool
12901 ldm_stm_operation_p (rtx op, bool load, enum machine_mode mode,
12902 bool consecutive, bool return_pc)
12904 HOST_WIDE_INT count = XVECLEN (op, 0);
12905 rtx reg, mem, addr;
12906 unsigned regno;
12907 unsigned first_regno;
12908 HOST_WIDE_INT i = 1, base = 0, offset = 0;
12909 rtx elt;
12910 bool addr_reg_in_reglist = false;
12911 bool update = false;
12912 int reg_increment;
12913 int offset_adj;
12914 int regs_per_val;
12916 /* If not in SImode, then registers must be consecutive
12917 (e.g., VLDM instructions for DFmode). */
12918 gcc_assert ((mode == SImode) || consecutive);
12919 /* Setting return_pc for stores is illegal. */
12920 gcc_assert (!return_pc || load);
12922 /* Set up the increments and the regs per val based on the mode. */
12923 reg_increment = GET_MODE_SIZE (mode);
12924 regs_per_val = reg_increment / 4;
12925 offset_adj = return_pc ? 1 : 0;
12927 if (count <= 1
12928 || GET_CODE (XVECEXP (op, 0, offset_adj)) != SET
12929 || (load && !REG_P (SET_DEST (XVECEXP (op, 0, offset_adj)))))
12930 return false;
12932 /* Check if this is a write-back. */
12933 elt = XVECEXP (op, 0, offset_adj);
12934 if (GET_CODE (SET_SRC (elt)) == PLUS)
12936 i++;
12937 base = 1;
12938 update = true;
12940 /* The offset adjustment must be the number of registers being
12941 popped times the size of a single register. */
12942 if (!REG_P (SET_DEST (elt))
12943 || !REG_P (XEXP (SET_SRC (elt), 0))
12944 || (REGNO (SET_DEST (elt)) != REGNO (XEXP (SET_SRC (elt), 0)))
12945 || !CONST_INT_P (XEXP (SET_SRC (elt), 1))
12946 || INTVAL (XEXP (SET_SRC (elt), 1)) !=
12947 ((count - 1 - offset_adj) * reg_increment))
12948 return false;
12951 i = i + offset_adj;
12952 base = base + offset_adj;
12953 /* Perform a quick check so we don't blow up below. If only one reg is loaded,
12954 success depends on the type: VLDM can do just one reg,
12955 LDM must do at least two. */
12956 if ((count <= i) && (mode == SImode))
12957 return false;
12959 elt = XVECEXP (op, 0, i - 1);
12960 if (GET_CODE (elt) != SET)
12961 return false;
12963 if (load)
12965 reg = SET_DEST (elt);
12966 mem = SET_SRC (elt);
12968 else
12970 reg = SET_SRC (elt);
12971 mem = SET_DEST (elt);
12974 if (!REG_P (reg) || !MEM_P (mem))
12975 return false;
12977 regno = REGNO (reg);
12978 first_regno = regno;
12979 addr = XEXP (mem, 0);
12980 if (GET_CODE (addr) == PLUS)
12982 if (!CONST_INT_P (XEXP (addr, 1)))
12983 return false;
12985 offset = INTVAL (XEXP (addr, 1));
12986 addr = XEXP (addr, 0);
12989 if (!REG_P (addr))
12990 return false;
12992 /* Don't allow SP to be loaded unless it is also the base register. It
12993 guarantees that SP is reset correctly when an LDM instruction
12994 is interrupted. Otherwise, we might end up with a corrupt stack. */
12995 if (load && (REGNO (reg) == SP_REGNUM) && (REGNO (addr) != SP_REGNUM))
12996 return false;
12998 for (; i < count; i++)
13000 elt = XVECEXP (op, 0, i);
13001 if (GET_CODE (elt) != SET)
13002 return false;
13004 if (load)
13006 reg = SET_DEST (elt);
13007 mem = SET_SRC (elt);
13009 else
13011 reg = SET_SRC (elt);
13012 mem = SET_DEST (elt);
13015 if (!REG_P (reg)
13016 || GET_MODE (reg) != mode
13017 || REGNO (reg) <= regno
13018 || (consecutive
13019 && (REGNO (reg) !=
13020 (unsigned int) (first_regno + regs_per_val * (i - base))))
13021 /* Don't allow SP to be loaded unless it is also the base register. It
13022 guarantees that SP is reset correctly when an LDM instruction
13023 is interrupted. Otherwise, we might end up with a corrupt stack. */
13024 || (load && (REGNO (reg) == SP_REGNUM) && (REGNO (addr) != SP_REGNUM))
13025 || !MEM_P (mem)
13026 || GET_MODE (mem) != mode
13027 || ((GET_CODE (XEXP (mem, 0)) != PLUS
13028 || !rtx_equal_p (XEXP (XEXP (mem, 0), 0), addr)
13029 || !CONST_INT_P (XEXP (XEXP (mem, 0), 1))
13030 || (INTVAL (XEXP (XEXP (mem, 0), 1)) !=
13031 offset + (i - base) * reg_increment))
13032 && (!REG_P (XEXP (mem, 0))
13033 || offset + (i - base) * reg_increment != 0)))
13034 return false;
13036 regno = REGNO (reg);
13037 if (regno == REGNO (addr))
13038 addr_reg_in_reglist = true;
13041 if (load)
13043 if (update && addr_reg_in_reglist)
13044 return false;
13046 /* For Thumb-1, address register is always modified - either by write-back
13047 or by explicit load. If the pattern does not describe an update,
13048 then the address register must be in the list of loaded registers. */
13049 if (TARGET_THUMB1)
13050 return update || addr_reg_in_reglist;
13053 return true;
13056 /* Return true iff it would be profitable to turn a sequence of NOPS loads
13057 or stores (depending on IS_STORE) into a load-multiple or store-multiple
13058 instruction. ADD_OFFSET is nonzero if the base address register needs
13059 to be modified with an add instruction before we can use it. */
13061 static bool
13062 multiple_operation_profitable_p (bool is_store ATTRIBUTE_UNUSED,
13063 int nops, HOST_WIDE_INT add_offset)
13065 /* For ARM8,9 & StrongARM, 2 ldr instructions are faster than an ldm
13066 if the offset isn't small enough. The reason 2 ldrs are faster
13067 is because these ARMs are able to do more than one cache access
13068 in a single cycle. The ARM9 and StrongARM have Harvard caches,
13069 whilst the ARM8 has a double bandwidth cache. This means that
13070 these cores can do both an instruction fetch and a data fetch in
13071 a single cycle, so the trick of calculating the address into a
13072 scratch register (one of the result regs) and then doing a load
13073 multiple actually becomes slower (and no smaller in code size).
13074 That is the transformation
13076 ldr rd1, [rbase + offset]
13077 ldr rd2, [rbase + offset + 4]
13081 add rd1, rbase, offset
13082 ldmia rd1, {rd1, rd2}
13084 produces worse code -- '3 cycles + any stalls on rd2' instead of
13085 '2 cycles + any stalls on rd2'. On ARMs with only one cache
13086 access per cycle, the first sequence could never complete in less
13087 than 6 cycles, whereas the ldm sequence would only take 5 and
13088 would make better use of sequential accesses if not hitting the
13089 cache.
13091 We cheat here and test 'arm_ld_sched' which we currently know to
13092 only be true for the ARM8, ARM9 and StrongARM. If this ever
13093 changes, then the test below needs to be reworked. */
13094 if (nops == 2 && arm_ld_sched && add_offset != 0)
13095 return false;
13097 /* XScale has load-store double instructions, but they have stricter
13098 alignment requirements than load-store multiple, so we cannot
13099 use them.
13101 For XScale ldm requires 2 + NREGS cycles to complete and blocks
13102 the pipeline until completion.
13104 NREGS CYCLES
13110 An ldr instruction takes 1-3 cycles, but does not block the
13111 pipeline.
13113 NREGS CYCLES
13114 1 1-3
13115 2 2-6
13116 3 3-9
13117 4 4-12
13119 Best case ldr will always win. However, the more ldr instructions
13120 we issue, the less likely we are to be able to schedule them well.
13121 Using ldr instructions also increases code size.
13123 As a compromise, we use ldr for counts of 1 or 2 regs, and ldm
13124 for counts of 3 or 4 regs. */
13125 if (nops <= 2 && arm_tune_xscale && !optimize_size)
13126 return false;
13127 return true;
13130 /* Subroutine of load_multiple_sequence and store_multiple_sequence.
13131 Given an array of UNSORTED_OFFSETS, of which there are NOPS, compute
13132 an array ORDER which describes the sequence to use when accessing the
13133 offsets that produces an ascending order. In this sequence, each
13134 offset must be larger by exactly 4 than the previous one. ORDER[0]
13135 must have been filled in with the lowest offset by the caller.
13136 If UNSORTED_REGS is nonnull, it is an array of register numbers that
13137 we use to verify that ORDER produces an ascending order of registers.
13138 Return true if it was possible to construct such an order, false if
13139 not. */
13141 static bool
13142 compute_offset_order (int nops, HOST_WIDE_INT *unsorted_offsets, int *order,
13143 int *unsorted_regs)
13145 int i;
13146 for (i = 1; i < nops; i++)
13148 int j;
13150 order[i] = order[i - 1];
13151 for (j = 0; j < nops; j++)
13152 if (unsorted_offsets[j] == unsorted_offsets[order[i - 1]] + 4)
13154 /* We must find exactly one offset that is higher than the
13155 previous one by 4. */
13156 if (order[i] != order[i - 1])
13157 return false;
13158 order[i] = j;
13160 if (order[i] == order[i - 1])
13161 return false;
13162 /* The register numbers must be ascending. */
13163 if (unsorted_regs != NULL
13164 && unsorted_regs[order[i]] <= unsorted_regs[order[i - 1]])
13165 return false;
13167 return true;
13170 /* Used to determine in a peephole whether a sequence of load
13171 instructions can be changed into a load-multiple instruction.
13172 NOPS is the number of separate load instructions we are examining. The
13173 first NOPS entries in OPERANDS are the destination registers, the
13174 next NOPS entries are memory operands. If this function is
13175 successful, *BASE is set to the common base register of the memory
13176 accesses; *LOAD_OFFSET is set to the first memory location's offset
13177 from that base register.
13178 REGS is an array filled in with the destination register numbers.
13179 SAVED_ORDER (if nonnull), is an array filled in with an order that maps
13180 insn numbers to an ascending order of stores. If CHECK_REGS is true,
13181 the sequence of registers in REGS matches the loads from ascending memory
13182 locations, and the function verifies that the register numbers are
13183 themselves ascending. If CHECK_REGS is false, the register numbers
13184 are stored in the order they are found in the operands. */
13185 static int
13186 load_multiple_sequence (rtx *operands, int nops, int *regs, int *saved_order,
13187 int *base, HOST_WIDE_INT *load_offset, bool check_regs)
13189 int unsorted_regs[MAX_LDM_STM_OPS];
13190 HOST_WIDE_INT unsorted_offsets[MAX_LDM_STM_OPS];
13191 int order[MAX_LDM_STM_OPS];
13192 rtx base_reg_rtx = NULL;
13193 int base_reg = -1;
13194 int i, ldm_case;
13196 /* Can only handle up to MAX_LDM_STM_OPS insns at present, though could be
13197 easily extended if required. */
13198 gcc_assert (nops >= 2 && nops <= MAX_LDM_STM_OPS);
13200 memset (order, 0, MAX_LDM_STM_OPS * sizeof (int));
13202 /* Loop over the operands and check that the memory references are
13203 suitable (i.e. immediate offsets from the same base register). At
13204 the same time, extract the target register, and the memory
13205 offsets. */
13206 for (i = 0; i < nops; i++)
13208 rtx reg;
13209 rtx offset;
13211 /* Convert a subreg of a mem into the mem itself. */
13212 if (GET_CODE (operands[nops + i]) == SUBREG)
13213 operands[nops + i] = alter_subreg (operands + (nops + i), true);
13215 gcc_assert (MEM_P (operands[nops + i]));
13217 /* Don't reorder volatile memory references; it doesn't seem worth
13218 looking for the case where the order is ok anyway. */
13219 if (MEM_VOLATILE_P (operands[nops + i]))
13220 return 0;
13222 offset = const0_rtx;
13224 if ((REG_P (reg = XEXP (operands[nops + i], 0))
13225 || (GET_CODE (reg) == SUBREG
13226 && REG_P (reg = SUBREG_REG (reg))))
13227 || (GET_CODE (XEXP (operands[nops + i], 0)) == PLUS
13228 && ((REG_P (reg = XEXP (XEXP (operands[nops + i], 0), 0)))
13229 || (GET_CODE (reg) == SUBREG
13230 && REG_P (reg = SUBREG_REG (reg))))
13231 && (CONST_INT_P (offset
13232 = XEXP (XEXP (operands[nops + i], 0), 1)))))
13234 if (i == 0)
13236 base_reg = REGNO (reg);
13237 base_reg_rtx = reg;
13238 if (TARGET_THUMB1 && base_reg > LAST_LO_REGNUM)
13239 return 0;
13241 else if (base_reg != (int) REGNO (reg))
13242 /* Not addressed from the same base register. */
13243 return 0;
13245 unsorted_regs[i] = (REG_P (operands[i])
13246 ? REGNO (operands[i])
13247 : REGNO (SUBREG_REG (operands[i])));
13249 /* If it isn't an integer register, or if it overwrites the
13250 base register but isn't the last insn in the list, then
13251 we can't do this. */
13252 if (unsorted_regs[i] < 0
13253 || (TARGET_THUMB1 && unsorted_regs[i] > LAST_LO_REGNUM)
13254 || unsorted_regs[i] > 14
13255 || (i != nops - 1 && unsorted_regs[i] == base_reg))
13256 return 0;
13258 /* Don't allow SP to be loaded unless it is also the base
13259 register. It guarantees that SP is reset correctly when
13260 an LDM instruction is interrupted. Otherwise, we might
13261 end up with a corrupt stack. */
13262 if (unsorted_regs[i] == SP_REGNUM && base_reg != SP_REGNUM)
13263 return 0;
13265 unsorted_offsets[i] = INTVAL (offset);
13266 if (i == 0 || unsorted_offsets[i] < unsorted_offsets[order[0]])
13267 order[0] = i;
13269 else
13270 /* Not a suitable memory address. */
13271 return 0;
13274 /* All the useful information has now been extracted from the
13275 operands into unsorted_regs and unsorted_offsets; additionally,
13276 order[0] has been set to the lowest offset in the list. Sort
13277 the offsets into order, verifying that they are adjacent, and
13278 check that the register numbers are ascending. */
13279 if (!compute_offset_order (nops, unsorted_offsets, order,
13280 check_regs ? unsorted_regs : NULL))
13281 return 0;
13283 if (saved_order)
13284 memcpy (saved_order, order, sizeof order);
13286 if (base)
13288 *base = base_reg;
13290 for (i = 0; i < nops; i++)
13291 regs[i] = unsorted_regs[check_regs ? order[i] : i];
13293 *load_offset = unsorted_offsets[order[0]];
13296 if (TARGET_THUMB1
13297 && !peep2_reg_dead_p (nops, base_reg_rtx))
13298 return 0;
13300 if (unsorted_offsets[order[0]] == 0)
13301 ldm_case = 1; /* ldmia */
13302 else if (TARGET_ARM && unsorted_offsets[order[0]] == 4)
13303 ldm_case = 2; /* ldmib */
13304 else if (TARGET_ARM && unsorted_offsets[order[nops - 1]] == 0)
13305 ldm_case = 3; /* ldmda */
13306 else if (TARGET_32BIT && unsorted_offsets[order[nops - 1]] == -4)
13307 ldm_case = 4; /* ldmdb */
13308 else if (const_ok_for_arm (unsorted_offsets[order[0]])
13309 || const_ok_for_arm (-unsorted_offsets[order[0]]))
13310 ldm_case = 5;
13311 else
13312 return 0;
13314 if (!multiple_operation_profitable_p (false, nops,
13315 ldm_case == 5
13316 ? unsorted_offsets[order[0]] : 0))
13317 return 0;
13319 return ldm_case;
13322 /* Used to determine in a peephole whether a sequence of store instructions can
13323 be changed into a store-multiple instruction.
13324 NOPS is the number of separate store instructions we are examining.
13325 NOPS_TOTAL is the total number of instructions recognized by the peephole
13326 pattern.
13327 The first NOPS entries in OPERANDS are the source registers, the next
13328 NOPS entries are memory operands. If this function is successful, *BASE is
13329 set to the common base register of the memory accesses; *LOAD_OFFSET is set
13330 to the first memory location's offset from that base register. REGS is an
13331 array filled in with the source register numbers, REG_RTXS (if nonnull) is
13332 likewise filled with the corresponding rtx's.
13333 SAVED_ORDER (if nonnull), is an array filled in with an order that maps insn
13334 numbers to an ascending order of stores.
13335 If CHECK_REGS is true, the sequence of registers in *REGS matches the stores
13336 from ascending memory locations, and the function verifies that the register
13337 numbers are themselves ascending. If CHECK_REGS is false, the register
13338 numbers are stored in the order they are found in the operands. */
13339 static int
13340 store_multiple_sequence (rtx *operands, int nops, int nops_total,
13341 int *regs, rtx *reg_rtxs, int *saved_order, int *base,
13342 HOST_WIDE_INT *load_offset, bool check_regs)
13344 int unsorted_regs[MAX_LDM_STM_OPS];
13345 rtx unsorted_reg_rtxs[MAX_LDM_STM_OPS];
13346 HOST_WIDE_INT unsorted_offsets[MAX_LDM_STM_OPS];
13347 int order[MAX_LDM_STM_OPS];
13348 int base_reg = -1;
13349 rtx base_reg_rtx = NULL;
13350 int i, stm_case;
13352 /* Write back of base register is currently only supported for Thumb 1. */
13353 int base_writeback = TARGET_THUMB1;
13355 /* Can only handle up to MAX_LDM_STM_OPS insns at present, though could be
13356 easily extended if required. */
13357 gcc_assert (nops >= 2 && nops <= MAX_LDM_STM_OPS);
13359 memset (order, 0, MAX_LDM_STM_OPS * sizeof (int));
13361 /* Loop over the operands and check that the memory references are
13362 suitable (i.e. immediate offsets from the same base register). At
13363 the same time, extract the target register, and the memory
13364 offsets. */
13365 for (i = 0; i < nops; i++)
13367 rtx reg;
13368 rtx offset;
13370 /* Convert a subreg of a mem into the mem itself. */
13371 if (GET_CODE (operands[nops + i]) == SUBREG)
13372 operands[nops + i] = alter_subreg (operands + (nops + i), true);
13374 gcc_assert (MEM_P (operands[nops + i]));
13376 /* Don't reorder volatile memory references; it doesn't seem worth
13377 looking for the case where the order is ok anyway. */
13378 if (MEM_VOLATILE_P (operands[nops + i]))
13379 return 0;
13381 offset = const0_rtx;
13383 if ((REG_P (reg = XEXP (operands[nops + i], 0))
13384 || (GET_CODE (reg) == SUBREG
13385 && REG_P (reg = SUBREG_REG (reg))))
13386 || (GET_CODE (XEXP (operands[nops + i], 0)) == PLUS
13387 && ((REG_P (reg = XEXP (XEXP (operands[nops + i], 0), 0)))
13388 || (GET_CODE (reg) == SUBREG
13389 && REG_P (reg = SUBREG_REG (reg))))
13390 && (CONST_INT_P (offset
13391 = XEXP (XEXP (operands[nops + i], 0), 1)))))
13393 unsorted_reg_rtxs[i] = (REG_P (operands[i])
13394 ? operands[i] : SUBREG_REG (operands[i]));
13395 unsorted_regs[i] = REGNO (unsorted_reg_rtxs[i]);
13397 if (i == 0)
13399 base_reg = REGNO (reg);
13400 base_reg_rtx = reg;
13401 if (TARGET_THUMB1 && base_reg > LAST_LO_REGNUM)
13402 return 0;
13404 else if (base_reg != (int) REGNO (reg))
13405 /* Not addressed from the same base register. */
13406 return 0;
13408 /* If it isn't an integer register, then we can't do this. */
13409 if (unsorted_regs[i] < 0
13410 || (TARGET_THUMB1 && unsorted_regs[i] > LAST_LO_REGNUM)
13411 /* The effects are unpredictable if the base register is
13412 both updated and stored. */
13413 || (base_writeback && unsorted_regs[i] == base_reg)
13414 || (TARGET_THUMB2 && unsorted_regs[i] == SP_REGNUM)
13415 || unsorted_regs[i] > 14)
13416 return 0;
13418 unsorted_offsets[i] = INTVAL (offset);
13419 if (i == 0 || unsorted_offsets[i] < unsorted_offsets[order[0]])
13420 order[0] = i;
13422 else
13423 /* Not a suitable memory address. */
13424 return 0;
13427 /* All the useful information has now been extracted from the
13428 operands into unsorted_regs and unsorted_offsets; additionally,
13429 order[0] has been set to the lowest offset in the list. Sort
13430 the offsets into order, verifying that they are adjacent, and
13431 check that the register numbers are ascending. */
13432 if (!compute_offset_order (nops, unsorted_offsets, order,
13433 check_regs ? unsorted_regs : NULL))
13434 return 0;
13436 if (saved_order)
13437 memcpy (saved_order, order, sizeof order);
13439 if (base)
13441 *base = base_reg;
13443 for (i = 0; i < nops; i++)
13445 regs[i] = unsorted_regs[check_regs ? order[i] : i];
13446 if (reg_rtxs)
13447 reg_rtxs[i] = unsorted_reg_rtxs[check_regs ? order[i] : i];
13450 *load_offset = unsorted_offsets[order[0]];
13453 if (TARGET_THUMB1
13454 && !peep2_reg_dead_p (nops_total, base_reg_rtx))
13455 return 0;
13457 if (unsorted_offsets[order[0]] == 0)
13458 stm_case = 1; /* stmia */
13459 else if (TARGET_ARM && unsorted_offsets[order[0]] == 4)
13460 stm_case = 2; /* stmib */
13461 else if (TARGET_ARM && unsorted_offsets[order[nops - 1]] == 0)
13462 stm_case = 3; /* stmda */
13463 else if (TARGET_32BIT && unsorted_offsets[order[nops - 1]] == -4)
13464 stm_case = 4; /* stmdb */
13465 else
13466 return 0;
13468 if (!multiple_operation_profitable_p (false, nops, 0))
13469 return 0;
13471 return stm_case;
13474 /* Routines for use in generating RTL. */
13476 /* Generate a load-multiple instruction. COUNT is the number of loads in
13477 the instruction; REGS and MEMS are arrays containing the operands.
13478 BASEREG is the base register to be used in addressing the memory operands.
13479 WBACK_OFFSET is nonzero if the instruction should update the base
13480 register. */
13482 static rtx
13483 arm_gen_load_multiple_1 (int count, int *regs, rtx *mems, rtx basereg,
13484 HOST_WIDE_INT wback_offset)
13486 int i = 0, j;
13487 rtx result;
13489 if (!multiple_operation_profitable_p (false, count, 0))
13491 rtx seq;
13493 start_sequence ();
13495 for (i = 0; i < count; i++)
13496 emit_move_insn (gen_rtx_REG (SImode, regs[i]), mems[i]);
13498 if (wback_offset != 0)
13499 emit_move_insn (basereg, plus_constant (Pmode, basereg, wback_offset));
13501 seq = get_insns ();
13502 end_sequence ();
13504 return seq;
13507 result = gen_rtx_PARALLEL (VOIDmode,
13508 rtvec_alloc (count + (wback_offset != 0 ? 1 : 0)));
13509 if (wback_offset != 0)
13511 XVECEXP (result, 0, 0)
13512 = gen_rtx_SET (VOIDmode, basereg,
13513 plus_constant (Pmode, basereg, wback_offset));
13514 i = 1;
13515 count++;
13518 for (j = 0; i < count; i++, j++)
13519 XVECEXP (result, 0, i)
13520 = gen_rtx_SET (VOIDmode, gen_rtx_REG (SImode, regs[j]), mems[j]);
13522 return result;
13525 /* Generate a store-multiple instruction. COUNT is the number of stores in
13526 the instruction; REGS and MEMS are arrays containing the operands.
13527 BASEREG is the base register to be used in addressing the memory operands.
13528 WBACK_OFFSET is nonzero if the instruction should update the base
13529 register. */
13531 static rtx
13532 arm_gen_store_multiple_1 (int count, int *regs, rtx *mems, rtx basereg,
13533 HOST_WIDE_INT wback_offset)
13535 int i = 0, j;
13536 rtx result;
13538 if (GET_CODE (basereg) == PLUS)
13539 basereg = XEXP (basereg, 0);
13541 if (!multiple_operation_profitable_p (false, count, 0))
13543 rtx seq;
13545 start_sequence ();
13547 for (i = 0; i < count; i++)
13548 emit_move_insn (mems[i], gen_rtx_REG (SImode, regs[i]));
13550 if (wback_offset != 0)
13551 emit_move_insn (basereg, plus_constant (Pmode, basereg, wback_offset));
13553 seq = get_insns ();
13554 end_sequence ();
13556 return seq;
13559 result = gen_rtx_PARALLEL (VOIDmode,
13560 rtvec_alloc (count + (wback_offset != 0 ? 1 : 0)));
13561 if (wback_offset != 0)
13563 XVECEXP (result, 0, 0)
13564 = gen_rtx_SET (VOIDmode, basereg,
13565 plus_constant (Pmode, basereg, wback_offset));
13566 i = 1;
13567 count++;
13570 for (j = 0; i < count; i++, j++)
13571 XVECEXP (result, 0, i)
13572 = gen_rtx_SET (VOIDmode, mems[j], gen_rtx_REG (SImode, regs[j]));
13574 return result;
13577 /* Generate either a load-multiple or a store-multiple instruction. This
13578 function can be used in situations where we can start with a single MEM
13579 rtx and adjust its address upwards.
13580 COUNT is the number of operations in the instruction, not counting a
13581 possible update of the base register. REGS is an array containing the
13582 register operands.
13583 BASEREG is the base register to be used in addressing the memory operands,
13584 which are constructed from BASEMEM.
13585 WRITE_BACK specifies whether the generated instruction should include an
13586 update of the base register.
13587 OFFSETP is used to pass an offset to and from this function; this offset
13588 is not used when constructing the address (instead BASEMEM should have an
13589 appropriate offset in its address), it is used only for setting
13590 MEM_OFFSET. It is updated only if WRITE_BACK is true.*/
13592 static rtx
13593 arm_gen_multiple_op (bool is_load, int *regs, int count, rtx basereg,
13594 bool write_back, rtx basemem, HOST_WIDE_INT *offsetp)
13596 rtx mems[MAX_LDM_STM_OPS];
13597 HOST_WIDE_INT offset = *offsetp;
13598 int i;
13600 gcc_assert (count <= MAX_LDM_STM_OPS);
13602 if (GET_CODE (basereg) == PLUS)
13603 basereg = XEXP (basereg, 0);
13605 for (i = 0; i < count; i++)
13607 rtx addr = plus_constant (Pmode, basereg, i * 4);
13608 mems[i] = adjust_automodify_address_nv (basemem, SImode, addr, offset);
13609 offset += 4;
13612 if (write_back)
13613 *offsetp = offset;
13615 if (is_load)
13616 return arm_gen_load_multiple_1 (count, regs, mems, basereg,
13617 write_back ? 4 * count : 0);
13618 else
13619 return arm_gen_store_multiple_1 (count, regs, mems, basereg,
13620 write_back ? 4 * count : 0);
13624 arm_gen_load_multiple (int *regs, int count, rtx basereg, int write_back,
13625 rtx basemem, HOST_WIDE_INT *offsetp)
13627 return arm_gen_multiple_op (TRUE, regs, count, basereg, write_back, basemem,
13628 offsetp);
13632 arm_gen_store_multiple (int *regs, int count, rtx basereg, int write_back,
13633 rtx basemem, HOST_WIDE_INT *offsetp)
13635 return arm_gen_multiple_op (FALSE, regs, count, basereg, write_back, basemem,
13636 offsetp);
13639 /* Called from a peephole2 expander to turn a sequence of loads into an
13640 LDM instruction. OPERANDS are the operands found by the peephole matcher;
13641 NOPS indicates how many separate loads we are trying to combine. SORT_REGS
13642 is true if we can reorder the registers because they are used commutatively
13643 subsequently.
13644 Returns true iff we could generate a new instruction. */
13646 bool
13647 gen_ldm_seq (rtx *operands, int nops, bool sort_regs)
13649 int regs[MAX_LDM_STM_OPS], mem_order[MAX_LDM_STM_OPS];
13650 rtx mems[MAX_LDM_STM_OPS];
13651 int i, j, base_reg;
13652 rtx base_reg_rtx;
13653 HOST_WIDE_INT offset;
13654 int write_back = FALSE;
13655 int ldm_case;
13656 rtx addr;
13658 ldm_case = load_multiple_sequence (operands, nops, regs, mem_order,
13659 &base_reg, &offset, !sort_regs);
13661 if (ldm_case == 0)
13662 return false;
13664 if (sort_regs)
13665 for (i = 0; i < nops - 1; i++)
13666 for (j = i + 1; j < nops; j++)
13667 if (regs[i] > regs[j])
13669 int t = regs[i];
13670 regs[i] = regs[j];
13671 regs[j] = t;
13673 base_reg_rtx = gen_rtx_REG (Pmode, base_reg);
13675 if (TARGET_THUMB1)
13677 gcc_assert (peep2_reg_dead_p (nops, base_reg_rtx));
13678 gcc_assert (ldm_case == 1 || ldm_case == 5);
13679 write_back = TRUE;
13682 if (ldm_case == 5)
13684 rtx newbase = TARGET_THUMB1 ? base_reg_rtx : gen_rtx_REG (SImode, regs[0]);
13685 emit_insn (gen_addsi3 (newbase, base_reg_rtx, GEN_INT (offset)));
13686 offset = 0;
13687 if (!TARGET_THUMB1)
13689 base_reg = regs[0];
13690 base_reg_rtx = newbase;
13694 for (i = 0; i < nops; i++)
13696 addr = plus_constant (Pmode, base_reg_rtx, offset + i * 4);
13697 mems[i] = adjust_automodify_address_nv (operands[nops + mem_order[i]],
13698 SImode, addr, 0);
13700 emit_insn (arm_gen_load_multiple_1 (nops, regs, mems, base_reg_rtx,
13701 write_back ? offset + i * 4 : 0));
13702 return true;
13705 /* Called from a peephole2 expander to turn a sequence of stores into an
13706 STM instruction. OPERANDS are the operands found by the peephole matcher;
13707 NOPS indicates how many separate stores we are trying to combine.
13708 Returns true iff we could generate a new instruction. */
13710 bool
13711 gen_stm_seq (rtx *operands, int nops)
13713 int i;
13714 int regs[MAX_LDM_STM_OPS], mem_order[MAX_LDM_STM_OPS];
13715 rtx mems[MAX_LDM_STM_OPS];
13716 int base_reg;
13717 rtx base_reg_rtx;
13718 HOST_WIDE_INT offset;
13719 int write_back = FALSE;
13720 int stm_case;
13721 rtx addr;
13722 bool base_reg_dies;
13724 stm_case = store_multiple_sequence (operands, nops, nops, regs, NULL,
13725 mem_order, &base_reg, &offset, true);
13727 if (stm_case == 0)
13728 return false;
13730 base_reg_rtx = gen_rtx_REG (Pmode, base_reg);
13732 base_reg_dies = peep2_reg_dead_p (nops, base_reg_rtx);
13733 if (TARGET_THUMB1)
13735 gcc_assert (base_reg_dies);
13736 write_back = TRUE;
13739 if (stm_case == 5)
13741 gcc_assert (base_reg_dies);
13742 emit_insn (gen_addsi3 (base_reg_rtx, base_reg_rtx, GEN_INT (offset)));
13743 offset = 0;
13746 addr = plus_constant (Pmode, base_reg_rtx, offset);
13748 for (i = 0; i < nops; i++)
13750 addr = plus_constant (Pmode, base_reg_rtx, offset + i * 4);
13751 mems[i] = adjust_automodify_address_nv (operands[nops + mem_order[i]],
13752 SImode, addr, 0);
13754 emit_insn (arm_gen_store_multiple_1 (nops, regs, mems, base_reg_rtx,
13755 write_back ? offset + i * 4 : 0));
13756 return true;
13759 /* Called from a peephole2 expander to turn a sequence of stores that are
13760 preceded by constant loads into an STM instruction. OPERANDS are the
13761 operands found by the peephole matcher; NOPS indicates how many
13762 separate stores we are trying to combine; there are 2 * NOPS
13763 instructions in the peephole.
13764 Returns true iff we could generate a new instruction. */
13766 bool
13767 gen_const_stm_seq (rtx *operands, int nops)
13769 int regs[MAX_LDM_STM_OPS], sorted_regs[MAX_LDM_STM_OPS];
13770 int reg_order[MAX_LDM_STM_OPS], mem_order[MAX_LDM_STM_OPS];
13771 rtx reg_rtxs[MAX_LDM_STM_OPS], orig_reg_rtxs[MAX_LDM_STM_OPS];
13772 rtx mems[MAX_LDM_STM_OPS];
13773 int base_reg;
13774 rtx base_reg_rtx;
13775 HOST_WIDE_INT offset;
13776 int write_back = FALSE;
13777 int stm_case;
13778 rtx addr;
13779 bool base_reg_dies;
13780 int i, j;
13781 HARD_REG_SET allocated;
13783 stm_case = store_multiple_sequence (operands, nops, 2 * nops, regs, reg_rtxs,
13784 mem_order, &base_reg, &offset, false);
13786 if (stm_case == 0)
13787 return false;
13789 memcpy (orig_reg_rtxs, reg_rtxs, sizeof orig_reg_rtxs);
13791 /* If the same register is used more than once, try to find a free
13792 register. */
13793 CLEAR_HARD_REG_SET (allocated);
13794 for (i = 0; i < nops; i++)
13796 for (j = i + 1; j < nops; j++)
13797 if (regs[i] == regs[j])
13799 rtx t = peep2_find_free_register (0, nops * 2,
13800 TARGET_THUMB1 ? "l" : "r",
13801 SImode, &allocated);
13802 if (t == NULL_RTX)
13803 return false;
13804 reg_rtxs[i] = t;
13805 regs[i] = REGNO (t);
13809 /* Compute an ordering that maps the register numbers to an ascending
13810 sequence. */
13811 reg_order[0] = 0;
13812 for (i = 0; i < nops; i++)
13813 if (regs[i] < regs[reg_order[0]])
13814 reg_order[0] = i;
13816 for (i = 1; i < nops; i++)
13818 int this_order = reg_order[i - 1];
13819 for (j = 0; j < nops; j++)
13820 if (regs[j] > regs[reg_order[i - 1]]
13821 && (this_order == reg_order[i - 1]
13822 || regs[j] < regs[this_order]))
13823 this_order = j;
13824 reg_order[i] = this_order;
13827 /* Ensure that registers that must be live after the instruction end
13828 up with the correct value. */
13829 for (i = 0; i < nops; i++)
13831 int this_order = reg_order[i];
13832 if ((this_order != mem_order[i]
13833 || orig_reg_rtxs[this_order] != reg_rtxs[this_order])
13834 && !peep2_reg_dead_p (nops * 2, orig_reg_rtxs[this_order]))
13835 return false;
13838 /* Load the constants. */
13839 for (i = 0; i < nops; i++)
13841 rtx op = operands[2 * nops + mem_order[i]];
13842 sorted_regs[i] = regs[reg_order[i]];
13843 emit_move_insn (reg_rtxs[reg_order[i]], op);
13846 base_reg_rtx = gen_rtx_REG (Pmode, base_reg);
13848 base_reg_dies = peep2_reg_dead_p (nops * 2, base_reg_rtx);
13849 if (TARGET_THUMB1)
13851 gcc_assert (base_reg_dies);
13852 write_back = TRUE;
13855 if (stm_case == 5)
13857 gcc_assert (base_reg_dies);
13858 emit_insn (gen_addsi3 (base_reg_rtx, base_reg_rtx, GEN_INT (offset)));
13859 offset = 0;
13862 addr = plus_constant (Pmode, base_reg_rtx, offset);
13864 for (i = 0; i < nops; i++)
13866 addr = plus_constant (Pmode, base_reg_rtx, offset + i * 4);
13867 mems[i] = adjust_automodify_address_nv (operands[nops + mem_order[i]],
13868 SImode, addr, 0);
13870 emit_insn (arm_gen_store_multiple_1 (nops, sorted_regs, mems, base_reg_rtx,
13871 write_back ? offset + i * 4 : 0));
13872 return true;
13875 /* Copy a block of memory using plain ldr/str/ldrh/strh instructions, to permit
13876 unaligned copies on processors which support unaligned semantics for those
13877 instructions. INTERLEAVE_FACTOR can be used to attempt to hide load latency
13878 (using more registers) by doing e.g. load/load/store/store for a factor of 2.
13879 An interleave factor of 1 (the minimum) will perform no interleaving.
13880 Load/store multiple are used for aligned addresses where possible. */
13882 static void
13883 arm_block_move_unaligned_straight (rtx dstbase, rtx srcbase,
13884 HOST_WIDE_INT length,
13885 unsigned int interleave_factor)
13887 rtx *regs = XALLOCAVEC (rtx, interleave_factor);
13888 int *regnos = XALLOCAVEC (int, interleave_factor);
13889 HOST_WIDE_INT block_size_bytes = interleave_factor * UNITS_PER_WORD;
13890 HOST_WIDE_INT i, j;
13891 HOST_WIDE_INT remaining = length, words;
13892 rtx halfword_tmp = NULL, byte_tmp = NULL;
13893 rtx dst, src;
13894 bool src_aligned = MEM_ALIGN (srcbase) >= BITS_PER_WORD;
13895 bool dst_aligned = MEM_ALIGN (dstbase) >= BITS_PER_WORD;
13896 HOST_WIDE_INT srcoffset, dstoffset;
13897 HOST_WIDE_INT src_autoinc, dst_autoinc;
13898 rtx mem, addr;
13900 gcc_assert (1 <= interleave_factor && interleave_factor <= 4);
13902 /* Use hard registers if we have aligned source or destination so we can use
13903 load/store multiple with contiguous registers. */
13904 if (dst_aligned || src_aligned)
13905 for (i = 0; i < interleave_factor; i++)
13906 regs[i] = gen_rtx_REG (SImode, i);
13907 else
13908 for (i = 0; i < interleave_factor; i++)
13909 regs[i] = gen_reg_rtx (SImode);
13911 dst = copy_addr_to_reg (XEXP (dstbase, 0));
13912 src = copy_addr_to_reg (XEXP (srcbase, 0));
13914 srcoffset = dstoffset = 0;
13916 /* Calls to arm_gen_load_multiple and arm_gen_store_multiple update SRC/DST.
13917 For copying the last bytes we want to subtract this offset again. */
13918 src_autoinc = dst_autoinc = 0;
13920 for (i = 0; i < interleave_factor; i++)
13921 regnos[i] = i;
13923 /* Copy BLOCK_SIZE_BYTES chunks. */
13925 for (i = 0; i + block_size_bytes <= length; i += block_size_bytes)
13927 /* Load words. */
13928 if (src_aligned && interleave_factor > 1)
13930 emit_insn (arm_gen_load_multiple (regnos, interleave_factor, src,
13931 TRUE, srcbase, &srcoffset));
13932 src_autoinc += UNITS_PER_WORD * interleave_factor;
13934 else
13936 for (j = 0; j < interleave_factor; j++)
13938 addr = plus_constant (Pmode, src, (srcoffset + j * UNITS_PER_WORD
13939 - src_autoinc));
13940 mem = adjust_automodify_address (srcbase, SImode, addr,
13941 srcoffset + j * UNITS_PER_WORD);
13942 emit_insn (gen_unaligned_loadsi (regs[j], mem));
13944 srcoffset += block_size_bytes;
13947 /* Store words. */
13948 if (dst_aligned && interleave_factor > 1)
13950 emit_insn (arm_gen_store_multiple (regnos, interleave_factor, dst,
13951 TRUE, dstbase, &dstoffset));
13952 dst_autoinc += UNITS_PER_WORD * interleave_factor;
13954 else
13956 for (j = 0; j < interleave_factor; j++)
13958 addr = plus_constant (Pmode, dst, (dstoffset + j * UNITS_PER_WORD
13959 - dst_autoinc));
13960 mem = adjust_automodify_address (dstbase, SImode, addr,
13961 dstoffset + j * UNITS_PER_WORD);
13962 emit_insn (gen_unaligned_storesi (mem, regs[j]));
13964 dstoffset += block_size_bytes;
13967 remaining -= block_size_bytes;
13970 /* Copy any whole words left (note these aren't interleaved with any
13971 subsequent halfword/byte load/stores in the interests of simplicity). */
13973 words = remaining / UNITS_PER_WORD;
13975 gcc_assert (words < interleave_factor);
13977 if (src_aligned && words > 1)
13979 emit_insn (arm_gen_load_multiple (regnos, words, src, TRUE, srcbase,
13980 &srcoffset));
13981 src_autoinc += UNITS_PER_WORD * words;
13983 else
13985 for (j = 0; j < words; j++)
13987 addr = plus_constant (Pmode, src,
13988 srcoffset + j * UNITS_PER_WORD - src_autoinc);
13989 mem = adjust_automodify_address (srcbase, SImode, addr,
13990 srcoffset + j * UNITS_PER_WORD);
13991 emit_insn (gen_unaligned_loadsi (regs[j], mem));
13993 srcoffset += words * UNITS_PER_WORD;
13996 if (dst_aligned && words > 1)
13998 emit_insn (arm_gen_store_multiple (regnos, words, dst, TRUE, dstbase,
13999 &dstoffset));
14000 dst_autoinc += words * UNITS_PER_WORD;
14002 else
14004 for (j = 0; j < words; j++)
14006 addr = plus_constant (Pmode, dst,
14007 dstoffset + j * UNITS_PER_WORD - dst_autoinc);
14008 mem = adjust_automodify_address (dstbase, SImode, addr,
14009 dstoffset + j * UNITS_PER_WORD);
14010 emit_insn (gen_unaligned_storesi (mem, regs[j]));
14012 dstoffset += words * UNITS_PER_WORD;
14015 remaining -= words * UNITS_PER_WORD;
14017 gcc_assert (remaining < 4);
14019 /* Copy a halfword if necessary. */
14021 if (remaining >= 2)
14023 halfword_tmp = gen_reg_rtx (SImode);
14025 addr = plus_constant (Pmode, src, srcoffset - src_autoinc);
14026 mem = adjust_automodify_address (srcbase, HImode, addr, srcoffset);
14027 emit_insn (gen_unaligned_loadhiu (halfword_tmp, mem));
14029 /* Either write out immediately, or delay until we've loaded the last
14030 byte, depending on interleave factor. */
14031 if (interleave_factor == 1)
14033 addr = plus_constant (Pmode, dst, dstoffset - dst_autoinc);
14034 mem = adjust_automodify_address (dstbase, HImode, addr, dstoffset);
14035 emit_insn (gen_unaligned_storehi (mem,
14036 gen_lowpart (HImode, halfword_tmp)));
14037 halfword_tmp = NULL;
14038 dstoffset += 2;
14041 remaining -= 2;
14042 srcoffset += 2;
14045 gcc_assert (remaining < 2);
14047 /* Copy last byte. */
14049 if ((remaining & 1) != 0)
14051 byte_tmp = gen_reg_rtx (SImode);
14053 addr = plus_constant (Pmode, src, srcoffset - src_autoinc);
14054 mem = adjust_automodify_address (srcbase, QImode, addr, srcoffset);
14055 emit_move_insn (gen_lowpart (QImode, byte_tmp), mem);
14057 if (interleave_factor == 1)
14059 addr = plus_constant (Pmode, dst, dstoffset - dst_autoinc);
14060 mem = adjust_automodify_address (dstbase, QImode, addr, dstoffset);
14061 emit_move_insn (mem, gen_lowpart (QImode, byte_tmp));
14062 byte_tmp = NULL;
14063 dstoffset++;
14066 remaining--;
14067 srcoffset++;
14070 /* Store last halfword if we haven't done so already. */
14072 if (halfword_tmp)
14074 addr = plus_constant (Pmode, dst, dstoffset - dst_autoinc);
14075 mem = adjust_automodify_address (dstbase, HImode, addr, dstoffset);
14076 emit_insn (gen_unaligned_storehi (mem,
14077 gen_lowpart (HImode, halfword_tmp)));
14078 dstoffset += 2;
14081 /* Likewise for last byte. */
14083 if (byte_tmp)
14085 addr = plus_constant (Pmode, dst, dstoffset - dst_autoinc);
14086 mem = adjust_automodify_address (dstbase, QImode, addr, dstoffset);
14087 emit_move_insn (mem, gen_lowpart (QImode, byte_tmp));
14088 dstoffset++;
14091 gcc_assert (remaining == 0 && srcoffset == dstoffset);
14094 /* From mips_adjust_block_mem:
14096 Helper function for doing a loop-based block operation on memory
14097 reference MEM. Each iteration of the loop will operate on LENGTH
14098 bytes of MEM.
14100 Create a new base register for use within the loop and point it to
14101 the start of MEM. Create a new memory reference that uses this
14102 register. Store them in *LOOP_REG and *LOOP_MEM respectively. */
14104 static void
14105 arm_adjust_block_mem (rtx mem, HOST_WIDE_INT length, rtx *loop_reg,
14106 rtx *loop_mem)
14108 *loop_reg = copy_addr_to_reg (XEXP (mem, 0));
14110 /* Although the new mem does not refer to a known location,
14111 it does keep up to LENGTH bytes of alignment. */
14112 *loop_mem = change_address (mem, BLKmode, *loop_reg);
14113 set_mem_align (*loop_mem, MIN (MEM_ALIGN (mem), length * BITS_PER_UNIT));
14116 /* From mips_block_move_loop:
14118 Move LENGTH bytes from SRC to DEST using a loop that moves BYTES_PER_ITER
14119 bytes at a time. LENGTH must be at least BYTES_PER_ITER. Assume that
14120 the memory regions do not overlap. */
14122 static void
14123 arm_block_move_unaligned_loop (rtx dest, rtx src, HOST_WIDE_INT length,
14124 unsigned int interleave_factor,
14125 HOST_WIDE_INT bytes_per_iter)
14127 rtx label, src_reg, dest_reg, final_src, test;
14128 HOST_WIDE_INT leftover;
14130 leftover = length % bytes_per_iter;
14131 length -= leftover;
14133 /* Create registers and memory references for use within the loop. */
14134 arm_adjust_block_mem (src, bytes_per_iter, &src_reg, &src);
14135 arm_adjust_block_mem (dest, bytes_per_iter, &dest_reg, &dest);
14137 /* Calculate the value that SRC_REG should have after the last iteration of
14138 the loop. */
14139 final_src = expand_simple_binop (Pmode, PLUS, src_reg, GEN_INT (length),
14140 0, 0, OPTAB_WIDEN);
14142 /* Emit the start of the loop. */
14143 label = gen_label_rtx ();
14144 emit_label (label);
14146 /* Emit the loop body. */
14147 arm_block_move_unaligned_straight (dest, src, bytes_per_iter,
14148 interleave_factor);
14150 /* Move on to the next block. */
14151 emit_move_insn (src_reg, plus_constant (Pmode, src_reg, bytes_per_iter));
14152 emit_move_insn (dest_reg, plus_constant (Pmode, dest_reg, bytes_per_iter));
14154 /* Emit the loop condition. */
14155 test = gen_rtx_NE (VOIDmode, src_reg, final_src);
14156 emit_jump_insn (gen_cbranchsi4 (test, src_reg, final_src, label));
14158 /* Mop up any left-over bytes. */
14159 if (leftover)
14160 arm_block_move_unaligned_straight (dest, src, leftover, interleave_factor);
14163 /* Emit a block move when either the source or destination is unaligned (not
14164 aligned to a four-byte boundary). This may need further tuning depending on
14165 core type, optimize_size setting, etc. */
14167 static int
14168 arm_movmemqi_unaligned (rtx *operands)
14170 HOST_WIDE_INT length = INTVAL (operands[2]);
14172 if (optimize_size)
14174 bool src_aligned = MEM_ALIGN (operands[1]) >= BITS_PER_WORD;
14175 bool dst_aligned = MEM_ALIGN (operands[0]) >= BITS_PER_WORD;
14176 /* Inlined memcpy using ldr/str/ldrh/strh can be quite big: try to limit
14177 size of code if optimizing for size. We'll use ldm/stm if src_aligned
14178 or dst_aligned though: allow more interleaving in those cases since the
14179 resulting code can be smaller. */
14180 unsigned int interleave_factor = (src_aligned || dst_aligned) ? 2 : 1;
14181 HOST_WIDE_INT bytes_per_iter = (src_aligned || dst_aligned) ? 8 : 4;
14183 if (length > 12)
14184 arm_block_move_unaligned_loop (operands[0], operands[1], length,
14185 interleave_factor, bytes_per_iter);
14186 else
14187 arm_block_move_unaligned_straight (operands[0], operands[1], length,
14188 interleave_factor);
14190 else
14192 /* Note that the loop created by arm_block_move_unaligned_loop may be
14193 subject to loop unrolling, which makes tuning this condition a little
14194 redundant. */
14195 if (length > 32)
14196 arm_block_move_unaligned_loop (operands[0], operands[1], length, 4, 16);
14197 else
14198 arm_block_move_unaligned_straight (operands[0], operands[1], length, 4);
14201 return 1;
14205 arm_gen_movmemqi (rtx *operands)
14207 HOST_WIDE_INT in_words_to_go, out_words_to_go, last_bytes;
14208 HOST_WIDE_INT srcoffset, dstoffset;
14209 int i;
14210 rtx src, dst, srcbase, dstbase;
14211 rtx part_bytes_reg = NULL;
14212 rtx mem;
14214 if (!CONST_INT_P (operands[2])
14215 || !CONST_INT_P (operands[3])
14216 || INTVAL (operands[2]) > 64)
14217 return 0;
14219 if (unaligned_access && (INTVAL (operands[3]) & 3) != 0)
14220 return arm_movmemqi_unaligned (operands);
14222 if (INTVAL (operands[3]) & 3)
14223 return 0;
14225 dstbase = operands[0];
14226 srcbase = operands[1];
14228 dst = copy_to_mode_reg (SImode, XEXP (dstbase, 0));
14229 src = copy_to_mode_reg (SImode, XEXP (srcbase, 0));
14231 in_words_to_go = ARM_NUM_INTS (INTVAL (operands[2]));
14232 out_words_to_go = INTVAL (operands[2]) / 4;
14233 last_bytes = INTVAL (operands[2]) & 3;
14234 dstoffset = srcoffset = 0;
14236 if (out_words_to_go != in_words_to_go && ((in_words_to_go - 1) & 3) != 0)
14237 part_bytes_reg = gen_rtx_REG (SImode, (in_words_to_go - 1) & 3);
14239 for (i = 0; in_words_to_go >= 2; i+=4)
14241 if (in_words_to_go > 4)
14242 emit_insn (arm_gen_load_multiple (arm_regs_in_sequence, 4, src,
14243 TRUE, srcbase, &srcoffset));
14244 else
14245 emit_insn (arm_gen_load_multiple (arm_regs_in_sequence, in_words_to_go,
14246 src, FALSE, srcbase,
14247 &srcoffset));
14249 if (out_words_to_go)
14251 if (out_words_to_go > 4)
14252 emit_insn (arm_gen_store_multiple (arm_regs_in_sequence, 4, dst,
14253 TRUE, dstbase, &dstoffset));
14254 else if (out_words_to_go != 1)
14255 emit_insn (arm_gen_store_multiple (arm_regs_in_sequence,
14256 out_words_to_go, dst,
14257 (last_bytes == 0
14258 ? FALSE : TRUE),
14259 dstbase, &dstoffset));
14260 else
14262 mem = adjust_automodify_address (dstbase, SImode, dst, dstoffset);
14263 emit_move_insn (mem, gen_rtx_REG (SImode, 0));
14264 if (last_bytes != 0)
14266 emit_insn (gen_addsi3 (dst, dst, GEN_INT (4)));
14267 dstoffset += 4;
14272 in_words_to_go -= in_words_to_go < 4 ? in_words_to_go : 4;
14273 out_words_to_go -= out_words_to_go < 4 ? out_words_to_go : 4;
14276 /* OUT_WORDS_TO_GO will be zero here if there are byte stores to do. */
14277 if (out_words_to_go)
14279 rtx sreg;
14281 mem = adjust_automodify_address (srcbase, SImode, src, srcoffset);
14282 sreg = copy_to_reg (mem);
14284 mem = adjust_automodify_address (dstbase, SImode, dst, dstoffset);
14285 emit_move_insn (mem, sreg);
14286 in_words_to_go--;
14288 gcc_assert (!in_words_to_go); /* Sanity check */
14291 if (in_words_to_go)
14293 gcc_assert (in_words_to_go > 0);
14295 mem = adjust_automodify_address (srcbase, SImode, src, srcoffset);
14296 part_bytes_reg = copy_to_mode_reg (SImode, mem);
14299 gcc_assert (!last_bytes || part_bytes_reg);
14301 if (BYTES_BIG_ENDIAN && last_bytes)
14303 rtx tmp = gen_reg_rtx (SImode);
14305 /* The bytes we want are in the top end of the word. */
14306 emit_insn (gen_lshrsi3 (tmp, part_bytes_reg,
14307 GEN_INT (8 * (4 - last_bytes))));
14308 part_bytes_reg = tmp;
14310 while (last_bytes)
14312 mem = adjust_automodify_address (dstbase, QImode,
14313 plus_constant (Pmode, dst,
14314 last_bytes - 1),
14315 dstoffset + last_bytes - 1);
14316 emit_move_insn (mem, gen_lowpart (QImode, part_bytes_reg));
14318 if (--last_bytes)
14320 tmp = gen_reg_rtx (SImode);
14321 emit_insn (gen_lshrsi3 (tmp, part_bytes_reg, GEN_INT (8)));
14322 part_bytes_reg = tmp;
14327 else
14329 if (last_bytes > 1)
14331 mem = adjust_automodify_address (dstbase, HImode, dst, dstoffset);
14332 emit_move_insn (mem, gen_lowpart (HImode, part_bytes_reg));
14333 last_bytes -= 2;
14334 if (last_bytes)
14336 rtx tmp = gen_reg_rtx (SImode);
14337 emit_insn (gen_addsi3 (dst, dst, const2_rtx));
14338 emit_insn (gen_lshrsi3 (tmp, part_bytes_reg, GEN_INT (16)));
14339 part_bytes_reg = tmp;
14340 dstoffset += 2;
14344 if (last_bytes)
14346 mem = adjust_automodify_address (dstbase, QImode, dst, dstoffset);
14347 emit_move_insn (mem, gen_lowpart (QImode, part_bytes_reg));
14351 return 1;
14354 /* Helper for gen_movmem_ldrd_strd. Increase the address of memory rtx
14355 by mode size. */
14356 inline static rtx
14357 next_consecutive_mem (rtx mem)
14359 enum machine_mode mode = GET_MODE (mem);
14360 HOST_WIDE_INT offset = GET_MODE_SIZE (mode);
14361 rtx addr = plus_constant (Pmode, XEXP (mem, 0), offset);
14363 return adjust_automodify_address (mem, mode, addr, offset);
14366 /* Copy using LDRD/STRD instructions whenever possible.
14367 Returns true upon success. */
14368 bool
14369 gen_movmem_ldrd_strd (rtx *operands)
14371 unsigned HOST_WIDE_INT len;
14372 HOST_WIDE_INT align;
14373 rtx src, dst, base;
14374 rtx reg0;
14375 bool src_aligned, dst_aligned;
14376 bool src_volatile, dst_volatile;
14378 gcc_assert (CONST_INT_P (operands[2]));
14379 gcc_assert (CONST_INT_P (operands[3]));
14381 len = UINTVAL (operands[2]);
14382 if (len > 64)
14383 return false;
14385 /* Maximum alignment we can assume for both src and dst buffers. */
14386 align = INTVAL (operands[3]);
14388 if ((!unaligned_access) && (len >= 4) && ((align & 3) != 0))
14389 return false;
14391 /* Place src and dst addresses in registers
14392 and update the corresponding mem rtx. */
14393 dst = operands[0];
14394 dst_volatile = MEM_VOLATILE_P (dst);
14395 dst_aligned = MEM_ALIGN (dst) >= BITS_PER_WORD;
14396 base = copy_to_mode_reg (SImode, XEXP (dst, 0));
14397 dst = adjust_automodify_address (dst, VOIDmode, base, 0);
14399 src = operands[1];
14400 src_volatile = MEM_VOLATILE_P (src);
14401 src_aligned = MEM_ALIGN (src) >= BITS_PER_WORD;
14402 base = copy_to_mode_reg (SImode, XEXP (src, 0));
14403 src = adjust_automodify_address (src, VOIDmode, base, 0);
14405 if (!unaligned_access && !(src_aligned && dst_aligned))
14406 return false;
14408 if (src_volatile || dst_volatile)
14409 return false;
14411 /* If we cannot generate any LDRD/STRD, try to generate LDM/STM. */
14412 if (!(dst_aligned || src_aligned))
14413 return arm_gen_movmemqi (operands);
14415 src = adjust_address (src, DImode, 0);
14416 dst = adjust_address (dst, DImode, 0);
14417 while (len >= 8)
14419 len -= 8;
14420 reg0 = gen_reg_rtx (DImode);
14421 if (src_aligned)
14422 emit_move_insn (reg0, src);
14423 else
14424 emit_insn (gen_unaligned_loaddi (reg0, src));
14426 if (dst_aligned)
14427 emit_move_insn (dst, reg0);
14428 else
14429 emit_insn (gen_unaligned_storedi (dst, reg0));
14431 src = next_consecutive_mem (src);
14432 dst = next_consecutive_mem (dst);
14435 gcc_assert (len < 8);
14436 if (len >= 4)
14438 /* More than a word but less than a double-word to copy. Copy a word. */
14439 reg0 = gen_reg_rtx (SImode);
14440 src = adjust_address (src, SImode, 0);
14441 dst = adjust_address (dst, SImode, 0);
14442 if (src_aligned)
14443 emit_move_insn (reg0, src);
14444 else
14445 emit_insn (gen_unaligned_loadsi (reg0, src));
14447 if (dst_aligned)
14448 emit_move_insn (dst, reg0);
14449 else
14450 emit_insn (gen_unaligned_storesi (dst, reg0));
14452 src = next_consecutive_mem (src);
14453 dst = next_consecutive_mem (dst);
14454 len -= 4;
14457 if (len == 0)
14458 return true;
14460 /* Copy the remaining bytes. */
14461 if (len >= 2)
14463 dst = adjust_address (dst, HImode, 0);
14464 src = adjust_address (src, HImode, 0);
14465 reg0 = gen_reg_rtx (SImode);
14466 if (src_aligned)
14467 emit_insn (gen_zero_extendhisi2 (reg0, src));
14468 else
14469 emit_insn (gen_unaligned_loadhiu (reg0, src));
14471 if (dst_aligned)
14472 emit_insn (gen_movhi (dst, gen_lowpart(HImode, reg0)));
14473 else
14474 emit_insn (gen_unaligned_storehi (dst, gen_lowpart (HImode, reg0)));
14476 src = next_consecutive_mem (src);
14477 dst = next_consecutive_mem (dst);
14478 if (len == 2)
14479 return true;
14482 dst = adjust_address (dst, QImode, 0);
14483 src = adjust_address (src, QImode, 0);
14484 reg0 = gen_reg_rtx (QImode);
14485 emit_move_insn (reg0, src);
14486 emit_move_insn (dst, reg0);
14487 return true;
14490 /* Select a dominance comparison mode if possible for a test of the general
14491 form (OP (COND_OR (X) (Y)) (const_int 0)). We support three forms.
14492 COND_OR == DOM_CC_X_AND_Y => (X && Y)
14493 COND_OR == DOM_CC_NX_OR_Y => ((! X) || Y)
14494 COND_OR == DOM_CC_X_OR_Y => (X || Y)
14495 In all cases OP will be either EQ or NE, but we don't need to know which
14496 here. If we are unable to support a dominance comparison we return
14497 CC mode. This will then fail to match for the RTL expressions that
14498 generate this call. */
14499 enum machine_mode
14500 arm_select_dominance_cc_mode (rtx x, rtx y, HOST_WIDE_INT cond_or)
14502 enum rtx_code cond1, cond2;
14503 int swapped = 0;
14505 /* Currently we will probably get the wrong result if the individual
14506 comparisons are not simple. This also ensures that it is safe to
14507 reverse a comparison if necessary. */
14508 if ((arm_select_cc_mode (cond1 = GET_CODE (x), XEXP (x, 0), XEXP (x, 1))
14509 != CCmode)
14510 || (arm_select_cc_mode (cond2 = GET_CODE (y), XEXP (y, 0), XEXP (y, 1))
14511 != CCmode))
14512 return CCmode;
14514 /* The if_then_else variant of this tests the second condition if the
14515 first passes, but is true if the first fails. Reverse the first
14516 condition to get a true "inclusive-or" expression. */
14517 if (cond_or == DOM_CC_NX_OR_Y)
14518 cond1 = reverse_condition (cond1);
14520 /* If the comparisons are not equal, and one doesn't dominate the other,
14521 then we can't do this. */
14522 if (cond1 != cond2
14523 && !comparison_dominates_p (cond1, cond2)
14524 && (swapped = 1, !comparison_dominates_p (cond2, cond1)))
14525 return CCmode;
14527 if (swapped)
14529 enum rtx_code temp = cond1;
14530 cond1 = cond2;
14531 cond2 = temp;
14534 switch (cond1)
14536 case EQ:
14537 if (cond_or == DOM_CC_X_AND_Y)
14538 return CC_DEQmode;
14540 switch (cond2)
14542 case EQ: return CC_DEQmode;
14543 case LE: return CC_DLEmode;
14544 case LEU: return CC_DLEUmode;
14545 case GE: return CC_DGEmode;
14546 case GEU: return CC_DGEUmode;
14547 default: gcc_unreachable ();
14550 case LT:
14551 if (cond_or == DOM_CC_X_AND_Y)
14552 return CC_DLTmode;
14554 switch (cond2)
14556 case LT:
14557 return CC_DLTmode;
14558 case LE:
14559 return CC_DLEmode;
14560 case NE:
14561 return CC_DNEmode;
14562 default:
14563 gcc_unreachable ();
14566 case GT:
14567 if (cond_or == DOM_CC_X_AND_Y)
14568 return CC_DGTmode;
14570 switch (cond2)
14572 case GT:
14573 return CC_DGTmode;
14574 case GE:
14575 return CC_DGEmode;
14576 case NE:
14577 return CC_DNEmode;
14578 default:
14579 gcc_unreachable ();
14582 case LTU:
14583 if (cond_or == DOM_CC_X_AND_Y)
14584 return CC_DLTUmode;
14586 switch (cond2)
14588 case LTU:
14589 return CC_DLTUmode;
14590 case LEU:
14591 return CC_DLEUmode;
14592 case NE:
14593 return CC_DNEmode;
14594 default:
14595 gcc_unreachable ();
14598 case GTU:
14599 if (cond_or == DOM_CC_X_AND_Y)
14600 return CC_DGTUmode;
14602 switch (cond2)
14604 case GTU:
14605 return CC_DGTUmode;
14606 case GEU:
14607 return CC_DGEUmode;
14608 case NE:
14609 return CC_DNEmode;
14610 default:
14611 gcc_unreachable ();
14614 /* The remaining cases only occur when both comparisons are the
14615 same. */
14616 case NE:
14617 gcc_assert (cond1 == cond2);
14618 return CC_DNEmode;
14620 case LE:
14621 gcc_assert (cond1 == cond2);
14622 return CC_DLEmode;
14624 case GE:
14625 gcc_assert (cond1 == cond2);
14626 return CC_DGEmode;
14628 case LEU:
14629 gcc_assert (cond1 == cond2);
14630 return CC_DLEUmode;
14632 case GEU:
14633 gcc_assert (cond1 == cond2);
14634 return CC_DGEUmode;
14636 default:
14637 gcc_unreachable ();
14641 enum machine_mode
14642 arm_select_cc_mode (enum rtx_code op, rtx x, rtx y)
14644 /* All floating point compares return CCFP if it is an equality
14645 comparison, and CCFPE otherwise. */
14646 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
14648 switch (op)
14650 case EQ:
14651 case NE:
14652 case UNORDERED:
14653 case ORDERED:
14654 case UNLT:
14655 case UNLE:
14656 case UNGT:
14657 case UNGE:
14658 case UNEQ:
14659 case LTGT:
14660 return CCFPmode;
14662 case LT:
14663 case LE:
14664 case GT:
14665 case GE:
14666 return CCFPEmode;
14668 default:
14669 gcc_unreachable ();
14673 /* A compare with a shifted operand. Because of canonicalization, the
14674 comparison will have to be swapped when we emit the assembler. */
14675 if (GET_MODE (y) == SImode
14676 && (REG_P (y) || (GET_CODE (y) == SUBREG))
14677 && (GET_CODE (x) == ASHIFT || GET_CODE (x) == ASHIFTRT
14678 || GET_CODE (x) == LSHIFTRT || GET_CODE (x) == ROTATE
14679 || GET_CODE (x) == ROTATERT))
14680 return CC_SWPmode;
14682 /* This operation is performed swapped, but since we only rely on the Z
14683 flag we don't need an additional mode. */
14684 if (GET_MODE (y) == SImode
14685 && (REG_P (y) || (GET_CODE (y) == SUBREG))
14686 && GET_CODE (x) == NEG
14687 && (op == EQ || op == NE))
14688 return CC_Zmode;
14690 /* This is a special case that is used by combine to allow a
14691 comparison of a shifted byte load to be split into a zero-extend
14692 followed by a comparison of the shifted integer (only valid for
14693 equalities and unsigned inequalities). */
14694 if (GET_MODE (x) == SImode
14695 && GET_CODE (x) == ASHIFT
14696 && CONST_INT_P (XEXP (x, 1)) && INTVAL (XEXP (x, 1)) == 24
14697 && GET_CODE (XEXP (x, 0)) == SUBREG
14698 && MEM_P (SUBREG_REG (XEXP (x, 0)))
14699 && GET_MODE (SUBREG_REG (XEXP (x, 0))) == QImode
14700 && (op == EQ || op == NE
14701 || op == GEU || op == GTU || op == LTU || op == LEU)
14702 && CONST_INT_P (y))
14703 return CC_Zmode;
14705 /* A construct for a conditional compare, if the false arm contains
14706 0, then both conditions must be true, otherwise either condition
14707 must be true. Not all conditions are possible, so CCmode is
14708 returned if it can't be done. */
14709 if (GET_CODE (x) == IF_THEN_ELSE
14710 && (XEXP (x, 2) == const0_rtx
14711 || XEXP (x, 2) == const1_rtx)
14712 && COMPARISON_P (XEXP (x, 0))
14713 && COMPARISON_P (XEXP (x, 1)))
14714 return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
14715 INTVAL (XEXP (x, 2)));
14717 /* Alternate canonicalizations of the above. These are somewhat cleaner. */
14718 if (GET_CODE (x) == AND
14719 && (op == EQ || op == NE)
14720 && COMPARISON_P (XEXP (x, 0))
14721 && COMPARISON_P (XEXP (x, 1)))
14722 return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
14723 DOM_CC_X_AND_Y);
14725 if (GET_CODE (x) == IOR
14726 && (op == EQ || op == NE)
14727 && COMPARISON_P (XEXP (x, 0))
14728 && COMPARISON_P (XEXP (x, 1)))
14729 return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
14730 DOM_CC_X_OR_Y);
14732 /* An operation (on Thumb) where we want to test for a single bit.
14733 This is done by shifting that bit up into the top bit of a
14734 scratch register; we can then branch on the sign bit. */
14735 if (TARGET_THUMB1
14736 && GET_MODE (x) == SImode
14737 && (op == EQ || op == NE)
14738 && GET_CODE (x) == ZERO_EXTRACT
14739 && XEXP (x, 1) == const1_rtx)
14740 return CC_Nmode;
14742 /* An operation that sets the condition codes as a side-effect, the
14743 V flag is not set correctly, so we can only use comparisons where
14744 this doesn't matter. (For LT and GE we can use "mi" and "pl"
14745 instead.) */
14746 /* ??? Does the ZERO_EXTRACT case really apply to thumb2? */
14747 if (GET_MODE (x) == SImode
14748 && y == const0_rtx
14749 && (op == EQ || op == NE || op == LT || op == GE)
14750 && (GET_CODE (x) == PLUS || GET_CODE (x) == MINUS
14751 || GET_CODE (x) == AND || GET_CODE (x) == IOR
14752 || GET_CODE (x) == XOR || GET_CODE (x) == MULT
14753 || GET_CODE (x) == NOT || GET_CODE (x) == NEG
14754 || GET_CODE (x) == LSHIFTRT
14755 || GET_CODE (x) == ASHIFT || GET_CODE (x) == ASHIFTRT
14756 || GET_CODE (x) == ROTATERT
14757 || (TARGET_32BIT && GET_CODE (x) == ZERO_EXTRACT)))
14758 return CC_NOOVmode;
14760 if (GET_MODE (x) == QImode && (op == EQ || op == NE))
14761 return CC_Zmode;
14763 if (GET_MODE (x) == SImode && (op == LTU || op == GEU)
14764 && GET_CODE (x) == PLUS
14765 && (rtx_equal_p (XEXP (x, 0), y) || rtx_equal_p (XEXP (x, 1), y)))
14766 return CC_Cmode;
14768 if (GET_MODE (x) == DImode || GET_MODE (y) == DImode)
14770 switch (op)
14772 case EQ:
14773 case NE:
14774 /* A DImode comparison against zero can be implemented by
14775 or'ing the two halves together. */
14776 if (y == const0_rtx)
14777 return CC_Zmode;
14779 /* We can do an equality test in three Thumb instructions. */
14780 if (!TARGET_32BIT)
14781 return CC_Zmode;
14783 /* FALLTHROUGH */
14785 case LTU:
14786 case LEU:
14787 case GTU:
14788 case GEU:
14789 /* DImode unsigned comparisons can be implemented by cmp +
14790 cmpeq without a scratch register. Not worth doing in
14791 Thumb-2. */
14792 if (TARGET_32BIT)
14793 return CC_CZmode;
14795 /* FALLTHROUGH */
14797 case LT:
14798 case LE:
14799 case GT:
14800 case GE:
14801 /* DImode signed and unsigned comparisons can be implemented
14802 by cmp + sbcs with a scratch register, but that does not
14803 set the Z flag - we must reverse GT/LE/GTU/LEU. */
14804 gcc_assert (op != EQ && op != NE);
14805 return CC_NCVmode;
14807 default:
14808 gcc_unreachable ();
14812 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_CC)
14813 return GET_MODE (x);
14815 return CCmode;
14818 /* X and Y are two things to compare using CODE. Emit the compare insn and
14819 return the rtx for register 0 in the proper mode. FP means this is a
14820 floating point compare: I don't think that it is needed on the arm. */
14822 arm_gen_compare_reg (enum rtx_code code, rtx x, rtx y, rtx scratch)
14824 enum machine_mode mode;
14825 rtx cc_reg;
14826 int dimode_comparison = GET_MODE (x) == DImode || GET_MODE (y) == DImode;
14828 /* We might have X as a constant, Y as a register because of the predicates
14829 used for cmpdi. If so, force X to a register here. */
14830 if (dimode_comparison && !REG_P (x))
14831 x = force_reg (DImode, x);
14833 mode = SELECT_CC_MODE (code, x, y);
14834 cc_reg = gen_rtx_REG (mode, CC_REGNUM);
14836 if (dimode_comparison
14837 && mode != CC_CZmode)
14839 rtx clobber, set;
14841 /* To compare two non-zero values for equality, XOR them and
14842 then compare against zero. Not used for ARM mode; there
14843 CC_CZmode is cheaper. */
14844 if (mode == CC_Zmode && y != const0_rtx)
14846 gcc_assert (!reload_completed);
14847 x = expand_binop (DImode, xor_optab, x, y, NULL_RTX, 0, OPTAB_WIDEN);
14848 y = const0_rtx;
14851 /* A scratch register is required. */
14852 if (reload_completed)
14853 gcc_assert (scratch != NULL && GET_MODE (scratch) == SImode);
14854 else
14855 scratch = gen_rtx_SCRATCH (SImode);
14857 clobber = gen_rtx_CLOBBER (VOIDmode, scratch);
14858 set = gen_rtx_SET (VOIDmode, cc_reg, gen_rtx_COMPARE (mode, x, y));
14859 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, set, clobber)));
14861 else
14862 emit_set_insn (cc_reg, gen_rtx_COMPARE (mode, x, y));
14864 return cc_reg;
14867 /* Generate a sequence of insns that will generate the correct return
14868 address mask depending on the physical architecture that the program
14869 is running on. */
14871 arm_gen_return_addr_mask (void)
14873 rtx reg = gen_reg_rtx (Pmode);
14875 emit_insn (gen_return_addr_mask (reg));
14876 return reg;
14879 void
14880 arm_reload_in_hi (rtx *operands)
14882 rtx ref = operands[1];
14883 rtx base, scratch;
14884 HOST_WIDE_INT offset = 0;
14886 if (GET_CODE (ref) == SUBREG)
14888 offset = SUBREG_BYTE (ref);
14889 ref = SUBREG_REG (ref);
14892 if (REG_P (ref))
14894 /* We have a pseudo which has been spilt onto the stack; there
14895 are two cases here: the first where there is a simple
14896 stack-slot replacement and a second where the stack-slot is
14897 out of range, or is used as a subreg. */
14898 if (reg_equiv_mem (REGNO (ref)))
14900 ref = reg_equiv_mem (REGNO (ref));
14901 base = find_replacement (&XEXP (ref, 0));
14903 else
14904 /* The slot is out of range, or was dressed up in a SUBREG. */
14905 base = reg_equiv_address (REGNO (ref));
14907 else
14908 base = find_replacement (&XEXP (ref, 0));
14910 /* Handle the case where the address is too complex to be offset by 1. */
14911 if (GET_CODE (base) == MINUS
14912 || (GET_CODE (base) == PLUS && !CONST_INT_P (XEXP (base, 1))))
14914 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
14916 emit_set_insn (base_plus, base);
14917 base = base_plus;
14919 else if (GET_CODE (base) == PLUS)
14921 /* The addend must be CONST_INT, or we would have dealt with it above. */
14922 HOST_WIDE_INT hi, lo;
14924 offset += INTVAL (XEXP (base, 1));
14925 base = XEXP (base, 0);
14927 /* Rework the address into a legal sequence of insns. */
14928 /* Valid range for lo is -4095 -> 4095 */
14929 lo = (offset >= 0
14930 ? (offset & 0xfff)
14931 : -((-offset) & 0xfff));
14933 /* Corner case, if lo is the max offset then we would be out of range
14934 once we have added the additional 1 below, so bump the msb into the
14935 pre-loading insn(s). */
14936 if (lo == 4095)
14937 lo &= 0x7ff;
14939 hi = ((((offset - lo) & (HOST_WIDE_INT) 0xffffffff)
14940 ^ (HOST_WIDE_INT) 0x80000000)
14941 - (HOST_WIDE_INT) 0x80000000);
14943 gcc_assert (hi + lo == offset);
14945 if (hi != 0)
14947 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
14949 /* Get the base address; addsi3 knows how to handle constants
14950 that require more than one insn. */
14951 emit_insn (gen_addsi3 (base_plus, base, GEN_INT (hi)));
14952 base = base_plus;
14953 offset = lo;
14957 /* Operands[2] may overlap operands[0] (though it won't overlap
14958 operands[1]), that's why we asked for a DImode reg -- so we can
14959 use the bit that does not overlap. */
14960 if (REGNO (operands[2]) == REGNO (operands[0]))
14961 scratch = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
14962 else
14963 scratch = gen_rtx_REG (SImode, REGNO (operands[2]));
14965 emit_insn (gen_zero_extendqisi2 (scratch,
14966 gen_rtx_MEM (QImode,
14967 plus_constant (Pmode, base,
14968 offset))));
14969 emit_insn (gen_zero_extendqisi2 (gen_rtx_SUBREG (SImode, operands[0], 0),
14970 gen_rtx_MEM (QImode,
14971 plus_constant (Pmode, base,
14972 offset + 1))));
14973 if (!BYTES_BIG_ENDIAN)
14974 emit_set_insn (gen_rtx_SUBREG (SImode, operands[0], 0),
14975 gen_rtx_IOR (SImode,
14976 gen_rtx_ASHIFT
14977 (SImode,
14978 gen_rtx_SUBREG (SImode, operands[0], 0),
14979 GEN_INT (8)),
14980 scratch));
14981 else
14982 emit_set_insn (gen_rtx_SUBREG (SImode, operands[0], 0),
14983 gen_rtx_IOR (SImode,
14984 gen_rtx_ASHIFT (SImode, scratch,
14985 GEN_INT (8)),
14986 gen_rtx_SUBREG (SImode, operands[0], 0)));
14989 /* Handle storing a half-word to memory during reload by synthesizing as two
14990 byte stores. Take care not to clobber the input values until after we
14991 have moved them somewhere safe. This code assumes that if the DImode
14992 scratch in operands[2] overlaps either the input value or output address
14993 in some way, then that value must die in this insn (we absolutely need
14994 two scratch registers for some corner cases). */
14995 void
14996 arm_reload_out_hi (rtx *operands)
14998 rtx ref = operands[0];
14999 rtx outval = operands[1];
15000 rtx base, scratch;
15001 HOST_WIDE_INT offset = 0;
15003 if (GET_CODE (ref) == SUBREG)
15005 offset = SUBREG_BYTE (ref);
15006 ref = SUBREG_REG (ref);
15009 if (REG_P (ref))
15011 /* We have a pseudo which has been spilt onto the stack; there
15012 are two cases here: the first where there is a simple
15013 stack-slot replacement and a second where the stack-slot is
15014 out of range, or is used as a subreg. */
15015 if (reg_equiv_mem (REGNO (ref)))
15017 ref = reg_equiv_mem (REGNO (ref));
15018 base = find_replacement (&XEXP (ref, 0));
15020 else
15021 /* The slot is out of range, or was dressed up in a SUBREG. */
15022 base = reg_equiv_address (REGNO (ref));
15024 else
15025 base = find_replacement (&XEXP (ref, 0));
15027 scratch = gen_rtx_REG (SImode, REGNO (operands[2]));
15029 /* Handle the case where the address is too complex to be offset by 1. */
15030 if (GET_CODE (base) == MINUS
15031 || (GET_CODE (base) == PLUS && !CONST_INT_P (XEXP (base, 1))))
15033 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
15035 /* Be careful not to destroy OUTVAL. */
15036 if (reg_overlap_mentioned_p (base_plus, outval))
15038 /* Updating base_plus might destroy outval, see if we can
15039 swap the scratch and base_plus. */
15040 if (!reg_overlap_mentioned_p (scratch, outval))
15042 rtx tmp = scratch;
15043 scratch = base_plus;
15044 base_plus = tmp;
15046 else
15048 rtx scratch_hi = gen_rtx_REG (HImode, REGNO (operands[2]));
15050 /* Be conservative and copy OUTVAL into the scratch now,
15051 this should only be necessary if outval is a subreg
15052 of something larger than a word. */
15053 /* XXX Might this clobber base? I can't see how it can,
15054 since scratch is known to overlap with OUTVAL, and
15055 must be wider than a word. */
15056 emit_insn (gen_movhi (scratch_hi, outval));
15057 outval = scratch_hi;
15061 emit_set_insn (base_plus, base);
15062 base = base_plus;
15064 else if (GET_CODE (base) == PLUS)
15066 /* The addend must be CONST_INT, or we would have dealt with it above. */
15067 HOST_WIDE_INT hi, lo;
15069 offset += INTVAL (XEXP (base, 1));
15070 base = XEXP (base, 0);
15072 /* Rework the address into a legal sequence of insns. */
15073 /* Valid range for lo is -4095 -> 4095 */
15074 lo = (offset >= 0
15075 ? (offset & 0xfff)
15076 : -((-offset) & 0xfff));
15078 /* Corner case, if lo is the max offset then we would be out of range
15079 once we have added the additional 1 below, so bump the msb into the
15080 pre-loading insn(s). */
15081 if (lo == 4095)
15082 lo &= 0x7ff;
15084 hi = ((((offset - lo) & (HOST_WIDE_INT) 0xffffffff)
15085 ^ (HOST_WIDE_INT) 0x80000000)
15086 - (HOST_WIDE_INT) 0x80000000);
15088 gcc_assert (hi + lo == offset);
15090 if (hi != 0)
15092 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
15094 /* Be careful not to destroy OUTVAL. */
15095 if (reg_overlap_mentioned_p (base_plus, outval))
15097 /* Updating base_plus might destroy outval, see if we
15098 can swap the scratch and base_plus. */
15099 if (!reg_overlap_mentioned_p (scratch, outval))
15101 rtx tmp = scratch;
15102 scratch = base_plus;
15103 base_plus = tmp;
15105 else
15107 rtx scratch_hi = gen_rtx_REG (HImode, REGNO (operands[2]));
15109 /* Be conservative and copy outval into scratch now,
15110 this should only be necessary if outval is a
15111 subreg of something larger than a word. */
15112 /* XXX Might this clobber base? I can't see how it
15113 can, since scratch is known to overlap with
15114 outval. */
15115 emit_insn (gen_movhi (scratch_hi, outval));
15116 outval = scratch_hi;
15120 /* Get the base address; addsi3 knows how to handle constants
15121 that require more than one insn. */
15122 emit_insn (gen_addsi3 (base_plus, base, GEN_INT (hi)));
15123 base = base_plus;
15124 offset = lo;
15128 if (BYTES_BIG_ENDIAN)
15130 emit_insn (gen_movqi (gen_rtx_MEM (QImode,
15131 plus_constant (Pmode, base,
15132 offset + 1)),
15133 gen_lowpart (QImode, outval)));
15134 emit_insn (gen_lshrsi3 (scratch,
15135 gen_rtx_SUBREG (SImode, outval, 0),
15136 GEN_INT (8)));
15137 emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (Pmode, base,
15138 offset)),
15139 gen_lowpart (QImode, scratch)));
15141 else
15143 emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (Pmode, base,
15144 offset)),
15145 gen_lowpart (QImode, outval)));
15146 emit_insn (gen_lshrsi3 (scratch,
15147 gen_rtx_SUBREG (SImode, outval, 0),
15148 GEN_INT (8)));
15149 emit_insn (gen_movqi (gen_rtx_MEM (QImode,
15150 plus_constant (Pmode, base,
15151 offset + 1)),
15152 gen_lowpart (QImode, scratch)));
15156 /* Return true if a type must be passed in memory. For AAPCS, small aggregates
15157 (padded to the size of a word) should be passed in a register. */
15159 static bool
15160 arm_must_pass_in_stack (enum machine_mode mode, const_tree type)
15162 if (TARGET_AAPCS_BASED)
15163 return must_pass_in_stack_var_size (mode, type);
15164 else
15165 return must_pass_in_stack_var_size_or_pad (mode, type);
15169 /* For use by FUNCTION_ARG_PADDING (MODE, TYPE).
15170 Return true if an argument passed on the stack should be padded upwards,
15171 i.e. if the least-significant byte has useful data.
15172 For legacy APCS ABIs we use the default. For AAPCS based ABIs small
15173 aggregate types are placed in the lowest memory address. */
15175 bool
15176 arm_pad_arg_upward (enum machine_mode mode ATTRIBUTE_UNUSED, const_tree type)
15178 if (!TARGET_AAPCS_BASED)
15179 return DEFAULT_FUNCTION_ARG_PADDING(mode, type) == upward;
15181 if (type && BYTES_BIG_ENDIAN && INTEGRAL_TYPE_P (type))
15182 return false;
15184 return true;
15188 /* Similarly, for use by BLOCK_REG_PADDING (MODE, TYPE, FIRST).
15189 Return !BYTES_BIG_ENDIAN if the least significant byte of the
15190 register has useful data, and return the opposite if the most
15191 significant byte does. */
15193 bool
15194 arm_pad_reg_upward (enum machine_mode mode,
15195 tree type, int first ATTRIBUTE_UNUSED)
15197 if (TARGET_AAPCS_BASED && BYTES_BIG_ENDIAN)
15199 /* For AAPCS, small aggregates, small fixed-point types,
15200 and small complex types are always padded upwards. */
15201 if (type)
15203 if ((AGGREGATE_TYPE_P (type)
15204 || TREE_CODE (type) == COMPLEX_TYPE
15205 || FIXED_POINT_TYPE_P (type))
15206 && int_size_in_bytes (type) <= 4)
15207 return true;
15209 else
15211 if ((COMPLEX_MODE_P (mode) || ALL_FIXED_POINT_MODE_P (mode))
15212 && GET_MODE_SIZE (mode) <= 4)
15213 return true;
15217 /* Otherwise, use default padding. */
15218 return !BYTES_BIG_ENDIAN;
15221 /* Returns true iff OFFSET is valid for use in an LDRD/STRD instruction,
15222 assuming that the address in the base register is word aligned. */
15223 bool
15224 offset_ok_for_ldrd_strd (HOST_WIDE_INT offset)
15226 HOST_WIDE_INT max_offset;
15228 /* Offset must be a multiple of 4 in Thumb mode. */
15229 if (TARGET_THUMB2 && ((offset & 3) != 0))
15230 return false;
15232 if (TARGET_THUMB2)
15233 max_offset = 1020;
15234 else if (TARGET_ARM)
15235 max_offset = 255;
15236 else
15237 return false;
15239 return ((offset <= max_offset) && (offset >= -max_offset));
15242 /* Checks whether the operands are valid for use in an LDRD/STRD instruction.
15243 Assumes that RT, RT2, and RN are REG. This is guaranteed by the patterns.
15244 Assumes that the address in the base register RN is word aligned. Pattern
15245 guarantees that both memory accesses use the same base register,
15246 the offsets are constants within the range, and the gap between the offsets is 4.
15247 If preload complete then check that registers are legal. WBACK indicates whether
15248 address is updated. LOAD indicates whether memory access is load or store. */
15249 bool
15250 operands_ok_ldrd_strd (rtx rt, rtx rt2, rtx rn, HOST_WIDE_INT offset,
15251 bool wback, bool load)
15253 unsigned int t, t2, n;
15255 if (!reload_completed)
15256 return true;
15258 if (!offset_ok_for_ldrd_strd (offset))
15259 return false;
15261 t = REGNO (rt);
15262 t2 = REGNO (rt2);
15263 n = REGNO (rn);
15265 if ((TARGET_THUMB2)
15266 && ((wback && (n == t || n == t2))
15267 || (t == SP_REGNUM)
15268 || (t == PC_REGNUM)
15269 || (t2 == SP_REGNUM)
15270 || (t2 == PC_REGNUM)
15271 || (!load && (n == PC_REGNUM))
15272 || (load && (t == t2))
15273 /* Triggers Cortex-M3 LDRD errata. */
15274 || (!wback && load && fix_cm3_ldrd && (n == t))))
15275 return false;
15277 if ((TARGET_ARM)
15278 && ((wback && (n == t || n == t2))
15279 || (t2 == PC_REGNUM)
15280 || (t % 2 != 0) /* First destination register is not even. */
15281 || (t2 != t + 1)
15282 /* PC can be used as base register (for offset addressing only),
15283 but it is depricated. */
15284 || (n == PC_REGNUM)))
15285 return false;
15287 return true;
15290 /* Helper for gen_operands_ldrd_strd. Returns true iff the memory
15291 operand MEM's address contains an immediate offset from the base
15292 register and has no side effects, in which case it sets BASE and
15293 OFFSET accordingly. */
15294 static bool
15295 mem_ok_for_ldrd_strd (rtx mem, rtx *base, rtx *offset)
15297 rtx addr;
15299 gcc_assert (base != NULL && offset != NULL);
15301 /* TODO: Handle more general memory operand patterns, such as
15302 PRE_DEC and PRE_INC. */
15304 if (side_effects_p (mem))
15305 return false;
15307 /* Can't deal with subregs. */
15308 if (GET_CODE (mem) == SUBREG)
15309 return false;
15311 gcc_assert (MEM_P (mem));
15313 *offset = const0_rtx;
15315 addr = XEXP (mem, 0);
15317 /* If addr isn't valid for DImode, then we can't handle it. */
15318 if (!arm_legitimate_address_p (DImode, addr,
15319 reload_in_progress || reload_completed))
15320 return false;
15322 if (REG_P (addr))
15324 *base = addr;
15325 return true;
15327 else if (GET_CODE (addr) == PLUS || GET_CODE (addr) == MINUS)
15329 *base = XEXP (addr, 0);
15330 *offset = XEXP (addr, 1);
15331 return (REG_P (*base) && CONST_INT_P (*offset));
15334 return false;
15337 #define SWAP_RTX(x,y) do { rtx tmp = x; x = y; y = tmp; } while (0)
15339 /* Called from a peephole2 to replace two word-size accesses with a
15340 single LDRD/STRD instruction. Returns true iff we can generate a
15341 new instruction sequence. That is, both accesses use the same base
15342 register and the gap between constant offsets is 4. This function
15343 may reorder its operands to match ldrd/strd RTL templates.
15344 OPERANDS are the operands found by the peephole matcher;
15345 OPERANDS[0,1] are register operands, and OPERANDS[2,3] are the
15346 corresponding memory operands. LOAD indicaates whether the access
15347 is load or store. CONST_STORE indicates a store of constant
15348 integer values held in OPERANDS[4,5] and assumes that the pattern
15349 is of length 4 insn, for the purpose of checking dead registers.
15350 COMMUTE indicates that register operands may be reordered. */
15351 bool
15352 gen_operands_ldrd_strd (rtx *operands, bool load,
15353 bool const_store, bool commute)
15355 int nops = 2;
15356 HOST_WIDE_INT offsets[2], offset;
15357 rtx base = NULL_RTX;
15358 rtx cur_base, cur_offset, tmp;
15359 int i, gap;
15360 HARD_REG_SET regset;
15362 gcc_assert (!const_store || !load);
15363 /* Check that the memory references are immediate offsets from the
15364 same base register. Extract the base register, the destination
15365 registers, and the corresponding memory offsets. */
15366 for (i = 0; i < nops; i++)
15368 if (!mem_ok_for_ldrd_strd (operands[nops+i], &cur_base, &cur_offset))
15369 return false;
15371 if (i == 0)
15372 base = cur_base;
15373 else if (REGNO (base) != REGNO (cur_base))
15374 return false;
15376 offsets[i] = INTVAL (cur_offset);
15377 if (GET_CODE (operands[i]) == SUBREG)
15379 tmp = SUBREG_REG (operands[i]);
15380 gcc_assert (GET_MODE (operands[i]) == GET_MODE (tmp));
15381 operands[i] = tmp;
15385 /* Make sure there is no dependency between the individual loads. */
15386 if (load && REGNO (operands[0]) == REGNO (base))
15387 return false; /* RAW */
15389 if (load && REGNO (operands[0]) == REGNO (operands[1]))
15390 return false; /* WAW */
15392 /* If the same input register is used in both stores
15393 when storing different constants, try to find a free register.
15394 For example, the code
15395 mov r0, 0
15396 str r0, [r2]
15397 mov r0, 1
15398 str r0, [r2, #4]
15399 can be transformed into
15400 mov r1, 0
15401 strd r1, r0, [r2]
15402 in Thumb mode assuming that r1 is free. */
15403 if (const_store
15404 && REGNO (operands[0]) == REGNO (operands[1])
15405 && INTVAL (operands[4]) != INTVAL (operands[5]))
15407 if (TARGET_THUMB2)
15409 CLEAR_HARD_REG_SET (regset);
15410 tmp = peep2_find_free_register (0, 4, "r", SImode, &regset);
15411 if (tmp == NULL_RTX)
15412 return false;
15414 /* Use the new register in the first load to ensure that
15415 if the original input register is not dead after peephole,
15416 then it will have the correct constant value. */
15417 operands[0] = tmp;
15419 else if (TARGET_ARM)
15421 return false;
15422 int regno = REGNO (operands[0]);
15423 if (!peep2_reg_dead_p (4, operands[0]))
15425 /* When the input register is even and is not dead after the
15426 pattern, it has to hold the second constant but we cannot
15427 form a legal STRD in ARM mode with this register as the second
15428 register. */
15429 if (regno % 2 == 0)
15430 return false;
15432 /* Is regno-1 free? */
15433 SET_HARD_REG_SET (regset);
15434 CLEAR_HARD_REG_BIT(regset, regno - 1);
15435 tmp = peep2_find_free_register (0, 4, "r", SImode, &regset);
15436 if (tmp == NULL_RTX)
15437 return false;
15439 operands[0] = tmp;
15441 else
15443 /* Find a DImode register. */
15444 CLEAR_HARD_REG_SET (regset);
15445 tmp = peep2_find_free_register (0, 4, "r", DImode, &regset);
15446 if (tmp != NULL_RTX)
15448 operands[0] = simplify_gen_subreg (SImode, tmp, DImode, 0);
15449 operands[1] = simplify_gen_subreg (SImode, tmp, DImode, 4);
15451 else
15453 /* Can we use the input register to form a DI register? */
15454 SET_HARD_REG_SET (regset);
15455 CLEAR_HARD_REG_BIT(regset,
15456 regno % 2 == 0 ? regno + 1 : regno - 1);
15457 tmp = peep2_find_free_register (0, 4, "r", SImode, &regset);
15458 if (tmp == NULL_RTX)
15459 return false;
15460 operands[regno % 2 == 1 ? 0 : 1] = tmp;
15464 gcc_assert (operands[0] != NULL_RTX);
15465 gcc_assert (operands[1] != NULL_RTX);
15466 gcc_assert (REGNO (operands[0]) % 2 == 0);
15467 gcc_assert (REGNO (operands[1]) == REGNO (operands[0]) + 1);
15471 /* Make sure the instructions are ordered with lower memory access first. */
15472 if (offsets[0] > offsets[1])
15474 gap = offsets[0] - offsets[1];
15475 offset = offsets[1];
15477 /* Swap the instructions such that lower memory is accessed first. */
15478 SWAP_RTX (operands[0], operands[1]);
15479 SWAP_RTX (operands[2], operands[3]);
15480 if (const_store)
15481 SWAP_RTX (operands[4], operands[5]);
15483 else
15485 gap = offsets[1] - offsets[0];
15486 offset = offsets[0];
15489 /* Make sure accesses are to consecutive memory locations. */
15490 if (gap != 4)
15491 return false;
15493 /* Make sure we generate legal instructions. */
15494 if (operands_ok_ldrd_strd (operands[0], operands[1], base, offset,
15495 false, load))
15496 return true;
15498 /* In Thumb state, where registers are almost unconstrained, there
15499 is little hope to fix it. */
15500 if (TARGET_THUMB2)
15501 return false;
15503 if (load && commute)
15505 /* Try reordering registers. */
15506 SWAP_RTX (operands[0], operands[1]);
15507 if (operands_ok_ldrd_strd (operands[0], operands[1], base, offset,
15508 false, load))
15509 return true;
15512 if (const_store)
15514 /* If input registers are dead after this pattern, they can be
15515 reordered or replaced by other registers that are free in the
15516 current pattern. */
15517 if (!peep2_reg_dead_p (4, operands[0])
15518 || !peep2_reg_dead_p (4, operands[1]))
15519 return false;
15521 /* Try to reorder the input registers. */
15522 /* For example, the code
15523 mov r0, 0
15524 mov r1, 1
15525 str r1, [r2]
15526 str r0, [r2, #4]
15527 can be transformed into
15528 mov r1, 0
15529 mov r0, 1
15530 strd r0, [r2]
15532 if (operands_ok_ldrd_strd (operands[1], operands[0], base, offset,
15533 false, false))
15535 SWAP_RTX (operands[0], operands[1]);
15536 return true;
15539 /* Try to find a free DI register. */
15540 CLEAR_HARD_REG_SET (regset);
15541 add_to_hard_reg_set (&regset, SImode, REGNO (operands[0]));
15542 add_to_hard_reg_set (&regset, SImode, REGNO (operands[1]));
15543 while (true)
15545 tmp = peep2_find_free_register (0, 4, "r", DImode, &regset);
15546 if (tmp == NULL_RTX)
15547 return false;
15549 /* DREG must be an even-numbered register in DImode.
15550 Split it into SI registers. */
15551 operands[0] = simplify_gen_subreg (SImode, tmp, DImode, 0);
15552 operands[1] = simplify_gen_subreg (SImode, tmp, DImode, 4);
15553 gcc_assert (operands[0] != NULL_RTX);
15554 gcc_assert (operands[1] != NULL_RTX);
15555 gcc_assert (REGNO (operands[0]) % 2 == 0);
15556 gcc_assert (REGNO (operands[0]) + 1 == REGNO (operands[1]));
15558 return (operands_ok_ldrd_strd (operands[0], operands[1],
15559 base, offset,
15560 false, load));
15564 return false;
15566 #undef SWAP_RTX
15571 /* Print a symbolic form of X to the debug file, F. */
15572 static void
15573 arm_print_value (FILE *f, rtx x)
15575 switch (GET_CODE (x))
15577 case CONST_INT:
15578 fprintf (f, HOST_WIDE_INT_PRINT_HEX, INTVAL (x));
15579 return;
15581 case CONST_DOUBLE:
15582 fprintf (f, "<0x%lx,0x%lx>", (long)XWINT (x, 2), (long)XWINT (x, 3));
15583 return;
15585 case CONST_VECTOR:
15587 int i;
15589 fprintf (f, "<");
15590 for (i = 0; i < CONST_VECTOR_NUNITS (x); i++)
15592 fprintf (f, HOST_WIDE_INT_PRINT_HEX, INTVAL (CONST_VECTOR_ELT (x, i)));
15593 if (i < (CONST_VECTOR_NUNITS (x) - 1))
15594 fputc (',', f);
15596 fprintf (f, ">");
15598 return;
15600 case CONST_STRING:
15601 fprintf (f, "\"%s\"", XSTR (x, 0));
15602 return;
15604 case SYMBOL_REF:
15605 fprintf (f, "`%s'", XSTR (x, 0));
15606 return;
15608 case LABEL_REF:
15609 fprintf (f, "L%d", INSN_UID (XEXP (x, 0)));
15610 return;
15612 case CONST:
15613 arm_print_value (f, XEXP (x, 0));
15614 return;
15616 case PLUS:
15617 arm_print_value (f, XEXP (x, 0));
15618 fprintf (f, "+");
15619 arm_print_value (f, XEXP (x, 1));
15620 return;
15622 case PC:
15623 fprintf (f, "pc");
15624 return;
15626 default:
15627 fprintf (f, "????");
15628 return;
15632 /* Routines for manipulation of the constant pool. */
15634 /* Arm instructions cannot load a large constant directly into a
15635 register; they have to come from a pc relative load. The constant
15636 must therefore be placed in the addressable range of the pc
15637 relative load. Depending on the precise pc relative load
15638 instruction the range is somewhere between 256 bytes and 4k. This
15639 means that we often have to dump a constant inside a function, and
15640 generate code to branch around it.
15642 It is important to minimize this, since the branches will slow
15643 things down and make the code larger.
15645 Normally we can hide the table after an existing unconditional
15646 branch so that there is no interruption of the flow, but in the
15647 worst case the code looks like this:
15649 ldr rn, L1
15651 b L2
15652 align
15653 L1: .long value
15657 ldr rn, L3
15659 b L4
15660 align
15661 L3: .long value
15665 We fix this by performing a scan after scheduling, which notices
15666 which instructions need to have their operands fetched from the
15667 constant table and builds the table.
15669 The algorithm starts by building a table of all the constants that
15670 need fixing up and all the natural barriers in the function (places
15671 where a constant table can be dropped without breaking the flow).
15672 For each fixup we note how far the pc-relative replacement will be
15673 able to reach and the offset of the instruction into the function.
15675 Having built the table we then group the fixes together to form
15676 tables that are as large as possible (subject to addressing
15677 constraints) and emit each table of constants after the last
15678 barrier that is within range of all the instructions in the group.
15679 If a group does not contain a barrier, then we forcibly create one
15680 by inserting a jump instruction into the flow. Once the table has
15681 been inserted, the insns are then modified to reference the
15682 relevant entry in the pool.
15684 Possible enhancements to the algorithm (not implemented) are:
15686 1) For some processors and object formats, there may be benefit in
15687 aligning the pools to the start of cache lines; this alignment
15688 would need to be taken into account when calculating addressability
15689 of a pool. */
15691 /* These typedefs are located at the start of this file, so that
15692 they can be used in the prototypes there. This comment is to
15693 remind readers of that fact so that the following structures
15694 can be understood more easily.
15696 typedef struct minipool_node Mnode;
15697 typedef struct minipool_fixup Mfix; */
15699 struct minipool_node
15701 /* Doubly linked chain of entries. */
15702 Mnode * next;
15703 Mnode * prev;
15704 /* The maximum offset into the code that this entry can be placed. While
15705 pushing fixes for forward references, all entries are sorted in order
15706 of increasing max_address. */
15707 HOST_WIDE_INT max_address;
15708 /* Similarly for an entry inserted for a backwards ref. */
15709 HOST_WIDE_INT min_address;
15710 /* The number of fixes referencing this entry. This can become zero
15711 if we "unpush" an entry. In this case we ignore the entry when we
15712 come to emit the code. */
15713 int refcount;
15714 /* The offset from the start of the minipool. */
15715 HOST_WIDE_INT offset;
15716 /* The value in table. */
15717 rtx value;
15718 /* The mode of value. */
15719 enum machine_mode mode;
15720 /* The size of the value. With iWMMXt enabled
15721 sizes > 4 also imply an alignment of 8-bytes. */
15722 int fix_size;
15725 struct minipool_fixup
15727 Mfix * next;
15728 rtx insn;
15729 HOST_WIDE_INT address;
15730 rtx * loc;
15731 enum machine_mode mode;
15732 int fix_size;
15733 rtx value;
15734 Mnode * minipool;
15735 HOST_WIDE_INT forwards;
15736 HOST_WIDE_INT backwards;
15739 /* Fixes less than a word need padding out to a word boundary. */
15740 #define MINIPOOL_FIX_SIZE(mode) \
15741 (GET_MODE_SIZE ((mode)) >= 4 ? GET_MODE_SIZE ((mode)) : 4)
15743 static Mnode * minipool_vector_head;
15744 static Mnode * minipool_vector_tail;
15745 static rtx minipool_vector_label;
15746 static int minipool_pad;
15748 /* The linked list of all minipool fixes required for this function. */
15749 Mfix * minipool_fix_head;
15750 Mfix * minipool_fix_tail;
15751 /* The fix entry for the current minipool, once it has been placed. */
15752 Mfix * minipool_barrier;
15754 #ifndef JUMP_TABLES_IN_TEXT_SECTION
15755 #define JUMP_TABLES_IN_TEXT_SECTION 0
15756 #endif
15758 static HOST_WIDE_INT
15759 get_jump_table_size (rtx insn)
15761 /* ADDR_VECs only take room if read-only data does into the text
15762 section. */
15763 if (JUMP_TABLES_IN_TEXT_SECTION || readonly_data_section == text_section)
15765 rtx body = PATTERN (insn);
15766 int elt = GET_CODE (body) == ADDR_DIFF_VEC ? 1 : 0;
15767 HOST_WIDE_INT size;
15768 HOST_WIDE_INT modesize;
15770 modesize = GET_MODE_SIZE (GET_MODE (body));
15771 size = modesize * XVECLEN (body, elt);
15772 switch (modesize)
15774 case 1:
15775 /* Round up size of TBB table to a halfword boundary. */
15776 size = (size + 1) & ~(HOST_WIDE_INT)1;
15777 break;
15778 case 2:
15779 /* No padding necessary for TBH. */
15780 break;
15781 case 4:
15782 /* Add two bytes for alignment on Thumb. */
15783 if (TARGET_THUMB)
15784 size += 2;
15785 break;
15786 default:
15787 gcc_unreachable ();
15789 return size;
15792 return 0;
15795 /* Return the maximum amount of padding that will be inserted before
15796 label LABEL. */
15798 static HOST_WIDE_INT
15799 get_label_padding (rtx label)
15801 HOST_WIDE_INT align, min_insn_size;
15803 align = 1 << label_to_alignment (label);
15804 min_insn_size = TARGET_THUMB ? 2 : 4;
15805 return align > min_insn_size ? align - min_insn_size : 0;
15808 /* Move a minipool fix MP from its current location to before MAX_MP.
15809 If MAX_MP is NULL, then MP doesn't need moving, but the addressing
15810 constraints may need updating. */
15811 static Mnode *
15812 move_minipool_fix_forward_ref (Mnode *mp, Mnode *max_mp,
15813 HOST_WIDE_INT max_address)
15815 /* The code below assumes these are different. */
15816 gcc_assert (mp != max_mp);
15818 if (max_mp == NULL)
15820 if (max_address < mp->max_address)
15821 mp->max_address = max_address;
15823 else
15825 if (max_address > max_mp->max_address - mp->fix_size)
15826 mp->max_address = max_mp->max_address - mp->fix_size;
15827 else
15828 mp->max_address = max_address;
15830 /* Unlink MP from its current position. Since max_mp is non-null,
15831 mp->prev must be non-null. */
15832 mp->prev->next = mp->next;
15833 if (mp->next != NULL)
15834 mp->next->prev = mp->prev;
15835 else
15836 minipool_vector_tail = mp->prev;
15838 /* Re-insert it before MAX_MP. */
15839 mp->next = max_mp;
15840 mp->prev = max_mp->prev;
15841 max_mp->prev = mp;
15843 if (mp->prev != NULL)
15844 mp->prev->next = mp;
15845 else
15846 minipool_vector_head = mp;
15849 /* Save the new entry. */
15850 max_mp = mp;
15852 /* Scan over the preceding entries and adjust their addresses as
15853 required. */
15854 while (mp->prev != NULL
15855 && mp->prev->max_address > mp->max_address - mp->prev->fix_size)
15857 mp->prev->max_address = mp->max_address - mp->prev->fix_size;
15858 mp = mp->prev;
15861 return max_mp;
15864 /* Add a constant to the minipool for a forward reference. Returns the
15865 node added or NULL if the constant will not fit in this pool. */
15866 static Mnode *
15867 add_minipool_forward_ref (Mfix *fix)
15869 /* If set, max_mp is the first pool_entry that has a lower
15870 constraint than the one we are trying to add. */
15871 Mnode * max_mp = NULL;
15872 HOST_WIDE_INT max_address = fix->address + fix->forwards - minipool_pad;
15873 Mnode * mp;
15875 /* If the minipool starts before the end of FIX->INSN then this FIX
15876 can not be placed into the current pool. Furthermore, adding the
15877 new constant pool entry may cause the pool to start FIX_SIZE bytes
15878 earlier. */
15879 if (minipool_vector_head &&
15880 (fix->address + get_attr_length (fix->insn)
15881 >= minipool_vector_head->max_address - fix->fix_size))
15882 return NULL;
15884 /* Scan the pool to see if a constant with the same value has
15885 already been added. While we are doing this, also note the
15886 location where we must insert the constant if it doesn't already
15887 exist. */
15888 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
15890 if (GET_CODE (fix->value) == GET_CODE (mp->value)
15891 && fix->mode == mp->mode
15892 && (!LABEL_P (fix->value)
15893 || (CODE_LABEL_NUMBER (fix->value)
15894 == CODE_LABEL_NUMBER (mp->value)))
15895 && rtx_equal_p (fix->value, mp->value))
15897 /* More than one fix references this entry. */
15898 mp->refcount++;
15899 return move_minipool_fix_forward_ref (mp, max_mp, max_address);
15902 /* Note the insertion point if necessary. */
15903 if (max_mp == NULL
15904 && mp->max_address > max_address)
15905 max_mp = mp;
15907 /* If we are inserting an 8-bytes aligned quantity and
15908 we have not already found an insertion point, then
15909 make sure that all such 8-byte aligned quantities are
15910 placed at the start of the pool. */
15911 if (ARM_DOUBLEWORD_ALIGN
15912 && max_mp == NULL
15913 && fix->fix_size >= 8
15914 && mp->fix_size < 8)
15916 max_mp = mp;
15917 max_address = mp->max_address;
15921 /* The value is not currently in the minipool, so we need to create
15922 a new entry for it. If MAX_MP is NULL, the entry will be put on
15923 the end of the list since the placement is less constrained than
15924 any existing entry. Otherwise, we insert the new fix before
15925 MAX_MP and, if necessary, adjust the constraints on the other
15926 entries. */
15927 mp = XNEW (Mnode);
15928 mp->fix_size = fix->fix_size;
15929 mp->mode = fix->mode;
15930 mp->value = fix->value;
15931 mp->refcount = 1;
15932 /* Not yet required for a backwards ref. */
15933 mp->min_address = -65536;
15935 if (max_mp == NULL)
15937 mp->max_address = max_address;
15938 mp->next = NULL;
15939 mp->prev = minipool_vector_tail;
15941 if (mp->prev == NULL)
15943 minipool_vector_head = mp;
15944 minipool_vector_label = gen_label_rtx ();
15946 else
15947 mp->prev->next = mp;
15949 minipool_vector_tail = mp;
15951 else
15953 if (max_address > max_mp->max_address - mp->fix_size)
15954 mp->max_address = max_mp->max_address - mp->fix_size;
15955 else
15956 mp->max_address = max_address;
15958 mp->next = max_mp;
15959 mp->prev = max_mp->prev;
15960 max_mp->prev = mp;
15961 if (mp->prev != NULL)
15962 mp->prev->next = mp;
15963 else
15964 minipool_vector_head = mp;
15967 /* Save the new entry. */
15968 max_mp = mp;
15970 /* Scan over the preceding entries and adjust their addresses as
15971 required. */
15972 while (mp->prev != NULL
15973 && mp->prev->max_address > mp->max_address - mp->prev->fix_size)
15975 mp->prev->max_address = mp->max_address - mp->prev->fix_size;
15976 mp = mp->prev;
15979 return max_mp;
15982 static Mnode *
15983 move_minipool_fix_backward_ref (Mnode *mp, Mnode *min_mp,
15984 HOST_WIDE_INT min_address)
15986 HOST_WIDE_INT offset;
15988 /* The code below assumes these are different. */
15989 gcc_assert (mp != min_mp);
15991 if (min_mp == NULL)
15993 if (min_address > mp->min_address)
15994 mp->min_address = min_address;
15996 else
15998 /* We will adjust this below if it is too loose. */
15999 mp->min_address = min_address;
16001 /* Unlink MP from its current position. Since min_mp is non-null,
16002 mp->next must be non-null. */
16003 mp->next->prev = mp->prev;
16004 if (mp->prev != NULL)
16005 mp->prev->next = mp->next;
16006 else
16007 minipool_vector_head = mp->next;
16009 /* Reinsert it after MIN_MP. */
16010 mp->prev = min_mp;
16011 mp->next = min_mp->next;
16012 min_mp->next = mp;
16013 if (mp->next != NULL)
16014 mp->next->prev = mp;
16015 else
16016 minipool_vector_tail = mp;
16019 min_mp = mp;
16021 offset = 0;
16022 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
16024 mp->offset = offset;
16025 if (mp->refcount > 0)
16026 offset += mp->fix_size;
16028 if (mp->next && mp->next->min_address < mp->min_address + mp->fix_size)
16029 mp->next->min_address = mp->min_address + mp->fix_size;
16032 return min_mp;
16035 /* Add a constant to the minipool for a backward reference. Returns the
16036 node added or NULL if the constant will not fit in this pool.
16038 Note that the code for insertion for a backwards reference can be
16039 somewhat confusing because the calculated offsets for each fix do
16040 not take into account the size of the pool (which is still under
16041 construction. */
16042 static Mnode *
16043 add_minipool_backward_ref (Mfix *fix)
16045 /* If set, min_mp is the last pool_entry that has a lower constraint
16046 than the one we are trying to add. */
16047 Mnode *min_mp = NULL;
16048 /* This can be negative, since it is only a constraint. */
16049 HOST_WIDE_INT min_address = fix->address - fix->backwards;
16050 Mnode *mp;
16052 /* If we can't reach the current pool from this insn, or if we can't
16053 insert this entry at the end of the pool without pushing other
16054 fixes out of range, then we don't try. This ensures that we
16055 can't fail later on. */
16056 if (min_address >= minipool_barrier->address
16057 || (minipool_vector_tail->min_address + fix->fix_size
16058 >= minipool_barrier->address))
16059 return NULL;
16061 /* Scan the pool to see if a constant with the same value has
16062 already been added. While we are doing this, also note the
16063 location where we must insert the constant if it doesn't already
16064 exist. */
16065 for (mp = minipool_vector_tail; mp != NULL; mp = mp->prev)
16067 if (GET_CODE (fix->value) == GET_CODE (mp->value)
16068 && fix->mode == mp->mode
16069 && (!LABEL_P (fix->value)
16070 || (CODE_LABEL_NUMBER (fix->value)
16071 == CODE_LABEL_NUMBER (mp->value)))
16072 && rtx_equal_p (fix->value, mp->value)
16073 /* Check that there is enough slack to move this entry to the
16074 end of the table (this is conservative). */
16075 && (mp->max_address
16076 > (minipool_barrier->address
16077 + minipool_vector_tail->offset
16078 + minipool_vector_tail->fix_size)))
16080 mp->refcount++;
16081 return move_minipool_fix_backward_ref (mp, min_mp, min_address);
16084 if (min_mp != NULL)
16085 mp->min_address += fix->fix_size;
16086 else
16088 /* Note the insertion point if necessary. */
16089 if (mp->min_address < min_address)
16091 /* For now, we do not allow the insertion of 8-byte alignment
16092 requiring nodes anywhere but at the start of the pool. */
16093 if (ARM_DOUBLEWORD_ALIGN
16094 && fix->fix_size >= 8 && mp->fix_size < 8)
16095 return NULL;
16096 else
16097 min_mp = mp;
16099 else if (mp->max_address
16100 < minipool_barrier->address + mp->offset + fix->fix_size)
16102 /* Inserting before this entry would push the fix beyond
16103 its maximum address (which can happen if we have
16104 re-located a forwards fix); force the new fix to come
16105 after it. */
16106 if (ARM_DOUBLEWORD_ALIGN
16107 && fix->fix_size >= 8 && mp->fix_size < 8)
16108 return NULL;
16109 else
16111 min_mp = mp;
16112 min_address = mp->min_address + fix->fix_size;
16115 /* Do not insert a non-8-byte aligned quantity before 8-byte
16116 aligned quantities. */
16117 else if (ARM_DOUBLEWORD_ALIGN
16118 && fix->fix_size < 8
16119 && mp->fix_size >= 8)
16121 min_mp = mp;
16122 min_address = mp->min_address + fix->fix_size;
16127 /* We need to create a new entry. */
16128 mp = XNEW (Mnode);
16129 mp->fix_size = fix->fix_size;
16130 mp->mode = fix->mode;
16131 mp->value = fix->value;
16132 mp->refcount = 1;
16133 mp->max_address = minipool_barrier->address + 65536;
16135 mp->min_address = min_address;
16137 if (min_mp == NULL)
16139 mp->prev = NULL;
16140 mp->next = minipool_vector_head;
16142 if (mp->next == NULL)
16144 minipool_vector_tail = mp;
16145 minipool_vector_label = gen_label_rtx ();
16147 else
16148 mp->next->prev = mp;
16150 minipool_vector_head = mp;
16152 else
16154 mp->next = min_mp->next;
16155 mp->prev = min_mp;
16156 min_mp->next = mp;
16158 if (mp->next != NULL)
16159 mp->next->prev = mp;
16160 else
16161 minipool_vector_tail = mp;
16164 /* Save the new entry. */
16165 min_mp = mp;
16167 if (mp->prev)
16168 mp = mp->prev;
16169 else
16170 mp->offset = 0;
16172 /* Scan over the following entries and adjust their offsets. */
16173 while (mp->next != NULL)
16175 if (mp->next->min_address < mp->min_address + mp->fix_size)
16176 mp->next->min_address = mp->min_address + mp->fix_size;
16178 if (mp->refcount)
16179 mp->next->offset = mp->offset + mp->fix_size;
16180 else
16181 mp->next->offset = mp->offset;
16183 mp = mp->next;
16186 return min_mp;
16189 static void
16190 assign_minipool_offsets (Mfix *barrier)
16192 HOST_WIDE_INT offset = 0;
16193 Mnode *mp;
16195 minipool_barrier = barrier;
16197 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
16199 mp->offset = offset;
16201 if (mp->refcount > 0)
16202 offset += mp->fix_size;
16206 /* Output the literal table */
16207 static void
16208 dump_minipool (rtx scan)
16210 Mnode * mp;
16211 Mnode * nmp;
16212 int align64 = 0;
16214 if (ARM_DOUBLEWORD_ALIGN)
16215 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
16216 if (mp->refcount > 0 && mp->fix_size >= 8)
16218 align64 = 1;
16219 break;
16222 if (dump_file)
16223 fprintf (dump_file,
16224 ";; Emitting minipool after insn %u; address %ld; align %d (bytes)\n",
16225 INSN_UID (scan), (unsigned long) minipool_barrier->address, align64 ? 8 : 4);
16227 scan = emit_label_after (gen_label_rtx (), scan);
16228 scan = emit_insn_after (align64 ? gen_align_8 () : gen_align_4 (), scan);
16229 scan = emit_label_after (minipool_vector_label, scan);
16231 for (mp = minipool_vector_head; mp != NULL; mp = nmp)
16233 if (mp->refcount > 0)
16235 if (dump_file)
16237 fprintf (dump_file,
16238 ";; Offset %u, min %ld, max %ld ",
16239 (unsigned) mp->offset, (unsigned long) mp->min_address,
16240 (unsigned long) mp->max_address);
16241 arm_print_value (dump_file, mp->value);
16242 fputc ('\n', dump_file);
16245 switch (mp->fix_size)
16247 #ifdef HAVE_consttable_1
16248 case 1:
16249 scan = emit_insn_after (gen_consttable_1 (mp->value), scan);
16250 break;
16252 #endif
16253 #ifdef HAVE_consttable_2
16254 case 2:
16255 scan = emit_insn_after (gen_consttable_2 (mp->value), scan);
16256 break;
16258 #endif
16259 #ifdef HAVE_consttable_4
16260 case 4:
16261 scan = emit_insn_after (gen_consttable_4 (mp->value), scan);
16262 break;
16264 #endif
16265 #ifdef HAVE_consttable_8
16266 case 8:
16267 scan = emit_insn_after (gen_consttable_8 (mp->value), scan);
16268 break;
16270 #endif
16271 #ifdef HAVE_consttable_16
16272 case 16:
16273 scan = emit_insn_after (gen_consttable_16 (mp->value), scan);
16274 break;
16276 #endif
16277 default:
16278 gcc_unreachable ();
16282 nmp = mp->next;
16283 free (mp);
16286 minipool_vector_head = minipool_vector_tail = NULL;
16287 scan = emit_insn_after (gen_consttable_end (), scan);
16288 scan = emit_barrier_after (scan);
16291 /* Return the cost of forcibly inserting a barrier after INSN. */
16292 static int
16293 arm_barrier_cost (rtx insn)
16295 /* Basing the location of the pool on the loop depth is preferable,
16296 but at the moment, the basic block information seems to be
16297 corrupt by this stage of the compilation. */
16298 int base_cost = 50;
16299 rtx next = next_nonnote_insn (insn);
16301 if (next != NULL && LABEL_P (next))
16302 base_cost -= 20;
16304 switch (GET_CODE (insn))
16306 case CODE_LABEL:
16307 /* It will always be better to place the table before the label, rather
16308 than after it. */
16309 return 50;
16311 case INSN:
16312 case CALL_INSN:
16313 return base_cost;
16315 case JUMP_INSN:
16316 return base_cost - 10;
16318 default:
16319 return base_cost + 10;
16323 /* Find the best place in the insn stream in the range
16324 (FIX->address,MAX_ADDRESS) to forcibly insert a minipool barrier.
16325 Create the barrier by inserting a jump and add a new fix entry for
16326 it. */
16327 static Mfix *
16328 create_fix_barrier (Mfix *fix, HOST_WIDE_INT max_address)
16330 HOST_WIDE_INT count = 0;
16331 rtx barrier;
16332 rtx from = fix->insn;
16333 /* The instruction after which we will insert the jump. */
16334 rtx selected = NULL;
16335 int selected_cost;
16336 /* The address at which the jump instruction will be placed. */
16337 HOST_WIDE_INT selected_address;
16338 Mfix * new_fix;
16339 HOST_WIDE_INT max_count = max_address - fix->address;
16340 rtx label = gen_label_rtx ();
16342 selected_cost = arm_barrier_cost (from);
16343 selected_address = fix->address;
16345 while (from && count < max_count)
16347 rtx tmp;
16348 int new_cost;
16350 /* This code shouldn't have been called if there was a natural barrier
16351 within range. */
16352 gcc_assert (!BARRIER_P (from));
16354 /* Count the length of this insn. This must stay in sync with the
16355 code that pushes minipool fixes. */
16356 if (LABEL_P (from))
16357 count += get_label_padding (from);
16358 else
16359 count += get_attr_length (from);
16361 /* If there is a jump table, add its length. */
16362 if (tablejump_p (from, NULL, &tmp))
16364 count += get_jump_table_size (tmp);
16366 /* Jump tables aren't in a basic block, so base the cost on
16367 the dispatch insn. If we select this location, we will
16368 still put the pool after the table. */
16369 new_cost = arm_barrier_cost (from);
16371 if (count < max_count
16372 && (!selected || new_cost <= selected_cost))
16374 selected = tmp;
16375 selected_cost = new_cost;
16376 selected_address = fix->address + count;
16379 /* Continue after the dispatch table. */
16380 from = NEXT_INSN (tmp);
16381 continue;
16384 new_cost = arm_barrier_cost (from);
16386 if (count < max_count
16387 && (!selected || new_cost <= selected_cost))
16389 selected = from;
16390 selected_cost = new_cost;
16391 selected_address = fix->address + count;
16394 from = NEXT_INSN (from);
16397 /* Make sure that we found a place to insert the jump. */
16398 gcc_assert (selected);
16400 /* Make sure we do not split a call and its corresponding
16401 CALL_ARG_LOCATION note. */
16402 if (CALL_P (selected))
16404 rtx next = NEXT_INSN (selected);
16405 if (next && NOTE_P (next)
16406 && NOTE_KIND (next) == NOTE_INSN_CALL_ARG_LOCATION)
16407 selected = next;
16410 /* Create a new JUMP_INSN that branches around a barrier. */
16411 from = emit_jump_insn_after (gen_jump (label), selected);
16412 JUMP_LABEL (from) = label;
16413 barrier = emit_barrier_after (from);
16414 emit_label_after (label, barrier);
16416 /* Create a minipool barrier entry for the new barrier. */
16417 new_fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* new_fix));
16418 new_fix->insn = barrier;
16419 new_fix->address = selected_address;
16420 new_fix->next = fix->next;
16421 fix->next = new_fix;
16423 return new_fix;
16426 /* Record that there is a natural barrier in the insn stream at
16427 ADDRESS. */
16428 static void
16429 push_minipool_barrier (rtx insn, HOST_WIDE_INT address)
16431 Mfix * fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* fix));
16433 fix->insn = insn;
16434 fix->address = address;
16436 fix->next = NULL;
16437 if (minipool_fix_head != NULL)
16438 minipool_fix_tail->next = fix;
16439 else
16440 minipool_fix_head = fix;
16442 minipool_fix_tail = fix;
16445 /* Record INSN, which will need fixing up to load a value from the
16446 minipool. ADDRESS is the offset of the insn since the start of the
16447 function; LOC is a pointer to the part of the insn which requires
16448 fixing; VALUE is the constant that must be loaded, which is of type
16449 MODE. */
16450 static void
16451 push_minipool_fix (rtx insn, HOST_WIDE_INT address, rtx *loc,
16452 enum machine_mode mode, rtx value)
16454 Mfix * fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* fix));
16456 fix->insn = insn;
16457 fix->address = address;
16458 fix->loc = loc;
16459 fix->mode = mode;
16460 fix->fix_size = MINIPOOL_FIX_SIZE (mode);
16461 fix->value = value;
16462 fix->forwards = get_attr_pool_range (insn);
16463 fix->backwards = get_attr_neg_pool_range (insn);
16464 fix->minipool = NULL;
16466 /* If an insn doesn't have a range defined for it, then it isn't
16467 expecting to be reworked by this code. Better to stop now than
16468 to generate duff assembly code. */
16469 gcc_assert (fix->forwards || fix->backwards);
16471 /* If an entry requires 8-byte alignment then assume all constant pools
16472 require 4 bytes of padding. Trying to do this later on a per-pool
16473 basis is awkward because existing pool entries have to be modified. */
16474 if (ARM_DOUBLEWORD_ALIGN && fix->fix_size >= 8)
16475 minipool_pad = 4;
16477 if (dump_file)
16479 fprintf (dump_file,
16480 ";; %smode fixup for i%d; addr %lu, range (%ld,%ld): ",
16481 GET_MODE_NAME (mode),
16482 INSN_UID (insn), (unsigned long) address,
16483 -1 * (long)fix->backwards, (long)fix->forwards);
16484 arm_print_value (dump_file, fix->value);
16485 fprintf (dump_file, "\n");
16488 /* Add it to the chain of fixes. */
16489 fix->next = NULL;
16491 if (minipool_fix_head != NULL)
16492 minipool_fix_tail->next = fix;
16493 else
16494 minipool_fix_head = fix;
16496 minipool_fix_tail = fix;
16499 /* Return maximum allowed cost of synthesizing a 64-bit constant VAL inline.
16500 Returns the number of insns needed, or 99 if we always want to synthesize
16501 the value. */
16503 arm_max_const_double_inline_cost ()
16505 /* Let the value get synthesized to avoid the use of literal pools. */
16506 if (arm_disable_literal_pool)
16507 return 99;
16509 return ((optimize_size || arm_ld_sched) ? 3 : 4);
16512 /* Return the cost of synthesizing a 64-bit constant VAL inline.
16513 Returns the number of insns needed, or 99 if we don't know how to
16514 do it. */
16516 arm_const_double_inline_cost (rtx val)
16518 rtx lowpart, highpart;
16519 enum machine_mode mode;
16521 mode = GET_MODE (val);
16523 if (mode == VOIDmode)
16524 mode = DImode;
16526 gcc_assert (GET_MODE_SIZE (mode) == 8);
16528 lowpart = gen_lowpart (SImode, val);
16529 highpart = gen_highpart_mode (SImode, mode, val);
16531 gcc_assert (CONST_INT_P (lowpart));
16532 gcc_assert (CONST_INT_P (highpart));
16534 return (arm_gen_constant (SET, SImode, NULL_RTX, INTVAL (lowpart),
16535 NULL_RTX, NULL_RTX, 0, 0)
16536 + arm_gen_constant (SET, SImode, NULL_RTX, INTVAL (highpart),
16537 NULL_RTX, NULL_RTX, 0, 0));
16540 /* Return true if it is worthwhile to split a 64-bit constant into two
16541 32-bit operations. This is the case if optimizing for size, or
16542 if we have load delay slots, or if one 32-bit part can be done with
16543 a single data operation. */
16544 bool
16545 arm_const_double_by_parts (rtx val)
16547 enum machine_mode mode = GET_MODE (val);
16548 rtx part;
16550 if (optimize_size || arm_ld_sched)
16551 return true;
16553 if (mode == VOIDmode)
16554 mode = DImode;
16556 part = gen_highpart_mode (SImode, mode, val);
16558 gcc_assert (CONST_INT_P (part));
16560 if (const_ok_for_arm (INTVAL (part))
16561 || const_ok_for_arm (~INTVAL (part)))
16562 return true;
16564 part = gen_lowpart (SImode, val);
16566 gcc_assert (CONST_INT_P (part));
16568 if (const_ok_for_arm (INTVAL (part))
16569 || const_ok_for_arm (~INTVAL (part)))
16570 return true;
16572 return false;
16575 /* Return true if it is possible to inline both the high and low parts
16576 of a 64-bit constant into 32-bit data processing instructions. */
16577 bool
16578 arm_const_double_by_immediates (rtx val)
16580 enum machine_mode mode = GET_MODE (val);
16581 rtx part;
16583 if (mode == VOIDmode)
16584 mode = DImode;
16586 part = gen_highpart_mode (SImode, mode, val);
16588 gcc_assert (CONST_INT_P (part));
16590 if (!const_ok_for_arm (INTVAL (part)))
16591 return false;
16593 part = gen_lowpart (SImode, val);
16595 gcc_assert (CONST_INT_P (part));
16597 if (!const_ok_for_arm (INTVAL (part)))
16598 return false;
16600 return true;
16603 /* Scan INSN and note any of its operands that need fixing.
16604 If DO_PUSHES is false we do not actually push any of the fixups
16605 needed. */
16606 static void
16607 note_invalid_constants (rtx insn, HOST_WIDE_INT address, int do_pushes)
16609 int opno;
16611 extract_insn (insn);
16613 if (!constrain_operands (1))
16614 fatal_insn_not_found (insn);
16616 if (recog_data.n_alternatives == 0)
16617 return;
16619 /* Fill in recog_op_alt with information about the constraints of
16620 this insn. */
16621 preprocess_constraints ();
16623 for (opno = 0; opno < recog_data.n_operands; opno++)
16625 /* Things we need to fix can only occur in inputs. */
16626 if (recog_data.operand_type[opno] != OP_IN)
16627 continue;
16629 /* If this alternative is a memory reference, then any mention
16630 of constants in this alternative is really to fool reload
16631 into allowing us to accept one there. We need to fix them up
16632 now so that we output the right code. */
16633 if (recog_op_alt[opno][which_alternative].memory_ok)
16635 rtx op = recog_data.operand[opno];
16637 if (CONSTANT_P (op))
16639 if (do_pushes)
16640 push_minipool_fix (insn, address, recog_data.operand_loc[opno],
16641 recog_data.operand_mode[opno], op);
16643 else if (MEM_P (op)
16644 && GET_CODE (XEXP (op, 0)) == SYMBOL_REF
16645 && CONSTANT_POOL_ADDRESS_P (XEXP (op, 0)))
16647 if (do_pushes)
16649 rtx cop = avoid_constant_pool_reference (op);
16651 /* Casting the address of something to a mode narrower
16652 than a word can cause avoid_constant_pool_reference()
16653 to return the pool reference itself. That's no good to
16654 us here. Lets just hope that we can use the
16655 constant pool value directly. */
16656 if (op == cop)
16657 cop = get_pool_constant (XEXP (op, 0));
16659 push_minipool_fix (insn, address,
16660 recog_data.operand_loc[opno],
16661 recog_data.operand_mode[opno], cop);
16668 return;
16671 /* Rewrite move insn into subtract of 0 if the condition codes will
16672 be useful in next conditional jump insn. */
16674 static void
16675 thumb1_reorg (void)
16677 basic_block bb;
16679 FOR_EACH_BB_FN (bb, cfun)
16681 rtx dest, src;
16682 rtx pat, op0, set = NULL;
16683 rtx prev, insn = BB_END (bb);
16684 bool insn_clobbered = false;
16686 while (insn != BB_HEAD (bb) && DEBUG_INSN_P (insn))
16687 insn = PREV_INSN (insn);
16689 /* Find the last cbranchsi4_insn in basic block BB. */
16690 if (INSN_CODE (insn) != CODE_FOR_cbranchsi4_insn)
16691 continue;
16693 /* Get the register with which we are comparing. */
16694 pat = PATTERN (insn);
16695 op0 = XEXP (XEXP (SET_SRC (pat), 0), 0);
16697 /* Find the first flag setting insn before INSN in basic block BB. */
16698 gcc_assert (insn != BB_HEAD (bb));
16699 for (prev = PREV_INSN (insn);
16700 (!insn_clobbered
16701 && prev != BB_HEAD (bb)
16702 && (NOTE_P (prev)
16703 || DEBUG_INSN_P (prev)
16704 || ((set = single_set (prev)) != NULL
16705 && get_attr_conds (prev) == CONDS_NOCOND)));
16706 prev = PREV_INSN (prev))
16708 if (reg_set_p (op0, prev))
16709 insn_clobbered = true;
16712 /* Skip if op0 is clobbered by insn other than prev. */
16713 if (insn_clobbered)
16714 continue;
16716 if (!set)
16717 continue;
16719 dest = SET_DEST (set);
16720 src = SET_SRC (set);
16721 if (!low_register_operand (dest, SImode)
16722 || !low_register_operand (src, SImode))
16723 continue;
16725 /* Rewrite move into subtract of 0 if its operand is compared with ZERO
16726 in INSN. Both src and dest of the move insn are checked. */
16727 if (REGNO (op0) == REGNO (src) || REGNO (op0) == REGNO (dest))
16729 dest = copy_rtx (dest);
16730 src = copy_rtx (src);
16731 src = gen_rtx_MINUS (SImode, src, const0_rtx);
16732 PATTERN (prev) = gen_rtx_SET (VOIDmode, dest, src);
16733 INSN_CODE (prev) = -1;
16734 /* Set test register in INSN to dest. */
16735 XEXP (XEXP (SET_SRC (pat), 0), 0) = copy_rtx (dest);
16736 INSN_CODE (insn) = -1;
16741 /* Convert instructions to their cc-clobbering variant if possible, since
16742 that allows us to use smaller encodings. */
16744 static void
16745 thumb2_reorg (void)
16747 basic_block bb;
16748 regset_head live;
16750 INIT_REG_SET (&live);
16752 /* We are freeing block_for_insn in the toplev to keep compatibility
16753 with old MDEP_REORGS that are not CFG based. Recompute it now. */
16754 compute_bb_for_insn ();
16755 df_analyze ();
16757 FOR_EACH_BB_FN (bb, cfun)
16759 rtx insn;
16761 COPY_REG_SET (&live, DF_LR_OUT (bb));
16762 df_simulate_initialize_backwards (bb, &live);
16763 FOR_BB_INSNS_REVERSE (bb, insn)
16765 if (NONJUMP_INSN_P (insn)
16766 && !REGNO_REG_SET_P (&live, CC_REGNUM)
16767 && GET_CODE (PATTERN (insn)) == SET)
16769 enum {SKIP, CONV, SWAP_CONV} action = SKIP;
16770 rtx pat = PATTERN (insn);
16771 rtx dst = XEXP (pat, 0);
16772 rtx src = XEXP (pat, 1);
16773 rtx op0 = NULL_RTX, op1 = NULL_RTX;
16775 if (!OBJECT_P (src))
16776 op0 = XEXP (src, 0);
16778 if (BINARY_P (src))
16779 op1 = XEXP (src, 1);
16781 if (low_register_operand (dst, SImode))
16783 switch (GET_CODE (src))
16785 case PLUS:
16786 /* Adding two registers and storing the result
16787 in the first source is already a 16-bit
16788 operation. */
16789 if (rtx_equal_p (dst, op0)
16790 && register_operand (op1, SImode))
16791 break;
16793 if (low_register_operand (op0, SImode))
16795 /* ADDS <Rd>,<Rn>,<Rm> */
16796 if (low_register_operand (op1, SImode))
16797 action = CONV;
16798 /* ADDS <Rdn>,#<imm8> */
16799 /* SUBS <Rdn>,#<imm8> */
16800 else if (rtx_equal_p (dst, op0)
16801 && CONST_INT_P (op1)
16802 && IN_RANGE (INTVAL (op1), -255, 255))
16803 action = CONV;
16804 /* ADDS <Rd>,<Rn>,#<imm3> */
16805 /* SUBS <Rd>,<Rn>,#<imm3> */
16806 else if (CONST_INT_P (op1)
16807 && IN_RANGE (INTVAL (op1), -7, 7))
16808 action = CONV;
16810 /* ADCS <Rd>, <Rn> */
16811 else if (GET_CODE (XEXP (src, 0)) == PLUS
16812 && rtx_equal_p (XEXP (XEXP (src, 0), 0), dst)
16813 && low_register_operand (XEXP (XEXP (src, 0), 1),
16814 SImode)
16815 && COMPARISON_P (op1)
16816 && cc_register (XEXP (op1, 0), VOIDmode)
16817 && maybe_get_arm_condition_code (op1) == ARM_CS
16818 && XEXP (op1, 1) == const0_rtx)
16819 action = CONV;
16820 break;
16822 case MINUS:
16823 /* RSBS <Rd>,<Rn>,#0
16824 Not handled here: see NEG below. */
16825 /* SUBS <Rd>,<Rn>,#<imm3>
16826 SUBS <Rdn>,#<imm8>
16827 Not handled here: see PLUS above. */
16828 /* SUBS <Rd>,<Rn>,<Rm> */
16829 if (low_register_operand (op0, SImode)
16830 && low_register_operand (op1, SImode))
16831 action = CONV;
16832 break;
16834 case MULT:
16835 /* MULS <Rdm>,<Rn>,<Rdm>
16836 As an exception to the rule, this is only used
16837 when optimizing for size since MULS is slow on all
16838 known implementations. We do not even want to use
16839 MULS in cold code, if optimizing for speed, so we
16840 test the global flag here. */
16841 if (!optimize_size)
16842 break;
16843 /* else fall through. */
16844 case AND:
16845 case IOR:
16846 case XOR:
16847 /* ANDS <Rdn>,<Rm> */
16848 if (rtx_equal_p (dst, op0)
16849 && low_register_operand (op1, SImode))
16850 action = CONV;
16851 else if (rtx_equal_p (dst, op1)
16852 && low_register_operand (op0, SImode))
16853 action = SWAP_CONV;
16854 break;
16856 case ASHIFTRT:
16857 case ASHIFT:
16858 case LSHIFTRT:
16859 /* ASRS <Rdn>,<Rm> */
16860 /* LSRS <Rdn>,<Rm> */
16861 /* LSLS <Rdn>,<Rm> */
16862 if (rtx_equal_p (dst, op0)
16863 && low_register_operand (op1, SImode))
16864 action = CONV;
16865 /* ASRS <Rd>,<Rm>,#<imm5> */
16866 /* LSRS <Rd>,<Rm>,#<imm5> */
16867 /* LSLS <Rd>,<Rm>,#<imm5> */
16868 else if (low_register_operand (op0, SImode)
16869 && CONST_INT_P (op1)
16870 && IN_RANGE (INTVAL (op1), 0, 31))
16871 action = CONV;
16872 break;
16874 case ROTATERT:
16875 /* RORS <Rdn>,<Rm> */
16876 if (rtx_equal_p (dst, op0)
16877 && low_register_operand (op1, SImode))
16878 action = CONV;
16879 break;
16881 case NOT:
16882 case NEG:
16883 /* MVNS <Rd>,<Rm> */
16884 /* NEGS <Rd>,<Rm> (a.k.a RSBS) */
16885 if (low_register_operand (op0, SImode))
16886 action = CONV;
16887 break;
16889 case CONST_INT:
16890 /* MOVS <Rd>,#<imm8> */
16891 if (CONST_INT_P (src)
16892 && IN_RANGE (INTVAL (src), 0, 255))
16893 action = CONV;
16894 break;
16896 case REG:
16897 /* MOVS and MOV<c> with registers have different
16898 encodings, so are not relevant here. */
16899 break;
16901 default:
16902 break;
16906 if (action != SKIP)
16908 rtx ccreg = gen_rtx_REG (CCmode, CC_REGNUM);
16909 rtx clobber = gen_rtx_CLOBBER (VOIDmode, ccreg);
16910 rtvec vec;
16912 if (action == SWAP_CONV)
16914 src = copy_rtx (src);
16915 XEXP (src, 0) = op1;
16916 XEXP (src, 1) = op0;
16917 pat = gen_rtx_SET (VOIDmode, dst, src);
16918 vec = gen_rtvec (2, pat, clobber);
16920 else /* action == CONV */
16921 vec = gen_rtvec (2, pat, clobber);
16923 PATTERN (insn) = gen_rtx_PARALLEL (VOIDmode, vec);
16924 INSN_CODE (insn) = -1;
16928 if (NONDEBUG_INSN_P (insn))
16929 df_simulate_one_insn_backwards (bb, insn, &live);
16933 CLEAR_REG_SET (&live);
16936 /* Gcc puts the pool in the wrong place for ARM, since we can only
16937 load addresses a limited distance around the pc. We do some
16938 special munging to move the constant pool values to the correct
16939 point in the code. */
16940 static void
16941 arm_reorg (void)
16943 rtx insn;
16944 HOST_WIDE_INT address = 0;
16945 Mfix * fix;
16947 if (TARGET_THUMB1)
16948 thumb1_reorg ();
16949 else if (TARGET_THUMB2)
16950 thumb2_reorg ();
16952 /* Ensure all insns that must be split have been split at this point.
16953 Otherwise, the pool placement code below may compute incorrect
16954 insn lengths. Note that when optimizing, all insns have already
16955 been split at this point. */
16956 if (!optimize)
16957 split_all_insns_noflow ();
16959 minipool_fix_head = minipool_fix_tail = NULL;
16961 /* The first insn must always be a note, or the code below won't
16962 scan it properly. */
16963 insn = get_insns ();
16964 gcc_assert (NOTE_P (insn));
16965 minipool_pad = 0;
16967 /* Scan all the insns and record the operands that will need fixing. */
16968 for (insn = next_nonnote_insn (insn); insn; insn = next_nonnote_insn (insn))
16970 if (BARRIER_P (insn))
16971 push_minipool_barrier (insn, address);
16972 else if (INSN_P (insn))
16974 rtx table;
16976 note_invalid_constants (insn, address, true);
16977 address += get_attr_length (insn);
16979 /* If the insn is a vector jump, add the size of the table
16980 and skip the table. */
16981 if (tablejump_p (insn, NULL, &table))
16983 address += get_jump_table_size (table);
16984 insn = table;
16987 else if (LABEL_P (insn))
16988 /* Add the worst-case padding due to alignment. We don't add
16989 the _current_ padding because the minipool insertions
16990 themselves might change it. */
16991 address += get_label_padding (insn);
16994 fix = minipool_fix_head;
16996 /* Now scan the fixups and perform the required changes. */
16997 while (fix)
16999 Mfix * ftmp;
17000 Mfix * fdel;
17001 Mfix * last_added_fix;
17002 Mfix * last_barrier = NULL;
17003 Mfix * this_fix;
17005 /* Skip any further barriers before the next fix. */
17006 while (fix && BARRIER_P (fix->insn))
17007 fix = fix->next;
17009 /* No more fixes. */
17010 if (fix == NULL)
17011 break;
17013 last_added_fix = NULL;
17015 for (ftmp = fix; ftmp; ftmp = ftmp->next)
17017 if (BARRIER_P (ftmp->insn))
17019 if (ftmp->address >= minipool_vector_head->max_address)
17020 break;
17022 last_barrier = ftmp;
17024 else if ((ftmp->minipool = add_minipool_forward_ref (ftmp)) == NULL)
17025 break;
17027 last_added_fix = ftmp; /* Keep track of the last fix added. */
17030 /* If we found a barrier, drop back to that; any fixes that we
17031 could have reached but come after the barrier will now go in
17032 the next mini-pool. */
17033 if (last_barrier != NULL)
17035 /* Reduce the refcount for those fixes that won't go into this
17036 pool after all. */
17037 for (fdel = last_barrier->next;
17038 fdel && fdel != ftmp;
17039 fdel = fdel->next)
17041 fdel->minipool->refcount--;
17042 fdel->minipool = NULL;
17045 ftmp = last_barrier;
17047 else
17049 /* ftmp is first fix that we can't fit into this pool and
17050 there no natural barriers that we could use. Insert a
17051 new barrier in the code somewhere between the previous
17052 fix and this one, and arrange to jump around it. */
17053 HOST_WIDE_INT max_address;
17055 /* The last item on the list of fixes must be a barrier, so
17056 we can never run off the end of the list of fixes without
17057 last_barrier being set. */
17058 gcc_assert (ftmp);
17060 max_address = minipool_vector_head->max_address;
17061 /* Check that there isn't another fix that is in range that
17062 we couldn't fit into this pool because the pool was
17063 already too large: we need to put the pool before such an
17064 instruction. The pool itself may come just after the
17065 fix because create_fix_barrier also allows space for a
17066 jump instruction. */
17067 if (ftmp->address < max_address)
17068 max_address = ftmp->address + 1;
17070 last_barrier = create_fix_barrier (last_added_fix, max_address);
17073 assign_minipool_offsets (last_barrier);
17075 while (ftmp)
17077 if (!BARRIER_P (ftmp->insn)
17078 && ((ftmp->minipool = add_minipool_backward_ref (ftmp))
17079 == NULL))
17080 break;
17082 ftmp = ftmp->next;
17085 /* Scan over the fixes we have identified for this pool, fixing them
17086 up and adding the constants to the pool itself. */
17087 for (this_fix = fix; this_fix && ftmp != this_fix;
17088 this_fix = this_fix->next)
17089 if (!BARRIER_P (this_fix->insn))
17091 rtx addr
17092 = plus_constant (Pmode,
17093 gen_rtx_LABEL_REF (VOIDmode,
17094 minipool_vector_label),
17095 this_fix->minipool->offset);
17096 *this_fix->loc = gen_rtx_MEM (this_fix->mode, addr);
17099 dump_minipool (last_barrier->insn);
17100 fix = ftmp;
17103 /* From now on we must synthesize any constants that we can't handle
17104 directly. This can happen if the RTL gets split during final
17105 instruction generation. */
17106 after_arm_reorg = 1;
17108 /* Free the minipool memory. */
17109 obstack_free (&minipool_obstack, minipool_startobj);
17112 /* Routines to output assembly language. */
17114 /* If the rtx is the correct value then return the string of the number.
17115 In this way we can ensure that valid double constants are generated even
17116 when cross compiling. */
17117 const char *
17118 fp_immediate_constant (rtx x)
17120 REAL_VALUE_TYPE r;
17122 if (!fp_consts_inited)
17123 init_fp_table ();
17125 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
17127 gcc_assert (REAL_VALUES_EQUAL (r, value_fp0));
17128 return "0";
17131 /* As for fp_immediate_constant, but value is passed directly, not in rtx. */
17132 static const char *
17133 fp_const_from_val (REAL_VALUE_TYPE *r)
17135 if (!fp_consts_inited)
17136 init_fp_table ();
17138 gcc_assert (REAL_VALUES_EQUAL (*r, value_fp0));
17139 return "0";
17142 /* OPERANDS[0] is the entire list of insns that constitute pop,
17143 OPERANDS[1] is the base register, RETURN_PC is true iff return insn
17144 is in the list, UPDATE is true iff the list contains explicit
17145 update of base register. */
17146 void
17147 arm_output_multireg_pop (rtx *operands, bool return_pc, rtx cond, bool reverse,
17148 bool update)
17150 int i;
17151 char pattern[100];
17152 int offset;
17153 const char *conditional;
17154 int num_saves = XVECLEN (operands[0], 0);
17155 unsigned int regno;
17156 unsigned int regno_base = REGNO (operands[1]);
17158 offset = 0;
17159 offset += update ? 1 : 0;
17160 offset += return_pc ? 1 : 0;
17162 /* Is the base register in the list? */
17163 for (i = offset; i < num_saves; i++)
17165 regno = REGNO (XEXP (XVECEXP (operands[0], 0, i), 0));
17166 /* If SP is in the list, then the base register must be SP. */
17167 gcc_assert ((regno != SP_REGNUM) || (regno_base == SP_REGNUM));
17168 /* If base register is in the list, there must be no explicit update. */
17169 if (regno == regno_base)
17170 gcc_assert (!update);
17173 conditional = reverse ? "%?%D0" : "%?%d0";
17174 if ((regno_base == SP_REGNUM) && TARGET_UNIFIED_ASM)
17176 /* Output pop (not stmfd) because it has a shorter encoding. */
17177 gcc_assert (update);
17178 sprintf (pattern, "pop%s\t{", conditional);
17180 else
17182 /* Output ldmfd when the base register is SP, otherwise output ldmia.
17183 It's just a convention, their semantics are identical. */
17184 if (regno_base == SP_REGNUM)
17185 sprintf (pattern, "ldm%sfd\t", conditional);
17186 else if (TARGET_UNIFIED_ASM)
17187 sprintf (pattern, "ldmia%s\t", conditional);
17188 else
17189 sprintf (pattern, "ldm%sia\t", conditional);
17191 strcat (pattern, reg_names[regno_base]);
17192 if (update)
17193 strcat (pattern, "!, {");
17194 else
17195 strcat (pattern, ", {");
17198 /* Output the first destination register. */
17199 strcat (pattern,
17200 reg_names[REGNO (XEXP (XVECEXP (operands[0], 0, offset), 0))]);
17202 /* Output the rest of the destination registers. */
17203 for (i = offset + 1; i < num_saves; i++)
17205 strcat (pattern, ", ");
17206 strcat (pattern,
17207 reg_names[REGNO (XEXP (XVECEXP (operands[0], 0, i), 0))]);
17210 strcat (pattern, "}");
17212 if (IS_INTERRUPT (arm_current_func_type ()) && return_pc)
17213 strcat (pattern, "^");
17215 output_asm_insn (pattern, &cond);
17219 /* Output the assembly for a store multiple. */
17221 const char *
17222 vfp_output_fstmd (rtx * operands)
17224 char pattern[100];
17225 int p;
17226 int base;
17227 int i;
17229 strcpy (pattern, "fstmfdd%?\t%m0!, {%P1");
17230 p = strlen (pattern);
17232 gcc_assert (REG_P (operands[1]));
17234 base = (REGNO (operands[1]) - FIRST_VFP_REGNUM) / 2;
17235 for (i = 1; i < XVECLEN (operands[2], 0); i++)
17237 p += sprintf (&pattern[p], ", d%d", base + i);
17239 strcpy (&pattern[p], "}");
17241 output_asm_insn (pattern, operands);
17242 return "";
17246 /* Emit RTL to save block of VFP register pairs to the stack. Returns the
17247 number of bytes pushed. */
17249 static int
17250 vfp_emit_fstmd (int base_reg, int count)
17252 rtx par;
17253 rtx dwarf;
17254 rtx tmp, reg;
17255 int i;
17257 /* Workaround ARM10 VFPr1 bug. Data corruption can occur when exactly two
17258 register pairs are stored by a store multiple insn. We avoid this
17259 by pushing an extra pair. */
17260 if (count == 2 && !arm_arch6)
17262 if (base_reg == LAST_VFP_REGNUM - 3)
17263 base_reg -= 2;
17264 count++;
17267 /* FSTMD may not store more than 16 doubleword registers at once. Split
17268 larger stores into multiple parts (up to a maximum of two, in
17269 practice). */
17270 if (count > 16)
17272 int saved;
17273 /* NOTE: base_reg is an internal register number, so each D register
17274 counts as 2. */
17275 saved = vfp_emit_fstmd (base_reg + 32, count - 16);
17276 saved += vfp_emit_fstmd (base_reg, 16);
17277 return saved;
17280 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (count));
17281 dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (count + 1));
17283 reg = gen_rtx_REG (DFmode, base_reg);
17284 base_reg += 2;
17286 XVECEXP (par, 0, 0)
17287 = gen_rtx_SET (VOIDmode,
17288 gen_frame_mem
17289 (BLKmode,
17290 gen_rtx_PRE_MODIFY (Pmode,
17291 stack_pointer_rtx,
17292 plus_constant
17293 (Pmode, stack_pointer_rtx,
17294 - (count * 8)))
17296 gen_rtx_UNSPEC (BLKmode,
17297 gen_rtvec (1, reg),
17298 UNSPEC_PUSH_MULT));
17300 tmp = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
17301 plus_constant (Pmode, stack_pointer_rtx, -(count * 8)));
17302 RTX_FRAME_RELATED_P (tmp) = 1;
17303 XVECEXP (dwarf, 0, 0) = tmp;
17305 tmp = gen_rtx_SET (VOIDmode,
17306 gen_frame_mem (DFmode, stack_pointer_rtx),
17307 reg);
17308 RTX_FRAME_RELATED_P (tmp) = 1;
17309 XVECEXP (dwarf, 0, 1) = tmp;
17311 for (i = 1; i < count; i++)
17313 reg = gen_rtx_REG (DFmode, base_reg);
17314 base_reg += 2;
17315 XVECEXP (par, 0, i) = gen_rtx_USE (VOIDmode, reg);
17317 tmp = gen_rtx_SET (VOIDmode,
17318 gen_frame_mem (DFmode,
17319 plus_constant (Pmode,
17320 stack_pointer_rtx,
17321 i * 8)),
17322 reg);
17323 RTX_FRAME_RELATED_P (tmp) = 1;
17324 XVECEXP (dwarf, 0, i + 1) = tmp;
17327 par = emit_insn (par);
17328 add_reg_note (par, REG_FRAME_RELATED_EXPR, dwarf);
17329 RTX_FRAME_RELATED_P (par) = 1;
17331 return count * 8;
17334 /* Emit a call instruction with pattern PAT. ADDR is the address of
17335 the call target. */
17337 void
17338 arm_emit_call_insn (rtx pat, rtx addr)
17340 rtx insn;
17342 insn = emit_call_insn (pat);
17344 /* The PIC register is live on entry to VxWorks PIC PLT entries.
17345 If the call might use such an entry, add a use of the PIC register
17346 to the instruction's CALL_INSN_FUNCTION_USAGE. */
17347 if (TARGET_VXWORKS_RTP
17348 && flag_pic
17349 && GET_CODE (addr) == SYMBOL_REF
17350 && (SYMBOL_REF_DECL (addr)
17351 ? !targetm.binds_local_p (SYMBOL_REF_DECL (addr))
17352 : !SYMBOL_REF_LOCAL_P (addr)))
17354 require_pic_register ();
17355 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), cfun->machine->pic_reg);
17359 /* Output a 'call' insn. */
17360 const char *
17361 output_call (rtx *operands)
17363 gcc_assert (!arm_arch5); /* Patterns should call blx <reg> directly. */
17365 /* Handle calls to lr using ip (which may be clobbered in subr anyway). */
17366 if (REGNO (operands[0]) == LR_REGNUM)
17368 operands[0] = gen_rtx_REG (SImode, IP_REGNUM);
17369 output_asm_insn ("mov%?\t%0, %|lr", operands);
17372 output_asm_insn ("mov%?\t%|lr, %|pc", operands);
17374 if (TARGET_INTERWORK || arm_arch4t)
17375 output_asm_insn ("bx%?\t%0", operands);
17376 else
17377 output_asm_insn ("mov%?\t%|pc, %0", operands);
17379 return "";
17382 /* Output a 'call' insn that is a reference in memory. This is
17383 disabled for ARMv5 and we prefer a blx instead because otherwise
17384 there's a significant performance overhead. */
17385 const char *
17386 output_call_mem (rtx *operands)
17388 gcc_assert (!arm_arch5);
17389 if (TARGET_INTERWORK)
17391 output_asm_insn ("ldr%?\t%|ip, %0", operands);
17392 output_asm_insn ("mov%?\t%|lr, %|pc", operands);
17393 output_asm_insn ("bx%?\t%|ip", operands);
17395 else if (regno_use_in (LR_REGNUM, operands[0]))
17397 /* LR is used in the memory address. We load the address in the
17398 first instruction. It's safe to use IP as the target of the
17399 load since the call will kill it anyway. */
17400 output_asm_insn ("ldr%?\t%|ip, %0", operands);
17401 output_asm_insn ("mov%?\t%|lr, %|pc", operands);
17402 if (arm_arch4t)
17403 output_asm_insn ("bx%?\t%|ip", operands);
17404 else
17405 output_asm_insn ("mov%?\t%|pc, %|ip", operands);
17407 else
17409 output_asm_insn ("mov%?\t%|lr, %|pc", operands);
17410 output_asm_insn ("ldr%?\t%|pc, %0", operands);
17413 return "";
17417 /* Output a move from arm registers to arm registers of a long double
17418 OPERANDS[0] is the destination.
17419 OPERANDS[1] is the source. */
17420 const char *
17421 output_mov_long_double_arm_from_arm (rtx *operands)
17423 /* We have to be careful here because the two might overlap. */
17424 int dest_start = REGNO (operands[0]);
17425 int src_start = REGNO (operands[1]);
17426 rtx ops[2];
17427 int i;
17429 if (dest_start < src_start)
17431 for (i = 0; i < 3; i++)
17433 ops[0] = gen_rtx_REG (SImode, dest_start + i);
17434 ops[1] = gen_rtx_REG (SImode, src_start + i);
17435 output_asm_insn ("mov%?\t%0, %1", ops);
17438 else
17440 for (i = 2; i >= 0; i--)
17442 ops[0] = gen_rtx_REG (SImode, dest_start + i);
17443 ops[1] = gen_rtx_REG (SImode, src_start + i);
17444 output_asm_insn ("mov%?\t%0, %1", ops);
17448 return "";
17451 void
17452 arm_emit_movpair (rtx dest, rtx src)
17454 /* If the src is an immediate, simplify it. */
17455 if (CONST_INT_P (src))
17457 HOST_WIDE_INT val = INTVAL (src);
17458 emit_set_insn (dest, GEN_INT (val & 0x0000ffff));
17459 if ((val >> 16) & 0x0000ffff)
17460 emit_set_insn (gen_rtx_ZERO_EXTRACT (SImode, dest, GEN_INT (16),
17461 GEN_INT (16)),
17462 GEN_INT ((val >> 16) & 0x0000ffff));
17463 return;
17465 emit_set_insn (dest, gen_rtx_HIGH (SImode, src));
17466 emit_set_insn (dest, gen_rtx_LO_SUM (SImode, dest, src));
17469 /* Output a move between double words. It must be REG<-MEM
17470 or MEM<-REG. */
17471 const char *
17472 output_move_double (rtx *operands, bool emit, int *count)
17474 enum rtx_code code0 = GET_CODE (operands[0]);
17475 enum rtx_code code1 = GET_CODE (operands[1]);
17476 rtx otherops[3];
17477 if (count)
17478 *count = 1;
17480 /* The only case when this might happen is when
17481 you are looking at the length of a DImode instruction
17482 that has an invalid constant in it. */
17483 if (code0 == REG && code1 != MEM)
17485 gcc_assert (!emit);
17486 *count = 2;
17487 return "";
17490 if (code0 == REG)
17492 unsigned int reg0 = REGNO (operands[0]);
17494 otherops[0] = gen_rtx_REG (SImode, 1 + reg0);
17496 gcc_assert (code1 == MEM); /* Constraints should ensure this. */
17498 switch (GET_CODE (XEXP (operands[1], 0)))
17500 case REG:
17502 if (emit)
17504 if (TARGET_LDRD
17505 && !(fix_cm3_ldrd && reg0 == REGNO(XEXP (operands[1], 0))))
17506 output_asm_insn ("ldr%(d%)\t%0, [%m1]", operands);
17507 else
17508 output_asm_insn ("ldm%(ia%)\t%m1, %M0", operands);
17510 break;
17512 case PRE_INC:
17513 gcc_assert (TARGET_LDRD);
17514 if (emit)
17515 output_asm_insn ("ldr%(d%)\t%0, [%m1, #8]!", operands);
17516 break;
17518 case PRE_DEC:
17519 if (emit)
17521 if (TARGET_LDRD)
17522 output_asm_insn ("ldr%(d%)\t%0, [%m1, #-8]!", operands);
17523 else
17524 output_asm_insn ("ldm%(db%)\t%m1!, %M0", operands);
17526 break;
17528 case POST_INC:
17529 if (emit)
17531 if (TARGET_LDRD)
17532 output_asm_insn ("ldr%(d%)\t%0, [%m1], #8", operands);
17533 else
17534 output_asm_insn ("ldm%(ia%)\t%m1!, %M0", operands);
17536 break;
17538 case POST_DEC:
17539 gcc_assert (TARGET_LDRD);
17540 if (emit)
17541 output_asm_insn ("ldr%(d%)\t%0, [%m1], #-8", operands);
17542 break;
17544 case PRE_MODIFY:
17545 case POST_MODIFY:
17546 /* Autoicrement addressing modes should never have overlapping
17547 base and destination registers, and overlapping index registers
17548 are already prohibited, so this doesn't need to worry about
17549 fix_cm3_ldrd. */
17550 otherops[0] = operands[0];
17551 otherops[1] = XEXP (XEXP (XEXP (operands[1], 0), 1), 0);
17552 otherops[2] = XEXP (XEXP (XEXP (operands[1], 0), 1), 1);
17554 if (GET_CODE (XEXP (operands[1], 0)) == PRE_MODIFY)
17556 if (reg_overlap_mentioned_p (otherops[0], otherops[2]))
17558 /* Registers overlap so split out the increment. */
17559 if (emit)
17561 output_asm_insn ("add%?\t%1, %1, %2", otherops);
17562 output_asm_insn ("ldr%(d%)\t%0, [%1] @split", otherops);
17564 if (count)
17565 *count = 2;
17567 else
17569 /* Use a single insn if we can.
17570 FIXME: IWMMXT allows offsets larger than ldrd can
17571 handle, fix these up with a pair of ldr. */
17572 if (TARGET_THUMB2
17573 || !CONST_INT_P (otherops[2])
17574 || (INTVAL (otherops[2]) > -256
17575 && INTVAL (otherops[2]) < 256))
17577 if (emit)
17578 output_asm_insn ("ldr%(d%)\t%0, [%1, %2]!", otherops);
17580 else
17582 if (emit)
17584 output_asm_insn ("ldr%?\t%0, [%1, %2]!", otherops);
17585 output_asm_insn ("ldr%?\t%H0, [%1, #4]", otherops);
17587 if (count)
17588 *count = 2;
17593 else
17595 /* Use a single insn if we can.
17596 FIXME: IWMMXT allows offsets larger than ldrd can handle,
17597 fix these up with a pair of ldr. */
17598 if (TARGET_THUMB2
17599 || !CONST_INT_P (otherops[2])
17600 || (INTVAL (otherops[2]) > -256
17601 && INTVAL (otherops[2]) < 256))
17603 if (emit)
17604 output_asm_insn ("ldr%(d%)\t%0, [%1], %2", otherops);
17606 else
17608 if (emit)
17610 output_asm_insn ("ldr%?\t%H0, [%1, #4]", otherops);
17611 output_asm_insn ("ldr%?\t%0, [%1], %2", otherops);
17613 if (count)
17614 *count = 2;
17617 break;
17619 case LABEL_REF:
17620 case CONST:
17621 /* We might be able to use ldrd %0, %1 here. However the range is
17622 different to ldr/adr, and it is broken on some ARMv7-M
17623 implementations. */
17624 /* Use the second register of the pair to avoid problematic
17625 overlap. */
17626 otherops[1] = operands[1];
17627 if (emit)
17628 output_asm_insn ("adr%?\t%0, %1", otherops);
17629 operands[1] = otherops[0];
17630 if (emit)
17632 if (TARGET_LDRD)
17633 output_asm_insn ("ldr%(d%)\t%0, [%1]", operands);
17634 else
17635 output_asm_insn ("ldm%(ia%)\t%1, %M0", operands);
17638 if (count)
17639 *count = 2;
17640 break;
17642 /* ??? This needs checking for thumb2. */
17643 default:
17644 if (arm_add_operand (XEXP (XEXP (operands[1], 0), 1),
17645 GET_MODE (XEXP (XEXP (operands[1], 0), 1))))
17647 otherops[0] = operands[0];
17648 otherops[1] = XEXP (XEXP (operands[1], 0), 0);
17649 otherops[2] = XEXP (XEXP (operands[1], 0), 1);
17651 if (GET_CODE (XEXP (operands[1], 0)) == PLUS)
17653 if (CONST_INT_P (otherops[2]) && !TARGET_LDRD)
17655 switch ((int) INTVAL (otherops[2]))
17657 case -8:
17658 if (emit)
17659 output_asm_insn ("ldm%(db%)\t%1, %M0", otherops);
17660 return "";
17661 case -4:
17662 if (TARGET_THUMB2)
17663 break;
17664 if (emit)
17665 output_asm_insn ("ldm%(da%)\t%1, %M0", otherops);
17666 return "";
17667 case 4:
17668 if (TARGET_THUMB2)
17669 break;
17670 if (emit)
17671 output_asm_insn ("ldm%(ib%)\t%1, %M0", otherops);
17672 return "";
17675 otherops[0] = gen_rtx_REG(SImode, REGNO(operands[0]) + 1);
17676 operands[1] = otherops[0];
17677 if (TARGET_LDRD
17678 && (REG_P (otherops[2])
17679 || TARGET_THUMB2
17680 || (CONST_INT_P (otherops[2])
17681 && INTVAL (otherops[2]) > -256
17682 && INTVAL (otherops[2]) < 256)))
17684 if (reg_overlap_mentioned_p (operands[0],
17685 otherops[2]))
17687 rtx tmp;
17688 /* Swap base and index registers over to
17689 avoid a conflict. */
17690 tmp = otherops[1];
17691 otherops[1] = otherops[2];
17692 otherops[2] = tmp;
17694 /* If both registers conflict, it will usually
17695 have been fixed by a splitter. */
17696 if (reg_overlap_mentioned_p (operands[0], otherops[2])
17697 || (fix_cm3_ldrd && reg0 == REGNO (otherops[1])))
17699 if (emit)
17701 output_asm_insn ("add%?\t%0, %1, %2", otherops);
17702 output_asm_insn ("ldr%(d%)\t%0, [%1]", operands);
17704 if (count)
17705 *count = 2;
17707 else
17709 otherops[0] = operands[0];
17710 if (emit)
17711 output_asm_insn ("ldr%(d%)\t%0, [%1, %2]", otherops);
17713 return "";
17716 if (CONST_INT_P (otherops[2]))
17718 if (emit)
17720 if (!(const_ok_for_arm (INTVAL (otherops[2]))))
17721 output_asm_insn ("sub%?\t%0, %1, #%n2", otherops);
17722 else
17723 output_asm_insn ("add%?\t%0, %1, %2", otherops);
17726 else
17728 if (emit)
17729 output_asm_insn ("add%?\t%0, %1, %2", otherops);
17732 else
17734 if (emit)
17735 output_asm_insn ("sub%?\t%0, %1, %2", otherops);
17738 if (count)
17739 *count = 2;
17741 if (TARGET_LDRD)
17742 return "ldr%(d%)\t%0, [%1]";
17744 return "ldm%(ia%)\t%1, %M0";
17746 else
17748 otherops[1] = adjust_address (operands[1], SImode, 4);
17749 /* Take care of overlapping base/data reg. */
17750 if (reg_mentioned_p (operands[0], operands[1]))
17752 if (emit)
17754 output_asm_insn ("ldr%?\t%0, %1", otherops);
17755 output_asm_insn ("ldr%?\t%0, %1", operands);
17757 if (count)
17758 *count = 2;
17761 else
17763 if (emit)
17765 output_asm_insn ("ldr%?\t%0, %1", operands);
17766 output_asm_insn ("ldr%?\t%0, %1", otherops);
17768 if (count)
17769 *count = 2;
17774 else
17776 /* Constraints should ensure this. */
17777 gcc_assert (code0 == MEM && code1 == REG);
17778 gcc_assert ((REGNO (operands[1]) != IP_REGNUM)
17779 || (TARGET_ARM && TARGET_LDRD));
17781 switch (GET_CODE (XEXP (operands[0], 0)))
17783 case REG:
17784 if (emit)
17786 if (TARGET_LDRD)
17787 output_asm_insn ("str%(d%)\t%1, [%m0]", operands);
17788 else
17789 output_asm_insn ("stm%(ia%)\t%m0, %M1", operands);
17791 break;
17793 case PRE_INC:
17794 gcc_assert (TARGET_LDRD);
17795 if (emit)
17796 output_asm_insn ("str%(d%)\t%1, [%m0, #8]!", operands);
17797 break;
17799 case PRE_DEC:
17800 if (emit)
17802 if (TARGET_LDRD)
17803 output_asm_insn ("str%(d%)\t%1, [%m0, #-8]!", operands);
17804 else
17805 output_asm_insn ("stm%(db%)\t%m0!, %M1", operands);
17807 break;
17809 case POST_INC:
17810 if (emit)
17812 if (TARGET_LDRD)
17813 output_asm_insn ("str%(d%)\t%1, [%m0], #8", operands);
17814 else
17815 output_asm_insn ("stm%(ia%)\t%m0!, %M1", operands);
17817 break;
17819 case POST_DEC:
17820 gcc_assert (TARGET_LDRD);
17821 if (emit)
17822 output_asm_insn ("str%(d%)\t%1, [%m0], #-8", operands);
17823 break;
17825 case PRE_MODIFY:
17826 case POST_MODIFY:
17827 otherops[0] = operands[1];
17828 otherops[1] = XEXP (XEXP (XEXP (operands[0], 0), 1), 0);
17829 otherops[2] = XEXP (XEXP (XEXP (operands[0], 0), 1), 1);
17831 /* IWMMXT allows offsets larger than ldrd can handle,
17832 fix these up with a pair of ldr. */
17833 if (!TARGET_THUMB2
17834 && CONST_INT_P (otherops[2])
17835 && (INTVAL(otherops[2]) <= -256
17836 || INTVAL(otherops[2]) >= 256))
17838 if (GET_CODE (XEXP (operands[0], 0)) == PRE_MODIFY)
17840 if (emit)
17842 output_asm_insn ("str%?\t%0, [%1, %2]!", otherops);
17843 output_asm_insn ("str%?\t%H0, [%1, #4]", otherops);
17845 if (count)
17846 *count = 2;
17848 else
17850 if (emit)
17852 output_asm_insn ("str%?\t%H0, [%1, #4]", otherops);
17853 output_asm_insn ("str%?\t%0, [%1], %2", otherops);
17855 if (count)
17856 *count = 2;
17859 else if (GET_CODE (XEXP (operands[0], 0)) == PRE_MODIFY)
17861 if (emit)
17862 output_asm_insn ("str%(d%)\t%0, [%1, %2]!", otherops);
17864 else
17866 if (emit)
17867 output_asm_insn ("str%(d%)\t%0, [%1], %2", otherops);
17869 break;
17871 case PLUS:
17872 otherops[2] = XEXP (XEXP (operands[0], 0), 1);
17873 if (CONST_INT_P (otherops[2]) && !TARGET_LDRD)
17875 switch ((int) INTVAL (XEXP (XEXP (operands[0], 0), 1)))
17877 case -8:
17878 if (emit)
17879 output_asm_insn ("stm%(db%)\t%m0, %M1", operands);
17880 return "";
17882 case -4:
17883 if (TARGET_THUMB2)
17884 break;
17885 if (emit)
17886 output_asm_insn ("stm%(da%)\t%m0, %M1", operands);
17887 return "";
17889 case 4:
17890 if (TARGET_THUMB2)
17891 break;
17892 if (emit)
17893 output_asm_insn ("stm%(ib%)\t%m0, %M1", operands);
17894 return "";
17897 if (TARGET_LDRD
17898 && (REG_P (otherops[2])
17899 || TARGET_THUMB2
17900 || (CONST_INT_P (otherops[2])
17901 && INTVAL (otherops[2]) > -256
17902 && INTVAL (otherops[2]) < 256)))
17904 otherops[0] = operands[1];
17905 otherops[1] = XEXP (XEXP (operands[0], 0), 0);
17906 if (emit)
17907 output_asm_insn ("str%(d%)\t%0, [%1, %2]", otherops);
17908 return "";
17910 /* Fall through */
17912 default:
17913 otherops[0] = adjust_address (operands[0], SImode, 4);
17914 otherops[1] = operands[1];
17915 if (emit)
17917 output_asm_insn ("str%?\t%1, %0", operands);
17918 output_asm_insn ("str%?\t%H1, %0", otherops);
17920 if (count)
17921 *count = 2;
17925 return "";
17928 /* Output a move, load or store for quad-word vectors in ARM registers. Only
17929 handles MEMs accepted by neon_vector_mem_operand with TYPE=1. */
17931 const char *
17932 output_move_quad (rtx *operands)
17934 if (REG_P (operands[0]))
17936 /* Load, or reg->reg move. */
17938 if (MEM_P (operands[1]))
17940 switch (GET_CODE (XEXP (operands[1], 0)))
17942 case REG:
17943 output_asm_insn ("ldm%(ia%)\t%m1, %M0", operands);
17944 break;
17946 case LABEL_REF:
17947 case CONST:
17948 output_asm_insn ("adr%?\t%0, %1", operands);
17949 output_asm_insn ("ldm%(ia%)\t%0, %M0", operands);
17950 break;
17952 default:
17953 gcc_unreachable ();
17956 else
17958 rtx ops[2];
17959 int dest, src, i;
17961 gcc_assert (REG_P (operands[1]));
17963 dest = REGNO (operands[0]);
17964 src = REGNO (operands[1]);
17966 /* This seems pretty dumb, but hopefully GCC won't try to do it
17967 very often. */
17968 if (dest < src)
17969 for (i = 0; i < 4; i++)
17971 ops[0] = gen_rtx_REG (SImode, dest + i);
17972 ops[1] = gen_rtx_REG (SImode, src + i);
17973 output_asm_insn ("mov%?\t%0, %1", ops);
17975 else
17976 for (i = 3; i >= 0; i--)
17978 ops[0] = gen_rtx_REG (SImode, dest + i);
17979 ops[1] = gen_rtx_REG (SImode, src + i);
17980 output_asm_insn ("mov%?\t%0, %1", ops);
17984 else
17986 gcc_assert (MEM_P (operands[0]));
17987 gcc_assert (REG_P (operands[1]));
17988 gcc_assert (!reg_overlap_mentioned_p (operands[1], operands[0]));
17990 switch (GET_CODE (XEXP (operands[0], 0)))
17992 case REG:
17993 output_asm_insn ("stm%(ia%)\t%m0, %M1", operands);
17994 break;
17996 default:
17997 gcc_unreachable ();
18001 return "";
18004 /* Output a VFP load or store instruction. */
18006 const char *
18007 output_move_vfp (rtx *operands)
18009 rtx reg, mem, addr, ops[2];
18010 int load = REG_P (operands[0]);
18011 int dp = GET_MODE_SIZE (GET_MODE (operands[0])) == 8;
18012 int integer_p = GET_MODE_CLASS (GET_MODE (operands[0])) == MODE_INT;
18013 const char *templ;
18014 char buff[50];
18015 enum machine_mode mode;
18017 reg = operands[!load];
18018 mem = operands[load];
18020 mode = GET_MODE (reg);
18022 gcc_assert (REG_P (reg));
18023 gcc_assert (IS_VFP_REGNUM (REGNO (reg)));
18024 gcc_assert (mode == SFmode
18025 || mode == DFmode
18026 || mode == SImode
18027 || mode == DImode
18028 || (TARGET_NEON && VALID_NEON_DREG_MODE (mode)));
18029 gcc_assert (MEM_P (mem));
18031 addr = XEXP (mem, 0);
18033 switch (GET_CODE (addr))
18035 case PRE_DEC:
18036 templ = "f%smdb%c%%?\t%%0!, {%%%s1}%s";
18037 ops[0] = XEXP (addr, 0);
18038 ops[1] = reg;
18039 break;
18041 case POST_INC:
18042 templ = "f%smia%c%%?\t%%0!, {%%%s1}%s";
18043 ops[0] = XEXP (addr, 0);
18044 ops[1] = reg;
18045 break;
18047 default:
18048 templ = "f%s%c%%?\t%%%s0, %%1%s";
18049 ops[0] = reg;
18050 ops[1] = mem;
18051 break;
18054 sprintf (buff, templ,
18055 load ? "ld" : "st",
18056 dp ? 'd' : 's',
18057 dp ? "P" : "",
18058 integer_p ? "\t%@ int" : "");
18059 output_asm_insn (buff, ops);
18061 return "";
18064 /* Output a Neon double-word or quad-word load or store, or a load
18065 or store for larger structure modes.
18067 WARNING: The ordering of elements is weird in big-endian mode,
18068 because the EABI requires that vectors stored in memory appear
18069 as though they were stored by a VSTM, as required by the EABI.
18070 GCC RTL defines element ordering based on in-memory order.
18071 This can be different from the architectural ordering of elements
18072 within a NEON register. The intrinsics defined in arm_neon.h use the
18073 NEON register element ordering, not the GCC RTL element ordering.
18075 For example, the in-memory ordering of a big-endian a quadword
18076 vector with 16-bit elements when stored from register pair {d0,d1}
18077 will be (lowest address first, d0[N] is NEON register element N):
18079 [d0[3], d0[2], d0[1], d0[0], d1[7], d1[6], d1[5], d1[4]]
18081 When necessary, quadword registers (dN, dN+1) are moved to ARM
18082 registers from rN in the order:
18084 dN -> (rN+1, rN), dN+1 -> (rN+3, rN+2)
18086 So that STM/LDM can be used on vectors in ARM registers, and the
18087 same memory layout will result as if VSTM/VLDM were used.
18089 Instead of VSTM/VLDM we prefer to use VST1.64/VLD1.64 where
18090 possible, which allows use of appropriate alignment tags.
18091 Note that the choice of "64" is independent of the actual vector
18092 element size; this size simply ensures that the behavior is
18093 equivalent to VSTM/VLDM in both little-endian and big-endian mode.
18095 Due to limitations of those instructions, use of VST1.64/VLD1.64
18096 is not possible if:
18097 - the address contains PRE_DEC, or
18098 - the mode refers to more than 4 double-word registers
18100 In those cases, it would be possible to replace VSTM/VLDM by a
18101 sequence of instructions; this is not currently implemented since
18102 this is not certain to actually improve performance. */
18104 const char *
18105 output_move_neon (rtx *operands)
18107 rtx reg, mem, addr, ops[2];
18108 int regno, nregs, load = REG_P (operands[0]);
18109 const char *templ;
18110 char buff[50];
18111 enum machine_mode mode;
18113 reg = operands[!load];
18114 mem = operands[load];
18116 mode = GET_MODE (reg);
18118 gcc_assert (REG_P (reg));
18119 regno = REGNO (reg);
18120 nregs = HARD_REGNO_NREGS (regno, mode) / 2;
18121 gcc_assert (VFP_REGNO_OK_FOR_DOUBLE (regno)
18122 || NEON_REGNO_OK_FOR_QUAD (regno));
18123 gcc_assert (VALID_NEON_DREG_MODE (mode)
18124 || VALID_NEON_QREG_MODE (mode)
18125 || VALID_NEON_STRUCT_MODE (mode));
18126 gcc_assert (MEM_P (mem));
18128 addr = XEXP (mem, 0);
18130 /* Strip off const from addresses like (const (plus (...))). */
18131 if (GET_CODE (addr) == CONST && GET_CODE (XEXP (addr, 0)) == PLUS)
18132 addr = XEXP (addr, 0);
18134 switch (GET_CODE (addr))
18136 case POST_INC:
18137 /* We have to use vldm / vstm for too-large modes. */
18138 if (nregs > 4)
18140 templ = "v%smia%%?\t%%0!, %%h1";
18141 ops[0] = XEXP (addr, 0);
18143 else
18145 templ = "v%s1.64\t%%h1, %%A0";
18146 ops[0] = mem;
18148 ops[1] = reg;
18149 break;
18151 case PRE_DEC:
18152 /* We have to use vldm / vstm in this case, since there is no
18153 pre-decrement form of the vld1 / vst1 instructions. */
18154 templ = "v%smdb%%?\t%%0!, %%h1";
18155 ops[0] = XEXP (addr, 0);
18156 ops[1] = reg;
18157 break;
18159 case POST_MODIFY:
18160 /* FIXME: Not currently enabled in neon_vector_mem_operand. */
18161 gcc_unreachable ();
18163 case LABEL_REF:
18164 case PLUS:
18166 int i;
18167 int overlap = -1;
18168 for (i = 0; i < nregs; i++)
18170 /* We're only using DImode here because it's a convenient size. */
18171 ops[0] = gen_rtx_REG (DImode, REGNO (reg) + 2 * i);
18172 ops[1] = adjust_address (mem, DImode, 8 * i);
18173 if (reg_overlap_mentioned_p (ops[0], mem))
18175 gcc_assert (overlap == -1);
18176 overlap = i;
18178 else
18180 sprintf (buff, "v%sr%%?\t%%P0, %%1", load ? "ld" : "st");
18181 output_asm_insn (buff, ops);
18184 if (overlap != -1)
18186 ops[0] = gen_rtx_REG (DImode, REGNO (reg) + 2 * overlap);
18187 ops[1] = adjust_address (mem, SImode, 8 * overlap);
18188 sprintf (buff, "v%sr%%?\t%%P0, %%1", load ? "ld" : "st");
18189 output_asm_insn (buff, ops);
18192 return "";
18195 default:
18196 /* We have to use vldm / vstm for too-large modes. */
18197 if (nregs > 4)
18198 templ = "v%smia%%?\t%%m0, %%h1";
18199 else
18200 templ = "v%s1.64\t%%h1, %%A0";
18202 ops[0] = mem;
18203 ops[1] = reg;
18206 sprintf (buff, templ, load ? "ld" : "st");
18207 output_asm_insn (buff, ops);
18209 return "";
18212 /* Compute and return the length of neon_mov<mode>, where <mode> is
18213 one of VSTRUCT modes: EI, OI, CI or XI. */
18215 arm_attr_length_move_neon (rtx insn)
18217 rtx reg, mem, addr;
18218 int load;
18219 enum machine_mode mode;
18221 extract_insn_cached (insn);
18223 if (REG_P (recog_data.operand[0]) && REG_P (recog_data.operand[1]))
18225 mode = GET_MODE (recog_data.operand[0]);
18226 switch (mode)
18228 case EImode:
18229 case OImode:
18230 return 8;
18231 case CImode:
18232 return 12;
18233 case XImode:
18234 return 16;
18235 default:
18236 gcc_unreachable ();
18240 load = REG_P (recog_data.operand[0]);
18241 reg = recog_data.operand[!load];
18242 mem = recog_data.operand[load];
18244 gcc_assert (MEM_P (mem));
18246 mode = GET_MODE (reg);
18247 addr = XEXP (mem, 0);
18249 /* Strip off const from addresses like (const (plus (...))). */
18250 if (GET_CODE (addr) == CONST && GET_CODE (XEXP (addr, 0)) == PLUS)
18251 addr = XEXP (addr, 0);
18253 if (GET_CODE (addr) == LABEL_REF || GET_CODE (addr) == PLUS)
18255 int insns = HARD_REGNO_NREGS (REGNO (reg), mode) / 2;
18256 return insns * 4;
18258 else
18259 return 4;
18262 /* Return nonzero if the offset in the address is an immediate. Otherwise,
18263 return zero. */
18266 arm_address_offset_is_imm (rtx insn)
18268 rtx mem, addr;
18270 extract_insn_cached (insn);
18272 if (REG_P (recog_data.operand[0]))
18273 return 0;
18275 mem = recog_data.operand[0];
18277 gcc_assert (MEM_P (mem));
18279 addr = XEXP (mem, 0);
18281 if (REG_P (addr)
18282 || (GET_CODE (addr) == PLUS
18283 && REG_P (XEXP (addr, 0))
18284 && CONST_INT_P (XEXP (addr, 1))))
18285 return 1;
18286 else
18287 return 0;
18290 /* Output an ADD r, s, #n where n may be too big for one instruction.
18291 If adding zero to one register, output nothing. */
18292 const char *
18293 output_add_immediate (rtx *operands)
18295 HOST_WIDE_INT n = INTVAL (operands[2]);
18297 if (n != 0 || REGNO (operands[0]) != REGNO (operands[1]))
18299 if (n < 0)
18300 output_multi_immediate (operands,
18301 "sub%?\t%0, %1, %2", "sub%?\t%0, %0, %2", 2,
18302 -n);
18303 else
18304 output_multi_immediate (operands,
18305 "add%?\t%0, %1, %2", "add%?\t%0, %0, %2", 2,
18309 return "";
18312 /* Output a multiple immediate operation.
18313 OPERANDS is the vector of operands referred to in the output patterns.
18314 INSTR1 is the output pattern to use for the first constant.
18315 INSTR2 is the output pattern to use for subsequent constants.
18316 IMMED_OP is the index of the constant slot in OPERANDS.
18317 N is the constant value. */
18318 static const char *
18319 output_multi_immediate (rtx *operands, const char *instr1, const char *instr2,
18320 int immed_op, HOST_WIDE_INT n)
18322 #if HOST_BITS_PER_WIDE_INT > 32
18323 n &= 0xffffffff;
18324 #endif
18326 if (n == 0)
18328 /* Quick and easy output. */
18329 operands[immed_op] = const0_rtx;
18330 output_asm_insn (instr1, operands);
18332 else
18334 int i;
18335 const char * instr = instr1;
18337 /* Note that n is never zero here (which would give no output). */
18338 for (i = 0; i < 32; i += 2)
18340 if (n & (3 << i))
18342 operands[immed_op] = GEN_INT (n & (255 << i));
18343 output_asm_insn (instr, operands);
18344 instr = instr2;
18345 i += 6;
18350 return "";
18353 /* Return the name of a shifter operation. */
18354 static const char *
18355 arm_shift_nmem(enum rtx_code code)
18357 switch (code)
18359 case ASHIFT:
18360 return ARM_LSL_NAME;
18362 case ASHIFTRT:
18363 return "asr";
18365 case LSHIFTRT:
18366 return "lsr";
18368 case ROTATERT:
18369 return "ror";
18371 default:
18372 abort();
18376 /* Return the appropriate ARM instruction for the operation code.
18377 The returned result should not be overwritten. OP is the rtx of the
18378 operation. SHIFT_FIRST_ARG is TRUE if the first argument of the operator
18379 was shifted. */
18380 const char *
18381 arithmetic_instr (rtx op, int shift_first_arg)
18383 switch (GET_CODE (op))
18385 case PLUS:
18386 return "add";
18388 case MINUS:
18389 return shift_first_arg ? "rsb" : "sub";
18391 case IOR:
18392 return "orr";
18394 case XOR:
18395 return "eor";
18397 case AND:
18398 return "and";
18400 case ASHIFT:
18401 case ASHIFTRT:
18402 case LSHIFTRT:
18403 case ROTATERT:
18404 return arm_shift_nmem(GET_CODE(op));
18406 default:
18407 gcc_unreachable ();
18411 /* Ensure valid constant shifts and return the appropriate shift mnemonic
18412 for the operation code. The returned result should not be overwritten.
18413 OP is the rtx code of the shift.
18414 On exit, *AMOUNTP will be -1 if the shift is by a register, or a constant
18415 shift. */
18416 static const char *
18417 shift_op (rtx op, HOST_WIDE_INT *amountp)
18419 const char * mnem;
18420 enum rtx_code code = GET_CODE (op);
18422 switch (code)
18424 case ROTATE:
18425 if (!CONST_INT_P (XEXP (op, 1)))
18427 output_operand_lossage ("invalid shift operand");
18428 return NULL;
18431 code = ROTATERT;
18432 *amountp = 32 - INTVAL (XEXP (op, 1));
18433 mnem = "ror";
18434 break;
18436 case ASHIFT:
18437 case ASHIFTRT:
18438 case LSHIFTRT:
18439 case ROTATERT:
18440 mnem = arm_shift_nmem(code);
18441 if (CONST_INT_P (XEXP (op, 1)))
18443 *amountp = INTVAL (XEXP (op, 1));
18445 else if (REG_P (XEXP (op, 1)))
18447 *amountp = -1;
18448 return mnem;
18450 else
18452 output_operand_lossage ("invalid shift operand");
18453 return NULL;
18455 break;
18457 case MULT:
18458 /* We never have to worry about the amount being other than a
18459 power of 2, since this case can never be reloaded from a reg. */
18460 if (!CONST_INT_P (XEXP (op, 1)))
18462 output_operand_lossage ("invalid shift operand");
18463 return NULL;
18466 *amountp = INTVAL (XEXP (op, 1)) & 0xFFFFFFFF;
18468 /* Amount must be a power of two. */
18469 if (*amountp & (*amountp - 1))
18471 output_operand_lossage ("invalid shift operand");
18472 return NULL;
18475 *amountp = int_log2 (*amountp);
18476 return ARM_LSL_NAME;
18478 default:
18479 output_operand_lossage ("invalid shift operand");
18480 return NULL;
18483 /* This is not 100% correct, but follows from the desire to merge
18484 multiplication by a power of 2 with the recognizer for a
18485 shift. >=32 is not a valid shift for "lsl", so we must try and
18486 output a shift that produces the correct arithmetical result.
18487 Using lsr #32 is identical except for the fact that the carry bit
18488 is not set correctly if we set the flags; but we never use the
18489 carry bit from such an operation, so we can ignore that. */
18490 if (code == ROTATERT)
18491 /* Rotate is just modulo 32. */
18492 *amountp &= 31;
18493 else if (*amountp != (*amountp & 31))
18495 if (code == ASHIFT)
18496 mnem = "lsr";
18497 *amountp = 32;
18500 /* Shifts of 0 are no-ops. */
18501 if (*amountp == 0)
18502 return NULL;
18504 return mnem;
18507 /* Obtain the shift from the POWER of two. */
18509 static HOST_WIDE_INT
18510 int_log2 (HOST_WIDE_INT power)
18512 HOST_WIDE_INT shift = 0;
18514 while ((((HOST_WIDE_INT) 1 << shift) & power) == 0)
18516 gcc_assert (shift <= 31);
18517 shift++;
18520 return shift;
18523 /* Output a .ascii pseudo-op, keeping track of lengths. This is
18524 because /bin/as is horribly restrictive. The judgement about
18525 whether or not each character is 'printable' (and can be output as
18526 is) or not (and must be printed with an octal escape) must be made
18527 with reference to the *host* character set -- the situation is
18528 similar to that discussed in the comments above pp_c_char in
18529 c-pretty-print.c. */
18531 #define MAX_ASCII_LEN 51
18533 void
18534 output_ascii_pseudo_op (FILE *stream, const unsigned char *p, int len)
18536 int i;
18537 int len_so_far = 0;
18539 fputs ("\t.ascii\t\"", stream);
18541 for (i = 0; i < len; i++)
18543 int c = p[i];
18545 if (len_so_far >= MAX_ASCII_LEN)
18547 fputs ("\"\n\t.ascii\t\"", stream);
18548 len_so_far = 0;
18551 if (ISPRINT (c))
18553 if (c == '\\' || c == '\"')
18555 putc ('\\', stream);
18556 len_so_far++;
18558 putc (c, stream);
18559 len_so_far++;
18561 else
18563 fprintf (stream, "\\%03o", c);
18564 len_so_far += 4;
18568 fputs ("\"\n", stream);
18571 /* Compute the register save mask for registers 0 through 12
18572 inclusive. This code is used by arm_compute_save_reg_mask. */
18574 static unsigned long
18575 arm_compute_save_reg0_reg12_mask (void)
18577 unsigned long func_type = arm_current_func_type ();
18578 unsigned long save_reg_mask = 0;
18579 unsigned int reg;
18581 if (IS_INTERRUPT (func_type))
18583 unsigned int max_reg;
18584 /* Interrupt functions must not corrupt any registers,
18585 even call clobbered ones. If this is a leaf function
18586 we can just examine the registers used by the RTL, but
18587 otherwise we have to assume that whatever function is
18588 called might clobber anything, and so we have to save
18589 all the call-clobbered registers as well. */
18590 if (ARM_FUNC_TYPE (func_type) == ARM_FT_FIQ)
18591 /* FIQ handlers have registers r8 - r12 banked, so
18592 we only need to check r0 - r7, Normal ISRs only
18593 bank r14 and r15, so we must check up to r12.
18594 r13 is the stack pointer which is always preserved,
18595 so we do not need to consider it here. */
18596 max_reg = 7;
18597 else
18598 max_reg = 12;
18600 for (reg = 0; reg <= max_reg; reg++)
18601 if (df_regs_ever_live_p (reg)
18602 || (! crtl->is_leaf && call_used_regs[reg]))
18603 save_reg_mask |= (1 << reg);
18605 /* Also save the pic base register if necessary. */
18606 if (flag_pic
18607 && !TARGET_SINGLE_PIC_BASE
18608 && arm_pic_register != INVALID_REGNUM
18609 && crtl->uses_pic_offset_table)
18610 save_reg_mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
18612 else if (IS_VOLATILE(func_type))
18614 /* For noreturn functions we historically omitted register saves
18615 altogether. However this really messes up debugging. As a
18616 compromise save just the frame pointers. Combined with the link
18617 register saved elsewhere this should be sufficient to get
18618 a backtrace. */
18619 if (frame_pointer_needed)
18620 save_reg_mask |= 1 << HARD_FRAME_POINTER_REGNUM;
18621 if (df_regs_ever_live_p (ARM_HARD_FRAME_POINTER_REGNUM))
18622 save_reg_mask |= 1 << ARM_HARD_FRAME_POINTER_REGNUM;
18623 if (df_regs_ever_live_p (THUMB_HARD_FRAME_POINTER_REGNUM))
18624 save_reg_mask |= 1 << THUMB_HARD_FRAME_POINTER_REGNUM;
18626 else
18628 /* In the normal case we only need to save those registers
18629 which are call saved and which are used by this function. */
18630 for (reg = 0; reg <= 11; reg++)
18631 if (df_regs_ever_live_p (reg) && ! call_used_regs[reg])
18632 save_reg_mask |= (1 << reg);
18634 /* Handle the frame pointer as a special case. */
18635 if (frame_pointer_needed)
18636 save_reg_mask |= 1 << HARD_FRAME_POINTER_REGNUM;
18638 /* If we aren't loading the PIC register,
18639 don't stack it even though it may be live. */
18640 if (flag_pic
18641 && !TARGET_SINGLE_PIC_BASE
18642 && arm_pic_register != INVALID_REGNUM
18643 && (df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM)
18644 || crtl->uses_pic_offset_table))
18645 save_reg_mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
18647 /* The prologue will copy SP into R0, so save it. */
18648 if (IS_STACKALIGN (func_type))
18649 save_reg_mask |= 1;
18652 /* Save registers so the exception handler can modify them. */
18653 if (crtl->calls_eh_return)
18655 unsigned int i;
18657 for (i = 0; ; i++)
18659 reg = EH_RETURN_DATA_REGNO (i);
18660 if (reg == INVALID_REGNUM)
18661 break;
18662 save_reg_mask |= 1 << reg;
18666 return save_reg_mask;
18669 /* Return true if r3 is live at the start of the function. */
18671 static bool
18672 arm_r3_live_at_start_p (void)
18674 /* Just look at cfg info, which is still close enough to correct at this
18675 point. This gives false positives for broken functions that might use
18676 uninitialized data that happens to be allocated in r3, but who cares? */
18677 return REGNO_REG_SET_P (df_get_live_out (ENTRY_BLOCK_PTR_FOR_FN (cfun)),
18681 /* Compute the number of bytes used to store the static chain register on the
18682 stack, above the stack frame. We need to know this accurately to get the
18683 alignment of the rest of the stack frame correct. */
18685 static int
18686 arm_compute_static_chain_stack_bytes (void)
18688 /* See the defining assertion in arm_expand_prologue. */
18689 if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM
18690 && IS_NESTED (arm_current_func_type ())
18691 && arm_r3_live_at_start_p ()
18692 && crtl->args.pretend_args_size == 0)
18693 return 4;
18695 return 0;
18698 /* Compute a bit mask of which registers need to be
18699 saved on the stack for the current function.
18700 This is used by arm_get_frame_offsets, which may add extra registers. */
18702 static unsigned long
18703 arm_compute_save_reg_mask (void)
18705 unsigned int save_reg_mask = 0;
18706 unsigned long func_type = arm_current_func_type ();
18707 unsigned int reg;
18709 if (IS_NAKED (func_type))
18710 /* This should never really happen. */
18711 return 0;
18713 /* If we are creating a stack frame, then we must save the frame pointer,
18714 IP (which will hold the old stack pointer), LR and the PC. */
18715 if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
18716 save_reg_mask |=
18717 (1 << ARM_HARD_FRAME_POINTER_REGNUM)
18718 | (1 << IP_REGNUM)
18719 | (1 << LR_REGNUM)
18720 | (1 << PC_REGNUM);
18722 save_reg_mask |= arm_compute_save_reg0_reg12_mask ();
18724 /* Decide if we need to save the link register.
18725 Interrupt routines have their own banked link register,
18726 so they never need to save it.
18727 Otherwise if we do not use the link register we do not need to save
18728 it. If we are pushing other registers onto the stack however, we
18729 can save an instruction in the epilogue by pushing the link register
18730 now and then popping it back into the PC. This incurs extra memory
18731 accesses though, so we only do it when optimizing for size, and only
18732 if we know that we will not need a fancy return sequence. */
18733 if (df_regs_ever_live_p (LR_REGNUM)
18734 || (save_reg_mask
18735 && optimize_size
18736 && ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL
18737 && !crtl->calls_eh_return))
18738 save_reg_mask |= 1 << LR_REGNUM;
18740 if (cfun->machine->lr_save_eliminated)
18741 save_reg_mask &= ~ (1 << LR_REGNUM);
18743 if (TARGET_REALLY_IWMMXT
18744 && ((bit_count (save_reg_mask)
18745 + ARM_NUM_INTS (crtl->args.pretend_args_size +
18746 arm_compute_static_chain_stack_bytes())
18747 ) % 2) != 0)
18749 /* The total number of registers that are going to be pushed
18750 onto the stack is odd. We need to ensure that the stack
18751 is 64-bit aligned before we start to save iWMMXt registers,
18752 and also before we start to create locals. (A local variable
18753 might be a double or long long which we will load/store using
18754 an iWMMXt instruction). Therefore we need to push another
18755 ARM register, so that the stack will be 64-bit aligned. We
18756 try to avoid using the arg registers (r0 -r3) as they might be
18757 used to pass values in a tail call. */
18758 for (reg = 4; reg <= 12; reg++)
18759 if ((save_reg_mask & (1 << reg)) == 0)
18760 break;
18762 if (reg <= 12)
18763 save_reg_mask |= (1 << reg);
18764 else
18766 cfun->machine->sibcall_blocked = 1;
18767 save_reg_mask |= (1 << 3);
18771 /* We may need to push an additional register for use initializing the
18772 PIC base register. */
18773 if (TARGET_THUMB2 && IS_NESTED (func_type) && flag_pic
18774 && (save_reg_mask & THUMB2_WORK_REGS) == 0)
18776 reg = thumb_find_work_register (1 << 4);
18777 if (!call_used_regs[reg])
18778 save_reg_mask |= (1 << reg);
18781 return save_reg_mask;
18785 /* Compute a bit mask of which registers need to be
18786 saved on the stack for the current function. */
18787 static unsigned long
18788 thumb1_compute_save_reg_mask (void)
18790 unsigned long mask;
18791 unsigned reg;
18793 mask = 0;
18794 for (reg = 0; reg < 12; reg ++)
18795 if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
18796 mask |= 1 << reg;
18798 if (flag_pic
18799 && !TARGET_SINGLE_PIC_BASE
18800 && arm_pic_register != INVALID_REGNUM
18801 && crtl->uses_pic_offset_table)
18802 mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
18804 /* See if we might need r11 for calls to _interwork_r11_call_via_rN(). */
18805 if (!frame_pointer_needed && CALLER_INTERWORKING_SLOT_SIZE > 0)
18806 mask |= 1 << ARM_HARD_FRAME_POINTER_REGNUM;
18808 /* LR will also be pushed if any lo regs are pushed. */
18809 if (mask & 0xff || thumb_force_lr_save ())
18810 mask |= (1 << LR_REGNUM);
18812 /* Make sure we have a low work register if we need one.
18813 We will need one if we are going to push a high register,
18814 but we are not currently intending to push a low register. */
18815 if ((mask & 0xff) == 0
18816 && ((mask & 0x0f00) || TARGET_BACKTRACE))
18818 /* Use thumb_find_work_register to choose which register
18819 we will use. If the register is live then we will
18820 have to push it. Use LAST_LO_REGNUM as our fallback
18821 choice for the register to select. */
18822 reg = thumb_find_work_register (1 << LAST_LO_REGNUM);
18823 /* Make sure the register returned by thumb_find_work_register is
18824 not part of the return value. */
18825 if (reg * UNITS_PER_WORD <= (unsigned) arm_size_return_regs ())
18826 reg = LAST_LO_REGNUM;
18828 if (! call_used_regs[reg])
18829 mask |= 1 << reg;
18832 /* The 504 below is 8 bytes less than 512 because there are two possible
18833 alignment words. We can't tell here if they will be present or not so we
18834 have to play it safe and assume that they are. */
18835 if ((CALLER_INTERWORKING_SLOT_SIZE +
18836 ROUND_UP_WORD (get_frame_size ()) +
18837 crtl->outgoing_args_size) >= 504)
18839 /* This is the same as the code in thumb1_expand_prologue() which
18840 determines which register to use for stack decrement. */
18841 for (reg = LAST_ARG_REGNUM + 1; reg <= LAST_LO_REGNUM; reg++)
18842 if (mask & (1 << reg))
18843 break;
18845 if (reg > LAST_LO_REGNUM)
18847 /* Make sure we have a register available for stack decrement. */
18848 mask |= 1 << LAST_LO_REGNUM;
18852 return mask;
18856 /* Return the number of bytes required to save VFP registers. */
18857 static int
18858 arm_get_vfp_saved_size (void)
18860 unsigned int regno;
18861 int count;
18862 int saved;
18864 saved = 0;
18865 /* Space for saved VFP registers. */
18866 if (TARGET_HARD_FLOAT && TARGET_VFP)
18868 count = 0;
18869 for (regno = FIRST_VFP_REGNUM;
18870 regno < LAST_VFP_REGNUM;
18871 regno += 2)
18873 if ((!df_regs_ever_live_p (regno) || call_used_regs[regno])
18874 && (!df_regs_ever_live_p (regno + 1) || call_used_regs[regno + 1]))
18876 if (count > 0)
18878 /* Workaround ARM10 VFPr1 bug. */
18879 if (count == 2 && !arm_arch6)
18880 count++;
18881 saved += count * 8;
18883 count = 0;
18885 else
18886 count++;
18888 if (count > 0)
18890 if (count == 2 && !arm_arch6)
18891 count++;
18892 saved += count * 8;
18895 return saved;
18899 /* Generate a function exit sequence. If REALLY_RETURN is false, then do
18900 everything bar the final return instruction. If simple_return is true,
18901 then do not output epilogue, because it has already been emitted in RTL. */
18902 const char *
18903 output_return_instruction (rtx operand, bool really_return, bool reverse,
18904 bool simple_return)
18906 char conditional[10];
18907 char instr[100];
18908 unsigned reg;
18909 unsigned long live_regs_mask;
18910 unsigned long func_type;
18911 arm_stack_offsets *offsets;
18913 func_type = arm_current_func_type ();
18915 if (IS_NAKED (func_type))
18916 return "";
18918 if (IS_VOLATILE (func_type) && TARGET_ABORT_NORETURN)
18920 /* If this function was declared non-returning, and we have
18921 found a tail call, then we have to trust that the called
18922 function won't return. */
18923 if (really_return)
18925 rtx ops[2];
18927 /* Otherwise, trap an attempted return by aborting. */
18928 ops[0] = operand;
18929 ops[1] = gen_rtx_SYMBOL_REF (Pmode, NEED_PLT_RELOC ? "abort(PLT)"
18930 : "abort");
18931 assemble_external_libcall (ops[1]);
18932 output_asm_insn (reverse ? "bl%D0\t%a1" : "bl%d0\t%a1", ops);
18935 return "";
18938 gcc_assert (!cfun->calls_alloca || really_return);
18940 sprintf (conditional, "%%?%%%c0", reverse ? 'D' : 'd');
18942 cfun->machine->return_used_this_function = 1;
18944 offsets = arm_get_frame_offsets ();
18945 live_regs_mask = offsets->saved_regs_mask;
18947 if (!simple_return && live_regs_mask)
18949 const char * return_reg;
18951 /* If we do not have any special requirements for function exit
18952 (e.g. interworking) then we can load the return address
18953 directly into the PC. Otherwise we must load it into LR. */
18954 if (really_return
18955 && (IS_INTERRUPT (func_type) || !TARGET_INTERWORK))
18956 return_reg = reg_names[PC_REGNUM];
18957 else
18958 return_reg = reg_names[LR_REGNUM];
18960 if ((live_regs_mask & (1 << IP_REGNUM)) == (1 << IP_REGNUM))
18962 /* There are three possible reasons for the IP register
18963 being saved. 1) a stack frame was created, in which case
18964 IP contains the old stack pointer, or 2) an ISR routine
18965 corrupted it, or 3) it was saved to align the stack on
18966 iWMMXt. In case 1, restore IP into SP, otherwise just
18967 restore IP. */
18968 if (frame_pointer_needed)
18970 live_regs_mask &= ~ (1 << IP_REGNUM);
18971 live_regs_mask |= (1 << SP_REGNUM);
18973 else
18974 gcc_assert (IS_INTERRUPT (func_type) || TARGET_REALLY_IWMMXT);
18977 /* On some ARM architectures it is faster to use LDR rather than
18978 LDM to load a single register. On other architectures, the
18979 cost is the same. In 26 bit mode, or for exception handlers,
18980 we have to use LDM to load the PC so that the CPSR is also
18981 restored. */
18982 for (reg = 0; reg <= LAST_ARM_REGNUM; reg++)
18983 if (live_regs_mask == (1U << reg))
18984 break;
18986 if (reg <= LAST_ARM_REGNUM
18987 && (reg != LR_REGNUM
18988 || ! really_return
18989 || ! IS_INTERRUPT (func_type)))
18991 sprintf (instr, "ldr%s\t%%|%s, [%%|sp], #4", conditional,
18992 (reg == LR_REGNUM) ? return_reg : reg_names[reg]);
18994 else
18996 char *p;
18997 int first = 1;
18999 /* Generate the load multiple instruction to restore the
19000 registers. Note we can get here, even if
19001 frame_pointer_needed is true, but only if sp already
19002 points to the base of the saved core registers. */
19003 if (live_regs_mask & (1 << SP_REGNUM))
19005 unsigned HOST_WIDE_INT stack_adjust;
19007 stack_adjust = offsets->outgoing_args - offsets->saved_regs;
19008 gcc_assert (stack_adjust == 0 || stack_adjust == 4);
19010 if (stack_adjust && arm_arch5 && TARGET_ARM)
19011 if (TARGET_UNIFIED_ASM)
19012 sprintf (instr, "ldmib%s\t%%|sp, {", conditional);
19013 else
19014 sprintf (instr, "ldm%sib\t%%|sp, {", conditional);
19015 else
19017 /* If we can't use ldmib (SA110 bug),
19018 then try to pop r3 instead. */
19019 if (stack_adjust)
19020 live_regs_mask |= 1 << 3;
19022 if (TARGET_UNIFIED_ASM)
19023 sprintf (instr, "ldmfd%s\t%%|sp, {", conditional);
19024 else
19025 sprintf (instr, "ldm%sfd\t%%|sp, {", conditional);
19028 else
19029 if (TARGET_UNIFIED_ASM)
19030 sprintf (instr, "pop%s\t{", conditional);
19031 else
19032 sprintf (instr, "ldm%sfd\t%%|sp!, {", conditional);
19034 p = instr + strlen (instr);
19036 for (reg = 0; reg <= SP_REGNUM; reg++)
19037 if (live_regs_mask & (1 << reg))
19039 int l = strlen (reg_names[reg]);
19041 if (first)
19042 first = 0;
19043 else
19045 memcpy (p, ", ", 2);
19046 p += 2;
19049 memcpy (p, "%|", 2);
19050 memcpy (p + 2, reg_names[reg], l);
19051 p += l + 2;
19054 if (live_regs_mask & (1 << LR_REGNUM))
19056 sprintf (p, "%s%%|%s}", first ? "" : ", ", return_reg);
19057 /* If returning from an interrupt, restore the CPSR. */
19058 if (IS_INTERRUPT (func_type))
19059 strcat (p, "^");
19061 else
19062 strcpy (p, "}");
19065 output_asm_insn (instr, & operand);
19067 /* See if we need to generate an extra instruction to
19068 perform the actual function return. */
19069 if (really_return
19070 && func_type != ARM_FT_INTERWORKED
19071 && (live_regs_mask & (1 << LR_REGNUM)) != 0)
19073 /* The return has already been handled
19074 by loading the LR into the PC. */
19075 return "";
19079 if (really_return)
19081 switch ((int) ARM_FUNC_TYPE (func_type))
19083 case ARM_FT_ISR:
19084 case ARM_FT_FIQ:
19085 /* ??? This is wrong for unified assembly syntax. */
19086 sprintf (instr, "sub%ss\t%%|pc, %%|lr, #4", conditional);
19087 break;
19089 case ARM_FT_INTERWORKED:
19090 sprintf (instr, "bx%s\t%%|lr", conditional);
19091 break;
19093 case ARM_FT_EXCEPTION:
19094 /* ??? This is wrong for unified assembly syntax. */
19095 sprintf (instr, "mov%ss\t%%|pc, %%|lr", conditional);
19096 break;
19098 default:
19099 /* Use bx if it's available. */
19100 if (arm_arch5 || arm_arch4t)
19101 sprintf (instr, "bx%s\t%%|lr", conditional);
19102 else
19103 sprintf (instr, "mov%s\t%%|pc, %%|lr", conditional);
19104 break;
19107 output_asm_insn (instr, & operand);
19110 return "";
19113 /* Write the function name into the code section, directly preceding
19114 the function prologue.
19116 Code will be output similar to this:
19118 .ascii "arm_poke_function_name", 0
19119 .align
19121 .word 0xff000000 + (t1 - t0)
19122 arm_poke_function_name
19123 mov ip, sp
19124 stmfd sp!, {fp, ip, lr, pc}
19125 sub fp, ip, #4
19127 When performing a stack backtrace, code can inspect the value
19128 of 'pc' stored at 'fp' + 0. If the trace function then looks
19129 at location pc - 12 and the top 8 bits are set, then we know
19130 that there is a function name embedded immediately preceding this
19131 location and has length ((pc[-3]) & 0xff000000).
19133 We assume that pc is declared as a pointer to an unsigned long.
19135 It is of no benefit to output the function name if we are assembling
19136 a leaf function. These function types will not contain a stack
19137 backtrace structure, therefore it is not possible to determine the
19138 function name. */
19139 void
19140 arm_poke_function_name (FILE *stream, const char *name)
19142 unsigned long alignlength;
19143 unsigned long length;
19144 rtx x;
19146 length = strlen (name) + 1;
19147 alignlength = ROUND_UP_WORD (length);
19149 ASM_OUTPUT_ASCII (stream, name, length);
19150 ASM_OUTPUT_ALIGN (stream, 2);
19151 x = GEN_INT ((unsigned HOST_WIDE_INT) 0xff000000 + alignlength);
19152 assemble_aligned_integer (UNITS_PER_WORD, x);
19155 /* Place some comments into the assembler stream
19156 describing the current function. */
19157 static void
19158 arm_output_function_prologue (FILE *f, HOST_WIDE_INT frame_size)
19160 unsigned long func_type;
19162 /* ??? Do we want to print some of the below anyway? */
19163 if (TARGET_THUMB1)
19164 return;
19166 /* Sanity check. */
19167 gcc_assert (!arm_ccfsm_state && !arm_target_insn);
19169 func_type = arm_current_func_type ();
19171 switch ((int) ARM_FUNC_TYPE (func_type))
19173 default:
19174 case ARM_FT_NORMAL:
19175 break;
19176 case ARM_FT_INTERWORKED:
19177 asm_fprintf (f, "\t%@ Function supports interworking.\n");
19178 break;
19179 case ARM_FT_ISR:
19180 asm_fprintf (f, "\t%@ Interrupt Service Routine.\n");
19181 break;
19182 case ARM_FT_FIQ:
19183 asm_fprintf (f, "\t%@ Fast Interrupt Service Routine.\n");
19184 break;
19185 case ARM_FT_EXCEPTION:
19186 asm_fprintf (f, "\t%@ ARM Exception Handler.\n");
19187 break;
19190 if (IS_NAKED (func_type))
19191 asm_fprintf (f, "\t%@ Naked Function: prologue and epilogue provided by programmer.\n");
19193 if (IS_VOLATILE (func_type))
19194 asm_fprintf (f, "\t%@ Volatile: function does not return.\n");
19196 if (IS_NESTED (func_type))
19197 asm_fprintf (f, "\t%@ Nested: function declared inside another function.\n");
19198 if (IS_STACKALIGN (func_type))
19199 asm_fprintf (f, "\t%@ Stack Align: May be called with mis-aligned SP.\n");
19201 asm_fprintf (f, "\t%@ args = %d, pretend = %d, frame = %wd\n",
19202 crtl->args.size,
19203 crtl->args.pretend_args_size, frame_size);
19205 asm_fprintf (f, "\t%@ frame_needed = %d, uses_anonymous_args = %d\n",
19206 frame_pointer_needed,
19207 cfun->machine->uses_anonymous_args);
19209 if (cfun->machine->lr_save_eliminated)
19210 asm_fprintf (f, "\t%@ link register save eliminated.\n");
19212 if (crtl->calls_eh_return)
19213 asm_fprintf (f, "\t@ Calls __builtin_eh_return.\n");
19217 static void
19218 arm_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED,
19219 HOST_WIDE_INT frame_size ATTRIBUTE_UNUSED)
19221 arm_stack_offsets *offsets;
19223 if (TARGET_THUMB1)
19225 int regno;
19227 /* Emit any call-via-reg trampolines that are needed for v4t support
19228 of call_reg and call_value_reg type insns. */
19229 for (regno = 0; regno < LR_REGNUM; regno++)
19231 rtx label = cfun->machine->call_via[regno];
19233 if (label != NULL)
19235 switch_to_section (function_section (current_function_decl));
19236 targetm.asm_out.internal_label (asm_out_file, "L",
19237 CODE_LABEL_NUMBER (label));
19238 asm_fprintf (asm_out_file, "\tbx\t%r\n", regno);
19242 /* ??? Probably not safe to set this here, since it assumes that a
19243 function will be emitted as assembly immediately after we generate
19244 RTL for it. This does not happen for inline functions. */
19245 cfun->machine->return_used_this_function = 0;
19247 else /* TARGET_32BIT */
19249 /* We need to take into account any stack-frame rounding. */
19250 offsets = arm_get_frame_offsets ();
19252 gcc_assert (!use_return_insn (FALSE, NULL)
19253 || (cfun->machine->return_used_this_function != 0)
19254 || offsets->saved_regs == offsets->outgoing_args
19255 || frame_pointer_needed);
19257 /* Reset the ARM-specific per-function variables. */
19258 after_arm_reorg = 0;
19262 /* Generate and emit a sequence of insns equivalent to PUSH, but using
19263 STR and STRD. If an even number of registers are being pushed, one
19264 or more STRD patterns are created for each register pair. If an
19265 odd number of registers are pushed, emit an initial STR followed by
19266 as many STRD instructions as are needed. This works best when the
19267 stack is initially 64-bit aligned (the normal case), since it
19268 ensures that each STRD is also 64-bit aligned. */
19269 static void
19270 thumb2_emit_strd_push (unsigned long saved_regs_mask)
19272 int num_regs = 0;
19273 int i;
19274 int regno;
19275 rtx par = NULL_RTX;
19276 rtx dwarf = NULL_RTX;
19277 rtx tmp;
19278 bool first = true;
19280 num_regs = bit_count (saved_regs_mask);
19282 /* Must be at least one register to save, and can't save SP or PC. */
19283 gcc_assert (num_regs > 0 && num_regs <= 14);
19284 gcc_assert (!(saved_regs_mask & (1 << SP_REGNUM)));
19285 gcc_assert (!(saved_regs_mask & (1 << PC_REGNUM)));
19287 /* Create sequence for DWARF info. All the frame-related data for
19288 debugging is held in this wrapper. */
19289 dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (num_regs + 1));
19291 /* Describe the stack adjustment. */
19292 tmp = gen_rtx_SET (VOIDmode,
19293 stack_pointer_rtx,
19294 plus_constant (Pmode, stack_pointer_rtx, -4 * num_regs));
19295 RTX_FRAME_RELATED_P (tmp) = 1;
19296 XVECEXP (dwarf, 0, 0) = tmp;
19298 /* Find the first register. */
19299 for (regno = 0; (saved_regs_mask & (1 << regno)) == 0; regno++)
19302 i = 0;
19304 /* If there's an odd number of registers to push. Start off by
19305 pushing a single register. This ensures that subsequent strd
19306 operations are dword aligned (assuming that SP was originally
19307 64-bit aligned). */
19308 if ((num_regs & 1) != 0)
19310 rtx reg, mem, insn;
19312 reg = gen_rtx_REG (SImode, regno);
19313 if (num_regs == 1)
19314 mem = gen_frame_mem (Pmode, gen_rtx_PRE_DEC (Pmode,
19315 stack_pointer_rtx));
19316 else
19317 mem = gen_frame_mem (Pmode,
19318 gen_rtx_PRE_MODIFY
19319 (Pmode, stack_pointer_rtx,
19320 plus_constant (Pmode, stack_pointer_rtx,
19321 -4 * num_regs)));
19323 tmp = gen_rtx_SET (VOIDmode, mem, reg);
19324 RTX_FRAME_RELATED_P (tmp) = 1;
19325 insn = emit_insn (tmp);
19326 RTX_FRAME_RELATED_P (insn) = 1;
19327 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
19328 tmp = gen_rtx_SET (VOIDmode, gen_frame_mem (Pmode, stack_pointer_rtx),
19329 reg);
19330 RTX_FRAME_RELATED_P (tmp) = 1;
19331 i++;
19332 regno++;
19333 XVECEXP (dwarf, 0, i) = tmp;
19334 first = false;
19337 while (i < num_regs)
19338 if (saved_regs_mask & (1 << regno))
19340 rtx reg1, reg2, mem1, mem2;
19341 rtx tmp0, tmp1, tmp2;
19342 int regno2;
19344 /* Find the register to pair with this one. */
19345 for (regno2 = regno + 1; (saved_regs_mask & (1 << regno2)) == 0;
19346 regno2++)
19349 reg1 = gen_rtx_REG (SImode, regno);
19350 reg2 = gen_rtx_REG (SImode, regno2);
19352 if (first)
19354 rtx insn;
19356 first = false;
19357 mem1 = gen_frame_mem (Pmode, plus_constant (Pmode,
19358 stack_pointer_rtx,
19359 -4 * num_regs));
19360 mem2 = gen_frame_mem (Pmode, plus_constant (Pmode,
19361 stack_pointer_rtx,
19362 -4 * (num_regs - 1)));
19363 tmp0 = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
19364 plus_constant (Pmode, stack_pointer_rtx,
19365 -4 * (num_regs)));
19366 tmp1 = gen_rtx_SET (VOIDmode, mem1, reg1);
19367 tmp2 = gen_rtx_SET (VOIDmode, mem2, reg2);
19368 RTX_FRAME_RELATED_P (tmp0) = 1;
19369 RTX_FRAME_RELATED_P (tmp1) = 1;
19370 RTX_FRAME_RELATED_P (tmp2) = 1;
19371 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (3));
19372 XVECEXP (par, 0, 0) = tmp0;
19373 XVECEXP (par, 0, 1) = tmp1;
19374 XVECEXP (par, 0, 2) = tmp2;
19375 insn = emit_insn (par);
19376 RTX_FRAME_RELATED_P (insn) = 1;
19377 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
19379 else
19381 mem1 = gen_frame_mem (Pmode, plus_constant (Pmode,
19382 stack_pointer_rtx,
19383 4 * i));
19384 mem2 = gen_frame_mem (Pmode, plus_constant (Pmode,
19385 stack_pointer_rtx,
19386 4 * (i + 1)));
19387 tmp1 = gen_rtx_SET (VOIDmode, mem1, reg1);
19388 tmp2 = gen_rtx_SET (VOIDmode, mem2, reg2);
19389 RTX_FRAME_RELATED_P (tmp1) = 1;
19390 RTX_FRAME_RELATED_P (tmp2) = 1;
19391 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
19392 XVECEXP (par, 0, 0) = tmp1;
19393 XVECEXP (par, 0, 1) = tmp2;
19394 emit_insn (par);
19397 /* Create unwind information. This is an approximation. */
19398 tmp1 = gen_rtx_SET (VOIDmode,
19399 gen_frame_mem (Pmode,
19400 plus_constant (Pmode,
19401 stack_pointer_rtx,
19402 4 * i)),
19403 reg1);
19404 tmp2 = gen_rtx_SET (VOIDmode,
19405 gen_frame_mem (Pmode,
19406 plus_constant (Pmode,
19407 stack_pointer_rtx,
19408 4 * (i + 1))),
19409 reg2);
19411 RTX_FRAME_RELATED_P (tmp1) = 1;
19412 RTX_FRAME_RELATED_P (tmp2) = 1;
19413 XVECEXP (dwarf, 0, i + 1) = tmp1;
19414 XVECEXP (dwarf, 0, i + 2) = tmp2;
19415 i += 2;
19416 regno = regno2 + 1;
19418 else
19419 regno++;
19421 return;
19424 /* STRD in ARM mode requires consecutive registers. This function emits STRD
19425 whenever possible, otherwise it emits single-word stores. The first store
19426 also allocates stack space for all saved registers, using writeback with
19427 post-addressing mode. All other stores use offset addressing. If no STRD
19428 can be emitted, this function emits a sequence of single-word stores,
19429 and not an STM as before, because single-word stores provide more freedom
19430 scheduling and can be turned into an STM by peephole optimizations. */
19431 static void
19432 arm_emit_strd_push (unsigned long saved_regs_mask)
19434 int num_regs = 0;
19435 int i, j, dwarf_index = 0;
19436 int offset = 0;
19437 rtx dwarf = NULL_RTX;
19438 rtx insn = NULL_RTX;
19439 rtx tmp, mem;
19441 /* TODO: A more efficient code can be emitted by changing the
19442 layout, e.g., first push all pairs that can use STRD to keep the
19443 stack aligned, and then push all other registers. */
19444 for (i = 0; i <= LAST_ARM_REGNUM; i++)
19445 if (saved_regs_mask & (1 << i))
19446 num_regs++;
19448 gcc_assert (!(saved_regs_mask & (1 << SP_REGNUM)));
19449 gcc_assert (!(saved_regs_mask & (1 << PC_REGNUM)));
19450 gcc_assert (num_regs > 0);
19452 /* Create sequence for DWARF info. */
19453 dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (num_regs + 1));
19455 /* For dwarf info, we generate explicit stack update. */
19456 tmp = gen_rtx_SET (VOIDmode,
19457 stack_pointer_rtx,
19458 plus_constant (Pmode, stack_pointer_rtx, -4 * num_regs));
19459 RTX_FRAME_RELATED_P (tmp) = 1;
19460 XVECEXP (dwarf, 0, dwarf_index++) = tmp;
19462 /* Save registers. */
19463 offset = - 4 * num_regs;
19464 j = 0;
19465 while (j <= LAST_ARM_REGNUM)
19466 if (saved_regs_mask & (1 << j))
19468 if ((j % 2 == 0)
19469 && (saved_regs_mask & (1 << (j + 1))))
19471 /* Current register and previous register form register pair for
19472 which STRD can be generated. */
19473 if (offset < 0)
19475 /* Allocate stack space for all saved registers. */
19476 tmp = plus_constant (Pmode, stack_pointer_rtx, offset);
19477 tmp = gen_rtx_PRE_MODIFY (Pmode, stack_pointer_rtx, tmp);
19478 mem = gen_frame_mem (DImode, tmp);
19479 offset = 0;
19481 else if (offset > 0)
19482 mem = gen_frame_mem (DImode,
19483 plus_constant (Pmode,
19484 stack_pointer_rtx,
19485 offset));
19486 else
19487 mem = gen_frame_mem (DImode, stack_pointer_rtx);
19489 tmp = gen_rtx_SET (DImode, mem, gen_rtx_REG (DImode, j));
19490 RTX_FRAME_RELATED_P (tmp) = 1;
19491 tmp = emit_insn (tmp);
19493 /* Record the first store insn. */
19494 if (dwarf_index == 1)
19495 insn = tmp;
19497 /* Generate dwarf info. */
19498 mem = gen_frame_mem (SImode,
19499 plus_constant (Pmode,
19500 stack_pointer_rtx,
19501 offset));
19502 tmp = gen_rtx_SET (SImode, mem, gen_rtx_REG (SImode, j));
19503 RTX_FRAME_RELATED_P (tmp) = 1;
19504 XVECEXP (dwarf, 0, dwarf_index++) = tmp;
19506 mem = gen_frame_mem (SImode,
19507 plus_constant (Pmode,
19508 stack_pointer_rtx,
19509 offset + 4));
19510 tmp = gen_rtx_SET (SImode, mem, gen_rtx_REG (SImode, j + 1));
19511 RTX_FRAME_RELATED_P (tmp) = 1;
19512 XVECEXP (dwarf, 0, dwarf_index++) = tmp;
19514 offset += 8;
19515 j += 2;
19517 else
19519 /* Emit a single word store. */
19520 if (offset < 0)
19522 /* Allocate stack space for all saved registers. */
19523 tmp = plus_constant (Pmode, stack_pointer_rtx, offset);
19524 tmp = gen_rtx_PRE_MODIFY (Pmode, stack_pointer_rtx, tmp);
19525 mem = gen_frame_mem (SImode, tmp);
19526 offset = 0;
19528 else if (offset > 0)
19529 mem = gen_frame_mem (SImode,
19530 plus_constant (Pmode,
19531 stack_pointer_rtx,
19532 offset));
19533 else
19534 mem = gen_frame_mem (SImode, stack_pointer_rtx);
19536 tmp = gen_rtx_SET (SImode, mem, gen_rtx_REG (SImode, j));
19537 RTX_FRAME_RELATED_P (tmp) = 1;
19538 tmp = emit_insn (tmp);
19540 /* Record the first store insn. */
19541 if (dwarf_index == 1)
19542 insn = tmp;
19544 /* Generate dwarf info. */
19545 mem = gen_frame_mem (SImode,
19546 plus_constant(Pmode,
19547 stack_pointer_rtx,
19548 offset));
19549 tmp = gen_rtx_SET (SImode, mem, gen_rtx_REG (SImode, j));
19550 RTX_FRAME_RELATED_P (tmp) = 1;
19551 XVECEXP (dwarf, 0, dwarf_index++) = tmp;
19553 offset += 4;
19554 j += 1;
19557 else
19558 j++;
19560 /* Attach dwarf info to the first insn we generate. */
19561 gcc_assert (insn != NULL_RTX);
19562 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
19563 RTX_FRAME_RELATED_P (insn) = 1;
19566 /* Generate and emit an insn that we will recognize as a push_multi.
19567 Unfortunately, since this insn does not reflect very well the actual
19568 semantics of the operation, we need to annotate the insn for the benefit
19569 of DWARF2 frame unwind information. */
19570 static rtx
19571 emit_multi_reg_push (unsigned long mask)
19573 int num_regs = 0;
19574 int num_dwarf_regs;
19575 int i, j;
19576 rtx par;
19577 rtx dwarf;
19578 int dwarf_par_index;
19579 rtx tmp, reg;
19581 for (i = 0; i <= LAST_ARM_REGNUM; i++)
19582 if (mask & (1 << i))
19583 num_regs++;
19585 gcc_assert (num_regs && num_regs <= 16);
19587 /* We don't record the PC in the dwarf frame information. */
19588 num_dwarf_regs = num_regs;
19589 if (mask & (1 << PC_REGNUM))
19590 num_dwarf_regs--;
19592 /* For the body of the insn we are going to generate an UNSPEC in
19593 parallel with several USEs. This allows the insn to be recognized
19594 by the push_multi pattern in the arm.md file.
19596 The body of the insn looks something like this:
19598 (parallel [
19599 (set (mem:BLK (pre_modify:SI (reg:SI sp)
19600 (const_int:SI <num>)))
19601 (unspec:BLK [(reg:SI r4)] UNSPEC_PUSH_MULT))
19602 (use (reg:SI XX))
19603 (use (reg:SI YY))
19607 For the frame note however, we try to be more explicit and actually
19608 show each register being stored into the stack frame, plus a (single)
19609 decrement of the stack pointer. We do it this way in order to be
19610 friendly to the stack unwinding code, which only wants to see a single
19611 stack decrement per instruction. The RTL we generate for the note looks
19612 something like this:
19614 (sequence [
19615 (set (reg:SI sp) (plus:SI (reg:SI sp) (const_int -20)))
19616 (set (mem:SI (reg:SI sp)) (reg:SI r4))
19617 (set (mem:SI (plus:SI (reg:SI sp) (const_int 4))) (reg:SI XX))
19618 (set (mem:SI (plus:SI (reg:SI sp) (const_int 8))) (reg:SI YY))
19622 FIXME:: In an ideal world the PRE_MODIFY would not exist and
19623 instead we'd have a parallel expression detailing all
19624 the stores to the various memory addresses so that debug
19625 information is more up-to-date. Remember however while writing
19626 this to take care of the constraints with the push instruction.
19628 Note also that this has to be taken care of for the VFP registers.
19630 For more see PR43399. */
19632 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (num_regs));
19633 dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (num_dwarf_regs + 1));
19634 dwarf_par_index = 1;
19636 for (i = 0; i <= LAST_ARM_REGNUM; i++)
19638 if (mask & (1 << i))
19640 reg = gen_rtx_REG (SImode, i);
19642 XVECEXP (par, 0, 0)
19643 = gen_rtx_SET (VOIDmode,
19644 gen_frame_mem
19645 (BLKmode,
19646 gen_rtx_PRE_MODIFY (Pmode,
19647 stack_pointer_rtx,
19648 plus_constant
19649 (Pmode, stack_pointer_rtx,
19650 -4 * num_regs))
19652 gen_rtx_UNSPEC (BLKmode,
19653 gen_rtvec (1, reg),
19654 UNSPEC_PUSH_MULT));
19656 if (i != PC_REGNUM)
19658 tmp = gen_rtx_SET (VOIDmode,
19659 gen_frame_mem (SImode, stack_pointer_rtx),
19660 reg);
19661 RTX_FRAME_RELATED_P (tmp) = 1;
19662 XVECEXP (dwarf, 0, dwarf_par_index) = tmp;
19663 dwarf_par_index++;
19666 break;
19670 for (j = 1, i++; j < num_regs; i++)
19672 if (mask & (1 << i))
19674 reg = gen_rtx_REG (SImode, i);
19676 XVECEXP (par, 0, j) = gen_rtx_USE (VOIDmode, reg);
19678 if (i != PC_REGNUM)
19681 = gen_rtx_SET (VOIDmode,
19682 gen_frame_mem
19683 (SImode,
19684 plus_constant (Pmode, stack_pointer_rtx,
19685 4 * j)),
19686 reg);
19687 RTX_FRAME_RELATED_P (tmp) = 1;
19688 XVECEXP (dwarf, 0, dwarf_par_index++) = tmp;
19691 j++;
19695 par = emit_insn (par);
19697 tmp = gen_rtx_SET (VOIDmode,
19698 stack_pointer_rtx,
19699 plus_constant (Pmode, stack_pointer_rtx, -4 * num_regs));
19700 RTX_FRAME_RELATED_P (tmp) = 1;
19701 XVECEXP (dwarf, 0, 0) = tmp;
19703 add_reg_note (par, REG_FRAME_RELATED_EXPR, dwarf);
19705 return par;
19708 /* Add a REG_CFA_ADJUST_CFA REG note to INSN.
19709 SIZE is the offset to be adjusted.
19710 DEST and SRC might be stack_pointer_rtx or hard_frame_pointer_rtx. */
19711 static void
19712 arm_add_cfa_adjust_cfa_note (rtx insn, int size, rtx dest, rtx src)
19714 rtx dwarf;
19716 RTX_FRAME_RELATED_P (insn) = 1;
19717 dwarf = gen_rtx_SET (VOIDmode, dest, plus_constant (Pmode, src, size));
19718 add_reg_note (insn, REG_CFA_ADJUST_CFA, dwarf);
19721 /* Generate and emit an insn pattern that we will recognize as a pop_multi.
19722 SAVED_REGS_MASK shows which registers need to be restored.
19724 Unfortunately, since this insn does not reflect very well the actual
19725 semantics of the operation, we need to annotate the insn for the benefit
19726 of DWARF2 frame unwind information. */
19727 static void
19728 arm_emit_multi_reg_pop (unsigned long saved_regs_mask)
19730 int num_regs = 0;
19731 int i, j;
19732 rtx par;
19733 rtx dwarf = NULL_RTX;
19734 rtx tmp, reg;
19735 bool return_in_pc;
19736 int offset_adj;
19737 int emit_update;
19739 return_in_pc = (saved_regs_mask & (1 << PC_REGNUM)) ? true : false;
19740 offset_adj = return_in_pc ? 1 : 0;
19741 for (i = 0; i <= LAST_ARM_REGNUM; i++)
19742 if (saved_regs_mask & (1 << i))
19743 num_regs++;
19745 gcc_assert (num_regs && num_regs <= 16);
19747 /* If SP is in reglist, then we don't emit SP update insn. */
19748 emit_update = (saved_regs_mask & (1 << SP_REGNUM)) ? 0 : 1;
19750 /* The parallel needs to hold num_regs SETs
19751 and one SET for the stack update. */
19752 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (num_regs + emit_update + offset_adj));
19754 if (return_in_pc)
19756 tmp = ret_rtx;
19757 XVECEXP (par, 0, 0) = tmp;
19760 if (emit_update)
19762 /* Increment the stack pointer, based on there being
19763 num_regs 4-byte registers to restore. */
19764 tmp = gen_rtx_SET (VOIDmode,
19765 stack_pointer_rtx,
19766 plus_constant (Pmode,
19767 stack_pointer_rtx,
19768 4 * num_regs));
19769 RTX_FRAME_RELATED_P (tmp) = 1;
19770 XVECEXP (par, 0, offset_adj) = tmp;
19773 /* Now restore every reg, which may include PC. */
19774 for (j = 0, i = 0; j < num_regs; i++)
19775 if (saved_regs_mask & (1 << i))
19777 reg = gen_rtx_REG (SImode, i);
19778 if ((num_regs == 1) && emit_update && !return_in_pc)
19780 /* Emit single load with writeback. */
19781 tmp = gen_frame_mem (SImode,
19782 gen_rtx_POST_INC (Pmode,
19783 stack_pointer_rtx));
19784 tmp = emit_insn (gen_rtx_SET (VOIDmode, reg, tmp));
19785 REG_NOTES (tmp) = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
19786 return;
19789 tmp = gen_rtx_SET (VOIDmode,
19790 reg,
19791 gen_frame_mem
19792 (SImode,
19793 plus_constant (Pmode, stack_pointer_rtx, 4 * j)));
19794 RTX_FRAME_RELATED_P (tmp) = 1;
19795 XVECEXP (par, 0, j + emit_update + offset_adj) = tmp;
19797 /* We need to maintain a sequence for DWARF info too. As dwarf info
19798 should not have PC, skip PC. */
19799 if (i != PC_REGNUM)
19800 dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
19802 j++;
19805 if (return_in_pc)
19806 par = emit_jump_insn (par);
19807 else
19808 par = emit_insn (par);
19810 REG_NOTES (par) = dwarf;
19811 if (!return_in_pc)
19812 arm_add_cfa_adjust_cfa_note (par, UNITS_PER_WORD * num_regs,
19813 stack_pointer_rtx, stack_pointer_rtx);
19816 /* Generate and emit an insn pattern that we will recognize as a pop_multi
19817 of NUM_REGS consecutive VFP regs, starting at FIRST_REG.
19819 Unfortunately, since this insn does not reflect very well the actual
19820 semantics of the operation, we need to annotate the insn for the benefit
19821 of DWARF2 frame unwind information. */
19822 static void
19823 arm_emit_vfp_multi_reg_pop (int first_reg, int num_regs, rtx base_reg)
19825 int i, j;
19826 rtx par;
19827 rtx dwarf = NULL_RTX;
19828 rtx tmp, reg;
19830 gcc_assert (num_regs && num_regs <= 32);
19832 /* Workaround ARM10 VFPr1 bug. */
19833 if (num_regs == 2 && !arm_arch6)
19835 if (first_reg == 15)
19836 first_reg--;
19838 num_regs++;
19841 /* We can emit at most 16 D-registers in a single pop_multi instruction, and
19842 there could be up to 32 D-registers to restore.
19843 If there are more than 16 D-registers, make two recursive calls,
19844 each of which emits one pop_multi instruction. */
19845 if (num_regs > 16)
19847 arm_emit_vfp_multi_reg_pop (first_reg, 16, base_reg);
19848 arm_emit_vfp_multi_reg_pop (first_reg + 16, num_regs - 16, base_reg);
19849 return;
19852 /* The parallel needs to hold num_regs SETs
19853 and one SET for the stack update. */
19854 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (num_regs + 1));
19856 /* Increment the stack pointer, based on there being
19857 num_regs 8-byte registers to restore. */
19858 tmp = gen_rtx_SET (VOIDmode,
19859 base_reg,
19860 plus_constant (Pmode, base_reg, 8 * num_regs));
19861 RTX_FRAME_RELATED_P (tmp) = 1;
19862 XVECEXP (par, 0, 0) = tmp;
19864 /* Now show every reg that will be restored, using a SET for each. */
19865 for (j = 0, i=first_reg; j < num_regs; i += 2)
19867 reg = gen_rtx_REG (DFmode, i);
19869 tmp = gen_rtx_SET (VOIDmode,
19870 reg,
19871 gen_frame_mem
19872 (DFmode,
19873 plus_constant (Pmode, base_reg, 8 * j)));
19874 RTX_FRAME_RELATED_P (tmp) = 1;
19875 XVECEXP (par, 0, j + 1) = tmp;
19877 dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
19879 j++;
19882 par = emit_insn (par);
19883 REG_NOTES (par) = dwarf;
19885 arm_add_cfa_adjust_cfa_note (par, 2 * UNITS_PER_WORD * num_regs,
19886 base_reg, base_reg);
19889 /* Generate and emit a pattern that will be recognized as LDRD pattern. If even
19890 number of registers are being popped, multiple LDRD patterns are created for
19891 all register pairs. If odd number of registers are popped, last register is
19892 loaded by using LDR pattern. */
19893 static void
19894 thumb2_emit_ldrd_pop (unsigned long saved_regs_mask)
19896 int num_regs = 0;
19897 int i, j;
19898 rtx par = NULL_RTX;
19899 rtx dwarf = NULL_RTX;
19900 rtx tmp, reg, tmp1;
19901 bool return_in_pc;
19903 return_in_pc = (saved_regs_mask & (1 << PC_REGNUM)) ? true : false;
19904 for (i = 0; i <= LAST_ARM_REGNUM; i++)
19905 if (saved_regs_mask & (1 << i))
19906 num_regs++;
19908 gcc_assert (num_regs && num_regs <= 16);
19910 /* We cannot generate ldrd for PC. Hence, reduce the count if PC is
19911 to be popped. So, if num_regs is even, now it will become odd,
19912 and we can generate pop with PC. If num_regs is odd, it will be
19913 even now, and ldr with return can be generated for PC. */
19914 if (return_in_pc)
19915 num_regs--;
19917 gcc_assert (!(saved_regs_mask & (1 << SP_REGNUM)));
19919 /* Var j iterates over all the registers to gather all the registers in
19920 saved_regs_mask. Var i gives index of saved registers in stack frame.
19921 A PARALLEL RTX of register-pair is created here, so that pattern for
19922 LDRD can be matched. As PC is always last register to be popped, and
19923 we have already decremented num_regs if PC, we don't have to worry
19924 about PC in this loop. */
19925 for (i = 0, j = 0; i < (num_regs - (num_regs % 2)); j++)
19926 if (saved_regs_mask & (1 << j))
19928 /* Create RTX for memory load. */
19929 reg = gen_rtx_REG (SImode, j);
19930 tmp = gen_rtx_SET (SImode,
19931 reg,
19932 gen_frame_mem (SImode,
19933 plus_constant (Pmode,
19934 stack_pointer_rtx, 4 * i)));
19935 RTX_FRAME_RELATED_P (tmp) = 1;
19937 if (i % 2 == 0)
19939 /* When saved-register index (i) is even, the RTX to be emitted is
19940 yet to be created. Hence create it first. The LDRD pattern we
19941 are generating is :
19942 [ (SET (reg_t0) (MEM (PLUS (SP) (NUM))))
19943 (SET (reg_t1) (MEM (PLUS (SP) (NUM + 4)))) ]
19944 where target registers need not be consecutive. */
19945 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
19946 dwarf = NULL_RTX;
19949 /* ith register is added in PARALLEL RTX. If i is even, the reg_i is
19950 added as 0th element and if i is odd, reg_i is added as 1st element
19951 of LDRD pattern shown above. */
19952 XVECEXP (par, 0, (i % 2)) = tmp;
19953 dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
19955 if ((i % 2) == 1)
19957 /* When saved-register index (i) is odd, RTXs for both the registers
19958 to be loaded are generated in above given LDRD pattern, and the
19959 pattern can be emitted now. */
19960 par = emit_insn (par);
19961 REG_NOTES (par) = dwarf;
19962 RTX_FRAME_RELATED_P (par) = 1;
19965 i++;
19968 /* If the number of registers pushed is odd AND return_in_pc is false OR
19969 number of registers are even AND return_in_pc is true, last register is
19970 popped using LDR. It can be PC as well. Hence, adjust the stack first and
19971 then LDR with post increment. */
19973 /* Increment the stack pointer, based on there being
19974 num_regs 4-byte registers to restore. */
19975 tmp = gen_rtx_SET (VOIDmode,
19976 stack_pointer_rtx,
19977 plus_constant (Pmode, stack_pointer_rtx, 4 * i));
19978 RTX_FRAME_RELATED_P (tmp) = 1;
19979 tmp = emit_insn (tmp);
19980 if (!return_in_pc)
19982 arm_add_cfa_adjust_cfa_note (tmp, UNITS_PER_WORD * i,
19983 stack_pointer_rtx, stack_pointer_rtx);
19986 dwarf = NULL_RTX;
19988 if (((num_regs % 2) == 1 && !return_in_pc)
19989 || ((num_regs % 2) == 0 && return_in_pc))
19991 /* Scan for the single register to be popped. Skip until the saved
19992 register is found. */
19993 for (; (saved_regs_mask & (1 << j)) == 0; j++);
19995 /* Gen LDR with post increment here. */
19996 tmp1 = gen_rtx_MEM (SImode,
19997 gen_rtx_POST_INC (SImode,
19998 stack_pointer_rtx));
19999 set_mem_alias_set (tmp1, get_frame_alias_set ());
20001 reg = gen_rtx_REG (SImode, j);
20002 tmp = gen_rtx_SET (SImode, reg, tmp1);
20003 RTX_FRAME_RELATED_P (tmp) = 1;
20004 dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
20006 if (return_in_pc)
20008 /* If return_in_pc, j must be PC_REGNUM. */
20009 gcc_assert (j == PC_REGNUM);
20010 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
20011 XVECEXP (par, 0, 0) = ret_rtx;
20012 XVECEXP (par, 0, 1) = tmp;
20013 par = emit_jump_insn (par);
20015 else
20017 par = emit_insn (tmp);
20018 REG_NOTES (par) = dwarf;
20019 arm_add_cfa_adjust_cfa_note (par, UNITS_PER_WORD,
20020 stack_pointer_rtx, stack_pointer_rtx);
20024 else if ((num_regs % 2) == 1 && return_in_pc)
20026 /* There are 2 registers to be popped. So, generate the pattern
20027 pop_multiple_with_stack_update_and_return to pop in PC. */
20028 arm_emit_multi_reg_pop (saved_regs_mask & (~((1 << j) - 1)));
20031 return;
20034 /* LDRD in ARM mode needs consecutive registers as operands. This function
20035 emits LDRD whenever possible, otherwise it emits single-word loads. It uses
20036 offset addressing and then generates one separate stack udpate. This provides
20037 more scheduling freedom, compared to writeback on every load. However,
20038 if the function returns using load into PC directly
20039 (i.e., if PC is in SAVED_REGS_MASK), the stack needs to be updated
20040 before the last load. TODO: Add a peephole optimization to recognize
20041 the new epilogue sequence as an LDM instruction whenever possible. TODO: Add
20042 peephole optimization to merge the load at stack-offset zero
20043 with the stack update instruction using load with writeback
20044 in post-index addressing mode. */
20045 static void
20046 arm_emit_ldrd_pop (unsigned long saved_regs_mask)
20048 int j = 0;
20049 int offset = 0;
20050 rtx par = NULL_RTX;
20051 rtx dwarf = NULL_RTX;
20052 rtx tmp, mem;
20054 /* Restore saved registers. */
20055 gcc_assert (!((saved_regs_mask & (1 << SP_REGNUM))));
20056 j = 0;
20057 while (j <= LAST_ARM_REGNUM)
20058 if (saved_regs_mask & (1 << j))
20060 if ((j % 2) == 0
20061 && (saved_regs_mask & (1 << (j + 1)))
20062 && (j + 1) != PC_REGNUM)
20064 /* Current register and next register form register pair for which
20065 LDRD can be generated. PC is always the last register popped, and
20066 we handle it separately. */
20067 if (offset > 0)
20068 mem = gen_frame_mem (DImode,
20069 plus_constant (Pmode,
20070 stack_pointer_rtx,
20071 offset));
20072 else
20073 mem = gen_frame_mem (DImode, stack_pointer_rtx);
20075 tmp = gen_rtx_SET (DImode, gen_rtx_REG (DImode, j), mem);
20076 tmp = emit_insn (tmp);
20077 RTX_FRAME_RELATED_P (tmp) = 1;
20079 /* Generate dwarf info. */
20081 dwarf = alloc_reg_note (REG_CFA_RESTORE,
20082 gen_rtx_REG (SImode, j),
20083 NULL_RTX);
20084 dwarf = alloc_reg_note (REG_CFA_RESTORE,
20085 gen_rtx_REG (SImode, j + 1),
20086 dwarf);
20088 REG_NOTES (tmp) = dwarf;
20090 offset += 8;
20091 j += 2;
20093 else if (j != PC_REGNUM)
20095 /* Emit a single word load. */
20096 if (offset > 0)
20097 mem = gen_frame_mem (SImode,
20098 plus_constant (Pmode,
20099 stack_pointer_rtx,
20100 offset));
20101 else
20102 mem = gen_frame_mem (SImode, stack_pointer_rtx);
20104 tmp = gen_rtx_SET (SImode, gen_rtx_REG (SImode, j), mem);
20105 tmp = emit_insn (tmp);
20106 RTX_FRAME_RELATED_P (tmp) = 1;
20108 /* Generate dwarf info. */
20109 REG_NOTES (tmp) = alloc_reg_note (REG_CFA_RESTORE,
20110 gen_rtx_REG (SImode, j),
20111 NULL_RTX);
20113 offset += 4;
20114 j += 1;
20116 else /* j == PC_REGNUM */
20117 j++;
20119 else
20120 j++;
20122 /* Update the stack. */
20123 if (offset > 0)
20125 tmp = gen_rtx_SET (Pmode,
20126 stack_pointer_rtx,
20127 plus_constant (Pmode,
20128 stack_pointer_rtx,
20129 offset));
20130 tmp = emit_insn (tmp);
20131 arm_add_cfa_adjust_cfa_note (tmp, offset,
20132 stack_pointer_rtx, stack_pointer_rtx);
20133 offset = 0;
20136 if (saved_regs_mask & (1 << PC_REGNUM))
20138 /* Only PC is to be popped. */
20139 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
20140 XVECEXP (par, 0, 0) = ret_rtx;
20141 tmp = gen_rtx_SET (SImode,
20142 gen_rtx_REG (SImode, PC_REGNUM),
20143 gen_frame_mem (SImode,
20144 gen_rtx_POST_INC (SImode,
20145 stack_pointer_rtx)));
20146 RTX_FRAME_RELATED_P (tmp) = 1;
20147 XVECEXP (par, 0, 1) = tmp;
20148 par = emit_jump_insn (par);
20150 /* Generate dwarf info. */
20151 dwarf = alloc_reg_note (REG_CFA_RESTORE,
20152 gen_rtx_REG (SImode, PC_REGNUM),
20153 NULL_RTX);
20154 REG_NOTES (par) = dwarf;
20155 arm_add_cfa_adjust_cfa_note (par, UNITS_PER_WORD,
20156 stack_pointer_rtx, stack_pointer_rtx);
20160 /* Calculate the size of the return value that is passed in registers. */
20161 static unsigned
20162 arm_size_return_regs (void)
20164 enum machine_mode mode;
20166 if (crtl->return_rtx != 0)
20167 mode = GET_MODE (crtl->return_rtx);
20168 else
20169 mode = DECL_MODE (DECL_RESULT (current_function_decl));
20171 return GET_MODE_SIZE (mode);
20174 /* Return true if the current function needs to save/restore LR. */
20175 static bool
20176 thumb_force_lr_save (void)
20178 return !cfun->machine->lr_save_eliminated
20179 && (!leaf_function_p ()
20180 || thumb_far_jump_used_p ()
20181 || df_regs_ever_live_p (LR_REGNUM));
20184 /* We do not know if r3 will be available because
20185 we do have an indirect tailcall happening in this
20186 particular case. */
20187 static bool
20188 is_indirect_tailcall_p (rtx call)
20190 rtx pat = PATTERN (call);
20192 /* Indirect tail call. */
20193 pat = XVECEXP (pat, 0, 0);
20194 if (GET_CODE (pat) == SET)
20195 pat = SET_SRC (pat);
20197 pat = XEXP (XEXP (pat, 0), 0);
20198 return REG_P (pat);
20201 /* Return true if r3 is used by any of the tail call insns in the
20202 current function. */
20203 static bool
20204 any_sibcall_could_use_r3 (void)
20206 edge_iterator ei;
20207 edge e;
20209 if (!crtl->tail_call_emit)
20210 return false;
20211 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
20212 if (e->flags & EDGE_SIBCALL)
20214 rtx call = BB_END (e->src);
20215 if (!CALL_P (call))
20216 call = prev_nonnote_nondebug_insn (call);
20217 gcc_assert (CALL_P (call) && SIBLING_CALL_P (call));
20218 if (find_regno_fusage (call, USE, 3)
20219 || is_indirect_tailcall_p (call))
20220 return true;
20222 return false;
20226 /* Compute the distance from register FROM to register TO.
20227 These can be the arg pointer (26), the soft frame pointer (25),
20228 the stack pointer (13) or the hard frame pointer (11).
20229 In thumb mode r7 is used as the soft frame pointer, if needed.
20230 Typical stack layout looks like this:
20232 old stack pointer -> | |
20233 ----
20234 | | \
20235 | | saved arguments for
20236 | | vararg functions
20237 | | /
20239 hard FP & arg pointer -> | | \
20240 | | stack
20241 | | frame
20242 | | /
20244 | | \
20245 | | call saved
20246 | | registers
20247 soft frame pointer -> | | /
20249 | | \
20250 | | local
20251 | | variables
20252 locals base pointer -> | | /
20254 | | \
20255 | | outgoing
20256 | | arguments
20257 current stack pointer -> | | /
20260 For a given function some or all of these stack components
20261 may not be needed, giving rise to the possibility of
20262 eliminating some of the registers.
20264 The values returned by this function must reflect the behavior
20265 of arm_expand_prologue() and arm_compute_save_reg_mask().
20267 The sign of the number returned reflects the direction of stack
20268 growth, so the values are positive for all eliminations except
20269 from the soft frame pointer to the hard frame pointer.
20271 SFP may point just inside the local variables block to ensure correct
20272 alignment. */
20275 /* Calculate stack offsets. These are used to calculate register elimination
20276 offsets and in prologue/epilogue code. Also calculates which registers
20277 should be saved. */
20279 static arm_stack_offsets *
20280 arm_get_frame_offsets (void)
20282 struct arm_stack_offsets *offsets;
20283 unsigned long func_type;
20284 int leaf;
20285 int saved;
20286 int core_saved;
20287 HOST_WIDE_INT frame_size;
20288 int i;
20290 offsets = &cfun->machine->stack_offsets;
20292 /* We need to know if we are a leaf function. Unfortunately, it
20293 is possible to be called after start_sequence has been called,
20294 which causes get_insns to return the insns for the sequence,
20295 not the function, which will cause leaf_function_p to return
20296 the incorrect result.
20298 to know about leaf functions once reload has completed, and the
20299 frame size cannot be changed after that time, so we can safely
20300 use the cached value. */
20302 if (reload_completed)
20303 return offsets;
20305 /* Initially this is the size of the local variables. It will translated
20306 into an offset once we have determined the size of preceding data. */
20307 frame_size = ROUND_UP_WORD (get_frame_size ());
20309 leaf = leaf_function_p ();
20311 /* Space for variadic functions. */
20312 offsets->saved_args = crtl->args.pretend_args_size;
20314 /* In Thumb mode this is incorrect, but never used. */
20315 offsets->frame = offsets->saved_args + (frame_pointer_needed ? 4 : 0) +
20316 arm_compute_static_chain_stack_bytes();
20318 if (TARGET_32BIT)
20320 unsigned int regno;
20322 offsets->saved_regs_mask = arm_compute_save_reg_mask ();
20323 core_saved = bit_count (offsets->saved_regs_mask) * 4;
20324 saved = core_saved;
20326 /* We know that SP will be doubleword aligned on entry, and we must
20327 preserve that condition at any subroutine call. We also require the
20328 soft frame pointer to be doubleword aligned. */
20330 if (TARGET_REALLY_IWMMXT)
20332 /* Check for the call-saved iWMMXt registers. */
20333 for (regno = FIRST_IWMMXT_REGNUM;
20334 regno <= LAST_IWMMXT_REGNUM;
20335 regno++)
20336 if (df_regs_ever_live_p (regno) && ! call_used_regs[regno])
20337 saved += 8;
20340 func_type = arm_current_func_type ();
20341 /* Space for saved VFP registers. */
20342 if (! IS_VOLATILE (func_type)
20343 && TARGET_HARD_FLOAT && TARGET_VFP)
20344 saved += arm_get_vfp_saved_size ();
20346 else /* TARGET_THUMB1 */
20348 offsets->saved_regs_mask = thumb1_compute_save_reg_mask ();
20349 core_saved = bit_count (offsets->saved_regs_mask) * 4;
20350 saved = core_saved;
20351 if (TARGET_BACKTRACE)
20352 saved += 16;
20355 /* Saved registers include the stack frame. */
20356 offsets->saved_regs = offsets->saved_args + saved +
20357 arm_compute_static_chain_stack_bytes();
20358 offsets->soft_frame = offsets->saved_regs + CALLER_INTERWORKING_SLOT_SIZE;
20359 /* A leaf function does not need any stack alignment if it has nothing
20360 on the stack. */
20361 if (leaf && frame_size == 0
20362 /* However if it calls alloca(), we have a dynamically allocated
20363 block of BIGGEST_ALIGNMENT on stack, so still do stack alignment. */
20364 && ! cfun->calls_alloca)
20366 offsets->outgoing_args = offsets->soft_frame;
20367 offsets->locals_base = offsets->soft_frame;
20368 return offsets;
20371 /* Ensure SFP has the correct alignment. */
20372 if (ARM_DOUBLEWORD_ALIGN
20373 && (offsets->soft_frame & 7))
20375 offsets->soft_frame += 4;
20376 /* Try to align stack by pushing an extra reg. Don't bother doing this
20377 when there is a stack frame as the alignment will be rolled into
20378 the normal stack adjustment. */
20379 if (frame_size + crtl->outgoing_args_size == 0)
20381 int reg = -1;
20383 /* If it is safe to use r3, then do so. This sometimes
20384 generates better code on Thumb-2 by avoiding the need to
20385 use 32-bit push/pop instructions. */
20386 if (! any_sibcall_could_use_r3 ()
20387 && arm_size_return_regs () <= 12
20388 && (offsets->saved_regs_mask & (1 << 3)) == 0
20389 && (TARGET_THUMB2
20390 || !(TARGET_LDRD && current_tune->prefer_ldrd_strd)))
20392 reg = 3;
20394 else
20395 for (i = 4; i <= (TARGET_THUMB1 ? LAST_LO_REGNUM : 11); i++)
20397 /* Avoid fixed registers; they may be changed at
20398 arbitrary times so it's unsafe to restore them
20399 during the epilogue. */
20400 if (!fixed_regs[i]
20401 && (offsets->saved_regs_mask & (1 << i)) == 0)
20403 reg = i;
20404 break;
20408 if (reg != -1)
20410 offsets->saved_regs += 4;
20411 offsets->saved_regs_mask |= (1 << reg);
20416 offsets->locals_base = offsets->soft_frame + frame_size;
20417 offsets->outgoing_args = (offsets->locals_base
20418 + crtl->outgoing_args_size);
20420 if (ARM_DOUBLEWORD_ALIGN)
20422 /* Ensure SP remains doubleword aligned. */
20423 if (offsets->outgoing_args & 7)
20424 offsets->outgoing_args += 4;
20425 gcc_assert (!(offsets->outgoing_args & 7));
20428 return offsets;
20432 /* Calculate the relative offsets for the different stack pointers. Positive
20433 offsets are in the direction of stack growth. */
20435 HOST_WIDE_INT
20436 arm_compute_initial_elimination_offset (unsigned int from, unsigned int to)
20438 arm_stack_offsets *offsets;
20440 offsets = arm_get_frame_offsets ();
20442 /* OK, now we have enough information to compute the distances.
20443 There must be an entry in these switch tables for each pair
20444 of registers in ELIMINABLE_REGS, even if some of the entries
20445 seem to be redundant or useless. */
20446 switch (from)
20448 case ARG_POINTER_REGNUM:
20449 switch (to)
20451 case THUMB_HARD_FRAME_POINTER_REGNUM:
20452 return 0;
20454 case FRAME_POINTER_REGNUM:
20455 /* This is the reverse of the soft frame pointer
20456 to hard frame pointer elimination below. */
20457 return offsets->soft_frame - offsets->saved_args;
20459 case ARM_HARD_FRAME_POINTER_REGNUM:
20460 /* This is only non-zero in the case where the static chain register
20461 is stored above the frame. */
20462 return offsets->frame - offsets->saved_args - 4;
20464 case STACK_POINTER_REGNUM:
20465 /* If nothing has been pushed on the stack at all
20466 then this will return -4. This *is* correct! */
20467 return offsets->outgoing_args - (offsets->saved_args + 4);
20469 default:
20470 gcc_unreachable ();
20472 gcc_unreachable ();
20474 case FRAME_POINTER_REGNUM:
20475 switch (to)
20477 case THUMB_HARD_FRAME_POINTER_REGNUM:
20478 return 0;
20480 case ARM_HARD_FRAME_POINTER_REGNUM:
20481 /* The hard frame pointer points to the top entry in the
20482 stack frame. The soft frame pointer to the bottom entry
20483 in the stack frame. If there is no stack frame at all,
20484 then they are identical. */
20486 return offsets->frame - offsets->soft_frame;
20488 case STACK_POINTER_REGNUM:
20489 return offsets->outgoing_args - offsets->soft_frame;
20491 default:
20492 gcc_unreachable ();
20494 gcc_unreachable ();
20496 default:
20497 /* You cannot eliminate from the stack pointer.
20498 In theory you could eliminate from the hard frame
20499 pointer to the stack pointer, but this will never
20500 happen, since if a stack frame is not needed the
20501 hard frame pointer will never be used. */
20502 gcc_unreachable ();
20506 /* Given FROM and TO register numbers, say whether this elimination is
20507 allowed. Frame pointer elimination is automatically handled.
20509 All eliminations are permissible. Note that ARG_POINTER_REGNUM and
20510 HARD_FRAME_POINTER_REGNUM are in fact the same thing. If we need a frame
20511 pointer, we must eliminate FRAME_POINTER_REGNUM into
20512 HARD_FRAME_POINTER_REGNUM and not into STACK_POINTER_REGNUM or
20513 ARG_POINTER_REGNUM. */
20515 bool
20516 arm_can_eliminate (const int from, const int to)
20518 return ((to == FRAME_POINTER_REGNUM && from == ARG_POINTER_REGNUM) ? false :
20519 (to == STACK_POINTER_REGNUM && frame_pointer_needed) ? false :
20520 (to == ARM_HARD_FRAME_POINTER_REGNUM && TARGET_THUMB) ? false :
20521 (to == THUMB_HARD_FRAME_POINTER_REGNUM && TARGET_ARM) ? false :
20522 true);
20525 /* Emit RTL to save coprocessor registers on function entry. Returns the
20526 number of bytes pushed. */
20528 static int
20529 arm_save_coproc_regs(void)
20531 int saved_size = 0;
20532 unsigned reg;
20533 unsigned start_reg;
20534 rtx insn;
20536 for (reg = LAST_IWMMXT_REGNUM; reg >= FIRST_IWMMXT_REGNUM; reg--)
20537 if (df_regs_ever_live_p (reg) && ! call_used_regs[reg])
20539 insn = gen_rtx_PRE_DEC (Pmode, stack_pointer_rtx);
20540 insn = gen_rtx_MEM (V2SImode, insn);
20541 insn = emit_set_insn (insn, gen_rtx_REG (V2SImode, reg));
20542 RTX_FRAME_RELATED_P (insn) = 1;
20543 saved_size += 8;
20546 if (TARGET_HARD_FLOAT && TARGET_VFP)
20548 start_reg = FIRST_VFP_REGNUM;
20550 for (reg = FIRST_VFP_REGNUM; reg < LAST_VFP_REGNUM; reg += 2)
20552 if ((!df_regs_ever_live_p (reg) || call_used_regs[reg])
20553 && (!df_regs_ever_live_p (reg + 1) || call_used_regs[reg + 1]))
20555 if (start_reg != reg)
20556 saved_size += vfp_emit_fstmd (start_reg,
20557 (reg - start_reg) / 2);
20558 start_reg = reg + 2;
20561 if (start_reg != reg)
20562 saved_size += vfp_emit_fstmd (start_reg,
20563 (reg - start_reg) / 2);
20565 return saved_size;
20569 /* Set the Thumb frame pointer from the stack pointer. */
20571 static void
20572 thumb_set_frame_pointer (arm_stack_offsets *offsets)
20574 HOST_WIDE_INT amount;
20575 rtx insn, dwarf;
20577 amount = offsets->outgoing_args - offsets->locals_base;
20578 if (amount < 1024)
20579 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
20580 stack_pointer_rtx, GEN_INT (amount)));
20581 else
20583 emit_insn (gen_movsi (hard_frame_pointer_rtx, GEN_INT (amount)));
20584 /* Thumb-2 RTL patterns expect sp as the first input. Thumb-1
20585 expects the first two operands to be the same. */
20586 if (TARGET_THUMB2)
20588 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
20589 stack_pointer_rtx,
20590 hard_frame_pointer_rtx));
20592 else
20594 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
20595 hard_frame_pointer_rtx,
20596 stack_pointer_rtx));
20598 dwarf = gen_rtx_SET (VOIDmode, hard_frame_pointer_rtx,
20599 plus_constant (Pmode, stack_pointer_rtx, amount));
20600 RTX_FRAME_RELATED_P (dwarf) = 1;
20601 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
20604 RTX_FRAME_RELATED_P (insn) = 1;
20607 /* Generate the prologue instructions for entry into an ARM or Thumb-2
20608 function. */
20609 void
20610 arm_expand_prologue (void)
20612 rtx amount;
20613 rtx insn;
20614 rtx ip_rtx;
20615 unsigned long live_regs_mask;
20616 unsigned long func_type;
20617 int fp_offset = 0;
20618 int saved_pretend_args = 0;
20619 int saved_regs = 0;
20620 unsigned HOST_WIDE_INT args_to_push;
20621 arm_stack_offsets *offsets;
20623 func_type = arm_current_func_type ();
20625 /* Naked functions don't have prologues. */
20626 if (IS_NAKED (func_type))
20627 return;
20629 /* Make a copy of c_f_p_a_s as we may need to modify it locally. */
20630 args_to_push = crtl->args.pretend_args_size;
20632 /* Compute which register we will have to save onto the stack. */
20633 offsets = arm_get_frame_offsets ();
20634 live_regs_mask = offsets->saved_regs_mask;
20636 ip_rtx = gen_rtx_REG (SImode, IP_REGNUM);
20638 if (IS_STACKALIGN (func_type))
20640 rtx r0, r1;
20642 /* Handle a word-aligned stack pointer. We generate the following:
20644 mov r0, sp
20645 bic r1, r0, #7
20646 mov sp, r1
20647 <save and restore r0 in normal prologue/epilogue>
20648 mov sp, r0
20649 bx lr
20651 The unwinder doesn't need to know about the stack realignment.
20652 Just tell it we saved SP in r0. */
20653 gcc_assert (TARGET_THUMB2 && !arm_arch_notm && args_to_push == 0);
20655 r0 = gen_rtx_REG (SImode, 0);
20656 r1 = gen_rtx_REG (SImode, 1);
20658 insn = emit_insn (gen_movsi (r0, stack_pointer_rtx));
20659 RTX_FRAME_RELATED_P (insn) = 1;
20660 add_reg_note (insn, REG_CFA_REGISTER, NULL);
20662 emit_insn (gen_andsi3 (r1, r0, GEN_INT (~(HOST_WIDE_INT)7)));
20664 /* ??? The CFA changes here, which may cause GDB to conclude that it
20665 has entered a different function. That said, the unwind info is
20666 correct, individually, before and after this instruction because
20667 we've described the save of SP, which will override the default
20668 handling of SP as restoring from the CFA. */
20669 emit_insn (gen_movsi (stack_pointer_rtx, r1));
20672 /* For APCS frames, if IP register is clobbered
20673 when creating frame, save that register in a special
20674 way. */
20675 if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
20677 if (IS_INTERRUPT (func_type))
20679 /* Interrupt functions must not corrupt any registers.
20680 Creating a frame pointer however, corrupts the IP
20681 register, so we must push it first. */
20682 emit_multi_reg_push (1 << IP_REGNUM);
20684 /* Do not set RTX_FRAME_RELATED_P on this insn.
20685 The dwarf stack unwinding code only wants to see one
20686 stack decrement per function, and this is not it. If
20687 this instruction is labeled as being part of the frame
20688 creation sequence then dwarf2out_frame_debug_expr will
20689 die when it encounters the assignment of IP to FP
20690 later on, since the use of SP here establishes SP as
20691 the CFA register and not IP.
20693 Anyway this instruction is not really part of the stack
20694 frame creation although it is part of the prologue. */
20696 else if (IS_NESTED (func_type))
20698 /* The static chain register is the same as the IP register
20699 used as a scratch register during stack frame creation.
20700 To get around this need to find somewhere to store IP
20701 whilst the frame is being created. We try the following
20702 places in order:
20704 1. The last argument register r3.
20705 2. A slot on the stack above the frame. (This only
20706 works if the function is not a varargs function).
20707 3. Register r3 again, after pushing the argument registers
20708 onto the stack.
20710 Note - we only need to tell the dwarf2 backend about the SP
20711 adjustment in the second variant; the static chain register
20712 doesn't need to be unwound, as it doesn't contain a value
20713 inherited from the caller. */
20715 if (!arm_r3_live_at_start_p ())
20716 insn = emit_set_insn (gen_rtx_REG (SImode, 3), ip_rtx);
20717 else if (args_to_push == 0)
20719 rtx dwarf;
20721 gcc_assert(arm_compute_static_chain_stack_bytes() == 4);
20722 saved_regs += 4;
20724 insn = gen_rtx_PRE_DEC (SImode, stack_pointer_rtx);
20725 insn = emit_set_insn (gen_frame_mem (SImode, insn), ip_rtx);
20726 fp_offset = 4;
20728 /* Just tell the dwarf backend that we adjusted SP. */
20729 dwarf = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
20730 plus_constant (Pmode, stack_pointer_rtx,
20731 -fp_offset));
20732 RTX_FRAME_RELATED_P (insn) = 1;
20733 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
20735 else
20737 /* Store the args on the stack. */
20738 if (cfun->machine->uses_anonymous_args)
20739 insn = emit_multi_reg_push
20740 ((0xf0 >> (args_to_push / 4)) & 0xf);
20741 else
20742 insn = emit_insn
20743 (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
20744 GEN_INT (- args_to_push)));
20746 RTX_FRAME_RELATED_P (insn) = 1;
20748 saved_pretend_args = 1;
20749 fp_offset = args_to_push;
20750 args_to_push = 0;
20752 /* Now reuse r3 to preserve IP. */
20753 emit_set_insn (gen_rtx_REG (SImode, 3), ip_rtx);
20757 insn = emit_set_insn (ip_rtx,
20758 plus_constant (Pmode, stack_pointer_rtx,
20759 fp_offset));
20760 RTX_FRAME_RELATED_P (insn) = 1;
20763 if (args_to_push)
20765 /* Push the argument registers, or reserve space for them. */
20766 if (cfun->machine->uses_anonymous_args)
20767 insn = emit_multi_reg_push
20768 ((0xf0 >> (args_to_push / 4)) & 0xf);
20769 else
20770 insn = emit_insn
20771 (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
20772 GEN_INT (- args_to_push)));
20773 RTX_FRAME_RELATED_P (insn) = 1;
20776 /* If this is an interrupt service routine, and the link register
20777 is going to be pushed, and we're not generating extra
20778 push of IP (needed when frame is needed and frame layout if apcs),
20779 subtracting four from LR now will mean that the function return
20780 can be done with a single instruction. */
20781 if ((func_type == ARM_FT_ISR || func_type == ARM_FT_FIQ)
20782 && (live_regs_mask & (1 << LR_REGNUM)) != 0
20783 && !(frame_pointer_needed && TARGET_APCS_FRAME)
20784 && TARGET_ARM)
20786 rtx lr = gen_rtx_REG (SImode, LR_REGNUM);
20788 emit_set_insn (lr, plus_constant (SImode, lr, -4));
20791 if (live_regs_mask)
20793 saved_regs += bit_count (live_regs_mask) * 4;
20794 if (optimize_size && !frame_pointer_needed
20795 && saved_regs == offsets->saved_regs - offsets->saved_args)
20797 /* If no coprocessor registers are being pushed and we don't have
20798 to worry about a frame pointer then push extra registers to
20799 create the stack frame. This is done is a way that does not
20800 alter the frame layout, so is independent of the epilogue. */
20801 int n;
20802 int frame;
20803 n = 0;
20804 while (n < 8 && (live_regs_mask & (1 << n)) == 0)
20805 n++;
20806 frame = offsets->outgoing_args - (offsets->saved_args + saved_regs);
20807 if (frame && n * 4 >= frame)
20809 n = frame / 4;
20810 live_regs_mask |= (1 << n) - 1;
20811 saved_regs += frame;
20815 if (TARGET_LDRD
20816 && current_tune->prefer_ldrd_strd
20817 && !optimize_function_for_size_p (cfun))
20819 if (TARGET_THUMB2)
20821 thumb2_emit_strd_push (live_regs_mask);
20823 else if (TARGET_ARM
20824 && !TARGET_APCS_FRAME
20825 && !IS_INTERRUPT (func_type))
20827 arm_emit_strd_push (live_regs_mask);
20829 else
20831 insn = emit_multi_reg_push (live_regs_mask);
20832 RTX_FRAME_RELATED_P (insn) = 1;
20835 else
20837 insn = emit_multi_reg_push (live_regs_mask);
20838 RTX_FRAME_RELATED_P (insn) = 1;
20842 if (! IS_VOLATILE (func_type))
20843 saved_regs += arm_save_coproc_regs ();
20845 if (frame_pointer_needed && TARGET_ARM)
20847 /* Create the new frame pointer. */
20848 if (TARGET_APCS_FRAME)
20850 insn = GEN_INT (-(4 + args_to_push + fp_offset));
20851 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx, ip_rtx, insn));
20852 RTX_FRAME_RELATED_P (insn) = 1;
20854 if (IS_NESTED (func_type))
20856 /* Recover the static chain register. */
20857 if (!arm_r3_live_at_start_p () || saved_pretend_args)
20858 insn = gen_rtx_REG (SImode, 3);
20859 else /* if (crtl->args.pretend_args_size == 0) */
20861 insn = plus_constant (Pmode, hard_frame_pointer_rtx, 4);
20862 insn = gen_frame_mem (SImode, insn);
20864 emit_set_insn (ip_rtx, insn);
20865 /* Add a USE to stop propagate_one_insn() from barfing. */
20866 emit_insn (gen_force_register_use (ip_rtx));
20869 else
20871 insn = GEN_INT (saved_regs - 4);
20872 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
20873 stack_pointer_rtx, insn));
20874 RTX_FRAME_RELATED_P (insn) = 1;
20878 if (flag_stack_usage_info)
20879 current_function_static_stack_size
20880 = offsets->outgoing_args - offsets->saved_args;
20882 if (offsets->outgoing_args != offsets->saved_args + saved_regs)
20884 /* This add can produce multiple insns for a large constant, so we
20885 need to get tricky. */
20886 rtx last = get_last_insn ();
20888 amount = GEN_INT (offsets->saved_args + saved_regs
20889 - offsets->outgoing_args);
20891 insn = emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
20892 amount));
20895 last = last ? NEXT_INSN (last) : get_insns ();
20896 RTX_FRAME_RELATED_P (last) = 1;
20898 while (last != insn);
20900 /* If the frame pointer is needed, emit a special barrier that
20901 will prevent the scheduler from moving stores to the frame
20902 before the stack adjustment. */
20903 if (frame_pointer_needed)
20904 insn = emit_insn (gen_stack_tie (stack_pointer_rtx,
20905 hard_frame_pointer_rtx));
20909 if (frame_pointer_needed && TARGET_THUMB2)
20910 thumb_set_frame_pointer (offsets);
20912 if (flag_pic && arm_pic_register != INVALID_REGNUM)
20914 unsigned long mask;
20916 mask = live_regs_mask;
20917 mask &= THUMB2_WORK_REGS;
20918 if (!IS_NESTED (func_type))
20919 mask |= (1 << IP_REGNUM);
20920 arm_load_pic_register (mask);
20923 /* If we are profiling, make sure no instructions are scheduled before
20924 the call to mcount. Similarly if the user has requested no
20925 scheduling in the prolog. Similarly if we want non-call exceptions
20926 using the EABI unwinder, to prevent faulting instructions from being
20927 swapped with a stack adjustment. */
20928 if (crtl->profile || !TARGET_SCHED_PROLOG
20929 || (arm_except_unwind_info (&global_options) == UI_TARGET
20930 && cfun->can_throw_non_call_exceptions))
20931 emit_insn (gen_blockage ());
20933 /* If the link register is being kept alive, with the return address in it,
20934 then make sure that it does not get reused by the ce2 pass. */
20935 if ((live_regs_mask & (1 << LR_REGNUM)) == 0)
20936 cfun->machine->lr_save_eliminated = 1;
20939 /* Print condition code to STREAM. Helper function for arm_print_operand. */
20940 static void
20941 arm_print_condition (FILE *stream)
20943 if (arm_ccfsm_state == 3 || arm_ccfsm_state == 4)
20945 /* Branch conversion is not implemented for Thumb-2. */
20946 if (TARGET_THUMB)
20948 output_operand_lossage ("predicated Thumb instruction");
20949 return;
20951 if (current_insn_predicate != NULL)
20953 output_operand_lossage
20954 ("predicated instruction in conditional sequence");
20955 return;
20958 fputs (arm_condition_codes[arm_current_cc], stream);
20960 else if (current_insn_predicate)
20962 enum arm_cond_code code;
20964 if (TARGET_THUMB1)
20966 output_operand_lossage ("predicated Thumb instruction");
20967 return;
20970 code = get_arm_condition_code (current_insn_predicate);
20971 fputs (arm_condition_codes[code], stream);
20976 /* If CODE is 'd', then the X is a condition operand and the instruction
20977 should only be executed if the condition is true.
20978 if CODE is 'D', then the X is a condition operand and the instruction
20979 should only be executed if the condition is false: however, if the mode
20980 of the comparison is CCFPEmode, then always execute the instruction -- we
20981 do this because in these circumstances !GE does not necessarily imply LT;
20982 in these cases the instruction pattern will take care to make sure that
20983 an instruction containing %d will follow, thereby undoing the effects of
20984 doing this instruction unconditionally.
20985 If CODE is 'N' then X is a floating point operand that must be negated
20986 before output.
20987 If CODE is 'B' then output a bitwise inverted value of X (a const int).
20988 If X is a REG and CODE is `M', output a ldm/stm style multi-reg. */
20989 static void
20990 arm_print_operand (FILE *stream, rtx x, int code)
20992 switch (code)
20994 case '@':
20995 fputs (ASM_COMMENT_START, stream);
20996 return;
20998 case '_':
20999 fputs (user_label_prefix, stream);
21000 return;
21002 case '|':
21003 fputs (REGISTER_PREFIX, stream);
21004 return;
21006 case '?':
21007 arm_print_condition (stream);
21008 return;
21010 case '(':
21011 /* Nothing in unified syntax, otherwise the current condition code. */
21012 if (!TARGET_UNIFIED_ASM)
21013 arm_print_condition (stream);
21014 break;
21016 case ')':
21017 /* The current condition code in unified syntax, otherwise nothing. */
21018 if (TARGET_UNIFIED_ASM)
21019 arm_print_condition (stream);
21020 break;
21022 case '.':
21023 /* The current condition code for a condition code setting instruction.
21024 Preceded by 's' in unified syntax, otherwise followed by 's'. */
21025 if (TARGET_UNIFIED_ASM)
21027 fputc('s', stream);
21028 arm_print_condition (stream);
21030 else
21032 arm_print_condition (stream);
21033 fputc('s', stream);
21035 return;
21037 case '!':
21038 /* If the instruction is conditionally executed then print
21039 the current condition code, otherwise print 's'. */
21040 gcc_assert (TARGET_THUMB2 && TARGET_UNIFIED_ASM);
21041 if (current_insn_predicate)
21042 arm_print_condition (stream);
21043 else
21044 fputc('s', stream);
21045 break;
21047 /* %# is a "break" sequence. It doesn't output anything, but is used to
21048 separate e.g. operand numbers from following text, if that text consists
21049 of further digits which we don't want to be part of the operand
21050 number. */
21051 case '#':
21052 return;
21054 case 'N':
21056 REAL_VALUE_TYPE r;
21057 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
21058 r = real_value_negate (&r);
21059 fprintf (stream, "%s", fp_const_from_val (&r));
21061 return;
21063 /* An integer or symbol address without a preceding # sign. */
21064 case 'c':
21065 switch (GET_CODE (x))
21067 case CONST_INT:
21068 fprintf (stream, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
21069 break;
21071 case SYMBOL_REF:
21072 output_addr_const (stream, x);
21073 break;
21075 case CONST:
21076 if (GET_CODE (XEXP (x, 0)) == PLUS
21077 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF)
21079 output_addr_const (stream, x);
21080 break;
21082 /* Fall through. */
21084 default:
21085 output_operand_lossage ("Unsupported operand for code '%c'", code);
21087 return;
21089 /* An integer that we want to print in HEX. */
21090 case 'x':
21091 switch (GET_CODE (x))
21093 case CONST_INT:
21094 fprintf (stream, "#" HOST_WIDE_INT_PRINT_HEX, INTVAL (x));
21095 break;
21097 default:
21098 output_operand_lossage ("Unsupported operand for code '%c'", code);
21100 return;
21102 case 'B':
21103 if (CONST_INT_P (x))
21105 HOST_WIDE_INT val;
21106 val = ARM_SIGN_EXTEND (~INTVAL (x));
21107 fprintf (stream, HOST_WIDE_INT_PRINT_DEC, val);
21109 else
21111 putc ('~', stream);
21112 output_addr_const (stream, x);
21114 return;
21116 case 'L':
21117 /* The low 16 bits of an immediate constant. */
21118 fprintf (stream, HOST_WIDE_INT_PRINT_DEC, INTVAL(x) & 0xffff);
21119 return;
21121 case 'i':
21122 fprintf (stream, "%s", arithmetic_instr (x, 1));
21123 return;
21125 case 'I':
21126 fprintf (stream, "%s", arithmetic_instr (x, 0));
21127 return;
21129 case 'S':
21131 HOST_WIDE_INT val;
21132 const char *shift;
21134 shift = shift_op (x, &val);
21136 if (shift)
21138 fprintf (stream, ", %s ", shift);
21139 if (val == -1)
21140 arm_print_operand (stream, XEXP (x, 1), 0);
21141 else
21142 fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, val);
21145 return;
21147 /* An explanation of the 'Q', 'R' and 'H' register operands:
21149 In a pair of registers containing a DI or DF value the 'Q'
21150 operand returns the register number of the register containing
21151 the least significant part of the value. The 'R' operand returns
21152 the register number of the register containing the most
21153 significant part of the value.
21155 The 'H' operand returns the higher of the two register numbers.
21156 On a run where WORDS_BIG_ENDIAN is true the 'H' operand is the
21157 same as the 'Q' operand, since the most significant part of the
21158 value is held in the lower number register. The reverse is true
21159 on systems where WORDS_BIG_ENDIAN is false.
21161 The purpose of these operands is to distinguish between cases
21162 where the endian-ness of the values is important (for example
21163 when they are added together), and cases where the endian-ness
21164 is irrelevant, but the order of register operations is important.
21165 For example when loading a value from memory into a register
21166 pair, the endian-ness does not matter. Provided that the value
21167 from the lower memory address is put into the lower numbered
21168 register, and the value from the higher address is put into the
21169 higher numbered register, the load will work regardless of whether
21170 the value being loaded is big-wordian or little-wordian. The
21171 order of the two register loads can matter however, if the address
21172 of the memory location is actually held in one of the registers
21173 being overwritten by the load.
21175 The 'Q' and 'R' constraints are also available for 64-bit
21176 constants. */
21177 case 'Q':
21178 if (CONST_INT_P (x) || CONST_DOUBLE_P (x))
21180 rtx part = gen_lowpart (SImode, x);
21181 fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, INTVAL (part));
21182 return;
21185 if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
21187 output_operand_lossage ("invalid operand for code '%c'", code);
21188 return;
21191 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 1 : 0));
21192 return;
21194 case 'R':
21195 if (CONST_INT_P (x) || CONST_DOUBLE_P (x))
21197 enum machine_mode mode = GET_MODE (x);
21198 rtx part;
21200 if (mode == VOIDmode)
21201 mode = DImode;
21202 part = gen_highpart_mode (SImode, mode, x);
21203 fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, INTVAL (part));
21204 return;
21207 if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
21209 output_operand_lossage ("invalid operand for code '%c'", code);
21210 return;
21213 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 0 : 1));
21214 return;
21216 case 'H':
21217 if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
21219 output_operand_lossage ("invalid operand for code '%c'", code);
21220 return;
21223 asm_fprintf (stream, "%r", REGNO (x) + 1);
21224 return;
21226 case 'J':
21227 if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
21229 output_operand_lossage ("invalid operand for code '%c'", code);
21230 return;
21233 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 3 : 2));
21234 return;
21236 case 'K':
21237 if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
21239 output_operand_lossage ("invalid operand for code '%c'", code);
21240 return;
21243 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 2 : 3));
21244 return;
21246 case 'm':
21247 asm_fprintf (stream, "%r",
21248 REG_P (XEXP (x, 0))
21249 ? REGNO (XEXP (x, 0)) : REGNO (XEXP (XEXP (x, 0), 0)));
21250 return;
21252 case 'M':
21253 asm_fprintf (stream, "{%r-%r}",
21254 REGNO (x),
21255 REGNO (x) + ARM_NUM_REGS (GET_MODE (x)) - 1);
21256 return;
21258 /* Like 'M', but writing doubleword vector registers, for use by Neon
21259 insns. */
21260 case 'h':
21262 int regno = (REGNO (x) - FIRST_VFP_REGNUM) / 2;
21263 int numregs = ARM_NUM_REGS (GET_MODE (x)) / 2;
21264 if (numregs == 1)
21265 asm_fprintf (stream, "{d%d}", regno);
21266 else
21267 asm_fprintf (stream, "{d%d-d%d}", regno, regno + numregs - 1);
21269 return;
21271 case 'd':
21272 /* CONST_TRUE_RTX means always -- that's the default. */
21273 if (x == const_true_rtx)
21274 return;
21276 if (!COMPARISON_P (x))
21278 output_operand_lossage ("invalid operand for code '%c'", code);
21279 return;
21282 fputs (arm_condition_codes[get_arm_condition_code (x)],
21283 stream);
21284 return;
21286 case 'D':
21287 /* CONST_TRUE_RTX means not always -- i.e. never. We shouldn't ever
21288 want to do that. */
21289 if (x == const_true_rtx)
21291 output_operand_lossage ("instruction never executed");
21292 return;
21294 if (!COMPARISON_P (x))
21296 output_operand_lossage ("invalid operand for code '%c'", code);
21297 return;
21300 fputs (arm_condition_codes[ARM_INVERSE_CONDITION_CODE
21301 (get_arm_condition_code (x))],
21302 stream);
21303 return;
21305 case 's':
21306 case 'V':
21307 case 'W':
21308 case 'X':
21309 case 'Y':
21310 case 'Z':
21311 /* Former Maverick support, removed after GCC-4.7. */
21312 output_operand_lossage ("obsolete Maverick format code '%c'", code);
21313 return;
21315 case 'U':
21316 if (!REG_P (x)
21317 || REGNO (x) < FIRST_IWMMXT_GR_REGNUM
21318 || REGNO (x) > LAST_IWMMXT_GR_REGNUM)
21319 /* Bad value for wCG register number. */
21321 output_operand_lossage ("invalid operand for code '%c'", code);
21322 return;
21325 else
21326 fprintf (stream, "%d", REGNO (x) - FIRST_IWMMXT_GR_REGNUM);
21327 return;
21329 /* Print an iWMMXt control register name. */
21330 case 'w':
21331 if (!CONST_INT_P (x)
21332 || INTVAL (x) < 0
21333 || INTVAL (x) >= 16)
21334 /* Bad value for wC register number. */
21336 output_operand_lossage ("invalid operand for code '%c'", code);
21337 return;
21340 else
21342 static const char * wc_reg_names [16] =
21344 "wCID", "wCon", "wCSSF", "wCASF",
21345 "wC4", "wC5", "wC6", "wC7",
21346 "wCGR0", "wCGR1", "wCGR2", "wCGR3",
21347 "wC12", "wC13", "wC14", "wC15"
21350 fputs (wc_reg_names [INTVAL (x)], stream);
21352 return;
21354 /* Print the high single-precision register of a VFP double-precision
21355 register. */
21356 case 'p':
21358 int mode = GET_MODE (x);
21359 int regno;
21361 if (GET_MODE_SIZE (mode) != 8 || !REG_P (x))
21363 output_operand_lossage ("invalid operand for code '%c'", code);
21364 return;
21367 regno = REGNO (x);
21368 if (!VFP_REGNO_OK_FOR_DOUBLE (regno))
21370 output_operand_lossage ("invalid operand for code '%c'", code);
21371 return;
21374 fprintf (stream, "s%d", regno - FIRST_VFP_REGNUM + 1);
21376 return;
21378 /* Print a VFP/Neon double precision or quad precision register name. */
21379 case 'P':
21380 case 'q':
21382 int mode = GET_MODE (x);
21383 int is_quad = (code == 'q');
21384 int regno;
21386 if (GET_MODE_SIZE (mode) != (is_quad ? 16 : 8))
21388 output_operand_lossage ("invalid operand for code '%c'", code);
21389 return;
21392 if (!REG_P (x)
21393 || !IS_VFP_REGNUM (REGNO (x)))
21395 output_operand_lossage ("invalid operand for code '%c'", code);
21396 return;
21399 regno = REGNO (x);
21400 if ((is_quad && !NEON_REGNO_OK_FOR_QUAD (regno))
21401 || (!is_quad && !VFP_REGNO_OK_FOR_DOUBLE (regno)))
21403 output_operand_lossage ("invalid operand for code '%c'", code);
21404 return;
21407 fprintf (stream, "%c%d", is_quad ? 'q' : 'd',
21408 (regno - FIRST_VFP_REGNUM) >> (is_quad ? 2 : 1));
21410 return;
21412 /* These two codes print the low/high doubleword register of a Neon quad
21413 register, respectively. For pair-structure types, can also print
21414 low/high quadword registers. */
21415 case 'e':
21416 case 'f':
21418 int mode = GET_MODE (x);
21419 int regno;
21421 if ((GET_MODE_SIZE (mode) != 16
21422 && GET_MODE_SIZE (mode) != 32) || !REG_P (x))
21424 output_operand_lossage ("invalid operand for code '%c'", code);
21425 return;
21428 regno = REGNO (x);
21429 if (!NEON_REGNO_OK_FOR_QUAD (regno))
21431 output_operand_lossage ("invalid operand for code '%c'", code);
21432 return;
21435 if (GET_MODE_SIZE (mode) == 16)
21436 fprintf (stream, "d%d", ((regno - FIRST_VFP_REGNUM) >> 1)
21437 + (code == 'f' ? 1 : 0));
21438 else
21439 fprintf (stream, "q%d", ((regno - FIRST_VFP_REGNUM) >> 2)
21440 + (code == 'f' ? 1 : 0));
21442 return;
21444 /* Print a VFPv3 floating-point constant, represented as an integer
21445 index. */
21446 case 'G':
21448 int index = vfp3_const_double_index (x);
21449 gcc_assert (index != -1);
21450 fprintf (stream, "%d", index);
21452 return;
21454 /* Print bits representing opcode features for Neon.
21456 Bit 0 is 1 for signed, 0 for unsigned. Floats count as signed
21457 and polynomials as unsigned.
21459 Bit 1 is 1 for floats and polynomials, 0 for ordinary integers.
21461 Bit 2 is 1 for rounding functions, 0 otherwise. */
21463 /* Identify the type as 's', 'u', 'p' or 'f'. */
21464 case 'T':
21466 HOST_WIDE_INT bits = INTVAL (x);
21467 fputc ("uspf"[bits & 3], stream);
21469 return;
21471 /* Likewise, but signed and unsigned integers are both 'i'. */
21472 case 'F':
21474 HOST_WIDE_INT bits = INTVAL (x);
21475 fputc ("iipf"[bits & 3], stream);
21477 return;
21479 /* As for 'T', but emit 'u' instead of 'p'. */
21480 case 't':
21482 HOST_WIDE_INT bits = INTVAL (x);
21483 fputc ("usuf"[bits & 3], stream);
21485 return;
21487 /* Bit 2: rounding (vs none). */
21488 case 'O':
21490 HOST_WIDE_INT bits = INTVAL (x);
21491 fputs ((bits & 4) != 0 ? "r" : "", stream);
21493 return;
21495 /* Memory operand for vld1/vst1 instruction. */
21496 case 'A':
21498 rtx addr;
21499 bool postinc = FALSE;
21500 unsigned align, memsize, align_bits;
21502 gcc_assert (MEM_P (x));
21503 addr = XEXP (x, 0);
21504 if (GET_CODE (addr) == POST_INC)
21506 postinc = 1;
21507 addr = XEXP (addr, 0);
21509 asm_fprintf (stream, "[%r", REGNO (addr));
21511 /* We know the alignment of this access, so we can emit a hint in the
21512 instruction (for some alignments) as an aid to the memory subsystem
21513 of the target. */
21514 align = MEM_ALIGN (x) >> 3;
21515 memsize = MEM_SIZE (x);
21517 /* Only certain alignment specifiers are supported by the hardware. */
21518 if (memsize == 32 && (align % 32) == 0)
21519 align_bits = 256;
21520 else if ((memsize == 16 || memsize == 32) && (align % 16) == 0)
21521 align_bits = 128;
21522 else if (memsize >= 8 && (align % 8) == 0)
21523 align_bits = 64;
21524 else
21525 align_bits = 0;
21527 if (align_bits != 0)
21528 asm_fprintf (stream, ":%d", align_bits);
21530 asm_fprintf (stream, "]");
21532 if (postinc)
21533 fputs("!", stream);
21535 return;
21537 case 'C':
21539 rtx addr;
21541 gcc_assert (MEM_P (x));
21542 addr = XEXP (x, 0);
21543 gcc_assert (REG_P (addr));
21544 asm_fprintf (stream, "[%r]", REGNO (addr));
21546 return;
21548 /* Translate an S register number into a D register number and element index. */
21549 case 'y':
21551 int mode = GET_MODE (x);
21552 int regno;
21554 if (GET_MODE_SIZE (mode) != 4 || !REG_P (x))
21556 output_operand_lossage ("invalid operand for code '%c'", code);
21557 return;
21560 regno = REGNO (x);
21561 if (!VFP_REGNO_OK_FOR_SINGLE (regno))
21563 output_operand_lossage ("invalid operand for code '%c'", code);
21564 return;
21567 regno = regno - FIRST_VFP_REGNUM;
21568 fprintf (stream, "d%d[%d]", regno / 2, regno % 2);
21570 return;
21572 case 'v':
21573 gcc_assert (CONST_DOUBLE_P (x));
21574 fprintf (stream, "#%d", vfp3_const_double_for_fract_bits (x));
21575 return;
21577 /* Register specifier for vld1.16/vst1.16. Translate the S register
21578 number into a D register number and element index. */
21579 case 'z':
21581 int mode = GET_MODE (x);
21582 int regno;
21584 if (GET_MODE_SIZE (mode) != 2 || !REG_P (x))
21586 output_operand_lossage ("invalid operand for code '%c'", code);
21587 return;
21590 regno = REGNO (x);
21591 if (!VFP_REGNO_OK_FOR_SINGLE (regno))
21593 output_operand_lossage ("invalid operand for code '%c'", code);
21594 return;
21597 regno = regno - FIRST_VFP_REGNUM;
21598 fprintf (stream, "d%d[%d]", regno/2, ((regno % 2) ? 2 : 0));
21600 return;
21602 default:
21603 if (x == 0)
21605 output_operand_lossage ("missing operand");
21606 return;
21609 switch (GET_CODE (x))
21611 case REG:
21612 asm_fprintf (stream, "%r", REGNO (x));
21613 break;
21615 case MEM:
21616 output_memory_reference_mode = GET_MODE (x);
21617 output_address (XEXP (x, 0));
21618 break;
21620 case CONST_DOUBLE:
21621 if (TARGET_NEON)
21623 char fpstr[20];
21624 real_to_decimal (fpstr, CONST_DOUBLE_REAL_VALUE (x),
21625 sizeof (fpstr), 0, 1);
21626 fprintf (stream, "#%s", fpstr);
21628 else
21629 fprintf (stream, "#%s", fp_immediate_constant (x));
21630 break;
21632 default:
21633 gcc_assert (GET_CODE (x) != NEG);
21634 fputc ('#', stream);
21635 if (GET_CODE (x) == HIGH)
21637 fputs (":lower16:", stream);
21638 x = XEXP (x, 0);
21641 output_addr_const (stream, x);
21642 break;
21647 /* Target hook for printing a memory address. */
21648 static void
21649 arm_print_operand_address (FILE *stream, rtx x)
21651 if (TARGET_32BIT)
21653 int is_minus = GET_CODE (x) == MINUS;
21655 if (REG_P (x))
21656 asm_fprintf (stream, "[%r]", REGNO (x));
21657 else if (GET_CODE (x) == PLUS || is_minus)
21659 rtx base = XEXP (x, 0);
21660 rtx index = XEXP (x, 1);
21661 HOST_WIDE_INT offset = 0;
21662 if (!REG_P (base)
21663 || (REG_P (index) && REGNO (index) == SP_REGNUM))
21665 /* Ensure that BASE is a register. */
21666 /* (one of them must be). */
21667 /* Also ensure the SP is not used as in index register. */
21668 rtx temp = base;
21669 base = index;
21670 index = temp;
21672 switch (GET_CODE (index))
21674 case CONST_INT:
21675 offset = INTVAL (index);
21676 if (is_minus)
21677 offset = -offset;
21678 asm_fprintf (stream, "[%r, #%wd]",
21679 REGNO (base), offset);
21680 break;
21682 case REG:
21683 asm_fprintf (stream, "[%r, %s%r]",
21684 REGNO (base), is_minus ? "-" : "",
21685 REGNO (index));
21686 break;
21688 case MULT:
21689 case ASHIFTRT:
21690 case LSHIFTRT:
21691 case ASHIFT:
21692 case ROTATERT:
21694 asm_fprintf (stream, "[%r, %s%r",
21695 REGNO (base), is_minus ? "-" : "",
21696 REGNO (XEXP (index, 0)));
21697 arm_print_operand (stream, index, 'S');
21698 fputs ("]", stream);
21699 break;
21702 default:
21703 gcc_unreachable ();
21706 else if (GET_CODE (x) == PRE_INC || GET_CODE (x) == POST_INC
21707 || GET_CODE (x) == PRE_DEC || GET_CODE (x) == POST_DEC)
21709 extern enum machine_mode output_memory_reference_mode;
21711 gcc_assert (REG_P (XEXP (x, 0)));
21713 if (GET_CODE (x) == PRE_DEC || GET_CODE (x) == PRE_INC)
21714 asm_fprintf (stream, "[%r, #%s%d]!",
21715 REGNO (XEXP (x, 0)),
21716 GET_CODE (x) == PRE_DEC ? "-" : "",
21717 GET_MODE_SIZE (output_memory_reference_mode));
21718 else
21719 asm_fprintf (stream, "[%r], #%s%d",
21720 REGNO (XEXP (x, 0)),
21721 GET_CODE (x) == POST_DEC ? "-" : "",
21722 GET_MODE_SIZE (output_memory_reference_mode));
21724 else if (GET_CODE (x) == PRE_MODIFY)
21726 asm_fprintf (stream, "[%r, ", REGNO (XEXP (x, 0)));
21727 if (CONST_INT_P (XEXP (XEXP (x, 1), 1)))
21728 asm_fprintf (stream, "#%wd]!",
21729 INTVAL (XEXP (XEXP (x, 1), 1)));
21730 else
21731 asm_fprintf (stream, "%r]!",
21732 REGNO (XEXP (XEXP (x, 1), 1)));
21734 else if (GET_CODE (x) == POST_MODIFY)
21736 asm_fprintf (stream, "[%r], ", REGNO (XEXP (x, 0)));
21737 if (CONST_INT_P (XEXP (XEXP (x, 1), 1)))
21738 asm_fprintf (stream, "#%wd",
21739 INTVAL (XEXP (XEXP (x, 1), 1)));
21740 else
21741 asm_fprintf (stream, "%r",
21742 REGNO (XEXP (XEXP (x, 1), 1)));
21744 else output_addr_const (stream, x);
21746 else
21748 if (REG_P (x))
21749 asm_fprintf (stream, "[%r]", REGNO (x));
21750 else if (GET_CODE (x) == POST_INC)
21751 asm_fprintf (stream, "%r!", REGNO (XEXP (x, 0)));
21752 else if (GET_CODE (x) == PLUS)
21754 gcc_assert (REG_P (XEXP (x, 0)));
21755 if (CONST_INT_P (XEXP (x, 1)))
21756 asm_fprintf (stream, "[%r, #%wd]",
21757 REGNO (XEXP (x, 0)),
21758 INTVAL (XEXP (x, 1)));
21759 else
21760 asm_fprintf (stream, "[%r, %r]",
21761 REGNO (XEXP (x, 0)),
21762 REGNO (XEXP (x, 1)));
21764 else
21765 output_addr_const (stream, x);
21769 /* Target hook for indicating whether a punctuation character for
21770 TARGET_PRINT_OPERAND is valid. */
21771 static bool
21772 arm_print_operand_punct_valid_p (unsigned char code)
21774 return (code == '@' || code == '|' || code == '.'
21775 || code == '(' || code == ')' || code == '#'
21776 || (TARGET_32BIT && (code == '?'))
21777 || (TARGET_THUMB2 && (code == '!'))
21778 || (TARGET_THUMB && (code == '_')));
21781 /* Target hook for assembling integer objects. The ARM version needs to
21782 handle word-sized values specially. */
21783 static bool
21784 arm_assemble_integer (rtx x, unsigned int size, int aligned_p)
21786 enum machine_mode mode;
21788 if (size == UNITS_PER_WORD && aligned_p)
21790 fputs ("\t.word\t", asm_out_file);
21791 output_addr_const (asm_out_file, x);
21793 /* Mark symbols as position independent. We only do this in the
21794 .text segment, not in the .data segment. */
21795 if (NEED_GOT_RELOC && flag_pic && making_const_table &&
21796 (GET_CODE (x) == SYMBOL_REF || GET_CODE (x) == LABEL_REF))
21798 /* See legitimize_pic_address for an explanation of the
21799 TARGET_VXWORKS_RTP check. */
21800 if (!arm_pic_data_is_text_relative
21801 || (GET_CODE (x) == SYMBOL_REF && !SYMBOL_REF_LOCAL_P (x)))
21802 fputs ("(GOT)", asm_out_file);
21803 else
21804 fputs ("(GOTOFF)", asm_out_file);
21806 fputc ('\n', asm_out_file);
21807 return true;
21810 mode = GET_MODE (x);
21812 if (arm_vector_mode_supported_p (mode))
21814 int i, units;
21816 gcc_assert (GET_CODE (x) == CONST_VECTOR);
21818 units = CONST_VECTOR_NUNITS (x);
21819 size = GET_MODE_SIZE (GET_MODE_INNER (mode));
21821 if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
21822 for (i = 0; i < units; i++)
21824 rtx elt = CONST_VECTOR_ELT (x, i);
21825 assemble_integer
21826 (elt, size, i == 0 ? BIGGEST_ALIGNMENT : size * BITS_PER_UNIT, 1);
21828 else
21829 for (i = 0; i < units; i++)
21831 rtx elt = CONST_VECTOR_ELT (x, i);
21832 REAL_VALUE_TYPE rval;
21834 REAL_VALUE_FROM_CONST_DOUBLE (rval, elt);
21836 assemble_real
21837 (rval, GET_MODE_INNER (mode),
21838 i == 0 ? BIGGEST_ALIGNMENT : size * BITS_PER_UNIT);
21841 return true;
21844 return default_assemble_integer (x, size, aligned_p);
21847 static void
21848 arm_elf_asm_cdtor (rtx symbol, int priority, bool is_ctor)
21850 section *s;
21852 if (!TARGET_AAPCS_BASED)
21854 (is_ctor ?
21855 default_named_section_asm_out_constructor
21856 : default_named_section_asm_out_destructor) (symbol, priority);
21857 return;
21860 /* Put these in the .init_array section, using a special relocation. */
21861 if (priority != DEFAULT_INIT_PRIORITY)
21863 char buf[18];
21864 sprintf (buf, "%s.%.5u",
21865 is_ctor ? ".init_array" : ".fini_array",
21866 priority);
21867 s = get_section (buf, SECTION_WRITE, NULL_TREE);
21869 else if (is_ctor)
21870 s = ctors_section;
21871 else
21872 s = dtors_section;
21874 switch_to_section (s);
21875 assemble_align (POINTER_SIZE);
21876 fputs ("\t.word\t", asm_out_file);
21877 output_addr_const (asm_out_file, symbol);
21878 fputs ("(target1)\n", asm_out_file);
21881 /* Add a function to the list of static constructors. */
21883 static void
21884 arm_elf_asm_constructor (rtx symbol, int priority)
21886 arm_elf_asm_cdtor (symbol, priority, /*is_ctor=*/true);
21889 /* Add a function to the list of static destructors. */
21891 static void
21892 arm_elf_asm_destructor (rtx symbol, int priority)
21894 arm_elf_asm_cdtor (symbol, priority, /*is_ctor=*/false);
21897 /* A finite state machine takes care of noticing whether or not instructions
21898 can be conditionally executed, and thus decrease execution time and code
21899 size by deleting branch instructions. The fsm is controlled by
21900 final_prescan_insn, and controls the actions of ASM_OUTPUT_OPCODE. */
21902 /* The state of the fsm controlling condition codes are:
21903 0: normal, do nothing special
21904 1: make ASM_OUTPUT_OPCODE not output this instruction
21905 2: make ASM_OUTPUT_OPCODE not output this instruction
21906 3: make instructions conditional
21907 4: make instructions conditional
21909 State transitions (state->state by whom under condition):
21910 0 -> 1 final_prescan_insn if the `target' is a label
21911 0 -> 2 final_prescan_insn if the `target' is an unconditional branch
21912 1 -> 3 ASM_OUTPUT_OPCODE after not having output the conditional branch
21913 2 -> 4 ASM_OUTPUT_OPCODE after not having output the conditional branch
21914 3 -> 0 (*targetm.asm_out.internal_label) if the `target' label is reached
21915 (the target label has CODE_LABEL_NUMBER equal to arm_target_label).
21916 4 -> 0 final_prescan_insn if the `target' unconditional branch is reached
21917 (the target insn is arm_target_insn).
21919 If the jump clobbers the conditions then we use states 2 and 4.
21921 A similar thing can be done with conditional return insns.
21923 XXX In case the `target' is an unconditional branch, this conditionalising
21924 of the instructions always reduces code size, but not always execution
21925 time. But then, I want to reduce the code size to somewhere near what
21926 /bin/cc produces. */
21928 /* In addition to this, state is maintained for Thumb-2 COND_EXEC
21929 instructions. When a COND_EXEC instruction is seen the subsequent
21930 instructions are scanned so that multiple conditional instructions can be
21931 combined into a single IT block. arm_condexec_count and arm_condexec_mask
21932 specify the length and true/false mask for the IT block. These will be
21933 decremented/zeroed by arm_asm_output_opcode as the insns are output. */
21935 /* Returns the index of the ARM condition code string in
21936 `arm_condition_codes', or ARM_NV if the comparison is invalid.
21937 COMPARISON should be an rtx like `(eq (...) (...))'. */
21939 enum arm_cond_code
21940 maybe_get_arm_condition_code (rtx comparison)
21942 enum machine_mode mode = GET_MODE (XEXP (comparison, 0));
21943 enum arm_cond_code code;
21944 enum rtx_code comp_code = GET_CODE (comparison);
21946 if (GET_MODE_CLASS (mode) != MODE_CC)
21947 mode = SELECT_CC_MODE (comp_code, XEXP (comparison, 0),
21948 XEXP (comparison, 1));
21950 switch (mode)
21952 case CC_DNEmode: code = ARM_NE; goto dominance;
21953 case CC_DEQmode: code = ARM_EQ; goto dominance;
21954 case CC_DGEmode: code = ARM_GE; goto dominance;
21955 case CC_DGTmode: code = ARM_GT; goto dominance;
21956 case CC_DLEmode: code = ARM_LE; goto dominance;
21957 case CC_DLTmode: code = ARM_LT; goto dominance;
21958 case CC_DGEUmode: code = ARM_CS; goto dominance;
21959 case CC_DGTUmode: code = ARM_HI; goto dominance;
21960 case CC_DLEUmode: code = ARM_LS; goto dominance;
21961 case CC_DLTUmode: code = ARM_CC;
21963 dominance:
21964 if (comp_code == EQ)
21965 return ARM_INVERSE_CONDITION_CODE (code);
21966 if (comp_code == NE)
21967 return code;
21968 return ARM_NV;
21970 case CC_NOOVmode:
21971 switch (comp_code)
21973 case NE: return ARM_NE;
21974 case EQ: return ARM_EQ;
21975 case GE: return ARM_PL;
21976 case LT: return ARM_MI;
21977 default: return ARM_NV;
21980 case CC_Zmode:
21981 switch (comp_code)
21983 case NE: return ARM_NE;
21984 case EQ: return ARM_EQ;
21985 default: return ARM_NV;
21988 case CC_Nmode:
21989 switch (comp_code)
21991 case NE: return ARM_MI;
21992 case EQ: return ARM_PL;
21993 default: return ARM_NV;
21996 case CCFPEmode:
21997 case CCFPmode:
21998 /* We can handle all cases except UNEQ and LTGT. */
21999 switch (comp_code)
22001 case GE: return ARM_GE;
22002 case GT: return ARM_GT;
22003 case LE: return ARM_LS;
22004 case LT: return ARM_MI;
22005 case NE: return ARM_NE;
22006 case EQ: return ARM_EQ;
22007 case ORDERED: return ARM_VC;
22008 case UNORDERED: return ARM_VS;
22009 case UNLT: return ARM_LT;
22010 case UNLE: return ARM_LE;
22011 case UNGT: return ARM_HI;
22012 case UNGE: return ARM_PL;
22013 /* UNEQ and LTGT do not have a representation. */
22014 case UNEQ: /* Fall through. */
22015 case LTGT: /* Fall through. */
22016 default: return ARM_NV;
22019 case CC_SWPmode:
22020 switch (comp_code)
22022 case NE: return ARM_NE;
22023 case EQ: return ARM_EQ;
22024 case GE: return ARM_LE;
22025 case GT: return ARM_LT;
22026 case LE: return ARM_GE;
22027 case LT: return ARM_GT;
22028 case GEU: return ARM_LS;
22029 case GTU: return ARM_CC;
22030 case LEU: return ARM_CS;
22031 case LTU: return ARM_HI;
22032 default: return ARM_NV;
22035 case CC_Cmode:
22036 switch (comp_code)
22038 case LTU: return ARM_CS;
22039 case GEU: return ARM_CC;
22040 default: return ARM_NV;
22043 case CC_CZmode:
22044 switch (comp_code)
22046 case NE: return ARM_NE;
22047 case EQ: return ARM_EQ;
22048 case GEU: return ARM_CS;
22049 case GTU: return ARM_HI;
22050 case LEU: return ARM_LS;
22051 case LTU: return ARM_CC;
22052 default: return ARM_NV;
22055 case CC_NCVmode:
22056 switch (comp_code)
22058 case GE: return ARM_GE;
22059 case LT: return ARM_LT;
22060 case GEU: return ARM_CS;
22061 case LTU: return ARM_CC;
22062 default: return ARM_NV;
22065 case CCmode:
22066 switch (comp_code)
22068 case NE: return ARM_NE;
22069 case EQ: return ARM_EQ;
22070 case GE: return ARM_GE;
22071 case GT: return ARM_GT;
22072 case LE: return ARM_LE;
22073 case LT: return ARM_LT;
22074 case GEU: return ARM_CS;
22075 case GTU: return ARM_HI;
22076 case LEU: return ARM_LS;
22077 case LTU: return ARM_CC;
22078 default: return ARM_NV;
22081 default: gcc_unreachable ();
22085 /* Like maybe_get_arm_condition_code, but never return ARM_NV. */
22086 static enum arm_cond_code
22087 get_arm_condition_code (rtx comparison)
22089 enum arm_cond_code code = maybe_get_arm_condition_code (comparison);
22090 gcc_assert (code != ARM_NV);
22091 return code;
22094 /* Tell arm_asm_output_opcode to output IT blocks for conditionally executed
22095 instructions. */
22096 void
22097 thumb2_final_prescan_insn (rtx insn)
22099 rtx first_insn = insn;
22100 rtx body = PATTERN (insn);
22101 rtx predicate;
22102 enum arm_cond_code code;
22103 int n;
22104 int mask;
22105 int max;
22107 /* Maximum number of conditionally executed instructions in a block
22108 is minimum of the two max values: maximum allowed in an IT block
22109 and maximum that is beneficial according to the cost model and tune. */
22110 max = (max_insns_skipped < MAX_INSN_PER_IT_BLOCK) ?
22111 max_insns_skipped : MAX_INSN_PER_IT_BLOCK;
22113 /* Remove the previous insn from the count of insns to be output. */
22114 if (arm_condexec_count)
22115 arm_condexec_count--;
22117 /* Nothing to do if we are already inside a conditional block. */
22118 if (arm_condexec_count)
22119 return;
22121 if (GET_CODE (body) != COND_EXEC)
22122 return;
22124 /* Conditional jumps are implemented directly. */
22125 if (JUMP_P (insn))
22126 return;
22128 predicate = COND_EXEC_TEST (body);
22129 arm_current_cc = get_arm_condition_code (predicate);
22131 n = get_attr_ce_count (insn);
22132 arm_condexec_count = 1;
22133 arm_condexec_mask = (1 << n) - 1;
22134 arm_condexec_masklen = n;
22135 /* See if subsequent instructions can be combined into the same block. */
22136 for (;;)
22138 insn = next_nonnote_insn (insn);
22140 /* Jumping into the middle of an IT block is illegal, so a label or
22141 barrier terminates the block. */
22142 if (!NONJUMP_INSN_P (insn) && !JUMP_P (insn))
22143 break;
22145 body = PATTERN (insn);
22146 /* USE and CLOBBER aren't really insns, so just skip them. */
22147 if (GET_CODE (body) == USE
22148 || GET_CODE (body) == CLOBBER)
22149 continue;
22151 /* ??? Recognize conditional jumps, and combine them with IT blocks. */
22152 if (GET_CODE (body) != COND_EXEC)
22153 break;
22154 /* Maximum number of conditionally executed instructions in a block. */
22155 n = get_attr_ce_count (insn);
22156 if (arm_condexec_masklen + n > max)
22157 break;
22159 predicate = COND_EXEC_TEST (body);
22160 code = get_arm_condition_code (predicate);
22161 mask = (1 << n) - 1;
22162 if (arm_current_cc == code)
22163 arm_condexec_mask |= (mask << arm_condexec_masklen);
22164 else if (arm_current_cc != ARM_INVERSE_CONDITION_CODE(code))
22165 break;
22167 arm_condexec_count++;
22168 arm_condexec_masklen += n;
22170 /* A jump must be the last instruction in a conditional block. */
22171 if (JUMP_P (insn))
22172 break;
22174 /* Restore recog_data (getting the attributes of other insns can
22175 destroy this array, but final.c assumes that it remains intact
22176 across this call). */
22177 extract_constrain_insn_cached (first_insn);
22180 void
22181 arm_final_prescan_insn (rtx insn)
22183 /* BODY will hold the body of INSN. */
22184 rtx body = PATTERN (insn);
22186 /* This will be 1 if trying to repeat the trick, and things need to be
22187 reversed if it appears to fail. */
22188 int reverse = 0;
22190 /* If we start with a return insn, we only succeed if we find another one. */
22191 int seeking_return = 0;
22192 enum rtx_code return_code = UNKNOWN;
22194 /* START_INSN will hold the insn from where we start looking. This is the
22195 first insn after the following code_label if REVERSE is true. */
22196 rtx start_insn = insn;
22198 /* If in state 4, check if the target branch is reached, in order to
22199 change back to state 0. */
22200 if (arm_ccfsm_state == 4)
22202 if (insn == arm_target_insn)
22204 arm_target_insn = NULL;
22205 arm_ccfsm_state = 0;
22207 return;
22210 /* If in state 3, it is possible to repeat the trick, if this insn is an
22211 unconditional branch to a label, and immediately following this branch
22212 is the previous target label which is only used once, and the label this
22213 branch jumps to is not too far off. */
22214 if (arm_ccfsm_state == 3)
22216 if (simplejump_p (insn))
22218 start_insn = next_nonnote_insn (start_insn);
22219 if (BARRIER_P (start_insn))
22221 /* XXX Isn't this always a barrier? */
22222 start_insn = next_nonnote_insn (start_insn);
22224 if (LABEL_P (start_insn)
22225 && CODE_LABEL_NUMBER (start_insn) == arm_target_label
22226 && LABEL_NUSES (start_insn) == 1)
22227 reverse = TRUE;
22228 else
22229 return;
22231 else if (ANY_RETURN_P (body))
22233 start_insn = next_nonnote_insn (start_insn);
22234 if (BARRIER_P (start_insn))
22235 start_insn = next_nonnote_insn (start_insn);
22236 if (LABEL_P (start_insn)
22237 && CODE_LABEL_NUMBER (start_insn) == arm_target_label
22238 && LABEL_NUSES (start_insn) == 1)
22240 reverse = TRUE;
22241 seeking_return = 1;
22242 return_code = GET_CODE (body);
22244 else
22245 return;
22247 else
22248 return;
22251 gcc_assert (!arm_ccfsm_state || reverse);
22252 if (!JUMP_P (insn))
22253 return;
22255 /* This jump might be paralleled with a clobber of the condition codes
22256 the jump should always come first */
22257 if (GET_CODE (body) == PARALLEL && XVECLEN (body, 0) > 0)
22258 body = XVECEXP (body, 0, 0);
22260 if (reverse
22261 || (GET_CODE (body) == SET && GET_CODE (SET_DEST (body)) == PC
22262 && GET_CODE (SET_SRC (body)) == IF_THEN_ELSE))
22264 int insns_skipped;
22265 int fail = FALSE, succeed = FALSE;
22266 /* Flag which part of the IF_THEN_ELSE is the LABEL_REF. */
22267 int then_not_else = TRUE;
22268 rtx this_insn = start_insn, label = 0;
22270 /* Register the insn jumped to. */
22271 if (reverse)
22273 if (!seeking_return)
22274 label = XEXP (SET_SRC (body), 0);
22276 else if (GET_CODE (XEXP (SET_SRC (body), 1)) == LABEL_REF)
22277 label = XEXP (XEXP (SET_SRC (body), 1), 0);
22278 else if (GET_CODE (XEXP (SET_SRC (body), 2)) == LABEL_REF)
22280 label = XEXP (XEXP (SET_SRC (body), 2), 0);
22281 then_not_else = FALSE;
22283 else if (ANY_RETURN_P (XEXP (SET_SRC (body), 1)))
22285 seeking_return = 1;
22286 return_code = GET_CODE (XEXP (SET_SRC (body), 1));
22288 else if (ANY_RETURN_P (XEXP (SET_SRC (body), 2)))
22290 seeking_return = 1;
22291 return_code = GET_CODE (XEXP (SET_SRC (body), 2));
22292 then_not_else = FALSE;
22294 else
22295 gcc_unreachable ();
22297 /* See how many insns this branch skips, and what kind of insns. If all
22298 insns are okay, and the label or unconditional branch to the same
22299 label is not too far away, succeed. */
22300 for (insns_skipped = 0;
22301 !fail && !succeed && insns_skipped++ < max_insns_skipped;)
22303 rtx scanbody;
22305 this_insn = next_nonnote_insn (this_insn);
22306 if (!this_insn)
22307 break;
22309 switch (GET_CODE (this_insn))
22311 case CODE_LABEL:
22312 /* Succeed if it is the target label, otherwise fail since
22313 control falls in from somewhere else. */
22314 if (this_insn == label)
22316 arm_ccfsm_state = 1;
22317 succeed = TRUE;
22319 else
22320 fail = TRUE;
22321 break;
22323 case BARRIER:
22324 /* Succeed if the following insn is the target label.
22325 Otherwise fail.
22326 If return insns are used then the last insn in a function
22327 will be a barrier. */
22328 this_insn = next_nonnote_insn (this_insn);
22329 if (this_insn && this_insn == label)
22331 arm_ccfsm_state = 1;
22332 succeed = TRUE;
22334 else
22335 fail = TRUE;
22336 break;
22338 case CALL_INSN:
22339 /* The AAPCS says that conditional calls should not be
22340 used since they make interworking inefficient (the
22341 linker can't transform BL<cond> into BLX). That's
22342 only a problem if the machine has BLX. */
22343 if (arm_arch5)
22345 fail = TRUE;
22346 break;
22349 /* Succeed if the following insn is the target label, or
22350 if the following two insns are a barrier and the
22351 target label. */
22352 this_insn = next_nonnote_insn (this_insn);
22353 if (this_insn && BARRIER_P (this_insn))
22354 this_insn = next_nonnote_insn (this_insn);
22356 if (this_insn && this_insn == label
22357 && insns_skipped < max_insns_skipped)
22359 arm_ccfsm_state = 1;
22360 succeed = TRUE;
22362 else
22363 fail = TRUE;
22364 break;
22366 case JUMP_INSN:
22367 /* If this is an unconditional branch to the same label, succeed.
22368 If it is to another label, do nothing. If it is conditional,
22369 fail. */
22370 /* XXX Probably, the tests for SET and the PC are
22371 unnecessary. */
22373 scanbody = PATTERN (this_insn);
22374 if (GET_CODE (scanbody) == SET
22375 && GET_CODE (SET_DEST (scanbody)) == PC)
22377 if (GET_CODE (SET_SRC (scanbody)) == LABEL_REF
22378 && XEXP (SET_SRC (scanbody), 0) == label && !reverse)
22380 arm_ccfsm_state = 2;
22381 succeed = TRUE;
22383 else if (GET_CODE (SET_SRC (scanbody)) == IF_THEN_ELSE)
22384 fail = TRUE;
22386 /* Fail if a conditional return is undesirable (e.g. on a
22387 StrongARM), but still allow this if optimizing for size. */
22388 else if (GET_CODE (scanbody) == return_code
22389 && !use_return_insn (TRUE, NULL)
22390 && !optimize_size)
22391 fail = TRUE;
22392 else if (GET_CODE (scanbody) == return_code)
22394 arm_ccfsm_state = 2;
22395 succeed = TRUE;
22397 else if (GET_CODE (scanbody) == PARALLEL)
22399 switch (get_attr_conds (this_insn))
22401 case CONDS_NOCOND:
22402 break;
22403 default:
22404 fail = TRUE;
22405 break;
22408 else
22409 fail = TRUE; /* Unrecognized jump (e.g. epilogue). */
22411 break;
22413 case INSN:
22414 /* Instructions using or affecting the condition codes make it
22415 fail. */
22416 scanbody = PATTERN (this_insn);
22417 if (!(GET_CODE (scanbody) == SET
22418 || GET_CODE (scanbody) == PARALLEL)
22419 || get_attr_conds (this_insn) != CONDS_NOCOND)
22420 fail = TRUE;
22421 break;
22423 default:
22424 break;
22427 if (succeed)
22429 if ((!seeking_return) && (arm_ccfsm_state == 1 || reverse))
22430 arm_target_label = CODE_LABEL_NUMBER (label);
22431 else
22433 gcc_assert (seeking_return || arm_ccfsm_state == 2);
22435 while (this_insn && GET_CODE (PATTERN (this_insn)) == USE)
22437 this_insn = next_nonnote_insn (this_insn);
22438 gcc_assert (!this_insn
22439 || (!BARRIER_P (this_insn)
22440 && !LABEL_P (this_insn)));
22442 if (!this_insn)
22444 /* Oh, dear! we ran off the end.. give up. */
22445 extract_constrain_insn_cached (insn);
22446 arm_ccfsm_state = 0;
22447 arm_target_insn = NULL;
22448 return;
22450 arm_target_insn = this_insn;
22453 /* If REVERSE is true, ARM_CURRENT_CC needs to be inverted from
22454 what it was. */
22455 if (!reverse)
22456 arm_current_cc = get_arm_condition_code (XEXP (SET_SRC (body), 0));
22458 if (reverse || then_not_else)
22459 arm_current_cc = ARM_INVERSE_CONDITION_CODE (arm_current_cc);
22462 /* Restore recog_data (getting the attributes of other insns can
22463 destroy this array, but final.c assumes that it remains intact
22464 across this call. */
22465 extract_constrain_insn_cached (insn);
22469 /* Output IT instructions. */
22470 void
22471 thumb2_asm_output_opcode (FILE * stream)
22473 char buff[5];
22474 int n;
22476 if (arm_condexec_mask)
22478 for (n = 0; n < arm_condexec_masklen; n++)
22479 buff[n] = (arm_condexec_mask & (1 << n)) ? 't' : 'e';
22480 buff[n] = 0;
22481 asm_fprintf(stream, "i%s\t%s\n\t", buff,
22482 arm_condition_codes[arm_current_cc]);
22483 arm_condexec_mask = 0;
22487 /* Returns true if REGNO is a valid register
22488 for holding a quantity of type MODE. */
22490 arm_hard_regno_mode_ok (unsigned int regno, enum machine_mode mode)
22492 if (GET_MODE_CLASS (mode) == MODE_CC)
22493 return (regno == CC_REGNUM
22494 || (TARGET_HARD_FLOAT && TARGET_VFP
22495 && regno == VFPCC_REGNUM));
22497 if (TARGET_THUMB1)
22498 /* For the Thumb we only allow values bigger than SImode in
22499 registers 0 - 6, so that there is always a second low
22500 register available to hold the upper part of the value.
22501 We probably we ought to ensure that the register is the
22502 start of an even numbered register pair. */
22503 return (ARM_NUM_REGS (mode) < 2) || (regno < LAST_LO_REGNUM);
22505 if (TARGET_HARD_FLOAT && TARGET_VFP
22506 && IS_VFP_REGNUM (regno))
22508 if (mode == SFmode || mode == SImode)
22509 return VFP_REGNO_OK_FOR_SINGLE (regno);
22511 if (mode == DFmode)
22512 return VFP_REGNO_OK_FOR_DOUBLE (regno);
22514 /* VFP registers can hold HFmode values, but there is no point in
22515 putting them there unless we have hardware conversion insns. */
22516 if (mode == HFmode)
22517 return TARGET_FP16 && VFP_REGNO_OK_FOR_SINGLE (regno);
22519 if (TARGET_NEON)
22520 return (VALID_NEON_DREG_MODE (mode) && VFP_REGNO_OK_FOR_DOUBLE (regno))
22521 || (VALID_NEON_QREG_MODE (mode)
22522 && NEON_REGNO_OK_FOR_QUAD (regno))
22523 || (mode == TImode && NEON_REGNO_OK_FOR_NREGS (regno, 2))
22524 || (mode == EImode && NEON_REGNO_OK_FOR_NREGS (regno, 3))
22525 || (mode == OImode && NEON_REGNO_OK_FOR_NREGS (regno, 4))
22526 || (mode == CImode && NEON_REGNO_OK_FOR_NREGS (regno, 6))
22527 || (mode == XImode && NEON_REGNO_OK_FOR_NREGS (regno, 8));
22529 return FALSE;
22532 if (TARGET_REALLY_IWMMXT)
22534 if (IS_IWMMXT_GR_REGNUM (regno))
22535 return mode == SImode;
22537 if (IS_IWMMXT_REGNUM (regno))
22538 return VALID_IWMMXT_REG_MODE (mode);
22541 /* We allow almost any value to be stored in the general registers.
22542 Restrict doubleword quantities to even register pairs so that we can
22543 use ldrd. Do not allow very large Neon structure opaque modes in
22544 general registers; they would use too many. */
22545 if (regno <= LAST_ARM_REGNUM)
22546 return !(TARGET_LDRD && GET_MODE_SIZE (mode) > 4 && (regno & 1) != 0)
22547 && ARM_NUM_REGS (mode) <= 4;
22549 if (regno == FRAME_POINTER_REGNUM
22550 || regno == ARG_POINTER_REGNUM)
22551 /* We only allow integers in the fake hard registers. */
22552 return GET_MODE_CLASS (mode) == MODE_INT;
22554 return FALSE;
22557 /* Implement MODES_TIEABLE_P. */
22559 bool
22560 arm_modes_tieable_p (enum machine_mode mode1, enum machine_mode mode2)
22562 if (GET_MODE_CLASS (mode1) == GET_MODE_CLASS (mode2))
22563 return true;
22565 /* We specifically want to allow elements of "structure" modes to
22566 be tieable to the structure. This more general condition allows
22567 other rarer situations too. */
22568 if (TARGET_NEON
22569 && (VALID_NEON_DREG_MODE (mode1)
22570 || VALID_NEON_QREG_MODE (mode1)
22571 || VALID_NEON_STRUCT_MODE (mode1))
22572 && (VALID_NEON_DREG_MODE (mode2)
22573 || VALID_NEON_QREG_MODE (mode2)
22574 || VALID_NEON_STRUCT_MODE (mode2)))
22575 return true;
22577 return false;
22580 /* For efficiency and historical reasons LO_REGS, HI_REGS and CC_REGS are
22581 not used in arm mode. */
22583 enum reg_class
22584 arm_regno_class (int regno)
22586 if (TARGET_THUMB1)
22588 if (regno == STACK_POINTER_REGNUM)
22589 return STACK_REG;
22590 if (regno == CC_REGNUM)
22591 return CC_REG;
22592 if (regno < 8)
22593 return LO_REGS;
22594 return HI_REGS;
22597 if (TARGET_THUMB2 && regno < 8)
22598 return LO_REGS;
22600 if ( regno <= LAST_ARM_REGNUM
22601 || regno == FRAME_POINTER_REGNUM
22602 || regno == ARG_POINTER_REGNUM)
22603 return TARGET_THUMB2 ? HI_REGS : GENERAL_REGS;
22605 if (regno == CC_REGNUM || regno == VFPCC_REGNUM)
22606 return TARGET_THUMB2 ? CC_REG : NO_REGS;
22608 if (IS_VFP_REGNUM (regno))
22610 if (regno <= D7_VFP_REGNUM)
22611 return VFP_D0_D7_REGS;
22612 else if (regno <= LAST_LO_VFP_REGNUM)
22613 return VFP_LO_REGS;
22614 else
22615 return VFP_HI_REGS;
22618 if (IS_IWMMXT_REGNUM (regno))
22619 return IWMMXT_REGS;
22621 if (IS_IWMMXT_GR_REGNUM (regno))
22622 return IWMMXT_GR_REGS;
22624 return NO_REGS;
22627 /* Handle a special case when computing the offset
22628 of an argument from the frame pointer. */
22630 arm_debugger_arg_offset (int value, rtx addr)
22632 rtx insn;
22634 /* We are only interested if dbxout_parms() failed to compute the offset. */
22635 if (value != 0)
22636 return 0;
22638 /* We can only cope with the case where the address is held in a register. */
22639 if (!REG_P (addr))
22640 return 0;
22642 /* If we are using the frame pointer to point at the argument, then
22643 an offset of 0 is correct. */
22644 if (REGNO (addr) == (unsigned) HARD_FRAME_POINTER_REGNUM)
22645 return 0;
22647 /* If we are using the stack pointer to point at the
22648 argument, then an offset of 0 is correct. */
22649 /* ??? Check this is consistent with thumb2 frame layout. */
22650 if ((TARGET_THUMB || !frame_pointer_needed)
22651 && REGNO (addr) == SP_REGNUM)
22652 return 0;
22654 /* Oh dear. The argument is pointed to by a register rather
22655 than being held in a register, or being stored at a known
22656 offset from the frame pointer. Since GDB only understands
22657 those two kinds of argument we must translate the address
22658 held in the register into an offset from the frame pointer.
22659 We do this by searching through the insns for the function
22660 looking to see where this register gets its value. If the
22661 register is initialized from the frame pointer plus an offset
22662 then we are in luck and we can continue, otherwise we give up.
22664 This code is exercised by producing debugging information
22665 for a function with arguments like this:
22667 double func (double a, double b, int c, double d) {return d;}
22669 Without this code the stab for parameter 'd' will be set to
22670 an offset of 0 from the frame pointer, rather than 8. */
22672 /* The if() statement says:
22674 If the insn is a normal instruction
22675 and if the insn is setting the value in a register
22676 and if the register being set is the register holding the address of the argument
22677 and if the address is computing by an addition
22678 that involves adding to a register
22679 which is the frame pointer
22680 a constant integer
22682 then... */
22684 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
22686 if ( NONJUMP_INSN_P (insn)
22687 && GET_CODE (PATTERN (insn)) == SET
22688 && REGNO (XEXP (PATTERN (insn), 0)) == REGNO (addr)
22689 && GET_CODE (XEXP (PATTERN (insn), 1)) == PLUS
22690 && REG_P (XEXP (XEXP (PATTERN (insn), 1), 0))
22691 && REGNO (XEXP (XEXP (PATTERN (insn), 1), 0)) == (unsigned) HARD_FRAME_POINTER_REGNUM
22692 && CONST_INT_P (XEXP (XEXP (PATTERN (insn), 1), 1))
22695 value = INTVAL (XEXP (XEXP (PATTERN (insn), 1), 1));
22697 break;
22701 if (value == 0)
22703 debug_rtx (addr);
22704 warning (0, "unable to compute real location of stacked parameter");
22705 value = 8; /* XXX magic hack */
22708 return value;
22711 typedef enum {
22712 T_V8QI,
22713 T_V4HI,
22714 T_V4HF,
22715 T_V2SI,
22716 T_V2SF,
22717 T_DI,
22718 T_V16QI,
22719 T_V8HI,
22720 T_V4SI,
22721 T_V4SF,
22722 T_V2DI,
22723 T_TI,
22724 T_EI,
22725 T_OI,
22726 T_MAX /* Size of enum. Keep last. */
22727 } neon_builtin_type_mode;
22729 #define TYPE_MODE_BIT(X) (1 << (X))
22731 #define TB_DREG (TYPE_MODE_BIT (T_V8QI) | TYPE_MODE_BIT (T_V4HI) \
22732 | TYPE_MODE_BIT (T_V4HF) | TYPE_MODE_BIT (T_V2SI) \
22733 | TYPE_MODE_BIT (T_V2SF) | TYPE_MODE_BIT (T_DI))
22734 #define TB_QREG (TYPE_MODE_BIT (T_V16QI) | TYPE_MODE_BIT (T_V8HI) \
22735 | TYPE_MODE_BIT (T_V4SI) | TYPE_MODE_BIT (T_V4SF) \
22736 | TYPE_MODE_BIT (T_V2DI) | TYPE_MODE_BIT (T_TI))
22738 #define v8qi_UP T_V8QI
22739 #define v4hi_UP T_V4HI
22740 #define v4hf_UP T_V4HF
22741 #define v2si_UP T_V2SI
22742 #define v2sf_UP T_V2SF
22743 #define di_UP T_DI
22744 #define v16qi_UP T_V16QI
22745 #define v8hi_UP T_V8HI
22746 #define v4si_UP T_V4SI
22747 #define v4sf_UP T_V4SF
22748 #define v2di_UP T_V2DI
22749 #define ti_UP T_TI
22750 #define ei_UP T_EI
22751 #define oi_UP T_OI
22753 #define UP(X) X##_UP
22755 typedef enum {
22756 NEON_BINOP,
22757 NEON_TERNOP,
22758 NEON_UNOP,
22759 NEON_GETLANE,
22760 NEON_SETLANE,
22761 NEON_CREATE,
22762 NEON_RINT,
22763 NEON_DUP,
22764 NEON_DUPLANE,
22765 NEON_COMBINE,
22766 NEON_SPLIT,
22767 NEON_LANEMUL,
22768 NEON_LANEMULL,
22769 NEON_LANEMULH,
22770 NEON_LANEMAC,
22771 NEON_SCALARMUL,
22772 NEON_SCALARMULL,
22773 NEON_SCALARMULH,
22774 NEON_SCALARMAC,
22775 NEON_CONVERT,
22776 NEON_FLOAT_WIDEN,
22777 NEON_FLOAT_NARROW,
22778 NEON_FIXCONV,
22779 NEON_SELECT,
22780 NEON_RESULTPAIR,
22781 NEON_REINTERP,
22782 NEON_VTBL,
22783 NEON_VTBX,
22784 NEON_LOAD1,
22785 NEON_LOAD1LANE,
22786 NEON_STORE1,
22787 NEON_STORE1LANE,
22788 NEON_LOADSTRUCT,
22789 NEON_LOADSTRUCTLANE,
22790 NEON_STORESTRUCT,
22791 NEON_STORESTRUCTLANE,
22792 NEON_LOGICBINOP,
22793 NEON_SHIFTINSERT,
22794 NEON_SHIFTIMM,
22795 NEON_SHIFTACC
22796 } neon_itype;
22798 typedef struct {
22799 const char *name;
22800 const neon_itype itype;
22801 const neon_builtin_type_mode mode;
22802 const enum insn_code code;
22803 unsigned int fcode;
22804 } neon_builtin_datum;
22806 #define CF(N,X) CODE_FOR_neon_##N##X
22808 #define VAR1(T, N, A) \
22809 {#N, NEON_##T, UP (A), CF (N, A), 0}
22810 #define VAR2(T, N, A, B) \
22811 VAR1 (T, N, A), \
22812 {#N, NEON_##T, UP (B), CF (N, B), 0}
22813 #define VAR3(T, N, A, B, C) \
22814 VAR2 (T, N, A, B), \
22815 {#N, NEON_##T, UP (C), CF (N, C), 0}
22816 #define VAR4(T, N, A, B, C, D) \
22817 VAR3 (T, N, A, B, C), \
22818 {#N, NEON_##T, UP (D), CF (N, D), 0}
22819 #define VAR5(T, N, A, B, C, D, E) \
22820 VAR4 (T, N, A, B, C, D), \
22821 {#N, NEON_##T, UP (E), CF (N, E), 0}
22822 #define VAR6(T, N, A, B, C, D, E, F) \
22823 VAR5 (T, N, A, B, C, D, E), \
22824 {#N, NEON_##T, UP (F), CF (N, F), 0}
22825 #define VAR7(T, N, A, B, C, D, E, F, G) \
22826 VAR6 (T, N, A, B, C, D, E, F), \
22827 {#N, NEON_##T, UP (G), CF (N, G), 0}
22828 #define VAR8(T, N, A, B, C, D, E, F, G, H) \
22829 VAR7 (T, N, A, B, C, D, E, F, G), \
22830 {#N, NEON_##T, UP (H), CF (N, H), 0}
22831 #define VAR9(T, N, A, B, C, D, E, F, G, H, I) \
22832 VAR8 (T, N, A, B, C, D, E, F, G, H), \
22833 {#N, NEON_##T, UP (I), CF (N, I), 0}
22834 #define VAR10(T, N, A, B, C, D, E, F, G, H, I, J) \
22835 VAR9 (T, N, A, B, C, D, E, F, G, H, I), \
22836 {#N, NEON_##T, UP (J), CF (N, J), 0}
22838 /* The NEON builtin data can be found in arm_neon_builtins.def.
22839 The mode entries in the following table correspond to the "key" type of the
22840 instruction variant, i.e. equivalent to that which would be specified after
22841 the assembler mnemonic, which usually refers to the last vector operand.
22842 (Signed/unsigned/polynomial types are not differentiated between though, and
22843 are all mapped onto the same mode for a given element size.) The modes
22844 listed per instruction should be the same as those defined for that
22845 instruction's pattern in neon.md. */
22847 static neon_builtin_datum neon_builtin_data[] =
22849 #include "arm_neon_builtins.def"
22852 #undef CF
22853 #undef VAR1
22854 #undef VAR2
22855 #undef VAR3
22856 #undef VAR4
22857 #undef VAR5
22858 #undef VAR6
22859 #undef VAR7
22860 #undef VAR8
22861 #undef VAR9
22862 #undef VAR10
22864 #define CF(N,X) ARM_BUILTIN_NEON_##N##X
22865 #define VAR1(T, N, A) \
22866 CF (N, A)
22867 #define VAR2(T, N, A, B) \
22868 VAR1 (T, N, A), \
22869 CF (N, B)
22870 #define VAR3(T, N, A, B, C) \
22871 VAR2 (T, N, A, B), \
22872 CF (N, C)
22873 #define VAR4(T, N, A, B, C, D) \
22874 VAR3 (T, N, A, B, C), \
22875 CF (N, D)
22876 #define VAR5(T, N, A, B, C, D, E) \
22877 VAR4 (T, N, A, B, C, D), \
22878 CF (N, E)
22879 #define VAR6(T, N, A, B, C, D, E, F) \
22880 VAR5 (T, N, A, B, C, D, E), \
22881 CF (N, F)
22882 #define VAR7(T, N, A, B, C, D, E, F, G) \
22883 VAR6 (T, N, A, B, C, D, E, F), \
22884 CF (N, G)
22885 #define VAR8(T, N, A, B, C, D, E, F, G, H) \
22886 VAR7 (T, N, A, B, C, D, E, F, G), \
22887 CF (N, H)
22888 #define VAR9(T, N, A, B, C, D, E, F, G, H, I) \
22889 VAR8 (T, N, A, B, C, D, E, F, G, H), \
22890 CF (N, I)
22891 #define VAR10(T, N, A, B, C, D, E, F, G, H, I, J) \
22892 VAR9 (T, N, A, B, C, D, E, F, G, H, I), \
22893 CF (N, J)
22894 enum arm_builtins
22896 ARM_BUILTIN_GETWCGR0,
22897 ARM_BUILTIN_GETWCGR1,
22898 ARM_BUILTIN_GETWCGR2,
22899 ARM_BUILTIN_GETWCGR3,
22901 ARM_BUILTIN_SETWCGR0,
22902 ARM_BUILTIN_SETWCGR1,
22903 ARM_BUILTIN_SETWCGR2,
22904 ARM_BUILTIN_SETWCGR3,
22906 ARM_BUILTIN_WZERO,
22908 ARM_BUILTIN_WAVG2BR,
22909 ARM_BUILTIN_WAVG2HR,
22910 ARM_BUILTIN_WAVG2B,
22911 ARM_BUILTIN_WAVG2H,
22913 ARM_BUILTIN_WACCB,
22914 ARM_BUILTIN_WACCH,
22915 ARM_BUILTIN_WACCW,
22917 ARM_BUILTIN_WMACS,
22918 ARM_BUILTIN_WMACSZ,
22919 ARM_BUILTIN_WMACU,
22920 ARM_BUILTIN_WMACUZ,
22922 ARM_BUILTIN_WSADB,
22923 ARM_BUILTIN_WSADBZ,
22924 ARM_BUILTIN_WSADH,
22925 ARM_BUILTIN_WSADHZ,
22927 ARM_BUILTIN_WALIGNI,
22928 ARM_BUILTIN_WALIGNR0,
22929 ARM_BUILTIN_WALIGNR1,
22930 ARM_BUILTIN_WALIGNR2,
22931 ARM_BUILTIN_WALIGNR3,
22933 ARM_BUILTIN_TMIA,
22934 ARM_BUILTIN_TMIAPH,
22935 ARM_BUILTIN_TMIABB,
22936 ARM_BUILTIN_TMIABT,
22937 ARM_BUILTIN_TMIATB,
22938 ARM_BUILTIN_TMIATT,
22940 ARM_BUILTIN_TMOVMSKB,
22941 ARM_BUILTIN_TMOVMSKH,
22942 ARM_BUILTIN_TMOVMSKW,
22944 ARM_BUILTIN_TBCSTB,
22945 ARM_BUILTIN_TBCSTH,
22946 ARM_BUILTIN_TBCSTW,
22948 ARM_BUILTIN_WMADDS,
22949 ARM_BUILTIN_WMADDU,
22951 ARM_BUILTIN_WPACKHSS,
22952 ARM_BUILTIN_WPACKWSS,
22953 ARM_BUILTIN_WPACKDSS,
22954 ARM_BUILTIN_WPACKHUS,
22955 ARM_BUILTIN_WPACKWUS,
22956 ARM_BUILTIN_WPACKDUS,
22958 ARM_BUILTIN_WADDB,
22959 ARM_BUILTIN_WADDH,
22960 ARM_BUILTIN_WADDW,
22961 ARM_BUILTIN_WADDSSB,
22962 ARM_BUILTIN_WADDSSH,
22963 ARM_BUILTIN_WADDSSW,
22964 ARM_BUILTIN_WADDUSB,
22965 ARM_BUILTIN_WADDUSH,
22966 ARM_BUILTIN_WADDUSW,
22967 ARM_BUILTIN_WSUBB,
22968 ARM_BUILTIN_WSUBH,
22969 ARM_BUILTIN_WSUBW,
22970 ARM_BUILTIN_WSUBSSB,
22971 ARM_BUILTIN_WSUBSSH,
22972 ARM_BUILTIN_WSUBSSW,
22973 ARM_BUILTIN_WSUBUSB,
22974 ARM_BUILTIN_WSUBUSH,
22975 ARM_BUILTIN_WSUBUSW,
22977 ARM_BUILTIN_WAND,
22978 ARM_BUILTIN_WANDN,
22979 ARM_BUILTIN_WOR,
22980 ARM_BUILTIN_WXOR,
22982 ARM_BUILTIN_WCMPEQB,
22983 ARM_BUILTIN_WCMPEQH,
22984 ARM_BUILTIN_WCMPEQW,
22985 ARM_BUILTIN_WCMPGTUB,
22986 ARM_BUILTIN_WCMPGTUH,
22987 ARM_BUILTIN_WCMPGTUW,
22988 ARM_BUILTIN_WCMPGTSB,
22989 ARM_BUILTIN_WCMPGTSH,
22990 ARM_BUILTIN_WCMPGTSW,
22992 ARM_BUILTIN_TEXTRMSB,
22993 ARM_BUILTIN_TEXTRMSH,
22994 ARM_BUILTIN_TEXTRMSW,
22995 ARM_BUILTIN_TEXTRMUB,
22996 ARM_BUILTIN_TEXTRMUH,
22997 ARM_BUILTIN_TEXTRMUW,
22998 ARM_BUILTIN_TINSRB,
22999 ARM_BUILTIN_TINSRH,
23000 ARM_BUILTIN_TINSRW,
23002 ARM_BUILTIN_WMAXSW,
23003 ARM_BUILTIN_WMAXSH,
23004 ARM_BUILTIN_WMAXSB,
23005 ARM_BUILTIN_WMAXUW,
23006 ARM_BUILTIN_WMAXUH,
23007 ARM_BUILTIN_WMAXUB,
23008 ARM_BUILTIN_WMINSW,
23009 ARM_BUILTIN_WMINSH,
23010 ARM_BUILTIN_WMINSB,
23011 ARM_BUILTIN_WMINUW,
23012 ARM_BUILTIN_WMINUH,
23013 ARM_BUILTIN_WMINUB,
23015 ARM_BUILTIN_WMULUM,
23016 ARM_BUILTIN_WMULSM,
23017 ARM_BUILTIN_WMULUL,
23019 ARM_BUILTIN_PSADBH,
23020 ARM_BUILTIN_WSHUFH,
23022 ARM_BUILTIN_WSLLH,
23023 ARM_BUILTIN_WSLLW,
23024 ARM_BUILTIN_WSLLD,
23025 ARM_BUILTIN_WSRAH,
23026 ARM_BUILTIN_WSRAW,
23027 ARM_BUILTIN_WSRAD,
23028 ARM_BUILTIN_WSRLH,
23029 ARM_BUILTIN_WSRLW,
23030 ARM_BUILTIN_WSRLD,
23031 ARM_BUILTIN_WRORH,
23032 ARM_BUILTIN_WRORW,
23033 ARM_BUILTIN_WRORD,
23034 ARM_BUILTIN_WSLLHI,
23035 ARM_BUILTIN_WSLLWI,
23036 ARM_BUILTIN_WSLLDI,
23037 ARM_BUILTIN_WSRAHI,
23038 ARM_BUILTIN_WSRAWI,
23039 ARM_BUILTIN_WSRADI,
23040 ARM_BUILTIN_WSRLHI,
23041 ARM_BUILTIN_WSRLWI,
23042 ARM_BUILTIN_WSRLDI,
23043 ARM_BUILTIN_WRORHI,
23044 ARM_BUILTIN_WRORWI,
23045 ARM_BUILTIN_WRORDI,
23047 ARM_BUILTIN_WUNPCKIHB,
23048 ARM_BUILTIN_WUNPCKIHH,
23049 ARM_BUILTIN_WUNPCKIHW,
23050 ARM_BUILTIN_WUNPCKILB,
23051 ARM_BUILTIN_WUNPCKILH,
23052 ARM_BUILTIN_WUNPCKILW,
23054 ARM_BUILTIN_WUNPCKEHSB,
23055 ARM_BUILTIN_WUNPCKEHSH,
23056 ARM_BUILTIN_WUNPCKEHSW,
23057 ARM_BUILTIN_WUNPCKEHUB,
23058 ARM_BUILTIN_WUNPCKEHUH,
23059 ARM_BUILTIN_WUNPCKEHUW,
23060 ARM_BUILTIN_WUNPCKELSB,
23061 ARM_BUILTIN_WUNPCKELSH,
23062 ARM_BUILTIN_WUNPCKELSW,
23063 ARM_BUILTIN_WUNPCKELUB,
23064 ARM_BUILTIN_WUNPCKELUH,
23065 ARM_BUILTIN_WUNPCKELUW,
23067 ARM_BUILTIN_WABSB,
23068 ARM_BUILTIN_WABSH,
23069 ARM_BUILTIN_WABSW,
23071 ARM_BUILTIN_WADDSUBHX,
23072 ARM_BUILTIN_WSUBADDHX,
23074 ARM_BUILTIN_WABSDIFFB,
23075 ARM_BUILTIN_WABSDIFFH,
23076 ARM_BUILTIN_WABSDIFFW,
23078 ARM_BUILTIN_WADDCH,
23079 ARM_BUILTIN_WADDCW,
23081 ARM_BUILTIN_WAVG4,
23082 ARM_BUILTIN_WAVG4R,
23084 ARM_BUILTIN_WMADDSX,
23085 ARM_BUILTIN_WMADDUX,
23087 ARM_BUILTIN_WMADDSN,
23088 ARM_BUILTIN_WMADDUN,
23090 ARM_BUILTIN_WMULWSM,
23091 ARM_BUILTIN_WMULWUM,
23093 ARM_BUILTIN_WMULWSMR,
23094 ARM_BUILTIN_WMULWUMR,
23096 ARM_BUILTIN_WMULWL,
23098 ARM_BUILTIN_WMULSMR,
23099 ARM_BUILTIN_WMULUMR,
23101 ARM_BUILTIN_WQMULM,
23102 ARM_BUILTIN_WQMULMR,
23104 ARM_BUILTIN_WQMULWM,
23105 ARM_BUILTIN_WQMULWMR,
23107 ARM_BUILTIN_WADDBHUSM,
23108 ARM_BUILTIN_WADDBHUSL,
23110 ARM_BUILTIN_WQMIABB,
23111 ARM_BUILTIN_WQMIABT,
23112 ARM_BUILTIN_WQMIATB,
23113 ARM_BUILTIN_WQMIATT,
23115 ARM_BUILTIN_WQMIABBN,
23116 ARM_BUILTIN_WQMIABTN,
23117 ARM_BUILTIN_WQMIATBN,
23118 ARM_BUILTIN_WQMIATTN,
23120 ARM_BUILTIN_WMIABB,
23121 ARM_BUILTIN_WMIABT,
23122 ARM_BUILTIN_WMIATB,
23123 ARM_BUILTIN_WMIATT,
23125 ARM_BUILTIN_WMIABBN,
23126 ARM_BUILTIN_WMIABTN,
23127 ARM_BUILTIN_WMIATBN,
23128 ARM_BUILTIN_WMIATTN,
23130 ARM_BUILTIN_WMIAWBB,
23131 ARM_BUILTIN_WMIAWBT,
23132 ARM_BUILTIN_WMIAWTB,
23133 ARM_BUILTIN_WMIAWTT,
23135 ARM_BUILTIN_WMIAWBBN,
23136 ARM_BUILTIN_WMIAWBTN,
23137 ARM_BUILTIN_WMIAWTBN,
23138 ARM_BUILTIN_WMIAWTTN,
23140 ARM_BUILTIN_WMERGE,
23142 #include "arm_neon_builtins.def"
23144 ,ARM_BUILTIN_MAX
23147 #define ARM_BUILTIN_NEON_BASE (ARM_BUILTIN_MAX - ARRAY_SIZE (neon_builtin_data))
23149 #undef CF
23150 #undef VAR1
23151 #undef VAR2
23152 #undef VAR3
23153 #undef VAR4
23154 #undef VAR5
23155 #undef VAR6
23156 #undef VAR7
23157 #undef VAR8
23158 #undef VAR9
23159 #undef VAR10
23161 static GTY(()) tree arm_builtin_decls[ARM_BUILTIN_MAX];
23163 static void
23164 arm_init_neon_builtins (void)
23166 unsigned int i, fcode;
23167 tree decl;
23169 tree neon_intQI_type_node;
23170 tree neon_intHI_type_node;
23171 tree neon_floatHF_type_node;
23172 tree neon_polyQI_type_node;
23173 tree neon_polyHI_type_node;
23174 tree neon_intSI_type_node;
23175 tree neon_intDI_type_node;
23176 tree neon_float_type_node;
23178 tree intQI_pointer_node;
23179 tree intHI_pointer_node;
23180 tree intSI_pointer_node;
23181 tree intDI_pointer_node;
23182 tree float_pointer_node;
23184 tree const_intQI_node;
23185 tree const_intHI_node;
23186 tree const_intSI_node;
23187 tree const_intDI_node;
23188 tree const_float_node;
23190 tree const_intQI_pointer_node;
23191 tree const_intHI_pointer_node;
23192 tree const_intSI_pointer_node;
23193 tree const_intDI_pointer_node;
23194 tree const_float_pointer_node;
23196 tree V8QI_type_node;
23197 tree V4HI_type_node;
23198 tree V4HF_type_node;
23199 tree V2SI_type_node;
23200 tree V2SF_type_node;
23201 tree V16QI_type_node;
23202 tree V8HI_type_node;
23203 tree V4SI_type_node;
23204 tree V4SF_type_node;
23205 tree V2DI_type_node;
23207 tree intUQI_type_node;
23208 tree intUHI_type_node;
23209 tree intUSI_type_node;
23210 tree intUDI_type_node;
23212 tree intEI_type_node;
23213 tree intOI_type_node;
23214 tree intCI_type_node;
23215 tree intXI_type_node;
23217 tree V8QI_pointer_node;
23218 tree V4HI_pointer_node;
23219 tree V2SI_pointer_node;
23220 tree V2SF_pointer_node;
23221 tree V16QI_pointer_node;
23222 tree V8HI_pointer_node;
23223 tree V4SI_pointer_node;
23224 tree V4SF_pointer_node;
23225 tree V2DI_pointer_node;
23227 tree void_ftype_pv8qi_v8qi_v8qi;
23228 tree void_ftype_pv4hi_v4hi_v4hi;
23229 tree void_ftype_pv2si_v2si_v2si;
23230 tree void_ftype_pv2sf_v2sf_v2sf;
23231 tree void_ftype_pdi_di_di;
23232 tree void_ftype_pv16qi_v16qi_v16qi;
23233 tree void_ftype_pv8hi_v8hi_v8hi;
23234 tree void_ftype_pv4si_v4si_v4si;
23235 tree void_ftype_pv4sf_v4sf_v4sf;
23236 tree void_ftype_pv2di_v2di_v2di;
23238 tree reinterp_ftype_dreg[5][5];
23239 tree reinterp_ftype_qreg[5][5];
23240 tree dreg_types[5], qreg_types[5];
23242 /* Create distinguished type nodes for NEON vector element types,
23243 and pointers to values of such types, so we can detect them later. */
23244 neon_intQI_type_node = make_signed_type (GET_MODE_PRECISION (QImode));
23245 neon_intHI_type_node = make_signed_type (GET_MODE_PRECISION (HImode));
23246 neon_polyQI_type_node = make_signed_type (GET_MODE_PRECISION (QImode));
23247 neon_polyHI_type_node = make_signed_type (GET_MODE_PRECISION (HImode));
23248 neon_intSI_type_node = make_signed_type (GET_MODE_PRECISION (SImode));
23249 neon_intDI_type_node = make_signed_type (GET_MODE_PRECISION (DImode));
23250 neon_float_type_node = make_node (REAL_TYPE);
23251 TYPE_PRECISION (neon_float_type_node) = FLOAT_TYPE_SIZE;
23252 layout_type (neon_float_type_node);
23253 neon_floatHF_type_node = make_node (REAL_TYPE);
23254 TYPE_PRECISION (neon_floatHF_type_node) = GET_MODE_PRECISION (HFmode);
23255 layout_type (neon_floatHF_type_node);
23257 /* Define typedefs which exactly correspond to the modes we are basing vector
23258 types on. If you change these names you'll need to change
23259 the table used by arm_mangle_type too. */
23260 (*lang_hooks.types.register_builtin_type) (neon_intQI_type_node,
23261 "__builtin_neon_qi");
23262 (*lang_hooks.types.register_builtin_type) (neon_intHI_type_node,
23263 "__builtin_neon_hi");
23264 (*lang_hooks.types.register_builtin_type) (neon_floatHF_type_node,
23265 "__builtin_neon_hf");
23266 (*lang_hooks.types.register_builtin_type) (neon_intSI_type_node,
23267 "__builtin_neon_si");
23268 (*lang_hooks.types.register_builtin_type) (neon_float_type_node,
23269 "__builtin_neon_sf");
23270 (*lang_hooks.types.register_builtin_type) (neon_intDI_type_node,
23271 "__builtin_neon_di");
23272 (*lang_hooks.types.register_builtin_type) (neon_polyQI_type_node,
23273 "__builtin_neon_poly8");
23274 (*lang_hooks.types.register_builtin_type) (neon_polyHI_type_node,
23275 "__builtin_neon_poly16");
23277 intQI_pointer_node = build_pointer_type (neon_intQI_type_node);
23278 intHI_pointer_node = build_pointer_type (neon_intHI_type_node);
23279 intSI_pointer_node = build_pointer_type (neon_intSI_type_node);
23280 intDI_pointer_node = build_pointer_type (neon_intDI_type_node);
23281 float_pointer_node = build_pointer_type (neon_float_type_node);
23283 /* Next create constant-qualified versions of the above types. */
23284 const_intQI_node = build_qualified_type (neon_intQI_type_node,
23285 TYPE_QUAL_CONST);
23286 const_intHI_node = build_qualified_type (neon_intHI_type_node,
23287 TYPE_QUAL_CONST);
23288 const_intSI_node = build_qualified_type (neon_intSI_type_node,
23289 TYPE_QUAL_CONST);
23290 const_intDI_node = build_qualified_type (neon_intDI_type_node,
23291 TYPE_QUAL_CONST);
23292 const_float_node = build_qualified_type (neon_float_type_node,
23293 TYPE_QUAL_CONST);
23295 const_intQI_pointer_node = build_pointer_type (const_intQI_node);
23296 const_intHI_pointer_node = build_pointer_type (const_intHI_node);
23297 const_intSI_pointer_node = build_pointer_type (const_intSI_node);
23298 const_intDI_pointer_node = build_pointer_type (const_intDI_node);
23299 const_float_pointer_node = build_pointer_type (const_float_node);
23301 /* Now create vector types based on our NEON element types. */
23302 /* 64-bit vectors. */
23303 V8QI_type_node =
23304 build_vector_type_for_mode (neon_intQI_type_node, V8QImode);
23305 V4HI_type_node =
23306 build_vector_type_for_mode (neon_intHI_type_node, V4HImode);
23307 V4HF_type_node =
23308 build_vector_type_for_mode (neon_floatHF_type_node, V4HFmode);
23309 V2SI_type_node =
23310 build_vector_type_for_mode (neon_intSI_type_node, V2SImode);
23311 V2SF_type_node =
23312 build_vector_type_for_mode (neon_float_type_node, V2SFmode);
23313 /* 128-bit vectors. */
23314 V16QI_type_node =
23315 build_vector_type_for_mode (neon_intQI_type_node, V16QImode);
23316 V8HI_type_node =
23317 build_vector_type_for_mode (neon_intHI_type_node, V8HImode);
23318 V4SI_type_node =
23319 build_vector_type_for_mode (neon_intSI_type_node, V4SImode);
23320 V4SF_type_node =
23321 build_vector_type_for_mode (neon_float_type_node, V4SFmode);
23322 V2DI_type_node =
23323 build_vector_type_for_mode (neon_intDI_type_node, V2DImode);
23325 /* Unsigned integer types for various mode sizes. */
23326 intUQI_type_node = make_unsigned_type (GET_MODE_PRECISION (QImode));
23327 intUHI_type_node = make_unsigned_type (GET_MODE_PRECISION (HImode));
23328 intUSI_type_node = make_unsigned_type (GET_MODE_PRECISION (SImode));
23329 intUDI_type_node = make_unsigned_type (GET_MODE_PRECISION (DImode));
23331 (*lang_hooks.types.register_builtin_type) (intUQI_type_node,
23332 "__builtin_neon_uqi");
23333 (*lang_hooks.types.register_builtin_type) (intUHI_type_node,
23334 "__builtin_neon_uhi");
23335 (*lang_hooks.types.register_builtin_type) (intUSI_type_node,
23336 "__builtin_neon_usi");
23337 (*lang_hooks.types.register_builtin_type) (intUDI_type_node,
23338 "__builtin_neon_udi");
23340 /* Opaque integer types for structures of vectors. */
23341 intEI_type_node = make_signed_type (GET_MODE_PRECISION (EImode));
23342 intOI_type_node = make_signed_type (GET_MODE_PRECISION (OImode));
23343 intCI_type_node = make_signed_type (GET_MODE_PRECISION (CImode));
23344 intXI_type_node = make_signed_type (GET_MODE_PRECISION (XImode));
23346 (*lang_hooks.types.register_builtin_type) (intTI_type_node,
23347 "__builtin_neon_ti");
23348 (*lang_hooks.types.register_builtin_type) (intEI_type_node,
23349 "__builtin_neon_ei");
23350 (*lang_hooks.types.register_builtin_type) (intOI_type_node,
23351 "__builtin_neon_oi");
23352 (*lang_hooks.types.register_builtin_type) (intCI_type_node,
23353 "__builtin_neon_ci");
23354 (*lang_hooks.types.register_builtin_type) (intXI_type_node,
23355 "__builtin_neon_xi");
23357 /* Pointers to vector types. */
23358 V8QI_pointer_node = build_pointer_type (V8QI_type_node);
23359 V4HI_pointer_node = build_pointer_type (V4HI_type_node);
23360 V2SI_pointer_node = build_pointer_type (V2SI_type_node);
23361 V2SF_pointer_node = build_pointer_type (V2SF_type_node);
23362 V16QI_pointer_node = build_pointer_type (V16QI_type_node);
23363 V8HI_pointer_node = build_pointer_type (V8HI_type_node);
23364 V4SI_pointer_node = build_pointer_type (V4SI_type_node);
23365 V4SF_pointer_node = build_pointer_type (V4SF_type_node);
23366 V2DI_pointer_node = build_pointer_type (V2DI_type_node);
23368 /* Operations which return results as pairs. */
23369 void_ftype_pv8qi_v8qi_v8qi =
23370 build_function_type_list (void_type_node, V8QI_pointer_node, V8QI_type_node,
23371 V8QI_type_node, NULL);
23372 void_ftype_pv4hi_v4hi_v4hi =
23373 build_function_type_list (void_type_node, V4HI_pointer_node, V4HI_type_node,
23374 V4HI_type_node, NULL);
23375 void_ftype_pv2si_v2si_v2si =
23376 build_function_type_list (void_type_node, V2SI_pointer_node, V2SI_type_node,
23377 V2SI_type_node, NULL);
23378 void_ftype_pv2sf_v2sf_v2sf =
23379 build_function_type_list (void_type_node, V2SF_pointer_node, V2SF_type_node,
23380 V2SF_type_node, NULL);
23381 void_ftype_pdi_di_di =
23382 build_function_type_list (void_type_node, intDI_pointer_node,
23383 neon_intDI_type_node, neon_intDI_type_node, NULL);
23384 void_ftype_pv16qi_v16qi_v16qi =
23385 build_function_type_list (void_type_node, V16QI_pointer_node,
23386 V16QI_type_node, V16QI_type_node, NULL);
23387 void_ftype_pv8hi_v8hi_v8hi =
23388 build_function_type_list (void_type_node, V8HI_pointer_node, V8HI_type_node,
23389 V8HI_type_node, NULL);
23390 void_ftype_pv4si_v4si_v4si =
23391 build_function_type_list (void_type_node, V4SI_pointer_node, V4SI_type_node,
23392 V4SI_type_node, NULL);
23393 void_ftype_pv4sf_v4sf_v4sf =
23394 build_function_type_list (void_type_node, V4SF_pointer_node, V4SF_type_node,
23395 V4SF_type_node, NULL);
23396 void_ftype_pv2di_v2di_v2di =
23397 build_function_type_list (void_type_node, V2DI_pointer_node, V2DI_type_node,
23398 V2DI_type_node, NULL);
23400 dreg_types[0] = V8QI_type_node;
23401 dreg_types[1] = V4HI_type_node;
23402 dreg_types[2] = V2SI_type_node;
23403 dreg_types[3] = V2SF_type_node;
23404 dreg_types[4] = neon_intDI_type_node;
23406 qreg_types[0] = V16QI_type_node;
23407 qreg_types[1] = V8HI_type_node;
23408 qreg_types[2] = V4SI_type_node;
23409 qreg_types[3] = V4SF_type_node;
23410 qreg_types[4] = V2DI_type_node;
23412 for (i = 0; i < 5; i++)
23414 int j;
23415 for (j = 0; j < 5; j++)
23417 reinterp_ftype_dreg[i][j]
23418 = build_function_type_list (dreg_types[i], dreg_types[j], NULL);
23419 reinterp_ftype_qreg[i][j]
23420 = build_function_type_list (qreg_types[i], qreg_types[j], NULL);
23424 for (i = 0, fcode = ARM_BUILTIN_NEON_BASE;
23425 i < ARRAY_SIZE (neon_builtin_data);
23426 i++, fcode++)
23428 neon_builtin_datum *d = &neon_builtin_data[i];
23430 const char* const modenames[] = {
23431 "v8qi", "v4hi", "v4hf", "v2si", "v2sf", "di",
23432 "v16qi", "v8hi", "v4si", "v4sf", "v2di",
23433 "ti", "ei", "oi"
23435 char namebuf[60];
23436 tree ftype = NULL;
23437 int is_load = 0, is_store = 0;
23439 gcc_assert (ARRAY_SIZE (modenames) == T_MAX);
23441 d->fcode = fcode;
23443 switch (d->itype)
23445 case NEON_LOAD1:
23446 case NEON_LOAD1LANE:
23447 case NEON_LOADSTRUCT:
23448 case NEON_LOADSTRUCTLANE:
23449 is_load = 1;
23450 /* Fall through. */
23451 case NEON_STORE1:
23452 case NEON_STORE1LANE:
23453 case NEON_STORESTRUCT:
23454 case NEON_STORESTRUCTLANE:
23455 if (!is_load)
23456 is_store = 1;
23457 /* Fall through. */
23458 case NEON_UNOP:
23459 case NEON_RINT:
23460 case NEON_BINOP:
23461 case NEON_LOGICBINOP:
23462 case NEON_SHIFTINSERT:
23463 case NEON_TERNOP:
23464 case NEON_GETLANE:
23465 case NEON_SETLANE:
23466 case NEON_CREATE:
23467 case NEON_DUP:
23468 case NEON_DUPLANE:
23469 case NEON_SHIFTIMM:
23470 case NEON_SHIFTACC:
23471 case NEON_COMBINE:
23472 case NEON_SPLIT:
23473 case NEON_CONVERT:
23474 case NEON_FIXCONV:
23475 case NEON_LANEMUL:
23476 case NEON_LANEMULL:
23477 case NEON_LANEMULH:
23478 case NEON_LANEMAC:
23479 case NEON_SCALARMUL:
23480 case NEON_SCALARMULL:
23481 case NEON_SCALARMULH:
23482 case NEON_SCALARMAC:
23483 case NEON_SELECT:
23484 case NEON_VTBL:
23485 case NEON_VTBX:
23487 int k;
23488 tree return_type = void_type_node, args = void_list_node;
23490 /* Build a function type directly from the insn_data for
23491 this builtin. The build_function_type() function takes
23492 care of removing duplicates for us. */
23493 for (k = insn_data[d->code].n_generator_args - 1; k >= 0; k--)
23495 tree eltype;
23497 if (is_load && k == 1)
23499 /* Neon load patterns always have the memory
23500 operand in the operand 1 position. */
23501 gcc_assert (insn_data[d->code].operand[k].predicate
23502 == neon_struct_operand);
23504 switch (d->mode)
23506 case T_V8QI:
23507 case T_V16QI:
23508 eltype = const_intQI_pointer_node;
23509 break;
23511 case T_V4HI:
23512 case T_V8HI:
23513 eltype = const_intHI_pointer_node;
23514 break;
23516 case T_V2SI:
23517 case T_V4SI:
23518 eltype = const_intSI_pointer_node;
23519 break;
23521 case T_V2SF:
23522 case T_V4SF:
23523 eltype = const_float_pointer_node;
23524 break;
23526 case T_DI:
23527 case T_V2DI:
23528 eltype = const_intDI_pointer_node;
23529 break;
23531 default: gcc_unreachable ();
23534 else if (is_store && k == 0)
23536 /* Similarly, Neon store patterns use operand 0 as
23537 the memory location to store to. */
23538 gcc_assert (insn_data[d->code].operand[k].predicate
23539 == neon_struct_operand);
23541 switch (d->mode)
23543 case T_V8QI:
23544 case T_V16QI:
23545 eltype = intQI_pointer_node;
23546 break;
23548 case T_V4HI:
23549 case T_V8HI:
23550 eltype = intHI_pointer_node;
23551 break;
23553 case T_V2SI:
23554 case T_V4SI:
23555 eltype = intSI_pointer_node;
23556 break;
23558 case T_V2SF:
23559 case T_V4SF:
23560 eltype = float_pointer_node;
23561 break;
23563 case T_DI:
23564 case T_V2DI:
23565 eltype = intDI_pointer_node;
23566 break;
23568 default: gcc_unreachable ();
23571 else
23573 switch (insn_data[d->code].operand[k].mode)
23575 case VOIDmode: eltype = void_type_node; break;
23576 /* Scalars. */
23577 case QImode: eltype = neon_intQI_type_node; break;
23578 case HImode: eltype = neon_intHI_type_node; break;
23579 case SImode: eltype = neon_intSI_type_node; break;
23580 case SFmode: eltype = neon_float_type_node; break;
23581 case DImode: eltype = neon_intDI_type_node; break;
23582 case TImode: eltype = intTI_type_node; break;
23583 case EImode: eltype = intEI_type_node; break;
23584 case OImode: eltype = intOI_type_node; break;
23585 case CImode: eltype = intCI_type_node; break;
23586 case XImode: eltype = intXI_type_node; break;
23587 /* 64-bit vectors. */
23588 case V8QImode: eltype = V8QI_type_node; break;
23589 case V4HImode: eltype = V4HI_type_node; break;
23590 case V2SImode: eltype = V2SI_type_node; break;
23591 case V2SFmode: eltype = V2SF_type_node; break;
23592 /* 128-bit vectors. */
23593 case V16QImode: eltype = V16QI_type_node; break;
23594 case V8HImode: eltype = V8HI_type_node; break;
23595 case V4SImode: eltype = V4SI_type_node; break;
23596 case V4SFmode: eltype = V4SF_type_node; break;
23597 case V2DImode: eltype = V2DI_type_node; break;
23598 default: gcc_unreachable ();
23602 if (k == 0 && !is_store)
23603 return_type = eltype;
23604 else
23605 args = tree_cons (NULL_TREE, eltype, args);
23608 ftype = build_function_type (return_type, args);
23610 break;
23612 case NEON_RESULTPAIR:
23614 switch (insn_data[d->code].operand[1].mode)
23616 case V8QImode: ftype = void_ftype_pv8qi_v8qi_v8qi; break;
23617 case V4HImode: ftype = void_ftype_pv4hi_v4hi_v4hi; break;
23618 case V2SImode: ftype = void_ftype_pv2si_v2si_v2si; break;
23619 case V2SFmode: ftype = void_ftype_pv2sf_v2sf_v2sf; break;
23620 case DImode: ftype = void_ftype_pdi_di_di; break;
23621 case V16QImode: ftype = void_ftype_pv16qi_v16qi_v16qi; break;
23622 case V8HImode: ftype = void_ftype_pv8hi_v8hi_v8hi; break;
23623 case V4SImode: ftype = void_ftype_pv4si_v4si_v4si; break;
23624 case V4SFmode: ftype = void_ftype_pv4sf_v4sf_v4sf; break;
23625 case V2DImode: ftype = void_ftype_pv2di_v2di_v2di; break;
23626 default: gcc_unreachable ();
23629 break;
23631 case NEON_REINTERP:
23633 /* We iterate over 5 doubleword types, then 5 quadword
23634 types. V4HF is not a type used in reinterpret, so we translate
23635 d->mode to the correct index in reinterp_ftype_dreg. */
23636 int rhs = (d->mode - ((d->mode > T_V4HF) ? 1 : 0)) % 5;
23637 switch (insn_data[d->code].operand[0].mode)
23639 case V8QImode: ftype = reinterp_ftype_dreg[0][rhs]; break;
23640 case V4HImode: ftype = reinterp_ftype_dreg[1][rhs]; break;
23641 case V2SImode: ftype = reinterp_ftype_dreg[2][rhs]; break;
23642 case V2SFmode: ftype = reinterp_ftype_dreg[3][rhs]; break;
23643 case DImode: ftype = reinterp_ftype_dreg[4][rhs]; break;
23644 case V16QImode: ftype = reinterp_ftype_qreg[0][rhs]; break;
23645 case V8HImode: ftype = reinterp_ftype_qreg[1][rhs]; break;
23646 case V4SImode: ftype = reinterp_ftype_qreg[2][rhs]; break;
23647 case V4SFmode: ftype = reinterp_ftype_qreg[3][rhs]; break;
23648 case V2DImode: ftype = reinterp_ftype_qreg[4][rhs]; break;
23649 default: gcc_unreachable ();
23652 break;
23653 case NEON_FLOAT_WIDEN:
23655 tree eltype = NULL_TREE;
23656 tree return_type = NULL_TREE;
23658 switch (insn_data[d->code].operand[1].mode)
23660 case V4HFmode:
23661 eltype = V4HF_type_node;
23662 return_type = V4SF_type_node;
23663 break;
23664 default: gcc_unreachable ();
23666 ftype = build_function_type_list (return_type, eltype, NULL);
23667 break;
23669 case NEON_FLOAT_NARROW:
23671 tree eltype = NULL_TREE;
23672 tree return_type = NULL_TREE;
23674 switch (insn_data[d->code].operand[1].mode)
23676 case V4SFmode:
23677 eltype = V4SF_type_node;
23678 return_type = V4HF_type_node;
23679 break;
23680 default: gcc_unreachable ();
23682 ftype = build_function_type_list (return_type, eltype, NULL);
23683 break;
23685 default:
23686 gcc_unreachable ();
23689 gcc_assert (ftype != NULL);
23691 sprintf (namebuf, "__builtin_neon_%s%s", d->name, modenames[d->mode]);
23693 decl = add_builtin_function (namebuf, ftype, fcode, BUILT_IN_MD, NULL,
23694 NULL_TREE);
23695 arm_builtin_decls[fcode] = decl;
23699 #define def_mbuiltin(MASK, NAME, TYPE, CODE) \
23700 do \
23702 if ((MASK) & insn_flags) \
23704 tree bdecl; \
23705 bdecl = add_builtin_function ((NAME), (TYPE), (CODE), \
23706 BUILT_IN_MD, NULL, NULL_TREE); \
23707 arm_builtin_decls[CODE] = bdecl; \
23710 while (0)
23712 struct builtin_description
23714 const unsigned int mask;
23715 const enum insn_code icode;
23716 const char * const name;
23717 const enum arm_builtins code;
23718 const enum rtx_code comparison;
23719 const unsigned int flag;
23722 static const struct builtin_description bdesc_2arg[] =
23724 #define IWMMXT_BUILTIN(code, string, builtin) \
23725 { FL_IWMMXT, CODE_FOR_##code, "__builtin_arm_" string, \
23726 ARM_BUILTIN_##builtin, UNKNOWN, 0 },
23728 #define IWMMXT2_BUILTIN(code, string, builtin) \
23729 { FL_IWMMXT2, CODE_FOR_##code, "__builtin_arm_" string, \
23730 ARM_BUILTIN_##builtin, UNKNOWN, 0 },
23732 IWMMXT_BUILTIN (addv8qi3, "waddb", WADDB)
23733 IWMMXT_BUILTIN (addv4hi3, "waddh", WADDH)
23734 IWMMXT_BUILTIN (addv2si3, "waddw", WADDW)
23735 IWMMXT_BUILTIN (subv8qi3, "wsubb", WSUBB)
23736 IWMMXT_BUILTIN (subv4hi3, "wsubh", WSUBH)
23737 IWMMXT_BUILTIN (subv2si3, "wsubw", WSUBW)
23738 IWMMXT_BUILTIN (ssaddv8qi3, "waddbss", WADDSSB)
23739 IWMMXT_BUILTIN (ssaddv4hi3, "waddhss", WADDSSH)
23740 IWMMXT_BUILTIN (ssaddv2si3, "waddwss", WADDSSW)
23741 IWMMXT_BUILTIN (sssubv8qi3, "wsubbss", WSUBSSB)
23742 IWMMXT_BUILTIN (sssubv4hi3, "wsubhss", WSUBSSH)
23743 IWMMXT_BUILTIN (sssubv2si3, "wsubwss", WSUBSSW)
23744 IWMMXT_BUILTIN (usaddv8qi3, "waddbus", WADDUSB)
23745 IWMMXT_BUILTIN (usaddv4hi3, "waddhus", WADDUSH)
23746 IWMMXT_BUILTIN (usaddv2si3, "waddwus", WADDUSW)
23747 IWMMXT_BUILTIN (ussubv8qi3, "wsubbus", WSUBUSB)
23748 IWMMXT_BUILTIN (ussubv4hi3, "wsubhus", WSUBUSH)
23749 IWMMXT_BUILTIN (ussubv2si3, "wsubwus", WSUBUSW)
23750 IWMMXT_BUILTIN (mulv4hi3, "wmulul", WMULUL)
23751 IWMMXT_BUILTIN (smulv4hi3_highpart, "wmulsm", WMULSM)
23752 IWMMXT_BUILTIN (umulv4hi3_highpart, "wmulum", WMULUM)
23753 IWMMXT_BUILTIN (eqv8qi3, "wcmpeqb", WCMPEQB)
23754 IWMMXT_BUILTIN (eqv4hi3, "wcmpeqh", WCMPEQH)
23755 IWMMXT_BUILTIN (eqv2si3, "wcmpeqw", WCMPEQW)
23756 IWMMXT_BUILTIN (gtuv8qi3, "wcmpgtub", WCMPGTUB)
23757 IWMMXT_BUILTIN (gtuv4hi3, "wcmpgtuh", WCMPGTUH)
23758 IWMMXT_BUILTIN (gtuv2si3, "wcmpgtuw", WCMPGTUW)
23759 IWMMXT_BUILTIN (gtv8qi3, "wcmpgtsb", WCMPGTSB)
23760 IWMMXT_BUILTIN (gtv4hi3, "wcmpgtsh", WCMPGTSH)
23761 IWMMXT_BUILTIN (gtv2si3, "wcmpgtsw", WCMPGTSW)
23762 IWMMXT_BUILTIN (umaxv8qi3, "wmaxub", WMAXUB)
23763 IWMMXT_BUILTIN (smaxv8qi3, "wmaxsb", WMAXSB)
23764 IWMMXT_BUILTIN (umaxv4hi3, "wmaxuh", WMAXUH)
23765 IWMMXT_BUILTIN (smaxv4hi3, "wmaxsh", WMAXSH)
23766 IWMMXT_BUILTIN (umaxv2si3, "wmaxuw", WMAXUW)
23767 IWMMXT_BUILTIN (smaxv2si3, "wmaxsw", WMAXSW)
23768 IWMMXT_BUILTIN (uminv8qi3, "wminub", WMINUB)
23769 IWMMXT_BUILTIN (sminv8qi3, "wminsb", WMINSB)
23770 IWMMXT_BUILTIN (uminv4hi3, "wminuh", WMINUH)
23771 IWMMXT_BUILTIN (sminv4hi3, "wminsh", WMINSH)
23772 IWMMXT_BUILTIN (uminv2si3, "wminuw", WMINUW)
23773 IWMMXT_BUILTIN (sminv2si3, "wminsw", WMINSW)
23774 IWMMXT_BUILTIN (iwmmxt_anddi3, "wand", WAND)
23775 IWMMXT_BUILTIN (iwmmxt_nanddi3, "wandn", WANDN)
23776 IWMMXT_BUILTIN (iwmmxt_iordi3, "wor", WOR)
23777 IWMMXT_BUILTIN (iwmmxt_xordi3, "wxor", WXOR)
23778 IWMMXT_BUILTIN (iwmmxt_uavgv8qi3, "wavg2b", WAVG2B)
23779 IWMMXT_BUILTIN (iwmmxt_uavgv4hi3, "wavg2h", WAVG2H)
23780 IWMMXT_BUILTIN (iwmmxt_uavgrndv8qi3, "wavg2br", WAVG2BR)
23781 IWMMXT_BUILTIN (iwmmxt_uavgrndv4hi3, "wavg2hr", WAVG2HR)
23782 IWMMXT_BUILTIN (iwmmxt_wunpckilb, "wunpckilb", WUNPCKILB)
23783 IWMMXT_BUILTIN (iwmmxt_wunpckilh, "wunpckilh", WUNPCKILH)
23784 IWMMXT_BUILTIN (iwmmxt_wunpckilw, "wunpckilw", WUNPCKILW)
23785 IWMMXT_BUILTIN (iwmmxt_wunpckihb, "wunpckihb", WUNPCKIHB)
23786 IWMMXT_BUILTIN (iwmmxt_wunpckihh, "wunpckihh", WUNPCKIHH)
23787 IWMMXT_BUILTIN (iwmmxt_wunpckihw, "wunpckihw", WUNPCKIHW)
23788 IWMMXT2_BUILTIN (iwmmxt_waddsubhx, "waddsubhx", WADDSUBHX)
23789 IWMMXT2_BUILTIN (iwmmxt_wsubaddhx, "wsubaddhx", WSUBADDHX)
23790 IWMMXT2_BUILTIN (iwmmxt_wabsdiffb, "wabsdiffb", WABSDIFFB)
23791 IWMMXT2_BUILTIN (iwmmxt_wabsdiffh, "wabsdiffh", WABSDIFFH)
23792 IWMMXT2_BUILTIN (iwmmxt_wabsdiffw, "wabsdiffw", WABSDIFFW)
23793 IWMMXT2_BUILTIN (iwmmxt_avg4, "wavg4", WAVG4)
23794 IWMMXT2_BUILTIN (iwmmxt_avg4r, "wavg4r", WAVG4R)
23795 IWMMXT2_BUILTIN (iwmmxt_wmulwsm, "wmulwsm", WMULWSM)
23796 IWMMXT2_BUILTIN (iwmmxt_wmulwum, "wmulwum", WMULWUM)
23797 IWMMXT2_BUILTIN (iwmmxt_wmulwsmr, "wmulwsmr", WMULWSMR)
23798 IWMMXT2_BUILTIN (iwmmxt_wmulwumr, "wmulwumr", WMULWUMR)
23799 IWMMXT2_BUILTIN (iwmmxt_wmulwl, "wmulwl", WMULWL)
23800 IWMMXT2_BUILTIN (iwmmxt_wmulsmr, "wmulsmr", WMULSMR)
23801 IWMMXT2_BUILTIN (iwmmxt_wmulumr, "wmulumr", WMULUMR)
23802 IWMMXT2_BUILTIN (iwmmxt_wqmulm, "wqmulm", WQMULM)
23803 IWMMXT2_BUILTIN (iwmmxt_wqmulmr, "wqmulmr", WQMULMR)
23804 IWMMXT2_BUILTIN (iwmmxt_wqmulwm, "wqmulwm", WQMULWM)
23805 IWMMXT2_BUILTIN (iwmmxt_wqmulwmr, "wqmulwmr", WQMULWMR)
23806 IWMMXT_BUILTIN (iwmmxt_walignr0, "walignr0", WALIGNR0)
23807 IWMMXT_BUILTIN (iwmmxt_walignr1, "walignr1", WALIGNR1)
23808 IWMMXT_BUILTIN (iwmmxt_walignr2, "walignr2", WALIGNR2)
23809 IWMMXT_BUILTIN (iwmmxt_walignr3, "walignr3", WALIGNR3)
23811 #define IWMMXT_BUILTIN2(code, builtin) \
23812 { FL_IWMMXT, CODE_FOR_##code, NULL, ARM_BUILTIN_##builtin, UNKNOWN, 0 },
23814 #define IWMMXT2_BUILTIN2(code, builtin) \
23815 { FL_IWMMXT2, CODE_FOR_##code, NULL, ARM_BUILTIN_##builtin, UNKNOWN, 0 },
23817 IWMMXT2_BUILTIN2 (iwmmxt_waddbhusm, WADDBHUSM)
23818 IWMMXT2_BUILTIN2 (iwmmxt_waddbhusl, WADDBHUSL)
23819 IWMMXT_BUILTIN2 (iwmmxt_wpackhss, WPACKHSS)
23820 IWMMXT_BUILTIN2 (iwmmxt_wpackwss, WPACKWSS)
23821 IWMMXT_BUILTIN2 (iwmmxt_wpackdss, WPACKDSS)
23822 IWMMXT_BUILTIN2 (iwmmxt_wpackhus, WPACKHUS)
23823 IWMMXT_BUILTIN2 (iwmmxt_wpackwus, WPACKWUS)
23824 IWMMXT_BUILTIN2 (iwmmxt_wpackdus, WPACKDUS)
23825 IWMMXT_BUILTIN2 (iwmmxt_wmacuz, WMACUZ)
23826 IWMMXT_BUILTIN2 (iwmmxt_wmacsz, WMACSZ)
23829 static const struct builtin_description bdesc_1arg[] =
23831 IWMMXT_BUILTIN (iwmmxt_tmovmskb, "tmovmskb", TMOVMSKB)
23832 IWMMXT_BUILTIN (iwmmxt_tmovmskh, "tmovmskh", TMOVMSKH)
23833 IWMMXT_BUILTIN (iwmmxt_tmovmskw, "tmovmskw", TMOVMSKW)
23834 IWMMXT_BUILTIN (iwmmxt_waccb, "waccb", WACCB)
23835 IWMMXT_BUILTIN (iwmmxt_wacch, "wacch", WACCH)
23836 IWMMXT_BUILTIN (iwmmxt_waccw, "waccw", WACCW)
23837 IWMMXT_BUILTIN (iwmmxt_wunpckehub, "wunpckehub", WUNPCKEHUB)
23838 IWMMXT_BUILTIN (iwmmxt_wunpckehuh, "wunpckehuh", WUNPCKEHUH)
23839 IWMMXT_BUILTIN (iwmmxt_wunpckehuw, "wunpckehuw", WUNPCKEHUW)
23840 IWMMXT_BUILTIN (iwmmxt_wunpckehsb, "wunpckehsb", WUNPCKEHSB)
23841 IWMMXT_BUILTIN (iwmmxt_wunpckehsh, "wunpckehsh", WUNPCKEHSH)
23842 IWMMXT_BUILTIN (iwmmxt_wunpckehsw, "wunpckehsw", WUNPCKEHSW)
23843 IWMMXT_BUILTIN (iwmmxt_wunpckelub, "wunpckelub", WUNPCKELUB)
23844 IWMMXT_BUILTIN (iwmmxt_wunpckeluh, "wunpckeluh", WUNPCKELUH)
23845 IWMMXT_BUILTIN (iwmmxt_wunpckeluw, "wunpckeluw", WUNPCKELUW)
23846 IWMMXT_BUILTIN (iwmmxt_wunpckelsb, "wunpckelsb", WUNPCKELSB)
23847 IWMMXT_BUILTIN (iwmmxt_wunpckelsh, "wunpckelsh", WUNPCKELSH)
23848 IWMMXT_BUILTIN (iwmmxt_wunpckelsw, "wunpckelsw", WUNPCKELSW)
23849 IWMMXT2_BUILTIN (iwmmxt_wabsv8qi3, "wabsb", WABSB)
23850 IWMMXT2_BUILTIN (iwmmxt_wabsv4hi3, "wabsh", WABSH)
23851 IWMMXT2_BUILTIN (iwmmxt_wabsv2si3, "wabsw", WABSW)
23852 IWMMXT_BUILTIN (tbcstv8qi, "tbcstb", TBCSTB)
23853 IWMMXT_BUILTIN (tbcstv4hi, "tbcsth", TBCSTH)
23854 IWMMXT_BUILTIN (tbcstv2si, "tbcstw", TBCSTW)
23857 /* Set up all the iWMMXt builtins. This is not called if
23858 TARGET_IWMMXT is zero. */
23860 static void
23861 arm_init_iwmmxt_builtins (void)
23863 const struct builtin_description * d;
23864 size_t i;
23866 tree V2SI_type_node = build_vector_type_for_mode (intSI_type_node, V2SImode);
23867 tree V4HI_type_node = build_vector_type_for_mode (intHI_type_node, V4HImode);
23868 tree V8QI_type_node = build_vector_type_for_mode (intQI_type_node, V8QImode);
23870 tree v8qi_ftype_v8qi_v8qi_int
23871 = build_function_type_list (V8QI_type_node,
23872 V8QI_type_node, V8QI_type_node,
23873 integer_type_node, NULL_TREE);
23874 tree v4hi_ftype_v4hi_int
23875 = build_function_type_list (V4HI_type_node,
23876 V4HI_type_node, integer_type_node, NULL_TREE);
23877 tree v2si_ftype_v2si_int
23878 = build_function_type_list (V2SI_type_node,
23879 V2SI_type_node, integer_type_node, NULL_TREE);
23880 tree v2si_ftype_di_di
23881 = build_function_type_list (V2SI_type_node,
23882 long_long_integer_type_node,
23883 long_long_integer_type_node,
23884 NULL_TREE);
23885 tree di_ftype_di_int
23886 = build_function_type_list (long_long_integer_type_node,
23887 long_long_integer_type_node,
23888 integer_type_node, NULL_TREE);
23889 tree di_ftype_di_int_int
23890 = build_function_type_list (long_long_integer_type_node,
23891 long_long_integer_type_node,
23892 integer_type_node,
23893 integer_type_node, NULL_TREE);
23894 tree int_ftype_v8qi
23895 = build_function_type_list (integer_type_node,
23896 V8QI_type_node, NULL_TREE);
23897 tree int_ftype_v4hi
23898 = build_function_type_list (integer_type_node,
23899 V4HI_type_node, NULL_TREE);
23900 tree int_ftype_v2si
23901 = build_function_type_list (integer_type_node,
23902 V2SI_type_node, NULL_TREE);
23903 tree int_ftype_v8qi_int
23904 = build_function_type_list (integer_type_node,
23905 V8QI_type_node, integer_type_node, NULL_TREE);
23906 tree int_ftype_v4hi_int
23907 = build_function_type_list (integer_type_node,
23908 V4HI_type_node, integer_type_node, NULL_TREE);
23909 tree int_ftype_v2si_int
23910 = build_function_type_list (integer_type_node,
23911 V2SI_type_node, integer_type_node, NULL_TREE);
23912 tree v8qi_ftype_v8qi_int_int
23913 = build_function_type_list (V8QI_type_node,
23914 V8QI_type_node, integer_type_node,
23915 integer_type_node, NULL_TREE);
23916 tree v4hi_ftype_v4hi_int_int
23917 = build_function_type_list (V4HI_type_node,
23918 V4HI_type_node, integer_type_node,
23919 integer_type_node, NULL_TREE);
23920 tree v2si_ftype_v2si_int_int
23921 = build_function_type_list (V2SI_type_node,
23922 V2SI_type_node, integer_type_node,
23923 integer_type_node, NULL_TREE);
23924 /* Miscellaneous. */
23925 tree v8qi_ftype_v4hi_v4hi
23926 = build_function_type_list (V8QI_type_node,
23927 V4HI_type_node, V4HI_type_node, NULL_TREE);
23928 tree v4hi_ftype_v2si_v2si
23929 = build_function_type_list (V4HI_type_node,
23930 V2SI_type_node, V2SI_type_node, NULL_TREE);
23931 tree v8qi_ftype_v4hi_v8qi
23932 = build_function_type_list (V8QI_type_node,
23933 V4HI_type_node, V8QI_type_node, NULL_TREE);
23934 tree v2si_ftype_v4hi_v4hi
23935 = build_function_type_list (V2SI_type_node,
23936 V4HI_type_node, V4HI_type_node, NULL_TREE);
23937 tree v2si_ftype_v8qi_v8qi
23938 = build_function_type_list (V2SI_type_node,
23939 V8QI_type_node, V8QI_type_node, NULL_TREE);
23940 tree v4hi_ftype_v4hi_di
23941 = build_function_type_list (V4HI_type_node,
23942 V4HI_type_node, long_long_integer_type_node,
23943 NULL_TREE);
23944 tree v2si_ftype_v2si_di
23945 = build_function_type_list (V2SI_type_node,
23946 V2SI_type_node, long_long_integer_type_node,
23947 NULL_TREE);
23948 tree di_ftype_void
23949 = build_function_type_list (long_long_unsigned_type_node, NULL_TREE);
23950 tree int_ftype_void
23951 = build_function_type_list (integer_type_node, NULL_TREE);
23952 tree di_ftype_v8qi
23953 = build_function_type_list (long_long_integer_type_node,
23954 V8QI_type_node, NULL_TREE);
23955 tree di_ftype_v4hi
23956 = build_function_type_list (long_long_integer_type_node,
23957 V4HI_type_node, NULL_TREE);
23958 tree di_ftype_v2si
23959 = build_function_type_list (long_long_integer_type_node,
23960 V2SI_type_node, NULL_TREE);
23961 tree v2si_ftype_v4hi
23962 = build_function_type_list (V2SI_type_node,
23963 V4HI_type_node, NULL_TREE);
23964 tree v4hi_ftype_v8qi
23965 = build_function_type_list (V4HI_type_node,
23966 V8QI_type_node, NULL_TREE);
23967 tree v8qi_ftype_v8qi
23968 = build_function_type_list (V8QI_type_node,
23969 V8QI_type_node, NULL_TREE);
23970 tree v4hi_ftype_v4hi
23971 = build_function_type_list (V4HI_type_node,
23972 V4HI_type_node, NULL_TREE);
23973 tree v2si_ftype_v2si
23974 = build_function_type_list (V2SI_type_node,
23975 V2SI_type_node, NULL_TREE);
23977 tree di_ftype_di_v4hi_v4hi
23978 = build_function_type_list (long_long_unsigned_type_node,
23979 long_long_unsigned_type_node,
23980 V4HI_type_node, V4HI_type_node,
23981 NULL_TREE);
23983 tree di_ftype_v4hi_v4hi
23984 = build_function_type_list (long_long_unsigned_type_node,
23985 V4HI_type_node,V4HI_type_node,
23986 NULL_TREE);
23988 tree v2si_ftype_v2si_v4hi_v4hi
23989 = build_function_type_list (V2SI_type_node,
23990 V2SI_type_node, V4HI_type_node,
23991 V4HI_type_node, NULL_TREE);
23993 tree v2si_ftype_v2si_v8qi_v8qi
23994 = build_function_type_list (V2SI_type_node,
23995 V2SI_type_node, V8QI_type_node,
23996 V8QI_type_node, NULL_TREE);
23998 tree di_ftype_di_v2si_v2si
23999 = build_function_type_list (long_long_unsigned_type_node,
24000 long_long_unsigned_type_node,
24001 V2SI_type_node, V2SI_type_node,
24002 NULL_TREE);
24004 tree di_ftype_di_di_int
24005 = build_function_type_list (long_long_unsigned_type_node,
24006 long_long_unsigned_type_node,
24007 long_long_unsigned_type_node,
24008 integer_type_node, NULL_TREE);
24010 tree void_ftype_int
24011 = build_function_type_list (void_type_node,
24012 integer_type_node, NULL_TREE);
24014 tree v8qi_ftype_char
24015 = build_function_type_list (V8QI_type_node,
24016 signed_char_type_node, NULL_TREE);
24018 tree v4hi_ftype_short
24019 = build_function_type_list (V4HI_type_node,
24020 short_integer_type_node, NULL_TREE);
24022 tree v2si_ftype_int
24023 = build_function_type_list (V2SI_type_node,
24024 integer_type_node, NULL_TREE);
24026 /* Normal vector binops. */
24027 tree v8qi_ftype_v8qi_v8qi
24028 = build_function_type_list (V8QI_type_node,
24029 V8QI_type_node, V8QI_type_node, NULL_TREE);
24030 tree v4hi_ftype_v4hi_v4hi
24031 = build_function_type_list (V4HI_type_node,
24032 V4HI_type_node,V4HI_type_node, NULL_TREE);
24033 tree v2si_ftype_v2si_v2si
24034 = build_function_type_list (V2SI_type_node,
24035 V2SI_type_node, V2SI_type_node, NULL_TREE);
24036 tree di_ftype_di_di
24037 = build_function_type_list (long_long_unsigned_type_node,
24038 long_long_unsigned_type_node,
24039 long_long_unsigned_type_node,
24040 NULL_TREE);
24042 /* Add all builtins that are more or less simple operations on two
24043 operands. */
24044 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
24046 /* Use one of the operands; the target can have a different mode for
24047 mask-generating compares. */
24048 enum machine_mode mode;
24049 tree type;
24051 if (d->name == 0)
24052 continue;
24054 mode = insn_data[d->icode].operand[1].mode;
24056 switch (mode)
24058 case V8QImode:
24059 type = v8qi_ftype_v8qi_v8qi;
24060 break;
24061 case V4HImode:
24062 type = v4hi_ftype_v4hi_v4hi;
24063 break;
24064 case V2SImode:
24065 type = v2si_ftype_v2si_v2si;
24066 break;
24067 case DImode:
24068 type = di_ftype_di_di;
24069 break;
24071 default:
24072 gcc_unreachable ();
24075 def_mbuiltin (d->mask, d->name, type, d->code);
24078 /* Add the remaining MMX insns with somewhat more complicated types. */
24079 #define iwmmx_mbuiltin(NAME, TYPE, CODE) \
24080 def_mbuiltin (FL_IWMMXT, "__builtin_arm_" NAME, (TYPE), \
24081 ARM_BUILTIN_ ## CODE)
24083 #define iwmmx2_mbuiltin(NAME, TYPE, CODE) \
24084 def_mbuiltin (FL_IWMMXT2, "__builtin_arm_" NAME, (TYPE), \
24085 ARM_BUILTIN_ ## CODE)
24087 iwmmx_mbuiltin ("wzero", di_ftype_void, WZERO);
24088 iwmmx_mbuiltin ("setwcgr0", void_ftype_int, SETWCGR0);
24089 iwmmx_mbuiltin ("setwcgr1", void_ftype_int, SETWCGR1);
24090 iwmmx_mbuiltin ("setwcgr2", void_ftype_int, SETWCGR2);
24091 iwmmx_mbuiltin ("setwcgr3", void_ftype_int, SETWCGR3);
24092 iwmmx_mbuiltin ("getwcgr0", int_ftype_void, GETWCGR0);
24093 iwmmx_mbuiltin ("getwcgr1", int_ftype_void, GETWCGR1);
24094 iwmmx_mbuiltin ("getwcgr2", int_ftype_void, GETWCGR2);
24095 iwmmx_mbuiltin ("getwcgr3", int_ftype_void, GETWCGR3);
24097 iwmmx_mbuiltin ("wsllh", v4hi_ftype_v4hi_di, WSLLH);
24098 iwmmx_mbuiltin ("wsllw", v2si_ftype_v2si_di, WSLLW);
24099 iwmmx_mbuiltin ("wslld", di_ftype_di_di, WSLLD);
24100 iwmmx_mbuiltin ("wsllhi", v4hi_ftype_v4hi_int, WSLLHI);
24101 iwmmx_mbuiltin ("wsllwi", v2si_ftype_v2si_int, WSLLWI);
24102 iwmmx_mbuiltin ("wslldi", di_ftype_di_int, WSLLDI);
24104 iwmmx_mbuiltin ("wsrlh", v4hi_ftype_v4hi_di, WSRLH);
24105 iwmmx_mbuiltin ("wsrlw", v2si_ftype_v2si_di, WSRLW);
24106 iwmmx_mbuiltin ("wsrld", di_ftype_di_di, WSRLD);
24107 iwmmx_mbuiltin ("wsrlhi", v4hi_ftype_v4hi_int, WSRLHI);
24108 iwmmx_mbuiltin ("wsrlwi", v2si_ftype_v2si_int, WSRLWI);
24109 iwmmx_mbuiltin ("wsrldi", di_ftype_di_int, WSRLDI);
24111 iwmmx_mbuiltin ("wsrah", v4hi_ftype_v4hi_di, WSRAH);
24112 iwmmx_mbuiltin ("wsraw", v2si_ftype_v2si_di, WSRAW);
24113 iwmmx_mbuiltin ("wsrad", di_ftype_di_di, WSRAD);
24114 iwmmx_mbuiltin ("wsrahi", v4hi_ftype_v4hi_int, WSRAHI);
24115 iwmmx_mbuiltin ("wsrawi", v2si_ftype_v2si_int, WSRAWI);
24116 iwmmx_mbuiltin ("wsradi", di_ftype_di_int, WSRADI);
24118 iwmmx_mbuiltin ("wrorh", v4hi_ftype_v4hi_di, WRORH);
24119 iwmmx_mbuiltin ("wrorw", v2si_ftype_v2si_di, WRORW);
24120 iwmmx_mbuiltin ("wrord", di_ftype_di_di, WRORD);
24121 iwmmx_mbuiltin ("wrorhi", v4hi_ftype_v4hi_int, WRORHI);
24122 iwmmx_mbuiltin ("wrorwi", v2si_ftype_v2si_int, WRORWI);
24123 iwmmx_mbuiltin ("wrordi", di_ftype_di_int, WRORDI);
24125 iwmmx_mbuiltin ("wshufh", v4hi_ftype_v4hi_int, WSHUFH);
24127 iwmmx_mbuiltin ("wsadb", v2si_ftype_v2si_v8qi_v8qi, WSADB);
24128 iwmmx_mbuiltin ("wsadh", v2si_ftype_v2si_v4hi_v4hi, WSADH);
24129 iwmmx_mbuiltin ("wmadds", v2si_ftype_v4hi_v4hi, WMADDS);
24130 iwmmx2_mbuiltin ("wmaddsx", v2si_ftype_v4hi_v4hi, WMADDSX);
24131 iwmmx2_mbuiltin ("wmaddsn", v2si_ftype_v4hi_v4hi, WMADDSN);
24132 iwmmx_mbuiltin ("wmaddu", v2si_ftype_v4hi_v4hi, WMADDU);
24133 iwmmx2_mbuiltin ("wmaddux", v2si_ftype_v4hi_v4hi, WMADDUX);
24134 iwmmx2_mbuiltin ("wmaddun", v2si_ftype_v4hi_v4hi, WMADDUN);
24135 iwmmx_mbuiltin ("wsadbz", v2si_ftype_v8qi_v8qi, WSADBZ);
24136 iwmmx_mbuiltin ("wsadhz", v2si_ftype_v4hi_v4hi, WSADHZ);
24138 iwmmx_mbuiltin ("textrmsb", int_ftype_v8qi_int, TEXTRMSB);
24139 iwmmx_mbuiltin ("textrmsh", int_ftype_v4hi_int, TEXTRMSH);
24140 iwmmx_mbuiltin ("textrmsw", int_ftype_v2si_int, TEXTRMSW);
24141 iwmmx_mbuiltin ("textrmub", int_ftype_v8qi_int, TEXTRMUB);
24142 iwmmx_mbuiltin ("textrmuh", int_ftype_v4hi_int, TEXTRMUH);
24143 iwmmx_mbuiltin ("textrmuw", int_ftype_v2si_int, TEXTRMUW);
24144 iwmmx_mbuiltin ("tinsrb", v8qi_ftype_v8qi_int_int, TINSRB);
24145 iwmmx_mbuiltin ("tinsrh", v4hi_ftype_v4hi_int_int, TINSRH);
24146 iwmmx_mbuiltin ("tinsrw", v2si_ftype_v2si_int_int, TINSRW);
24148 iwmmx_mbuiltin ("waccb", di_ftype_v8qi, WACCB);
24149 iwmmx_mbuiltin ("wacch", di_ftype_v4hi, WACCH);
24150 iwmmx_mbuiltin ("waccw", di_ftype_v2si, WACCW);
24152 iwmmx_mbuiltin ("tmovmskb", int_ftype_v8qi, TMOVMSKB);
24153 iwmmx_mbuiltin ("tmovmskh", int_ftype_v4hi, TMOVMSKH);
24154 iwmmx_mbuiltin ("tmovmskw", int_ftype_v2si, TMOVMSKW);
24156 iwmmx2_mbuiltin ("waddbhusm", v8qi_ftype_v4hi_v8qi, WADDBHUSM);
24157 iwmmx2_mbuiltin ("waddbhusl", v8qi_ftype_v4hi_v8qi, WADDBHUSL);
24159 iwmmx_mbuiltin ("wpackhss", v8qi_ftype_v4hi_v4hi, WPACKHSS);
24160 iwmmx_mbuiltin ("wpackhus", v8qi_ftype_v4hi_v4hi, WPACKHUS);
24161 iwmmx_mbuiltin ("wpackwus", v4hi_ftype_v2si_v2si, WPACKWUS);
24162 iwmmx_mbuiltin ("wpackwss", v4hi_ftype_v2si_v2si, WPACKWSS);
24163 iwmmx_mbuiltin ("wpackdus", v2si_ftype_di_di, WPACKDUS);
24164 iwmmx_mbuiltin ("wpackdss", v2si_ftype_di_di, WPACKDSS);
24166 iwmmx_mbuiltin ("wunpckehub", v4hi_ftype_v8qi, WUNPCKEHUB);
24167 iwmmx_mbuiltin ("wunpckehuh", v2si_ftype_v4hi, WUNPCKEHUH);
24168 iwmmx_mbuiltin ("wunpckehuw", di_ftype_v2si, WUNPCKEHUW);
24169 iwmmx_mbuiltin ("wunpckehsb", v4hi_ftype_v8qi, WUNPCKEHSB);
24170 iwmmx_mbuiltin ("wunpckehsh", v2si_ftype_v4hi, WUNPCKEHSH);
24171 iwmmx_mbuiltin ("wunpckehsw", di_ftype_v2si, WUNPCKEHSW);
24172 iwmmx_mbuiltin ("wunpckelub", v4hi_ftype_v8qi, WUNPCKELUB);
24173 iwmmx_mbuiltin ("wunpckeluh", v2si_ftype_v4hi, WUNPCKELUH);
24174 iwmmx_mbuiltin ("wunpckeluw", di_ftype_v2si, WUNPCKELUW);
24175 iwmmx_mbuiltin ("wunpckelsb", v4hi_ftype_v8qi, WUNPCKELSB);
24176 iwmmx_mbuiltin ("wunpckelsh", v2si_ftype_v4hi, WUNPCKELSH);
24177 iwmmx_mbuiltin ("wunpckelsw", di_ftype_v2si, WUNPCKELSW);
24179 iwmmx_mbuiltin ("wmacs", di_ftype_di_v4hi_v4hi, WMACS);
24180 iwmmx_mbuiltin ("wmacsz", di_ftype_v4hi_v4hi, WMACSZ);
24181 iwmmx_mbuiltin ("wmacu", di_ftype_di_v4hi_v4hi, WMACU);
24182 iwmmx_mbuiltin ("wmacuz", di_ftype_v4hi_v4hi, WMACUZ);
24184 iwmmx_mbuiltin ("walign", v8qi_ftype_v8qi_v8qi_int, WALIGNI);
24185 iwmmx_mbuiltin ("tmia", di_ftype_di_int_int, TMIA);
24186 iwmmx_mbuiltin ("tmiaph", di_ftype_di_int_int, TMIAPH);
24187 iwmmx_mbuiltin ("tmiabb", di_ftype_di_int_int, TMIABB);
24188 iwmmx_mbuiltin ("tmiabt", di_ftype_di_int_int, TMIABT);
24189 iwmmx_mbuiltin ("tmiatb", di_ftype_di_int_int, TMIATB);
24190 iwmmx_mbuiltin ("tmiatt", di_ftype_di_int_int, TMIATT);
24192 iwmmx2_mbuiltin ("wabsb", v8qi_ftype_v8qi, WABSB);
24193 iwmmx2_mbuiltin ("wabsh", v4hi_ftype_v4hi, WABSH);
24194 iwmmx2_mbuiltin ("wabsw", v2si_ftype_v2si, WABSW);
24196 iwmmx2_mbuiltin ("wqmiabb", v2si_ftype_v2si_v4hi_v4hi, WQMIABB);
24197 iwmmx2_mbuiltin ("wqmiabt", v2si_ftype_v2si_v4hi_v4hi, WQMIABT);
24198 iwmmx2_mbuiltin ("wqmiatb", v2si_ftype_v2si_v4hi_v4hi, WQMIATB);
24199 iwmmx2_mbuiltin ("wqmiatt", v2si_ftype_v2si_v4hi_v4hi, WQMIATT);
24201 iwmmx2_mbuiltin ("wqmiabbn", v2si_ftype_v2si_v4hi_v4hi, WQMIABBN);
24202 iwmmx2_mbuiltin ("wqmiabtn", v2si_ftype_v2si_v4hi_v4hi, WQMIABTN);
24203 iwmmx2_mbuiltin ("wqmiatbn", v2si_ftype_v2si_v4hi_v4hi, WQMIATBN);
24204 iwmmx2_mbuiltin ("wqmiattn", v2si_ftype_v2si_v4hi_v4hi, WQMIATTN);
24206 iwmmx2_mbuiltin ("wmiabb", di_ftype_di_v4hi_v4hi, WMIABB);
24207 iwmmx2_mbuiltin ("wmiabt", di_ftype_di_v4hi_v4hi, WMIABT);
24208 iwmmx2_mbuiltin ("wmiatb", di_ftype_di_v4hi_v4hi, WMIATB);
24209 iwmmx2_mbuiltin ("wmiatt", di_ftype_di_v4hi_v4hi, WMIATT);
24211 iwmmx2_mbuiltin ("wmiabbn", di_ftype_di_v4hi_v4hi, WMIABBN);
24212 iwmmx2_mbuiltin ("wmiabtn", di_ftype_di_v4hi_v4hi, WMIABTN);
24213 iwmmx2_mbuiltin ("wmiatbn", di_ftype_di_v4hi_v4hi, WMIATBN);
24214 iwmmx2_mbuiltin ("wmiattn", di_ftype_di_v4hi_v4hi, WMIATTN);
24216 iwmmx2_mbuiltin ("wmiawbb", di_ftype_di_v2si_v2si, WMIAWBB);
24217 iwmmx2_mbuiltin ("wmiawbt", di_ftype_di_v2si_v2si, WMIAWBT);
24218 iwmmx2_mbuiltin ("wmiawtb", di_ftype_di_v2si_v2si, WMIAWTB);
24219 iwmmx2_mbuiltin ("wmiawtt", di_ftype_di_v2si_v2si, WMIAWTT);
24221 iwmmx2_mbuiltin ("wmiawbbn", di_ftype_di_v2si_v2si, WMIAWBBN);
24222 iwmmx2_mbuiltin ("wmiawbtn", di_ftype_di_v2si_v2si, WMIAWBTN);
24223 iwmmx2_mbuiltin ("wmiawtbn", di_ftype_di_v2si_v2si, WMIAWTBN);
24224 iwmmx2_mbuiltin ("wmiawttn", di_ftype_di_v2si_v2si, WMIAWTTN);
24226 iwmmx2_mbuiltin ("wmerge", di_ftype_di_di_int, WMERGE);
24228 iwmmx_mbuiltin ("tbcstb", v8qi_ftype_char, TBCSTB);
24229 iwmmx_mbuiltin ("tbcsth", v4hi_ftype_short, TBCSTH);
24230 iwmmx_mbuiltin ("tbcstw", v2si_ftype_int, TBCSTW);
24232 #undef iwmmx_mbuiltin
24233 #undef iwmmx2_mbuiltin
24236 static void
24237 arm_init_fp16_builtins (void)
24239 tree fp16_type = make_node (REAL_TYPE);
24240 TYPE_PRECISION (fp16_type) = 16;
24241 layout_type (fp16_type);
24242 (*lang_hooks.types.register_builtin_type) (fp16_type, "__fp16");
24245 static void
24246 arm_init_builtins (void)
24248 if (TARGET_REALLY_IWMMXT)
24249 arm_init_iwmmxt_builtins ();
24251 if (TARGET_NEON)
24252 arm_init_neon_builtins ();
24254 if (arm_fp16_format)
24255 arm_init_fp16_builtins ();
24258 /* Return the ARM builtin for CODE. */
24260 static tree
24261 arm_builtin_decl (unsigned code, bool initialize_p ATTRIBUTE_UNUSED)
24263 if (code >= ARM_BUILTIN_MAX)
24264 return error_mark_node;
24266 return arm_builtin_decls[code];
24269 /* Implement TARGET_INVALID_PARAMETER_TYPE. */
24271 static const char *
24272 arm_invalid_parameter_type (const_tree t)
24274 if (SCALAR_FLOAT_TYPE_P (t) && TYPE_PRECISION (t) == 16)
24275 return N_("function parameters cannot have __fp16 type");
24276 return NULL;
24279 /* Implement TARGET_INVALID_PARAMETER_TYPE. */
24281 static const char *
24282 arm_invalid_return_type (const_tree t)
24284 if (SCALAR_FLOAT_TYPE_P (t) && TYPE_PRECISION (t) == 16)
24285 return N_("functions cannot return __fp16 type");
24286 return NULL;
24289 /* Implement TARGET_PROMOTED_TYPE. */
24291 static tree
24292 arm_promoted_type (const_tree t)
24294 if (SCALAR_FLOAT_TYPE_P (t) && TYPE_PRECISION (t) == 16)
24295 return float_type_node;
24296 return NULL_TREE;
24299 /* Implement TARGET_CONVERT_TO_TYPE.
24300 Specifically, this hook implements the peculiarity of the ARM
24301 half-precision floating-point C semantics that requires conversions between
24302 __fp16 to or from double to do an intermediate conversion to float. */
24304 static tree
24305 arm_convert_to_type (tree type, tree expr)
24307 tree fromtype = TREE_TYPE (expr);
24308 if (!SCALAR_FLOAT_TYPE_P (fromtype) || !SCALAR_FLOAT_TYPE_P (type))
24309 return NULL_TREE;
24310 if ((TYPE_PRECISION (fromtype) == 16 && TYPE_PRECISION (type) > 32)
24311 || (TYPE_PRECISION (type) == 16 && TYPE_PRECISION (fromtype) > 32))
24312 return convert (type, convert (float_type_node, expr));
24313 return NULL_TREE;
24316 /* Implement TARGET_SCALAR_MODE_SUPPORTED_P.
24317 This simply adds HFmode as a supported mode; even though we don't
24318 implement arithmetic on this type directly, it's supported by
24319 optabs conversions, much the way the double-word arithmetic is
24320 special-cased in the default hook. */
24322 static bool
24323 arm_scalar_mode_supported_p (enum machine_mode mode)
24325 if (mode == HFmode)
24326 return (arm_fp16_format != ARM_FP16_FORMAT_NONE);
24327 else if (ALL_FIXED_POINT_MODE_P (mode))
24328 return true;
24329 else
24330 return default_scalar_mode_supported_p (mode);
24333 /* Errors in the source file can cause expand_expr to return const0_rtx
24334 where we expect a vector. To avoid crashing, use one of the vector
24335 clear instructions. */
24337 static rtx
24338 safe_vector_operand (rtx x, enum machine_mode mode)
24340 if (x != const0_rtx)
24341 return x;
24342 x = gen_reg_rtx (mode);
24344 emit_insn (gen_iwmmxt_clrdi (mode == DImode ? x
24345 : gen_rtx_SUBREG (DImode, x, 0)));
24346 return x;
24349 /* Subroutine of arm_expand_builtin to take care of binop insns. */
24351 static rtx
24352 arm_expand_binop_builtin (enum insn_code icode,
24353 tree exp, rtx target)
24355 rtx pat;
24356 tree arg0 = CALL_EXPR_ARG (exp, 0);
24357 tree arg1 = CALL_EXPR_ARG (exp, 1);
24358 rtx op0 = expand_normal (arg0);
24359 rtx op1 = expand_normal (arg1);
24360 enum machine_mode tmode = insn_data[icode].operand[0].mode;
24361 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
24362 enum machine_mode mode1 = insn_data[icode].operand[2].mode;
24364 if (VECTOR_MODE_P (mode0))
24365 op0 = safe_vector_operand (op0, mode0);
24366 if (VECTOR_MODE_P (mode1))
24367 op1 = safe_vector_operand (op1, mode1);
24369 if (! target
24370 || GET_MODE (target) != tmode
24371 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
24372 target = gen_reg_rtx (tmode);
24374 gcc_assert ((GET_MODE (op0) == mode0 || GET_MODE (op0) == VOIDmode)
24375 && (GET_MODE (op1) == mode1 || GET_MODE (op1) == VOIDmode));
24377 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
24378 op0 = copy_to_mode_reg (mode0, op0);
24379 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
24380 op1 = copy_to_mode_reg (mode1, op1);
24382 pat = GEN_FCN (icode) (target, op0, op1);
24383 if (! pat)
24384 return 0;
24385 emit_insn (pat);
24386 return target;
24389 /* Subroutine of arm_expand_builtin to take care of unop insns. */
24391 static rtx
24392 arm_expand_unop_builtin (enum insn_code icode,
24393 tree exp, rtx target, int do_load)
24395 rtx pat;
24396 tree arg0 = CALL_EXPR_ARG (exp, 0);
24397 rtx op0 = expand_normal (arg0);
24398 enum machine_mode tmode = insn_data[icode].operand[0].mode;
24399 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
24401 if (! target
24402 || GET_MODE (target) != tmode
24403 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
24404 target = gen_reg_rtx (tmode);
24405 if (do_load)
24406 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
24407 else
24409 if (VECTOR_MODE_P (mode0))
24410 op0 = safe_vector_operand (op0, mode0);
24412 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
24413 op0 = copy_to_mode_reg (mode0, op0);
24416 pat = GEN_FCN (icode) (target, op0);
24417 if (! pat)
24418 return 0;
24419 emit_insn (pat);
24420 return target;
24423 typedef enum {
24424 NEON_ARG_COPY_TO_REG,
24425 NEON_ARG_CONSTANT,
24426 NEON_ARG_MEMORY,
24427 NEON_ARG_STOP
24428 } builtin_arg;
24430 #define NEON_MAX_BUILTIN_ARGS 5
24432 /* EXP is a pointer argument to a Neon load or store intrinsic. Derive
24433 and return an expression for the accessed memory.
24435 The intrinsic function operates on a block of registers that has
24436 mode REG_MODE. This block contains vectors of type TYPE_MODE. The
24437 function references the memory at EXP of type TYPE and in mode
24438 MEM_MODE; this mode may be BLKmode if no more suitable mode is
24439 available. */
24441 static tree
24442 neon_dereference_pointer (tree exp, tree type, enum machine_mode mem_mode,
24443 enum machine_mode reg_mode,
24444 neon_builtin_type_mode type_mode)
24446 HOST_WIDE_INT reg_size, vector_size, nvectors, nelems;
24447 tree elem_type, upper_bound, array_type;
24449 /* Work out the size of the register block in bytes. */
24450 reg_size = GET_MODE_SIZE (reg_mode);
24452 /* Work out the size of each vector in bytes. */
24453 gcc_assert (TYPE_MODE_BIT (type_mode) & (TB_DREG | TB_QREG));
24454 vector_size = (TYPE_MODE_BIT (type_mode) & TB_QREG ? 16 : 8);
24456 /* Work out how many vectors there are. */
24457 gcc_assert (reg_size % vector_size == 0);
24458 nvectors = reg_size / vector_size;
24460 /* Work out the type of each element. */
24461 gcc_assert (POINTER_TYPE_P (type));
24462 elem_type = TREE_TYPE (type);
24464 /* Work out how many elements are being loaded or stored.
24465 MEM_MODE == REG_MODE implies a one-to-one mapping between register
24466 and memory elements; anything else implies a lane load or store. */
24467 if (mem_mode == reg_mode)
24468 nelems = vector_size * nvectors / int_size_in_bytes (elem_type);
24469 else
24470 nelems = nvectors;
24472 /* Create a type that describes the full access. */
24473 upper_bound = build_int_cst (size_type_node, nelems - 1);
24474 array_type = build_array_type (elem_type, build_index_type (upper_bound));
24476 /* Dereference EXP using that type. */
24477 return fold_build2 (MEM_REF, array_type, exp,
24478 build_int_cst (build_pointer_type (array_type), 0));
24481 /* Expand a Neon builtin. */
24482 static rtx
24483 arm_expand_neon_args (rtx target, int icode, int have_retval,
24484 neon_builtin_type_mode type_mode,
24485 tree exp, int fcode, ...)
24487 va_list ap;
24488 rtx pat;
24489 tree arg[NEON_MAX_BUILTIN_ARGS];
24490 rtx op[NEON_MAX_BUILTIN_ARGS];
24491 tree arg_type;
24492 tree formals;
24493 enum machine_mode tmode = insn_data[icode].operand[0].mode;
24494 enum machine_mode mode[NEON_MAX_BUILTIN_ARGS];
24495 enum machine_mode other_mode;
24496 int argc = 0;
24497 int opno;
24499 if (have_retval
24500 && (!target
24501 || GET_MODE (target) != tmode
24502 || !(*insn_data[icode].operand[0].predicate) (target, tmode)))
24503 target = gen_reg_rtx (tmode);
24505 va_start (ap, fcode);
24507 formals = TYPE_ARG_TYPES (TREE_TYPE (arm_builtin_decls[fcode]));
24509 for (;;)
24511 builtin_arg thisarg = (builtin_arg) va_arg (ap, int);
24513 if (thisarg == NEON_ARG_STOP)
24514 break;
24515 else
24517 opno = argc + have_retval;
24518 mode[argc] = insn_data[icode].operand[opno].mode;
24519 arg[argc] = CALL_EXPR_ARG (exp, argc);
24520 arg_type = TREE_VALUE (formals);
24521 if (thisarg == NEON_ARG_MEMORY)
24523 other_mode = insn_data[icode].operand[1 - opno].mode;
24524 arg[argc] = neon_dereference_pointer (arg[argc], arg_type,
24525 mode[argc], other_mode,
24526 type_mode);
24529 op[argc] = expand_normal (arg[argc]);
24531 switch (thisarg)
24533 case NEON_ARG_COPY_TO_REG:
24534 /*gcc_assert (GET_MODE (op[argc]) == mode[argc]);*/
24535 if (!(*insn_data[icode].operand[opno].predicate)
24536 (op[argc], mode[argc]))
24537 op[argc] = copy_to_mode_reg (mode[argc], op[argc]);
24538 break;
24540 case NEON_ARG_CONSTANT:
24541 /* FIXME: This error message is somewhat unhelpful. */
24542 if (!(*insn_data[icode].operand[opno].predicate)
24543 (op[argc], mode[argc]))
24544 error ("argument must be a constant");
24545 break;
24547 case NEON_ARG_MEMORY:
24548 gcc_assert (MEM_P (op[argc]));
24549 PUT_MODE (op[argc], mode[argc]);
24550 /* ??? arm_neon.h uses the same built-in functions for signed
24551 and unsigned accesses, casting where necessary. This isn't
24552 alias safe. */
24553 set_mem_alias_set (op[argc], 0);
24554 if (!(*insn_data[icode].operand[opno].predicate)
24555 (op[argc], mode[argc]))
24556 op[argc] = (replace_equiv_address
24557 (op[argc], force_reg (Pmode, XEXP (op[argc], 0))));
24558 break;
24560 case NEON_ARG_STOP:
24561 gcc_unreachable ();
24564 argc++;
24565 formals = TREE_CHAIN (formals);
24569 va_end (ap);
24571 if (have_retval)
24572 switch (argc)
24574 case 1:
24575 pat = GEN_FCN (icode) (target, op[0]);
24576 break;
24578 case 2:
24579 pat = GEN_FCN (icode) (target, op[0], op[1]);
24580 break;
24582 case 3:
24583 pat = GEN_FCN (icode) (target, op[0], op[1], op[2]);
24584 break;
24586 case 4:
24587 pat = GEN_FCN (icode) (target, op[0], op[1], op[2], op[3]);
24588 break;
24590 case 5:
24591 pat = GEN_FCN (icode) (target, op[0], op[1], op[2], op[3], op[4]);
24592 break;
24594 default:
24595 gcc_unreachable ();
24597 else
24598 switch (argc)
24600 case 1:
24601 pat = GEN_FCN (icode) (op[0]);
24602 break;
24604 case 2:
24605 pat = GEN_FCN (icode) (op[0], op[1]);
24606 break;
24608 case 3:
24609 pat = GEN_FCN (icode) (op[0], op[1], op[2]);
24610 break;
24612 case 4:
24613 pat = GEN_FCN (icode) (op[0], op[1], op[2], op[3]);
24614 break;
24616 case 5:
24617 pat = GEN_FCN (icode) (op[0], op[1], op[2], op[3], op[4]);
24618 break;
24620 default:
24621 gcc_unreachable ();
24624 if (!pat)
24625 return 0;
24627 emit_insn (pat);
24629 return target;
24632 /* Expand a Neon builtin. These are "special" because they don't have symbolic
24633 constants defined per-instruction or per instruction-variant. Instead, the
24634 required info is looked up in the table neon_builtin_data. */
24635 static rtx
24636 arm_expand_neon_builtin (int fcode, tree exp, rtx target)
24638 neon_builtin_datum *d = &neon_builtin_data[fcode - ARM_BUILTIN_NEON_BASE];
24639 neon_itype itype = d->itype;
24640 enum insn_code icode = d->code;
24641 neon_builtin_type_mode type_mode = d->mode;
24643 switch (itype)
24645 case NEON_UNOP:
24646 case NEON_CONVERT:
24647 case NEON_DUPLANE:
24648 return arm_expand_neon_args (target, icode, 1, type_mode, exp, fcode,
24649 NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT, NEON_ARG_STOP);
24651 case NEON_BINOP:
24652 case NEON_SETLANE:
24653 case NEON_SCALARMUL:
24654 case NEON_SCALARMULL:
24655 case NEON_SCALARMULH:
24656 case NEON_SHIFTINSERT:
24657 case NEON_LOGICBINOP:
24658 return arm_expand_neon_args (target, icode, 1, type_mode, exp, fcode,
24659 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT,
24660 NEON_ARG_STOP);
24662 case NEON_TERNOP:
24663 return arm_expand_neon_args (target, icode, 1, type_mode, exp, fcode,
24664 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG,
24665 NEON_ARG_CONSTANT, NEON_ARG_STOP);
24667 case NEON_GETLANE:
24668 case NEON_FIXCONV:
24669 case NEON_SHIFTIMM:
24670 return arm_expand_neon_args (target, icode, 1, type_mode, exp, fcode,
24671 NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT, NEON_ARG_CONSTANT,
24672 NEON_ARG_STOP);
24674 case NEON_CREATE:
24675 return arm_expand_neon_args (target, icode, 1, type_mode, exp, fcode,
24676 NEON_ARG_COPY_TO_REG, NEON_ARG_STOP);
24678 case NEON_DUP:
24679 case NEON_RINT:
24680 case NEON_SPLIT:
24681 case NEON_FLOAT_WIDEN:
24682 case NEON_FLOAT_NARROW:
24683 case NEON_REINTERP:
24684 return arm_expand_neon_args (target, icode, 1, type_mode, exp, fcode,
24685 NEON_ARG_COPY_TO_REG, NEON_ARG_STOP);
24687 case NEON_COMBINE:
24688 case NEON_VTBL:
24689 return arm_expand_neon_args (target, icode, 1, type_mode, exp, fcode,
24690 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_STOP);
24692 case NEON_RESULTPAIR:
24693 return arm_expand_neon_args (target, icode, 0, type_mode, exp, fcode,
24694 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG,
24695 NEON_ARG_STOP);
24697 case NEON_LANEMUL:
24698 case NEON_LANEMULL:
24699 case NEON_LANEMULH:
24700 return arm_expand_neon_args (target, icode, 1, type_mode, exp, fcode,
24701 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT,
24702 NEON_ARG_CONSTANT, NEON_ARG_STOP);
24704 case NEON_LANEMAC:
24705 return arm_expand_neon_args (target, icode, 1, type_mode, exp, fcode,
24706 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG,
24707 NEON_ARG_CONSTANT, NEON_ARG_CONSTANT, NEON_ARG_STOP);
24709 case NEON_SHIFTACC:
24710 return arm_expand_neon_args (target, icode, 1, type_mode, exp, fcode,
24711 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT,
24712 NEON_ARG_CONSTANT, NEON_ARG_STOP);
24714 case NEON_SCALARMAC:
24715 return arm_expand_neon_args (target, icode, 1, type_mode, exp, fcode,
24716 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG,
24717 NEON_ARG_CONSTANT, NEON_ARG_STOP);
24719 case NEON_SELECT:
24720 case NEON_VTBX:
24721 return arm_expand_neon_args (target, icode, 1, type_mode, exp, fcode,
24722 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG,
24723 NEON_ARG_STOP);
24725 case NEON_LOAD1:
24726 case NEON_LOADSTRUCT:
24727 return arm_expand_neon_args (target, icode, 1, type_mode, exp, fcode,
24728 NEON_ARG_MEMORY, NEON_ARG_STOP);
24730 case NEON_LOAD1LANE:
24731 case NEON_LOADSTRUCTLANE:
24732 return arm_expand_neon_args (target, icode, 1, type_mode, exp, fcode,
24733 NEON_ARG_MEMORY, NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT,
24734 NEON_ARG_STOP);
24736 case NEON_STORE1:
24737 case NEON_STORESTRUCT:
24738 return arm_expand_neon_args (target, icode, 0, type_mode, exp, fcode,
24739 NEON_ARG_MEMORY, NEON_ARG_COPY_TO_REG, NEON_ARG_STOP);
24741 case NEON_STORE1LANE:
24742 case NEON_STORESTRUCTLANE:
24743 return arm_expand_neon_args (target, icode, 0, type_mode, exp, fcode,
24744 NEON_ARG_MEMORY, NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT,
24745 NEON_ARG_STOP);
24748 gcc_unreachable ();
24751 /* Emit code to reinterpret one Neon type as another, without altering bits. */
24752 void
24753 neon_reinterpret (rtx dest, rtx src)
24755 emit_move_insn (dest, gen_lowpart (GET_MODE (dest), src));
24758 /* Emit code to place a Neon pair result in memory locations (with equal
24759 registers). */
24760 void
24761 neon_emit_pair_result_insn (enum machine_mode mode,
24762 rtx (*intfn) (rtx, rtx, rtx, rtx), rtx destaddr,
24763 rtx op1, rtx op2)
24765 rtx mem = gen_rtx_MEM (mode, destaddr);
24766 rtx tmp1 = gen_reg_rtx (mode);
24767 rtx tmp2 = gen_reg_rtx (mode);
24769 emit_insn (intfn (tmp1, op1, op2, tmp2));
24771 emit_move_insn (mem, tmp1);
24772 mem = adjust_address (mem, mode, GET_MODE_SIZE (mode));
24773 emit_move_insn (mem, tmp2);
24776 /* Set up OPERANDS for a register copy from SRC to DEST, taking care
24777 not to early-clobber SRC registers in the process.
24779 We assume that the operands described by SRC and DEST represent a
24780 decomposed copy of OPERANDS[1] into OPERANDS[0]. COUNT is the
24781 number of components into which the copy has been decomposed. */
24782 void
24783 neon_disambiguate_copy (rtx *operands, rtx *dest, rtx *src, unsigned int count)
24785 unsigned int i;
24787 if (!reg_overlap_mentioned_p (operands[0], operands[1])
24788 || REGNO (operands[0]) < REGNO (operands[1]))
24790 for (i = 0; i < count; i++)
24792 operands[2 * i] = dest[i];
24793 operands[2 * i + 1] = src[i];
24796 else
24798 for (i = 0; i < count; i++)
24800 operands[2 * i] = dest[count - i - 1];
24801 operands[2 * i + 1] = src[count - i - 1];
24806 /* Split operands into moves from op[1] + op[2] into op[0]. */
24808 void
24809 neon_split_vcombine (rtx operands[3])
24811 unsigned int dest = REGNO (operands[0]);
24812 unsigned int src1 = REGNO (operands[1]);
24813 unsigned int src2 = REGNO (operands[2]);
24814 enum machine_mode halfmode = GET_MODE (operands[1]);
24815 unsigned int halfregs = HARD_REGNO_NREGS (src1, halfmode);
24816 rtx destlo, desthi;
24818 if (src1 == dest && src2 == dest + halfregs)
24820 /* No-op move. Can't split to nothing; emit something. */
24821 emit_note (NOTE_INSN_DELETED);
24822 return;
24825 /* Preserve register attributes for variable tracking. */
24826 destlo = gen_rtx_REG_offset (operands[0], halfmode, dest, 0);
24827 desthi = gen_rtx_REG_offset (operands[0], halfmode, dest + halfregs,
24828 GET_MODE_SIZE (halfmode));
24830 /* Special case of reversed high/low parts. Use VSWP. */
24831 if (src2 == dest && src1 == dest + halfregs)
24833 rtx x = gen_rtx_SET (VOIDmode, destlo, operands[1]);
24834 rtx y = gen_rtx_SET (VOIDmode, desthi, operands[2]);
24835 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, x, y)));
24836 return;
24839 if (!reg_overlap_mentioned_p (operands[2], destlo))
24841 /* Try to avoid unnecessary moves if part of the result
24842 is in the right place already. */
24843 if (src1 != dest)
24844 emit_move_insn (destlo, operands[1]);
24845 if (src2 != dest + halfregs)
24846 emit_move_insn (desthi, operands[2]);
24848 else
24850 if (src2 != dest + halfregs)
24851 emit_move_insn (desthi, operands[2]);
24852 if (src1 != dest)
24853 emit_move_insn (destlo, operands[1]);
24857 /* Expand an expression EXP that calls a built-in function,
24858 with result going to TARGET if that's convenient
24859 (and in mode MODE if that's convenient).
24860 SUBTARGET may be used as the target for computing one of EXP's operands.
24861 IGNORE is nonzero if the value is to be ignored. */
24863 static rtx
24864 arm_expand_builtin (tree exp,
24865 rtx target,
24866 rtx subtarget ATTRIBUTE_UNUSED,
24867 enum machine_mode mode ATTRIBUTE_UNUSED,
24868 int ignore ATTRIBUTE_UNUSED)
24870 const struct builtin_description * d;
24871 enum insn_code icode;
24872 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
24873 tree arg0;
24874 tree arg1;
24875 tree arg2;
24876 rtx op0;
24877 rtx op1;
24878 rtx op2;
24879 rtx pat;
24880 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
24881 size_t i;
24882 enum machine_mode tmode;
24883 enum machine_mode mode0;
24884 enum machine_mode mode1;
24885 enum machine_mode mode2;
24886 int opint;
24887 int selector;
24888 int mask;
24889 int imm;
24891 if (fcode >= ARM_BUILTIN_NEON_BASE)
24892 return arm_expand_neon_builtin (fcode, exp, target);
24894 switch (fcode)
24896 case ARM_BUILTIN_TEXTRMSB:
24897 case ARM_BUILTIN_TEXTRMUB:
24898 case ARM_BUILTIN_TEXTRMSH:
24899 case ARM_BUILTIN_TEXTRMUH:
24900 case ARM_BUILTIN_TEXTRMSW:
24901 case ARM_BUILTIN_TEXTRMUW:
24902 icode = (fcode == ARM_BUILTIN_TEXTRMSB ? CODE_FOR_iwmmxt_textrmsb
24903 : fcode == ARM_BUILTIN_TEXTRMUB ? CODE_FOR_iwmmxt_textrmub
24904 : fcode == ARM_BUILTIN_TEXTRMSH ? CODE_FOR_iwmmxt_textrmsh
24905 : fcode == ARM_BUILTIN_TEXTRMUH ? CODE_FOR_iwmmxt_textrmuh
24906 : CODE_FOR_iwmmxt_textrmw);
24908 arg0 = CALL_EXPR_ARG (exp, 0);
24909 arg1 = CALL_EXPR_ARG (exp, 1);
24910 op0 = expand_normal (arg0);
24911 op1 = expand_normal (arg1);
24912 tmode = insn_data[icode].operand[0].mode;
24913 mode0 = insn_data[icode].operand[1].mode;
24914 mode1 = insn_data[icode].operand[2].mode;
24916 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
24917 op0 = copy_to_mode_reg (mode0, op0);
24918 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
24920 /* @@@ better error message */
24921 error ("selector must be an immediate");
24922 return gen_reg_rtx (tmode);
24925 opint = INTVAL (op1);
24926 if (fcode == ARM_BUILTIN_TEXTRMSB || fcode == ARM_BUILTIN_TEXTRMUB)
24928 if (opint > 7 || opint < 0)
24929 error ("the range of selector should be in 0 to 7");
24931 else if (fcode == ARM_BUILTIN_TEXTRMSH || fcode == ARM_BUILTIN_TEXTRMUH)
24933 if (opint > 3 || opint < 0)
24934 error ("the range of selector should be in 0 to 3");
24936 else /* ARM_BUILTIN_TEXTRMSW || ARM_BUILTIN_TEXTRMUW. */
24938 if (opint > 1 || opint < 0)
24939 error ("the range of selector should be in 0 to 1");
24942 if (target == 0
24943 || GET_MODE (target) != tmode
24944 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
24945 target = gen_reg_rtx (tmode);
24946 pat = GEN_FCN (icode) (target, op0, op1);
24947 if (! pat)
24948 return 0;
24949 emit_insn (pat);
24950 return target;
24952 case ARM_BUILTIN_WALIGNI:
24953 /* If op2 is immediate, call walighi, else call walighr. */
24954 arg0 = CALL_EXPR_ARG (exp, 0);
24955 arg1 = CALL_EXPR_ARG (exp, 1);
24956 arg2 = CALL_EXPR_ARG (exp, 2);
24957 op0 = expand_normal (arg0);
24958 op1 = expand_normal (arg1);
24959 op2 = expand_normal (arg2);
24960 if (CONST_INT_P (op2))
24962 icode = CODE_FOR_iwmmxt_waligni;
24963 tmode = insn_data[icode].operand[0].mode;
24964 mode0 = insn_data[icode].operand[1].mode;
24965 mode1 = insn_data[icode].operand[2].mode;
24966 mode2 = insn_data[icode].operand[3].mode;
24967 if (!(*insn_data[icode].operand[1].predicate) (op0, mode0))
24968 op0 = copy_to_mode_reg (mode0, op0);
24969 if (!(*insn_data[icode].operand[2].predicate) (op1, mode1))
24970 op1 = copy_to_mode_reg (mode1, op1);
24971 gcc_assert ((*insn_data[icode].operand[3].predicate) (op2, mode2));
24972 selector = INTVAL (op2);
24973 if (selector > 7 || selector < 0)
24974 error ("the range of selector should be in 0 to 7");
24976 else
24978 icode = CODE_FOR_iwmmxt_walignr;
24979 tmode = insn_data[icode].operand[0].mode;
24980 mode0 = insn_data[icode].operand[1].mode;
24981 mode1 = insn_data[icode].operand[2].mode;
24982 mode2 = insn_data[icode].operand[3].mode;
24983 if (!(*insn_data[icode].operand[1].predicate) (op0, mode0))
24984 op0 = copy_to_mode_reg (mode0, op0);
24985 if (!(*insn_data[icode].operand[2].predicate) (op1, mode1))
24986 op1 = copy_to_mode_reg (mode1, op1);
24987 if (!(*insn_data[icode].operand[3].predicate) (op2, mode2))
24988 op2 = copy_to_mode_reg (mode2, op2);
24990 if (target == 0
24991 || GET_MODE (target) != tmode
24992 || !(*insn_data[icode].operand[0].predicate) (target, tmode))
24993 target = gen_reg_rtx (tmode);
24994 pat = GEN_FCN (icode) (target, op0, op1, op2);
24995 if (!pat)
24996 return 0;
24997 emit_insn (pat);
24998 return target;
25000 case ARM_BUILTIN_TINSRB:
25001 case ARM_BUILTIN_TINSRH:
25002 case ARM_BUILTIN_TINSRW:
25003 case ARM_BUILTIN_WMERGE:
25004 icode = (fcode == ARM_BUILTIN_TINSRB ? CODE_FOR_iwmmxt_tinsrb
25005 : fcode == ARM_BUILTIN_TINSRH ? CODE_FOR_iwmmxt_tinsrh
25006 : fcode == ARM_BUILTIN_WMERGE ? CODE_FOR_iwmmxt_wmerge
25007 : CODE_FOR_iwmmxt_tinsrw);
25008 arg0 = CALL_EXPR_ARG (exp, 0);
25009 arg1 = CALL_EXPR_ARG (exp, 1);
25010 arg2 = CALL_EXPR_ARG (exp, 2);
25011 op0 = expand_normal (arg0);
25012 op1 = expand_normal (arg1);
25013 op2 = expand_normal (arg2);
25014 tmode = insn_data[icode].operand[0].mode;
25015 mode0 = insn_data[icode].operand[1].mode;
25016 mode1 = insn_data[icode].operand[2].mode;
25017 mode2 = insn_data[icode].operand[3].mode;
25019 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
25020 op0 = copy_to_mode_reg (mode0, op0);
25021 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
25022 op1 = copy_to_mode_reg (mode1, op1);
25023 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
25025 error ("selector must be an immediate");
25026 return const0_rtx;
25028 if (icode == CODE_FOR_iwmmxt_wmerge)
25030 selector = INTVAL (op2);
25031 if (selector > 7 || selector < 0)
25032 error ("the range of selector should be in 0 to 7");
25034 if ((icode == CODE_FOR_iwmmxt_tinsrb)
25035 || (icode == CODE_FOR_iwmmxt_tinsrh)
25036 || (icode == CODE_FOR_iwmmxt_tinsrw))
25038 mask = 0x01;
25039 selector= INTVAL (op2);
25040 if (icode == CODE_FOR_iwmmxt_tinsrb && (selector < 0 || selector > 7))
25041 error ("the range of selector should be in 0 to 7");
25042 else if (icode == CODE_FOR_iwmmxt_tinsrh && (selector < 0 ||selector > 3))
25043 error ("the range of selector should be in 0 to 3");
25044 else if (icode == CODE_FOR_iwmmxt_tinsrw && (selector < 0 ||selector > 1))
25045 error ("the range of selector should be in 0 to 1");
25046 mask <<= selector;
25047 op2 = GEN_INT (mask);
25049 if (target == 0
25050 || GET_MODE (target) != tmode
25051 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
25052 target = gen_reg_rtx (tmode);
25053 pat = GEN_FCN (icode) (target, op0, op1, op2);
25054 if (! pat)
25055 return 0;
25056 emit_insn (pat);
25057 return target;
25059 case ARM_BUILTIN_SETWCGR0:
25060 case ARM_BUILTIN_SETWCGR1:
25061 case ARM_BUILTIN_SETWCGR2:
25062 case ARM_BUILTIN_SETWCGR3:
25063 icode = (fcode == ARM_BUILTIN_SETWCGR0 ? CODE_FOR_iwmmxt_setwcgr0
25064 : fcode == ARM_BUILTIN_SETWCGR1 ? CODE_FOR_iwmmxt_setwcgr1
25065 : fcode == ARM_BUILTIN_SETWCGR2 ? CODE_FOR_iwmmxt_setwcgr2
25066 : CODE_FOR_iwmmxt_setwcgr3);
25067 arg0 = CALL_EXPR_ARG (exp, 0);
25068 op0 = expand_normal (arg0);
25069 mode0 = insn_data[icode].operand[0].mode;
25070 if (!(*insn_data[icode].operand[0].predicate) (op0, mode0))
25071 op0 = copy_to_mode_reg (mode0, op0);
25072 pat = GEN_FCN (icode) (op0);
25073 if (!pat)
25074 return 0;
25075 emit_insn (pat);
25076 return 0;
25078 case ARM_BUILTIN_GETWCGR0:
25079 case ARM_BUILTIN_GETWCGR1:
25080 case ARM_BUILTIN_GETWCGR2:
25081 case ARM_BUILTIN_GETWCGR3:
25082 icode = (fcode == ARM_BUILTIN_GETWCGR0 ? CODE_FOR_iwmmxt_getwcgr0
25083 : fcode == ARM_BUILTIN_GETWCGR1 ? CODE_FOR_iwmmxt_getwcgr1
25084 : fcode == ARM_BUILTIN_GETWCGR2 ? CODE_FOR_iwmmxt_getwcgr2
25085 : CODE_FOR_iwmmxt_getwcgr3);
25086 tmode = insn_data[icode].operand[0].mode;
25087 if (target == 0
25088 || GET_MODE (target) != tmode
25089 || !(*insn_data[icode].operand[0].predicate) (target, tmode))
25090 target = gen_reg_rtx (tmode);
25091 pat = GEN_FCN (icode) (target);
25092 if (!pat)
25093 return 0;
25094 emit_insn (pat);
25095 return target;
25097 case ARM_BUILTIN_WSHUFH:
25098 icode = CODE_FOR_iwmmxt_wshufh;
25099 arg0 = CALL_EXPR_ARG (exp, 0);
25100 arg1 = CALL_EXPR_ARG (exp, 1);
25101 op0 = expand_normal (arg0);
25102 op1 = expand_normal (arg1);
25103 tmode = insn_data[icode].operand[0].mode;
25104 mode1 = insn_data[icode].operand[1].mode;
25105 mode2 = insn_data[icode].operand[2].mode;
25107 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
25108 op0 = copy_to_mode_reg (mode1, op0);
25109 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
25111 error ("mask must be an immediate");
25112 return const0_rtx;
25114 selector = INTVAL (op1);
25115 if (selector < 0 || selector > 255)
25116 error ("the range of mask should be in 0 to 255");
25117 if (target == 0
25118 || GET_MODE (target) != tmode
25119 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
25120 target = gen_reg_rtx (tmode);
25121 pat = GEN_FCN (icode) (target, op0, op1);
25122 if (! pat)
25123 return 0;
25124 emit_insn (pat);
25125 return target;
25127 case ARM_BUILTIN_WMADDS:
25128 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wmadds, exp, target);
25129 case ARM_BUILTIN_WMADDSX:
25130 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wmaddsx, exp, target);
25131 case ARM_BUILTIN_WMADDSN:
25132 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wmaddsn, exp, target);
25133 case ARM_BUILTIN_WMADDU:
25134 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wmaddu, exp, target);
25135 case ARM_BUILTIN_WMADDUX:
25136 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wmaddux, exp, target);
25137 case ARM_BUILTIN_WMADDUN:
25138 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wmaddun, exp, target);
25139 case ARM_BUILTIN_WSADBZ:
25140 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wsadbz, exp, target);
25141 case ARM_BUILTIN_WSADHZ:
25142 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wsadhz, exp, target);
25144 /* Several three-argument builtins. */
25145 case ARM_BUILTIN_WMACS:
25146 case ARM_BUILTIN_WMACU:
25147 case ARM_BUILTIN_TMIA:
25148 case ARM_BUILTIN_TMIAPH:
25149 case ARM_BUILTIN_TMIATT:
25150 case ARM_BUILTIN_TMIATB:
25151 case ARM_BUILTIN_TMIABT:
25152 case ARM_BUILTIN_TMIABB:
25153 case ARM_BUILTIN_WQMIABB:
25154 case ARM_BUILTIN_WQMIABT:
25155 case ARM_BUILTIN_WQMIATB:
25156 case ARM_BUILTIN_WQMIATT:
25157 case ARM_BUILTIN_WQMIABBN:
25158 case ARM_BUILTIN_WQMIABTN:
25159 case ARM_BUILTIN_WQMIATBN:
25160 case ARM_BUILTIN_WQMIATTN:
25161 case ARM_BUILTIN_WMIABB:
25162 case ARM_BUILTIN_WMIABT:
25163 case ARM_BUILTIN_WMIATB:
25164 case ARM_BUILTIN_WMIATT:
25165 case ARM_BUILTIN_WMIABBN:
25166 case ARM_BUILTIN_WMIABTN:
25167 case ARM_BUILTIN_WMIATBN:
25168 case ARM_BUILTIN_WMIATTN:
25169 case ARM_BUILTIN_WMIAWBB:
25170 case ARM_BUILTIN_WMIAWBT:
25171 case ARM_BUILTIN_WMIAWTB:
25172 case ARM_BUILTIN_WMIAWTT:
25173 case ARM_BUILTIN_WMIAWBBN:
25174 case ARM_BUILTIN_WMIAWBTN:
25175 case ARM_BUILTIN_WMIAWTBN:
25176 case ARM_BUILTIN_WMIAWTTN:
25177 case ARM_BUILTIN_WSADB:
25178 case ARM_BUILTIN_WSADH:
25179 icode = (fcode == ARM_BUILTIN_WMACS ? CODE_FOR_iwmmxt_wmacs
25180 : fcode == ARM_BUILTIN_WMACU ? CODE_FOR_iwmmxt_wmacu
25181 : fcode == ARM_BUILTIN_TMIA ? CODE_FOR_iwmmxt_tmia
25182 : fcode == ARM_BUILTIN_TMIAPH ? CODE_FOR_iwmmxt_tmiaph
25183 : fcode == ARM_BUILTIN_TMIABB ? CODE_FOR_iwmmxt_tmiabb
25184 : fcode == ARM_BUILTIN_TMIABT ? CODE_FOR_iwmmxt_tmiabt
25185 : fcode == ARM_BUILTIN_TMIATB ? CODE_FOR_iwmmxt_tmiatb
25186 : fcode == ARM_BUILTIN_TMIATT ? CODE_FOR_iwmmxt_tmiatt
25187 : fcode == ARM_BUILTIN_WQMIABB ? CODE_FOR_iwmmxt_wqmiabb
25188 : fcode == ARM_BUILTIN_WQMIABT ? CODE_FOR_iwmmxt_wqmiabt
25189 : fcode == ARM_BUILTIN_WQMIATB ? CODE_FOR_iwmmxt_wqmiatb
25190 : fcode == ARM_BUILTIN_WQMIATT ? CODE_FOR_iwmmxt_wqmiatt
25191 : fcode == ARM_BUILTIN_WQMIABBN ? CODE_FOR_iwmmxt_wqmiabbn
25192 : fcode == ARM_BUILTIN_WQMIABTN ? CODE_FOR_iwmmxt_wqmiabtn
25193 : fcode == ARM_BUILTIN_WQMIATBN ? CODE_FOR_iwmmxt_wqmiatbn
25194 : fcode == ARM_BUILTIN_WQMIATTN ? CODE_FOR_iwmmxt_wqmiattn
25195 : fcode == ARM_BUILTIN_WMIABB ? CODE_FOR_iwmmxt_wmiabb
25196 : fcode == ARM_BUILTIN_WMIABT ? CODE_FOR_iwmmxt_wmiabt
25197 : fcode == ARM_BUILTIN_WMIATB ? CODE_FOR_iwmmxt_wmiatb
25198 : fcode == ARM_BUILTIN_WMIATT ? CODE_FOR_iwmmxt_wmiatt
25199 : fcode == ARM_BUILTIN_WMIABBN ? CODE_FOR_iwmmxt_wmiabbn
25200 : fcode == ARM_BUILTIN_WMIABTN ? CODE_FOR_iwmmxt_wmiabtn
25201 : fcode == ARM_BUILTIN_WMIATBN ? CODE_FOR_iwmmxt_wmiatbn
25202 : fcode == ARM_BUILTIN_WMIATTN ? CODE_FOR_iwmmxt_wmiattn
25203 : fcode == ARM_BUILTIN_WMIAWBB ? CODE_FOR_iwmmxt_wmiawbb
25204 : fcode == ARM_BUILTIN_WMIAWBT ? CODE_FOR_iwmmxt_wmiawbt
25205 : fcode == ARM_BUILTIN_WMIAWTB ? CODE_FOR_iwmmxt_wmiawtb
25206 : fcode == ARM_BUILTIN_WMIAWTT ? CODE_FOR_iwmmxt_wmiawtt
25207 : fcode == ARM_BUILTIN_WMIAWBBN ? CODE_FOR_iwmmxt_wmiawbbn
25208 : fcode == ARM_BUILTIN_WMIAWBTN ? CODE_FOR_iwmmxt_wmiawbtn
25209 : fcode == ARM_BUILTIN_WMIAWTBN ? CODE_FOR_iwmmxt_wmiawtbn
25210 : fcode == ARM_BUILTIN_WMIAWTTN ? CODE_FOR_iwmmxt_wmiawttn
25211 : fcode == ARM_BUILTIN_WSADB ? CODE_FOR_iwmmxt_wsadb
25212 : CODE_FOR_iwmmxt_wsadh);
25213 arg0 = CALL_EXPR_ARG (exp, 0);
25214 arg1 = CALL_EXPR_ARG (exp, 1);
25215 arg2 = CALL_EXPR_ARG (exp, 2);
25216 op0 = expand_normal (arg0);
25217 op1 = expand_normal (arg1);
25218 op2 = expand_normal (arg2);
25219 tmode = insn_data[icode].operand[0].mode;
25220 mode0 = insn_data[icode].operand[1].mode;
25221 mode1 = insn_data[icode].operand[2].mode;
25222 mode2 = insn_data[icode].operand[3].mode;
25224 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
25225 op0 = copy_to_mode_reg (mode0, op0);
25226 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
25227 op1 = copy_to_mode_reg (mode1, op1);
25228 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
25229 op2 = copy_to_mode_reg (mode2, op2);
25230 if (target == 0
25231 || GET_MODE (target) != tmode
25232 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
25233 target = gen_reg_rtx (tmode);
25234 pat = GEN_FCN (icode) (target, op0, op1, op2);
25235 if (! pat)
25236 return 0;
25237 emit_insn (pat);
25238 return target;
25240 case ARM_BUILTIN_WZERO:
25241 target = gen_reg_rtx (DImode);
25242 emit_insn (gen_iwmmxt_clrdi (target));
25243 return target;
25245 case ARM_BUILTIN_WSRLHI:
25246 case ARM_BUILTIN_WSRLWI:
25247 case ARM_BUILTIN_WSRLDI:
25248 case ARM_BUILTIN_WSLLHI:
25249 case ARM_BUILTIN_WSLLWI:
25250 case ARM_BUILTIN_WSLLDI:
25251 case ARM_BUILTIN_WSRAHI:
25252 case ARM_BUILTIN_WSRAWI:
25253 case ARM_BUILTIN_WSRADI:
25254 case ARM_BUILTIN_WRORHI:
25255 case ARM_BUILTIN_WRORWI:
25256 case ARM_BUILTIN_WRORDI:
25257 case ARM_BUILTIN_WSRLH:
25258 case ARM_BUILTIN_WSRLW:
25259 case ARM_BUILTIN_WSRLD:
25260 case ARM_BUILTIN_WSLLH:
25261 case ARM_BUILTIN_WSLLW:
25262 case ARM_BUILTIN_WSLLD:
25263 case ARM_BUILTIN_WSRAH:
25264 case ARM_BUILTIN_WSRAW:
25265 case ARM_BUILTIN_WSRAD:
25266 case ARM_BUILTIN_WRORH:
25267 case ARM_BUILTIN_WRORW:
25268 case ARM_BUILTIN_WRORD:
25269 icode = (fcode == ARM_BUILTIN_WSRLHI ? CODE_FOR_lshrv4hi3_iwmmxt
25270 : fcode == ARM_BUILTIN_WSRLWI ? CODE_FOR_lshrv2si3_iwmmxt
25271 : fcode == ARM_BUILTIN_WSRLDI ? CODE_FOR_lshrdi3_iwmmxt
25272 : fcode == ARM_BUILTIN_WSLLHI ? CODE_FOR_ashlv4hi3_iwmmxt
25273 : fcode == ARM_BUILTIN_WSLLWI ? CODE_FOR_ashlv2si3_iwmmxt
25274 : fcode == ARM_BUILTIN_WSLLDI ? CODE_FOR_ashldi3_iwmmxt
25275 : fcode == ARM_BUILTIN_WSRAHI ? CODE_FOR_ashrv4hi3_iwmmxt
25276 : fcode == ARM_BUILTIN_WSRAWI ? CODE_FOR_ashrv2si3_iwmmxt
25277 : fcode == ARM_BUILTIN_WSRADI ? CODE_FOR_ashrdi3_iwmmxt
25278 : fcode == ARM_BUILTIN_WRORHI ? CODE_FOR_rorv4hi3
25279 : fcode == ARM_BUILTIN_WRORWI ? CODE_FOR_rorv2si3
25280 : fcode == ARM_BUILTIN_WRORDI ? CODE_FOR_rordi3
25281 : fcode == ARM_BUILTIN_WSRLH ? CODE_FOR_lshrv4hi3_di
25282 : fcode == ARM_BUILTIN_WSRLW ? CODE_FOR_lshrv2si3_di
25283 : fcode == ARM_BUILTIN_WSRLD ? CODE_FOR_lshrdi3_di
25284 : fcode == ARM_BUILTIN_WSLLH ? CODE_FOR_ashlv4hi3_di
25285 : fcode == ARM_BUILTIN_WSLLW ? CODE_FOR_ashlv2si3_di
25286 : fcode == ARM_BUILTIN_WSLLD ? CODE_FOR_ashldi3_di
25287 : fcode == ARM_BUILTIN_WSRAH ? CODE_FOR_ashrv4hi3_di
25288 : fcode == ARM_BUILTIN_WSRAW ? CODE_FOR_ashrv2si3_di
25289 : fcode == ARM_BUILTIN_WSRAD ? CODE_FOR_ashrdi3_di
25290 : fcode == ARM_BUILTIN_WRORH ? CODE_FOR_rorv4hi3_di
25291 : fcode == ARM_BUILTIN_WRORW ? CODE_FOR_rorv2si3_di
25292 : fcode == ARM_BUILTIN_WRORD ? CODE_FOR_rordi3_di
25293 : CODE_FOR_nothing);
25294 arg1 = CALL_EXPR_ARG (exp, 1);
25295 op1 = expand_normal (arg1);
25296 if (GET_MODE (op1) == VOIDmode)
25298 imm = INTVAL (op1);
25299 if ((fcode == ARM_BUILTIN_WRORHI || fcode == ARM_BUILTIN_WRORWI
25300 || fcode == ARM_BUILTIN_WRORH || fcode == ARM_BUILTIN_WRORW)
25301 && (imm < 0 || imm > 32))
25303 if (fcode == ARM_BUILTIN_WRORHI)
25304 error ("the range of count should be in 0 to 32. please check the intrinsic _mm_rori_pi16 in code.");
25305 else if (fcode == ARM_BUILTIN_WRORWI)
25306 error ("the range of count should be in 0 to 32. please check the intrinsic _mm_rori_pi32 in code.");
25307 else if (fcode == ARM_BUILTIN_WRORH)
25308 error ("the range of count should be in 0 to 32. please check the intrinsic _mm_ror_pi16 in code.");
25309 else
25310 error ("the range of count should be in 0 to 32. please check the intrinsic _mm_ror_pi32 in code.");
25312 else if ((fcode == ARM_BUILTIN_WRORDI || fcode == ARM_BUILTIN_WRORD)
25313 && (imm < 0 || imm > 64))
25315 if (fcode == ARM_BUILTIN_WRORDI)
25316 error ("the range of count should be in 0 to 64. please check the intrinsic _mm_rori_si64 in code.");
25317 else
25318 error ("the range of count should be in 0 to 64. please check the intrinsic _mm_ror_si64 in code.");
25320 else if (imm < 0)
25322 if (fcode == ARM_BUILTIN_WSRLHI)
25323 error ("the count should be no less than 0. please check the intrinsic _mm_srli_pi16 in code.");
25324 else if (fcode == ARM_BUILTIN_WSRLWI)
25325 error ("the count should be no less than 0. please check the intrinsic _mm_srli_pi32 in code.");
25326 else if (fcode == ARM_BUILTIN_WSRLDI)
25327 error ("the count should be no less than 0. please check the intrinsic _mm_srli_si64 in code.");
25328 else if (fcode == ARM_BUILTIN_WSLLHI)
25329 error ("the count should be no less than 0. please check the intrinsic _mm_slli_pi16 in code.");
25330 else if (fcode == ARM_BUILTIN_WSLLWI)
25331 error ("the count should be no less than 0. please check the intrinsic _mm_slli_pi32 in code.");
25332 else if (fcode == ARM_BUILTIN_WSLLDI)
25333 error ("the count should be no less than 0. please check the intrinsic _mm_slli_si64 in code.");
25334 else if (fcode == ARM_BUILTIN_WSRAHI)
25335 error ("the count should be no less than 0. please check the intrinsic _mm_srai_pi16 in code.");
25336 else if (fcode == ARM_BUILTIN_WSRAWI)
25337 error ("the count should be no less than 0. please check the intrinsic _mm_srai_pi32 in code.");
25338 else if (fcode == ARM_BUILTIN_WSRADI)
25339 error ("the count should be no less than 0. please check the intrinsic _mm_srai_si64 in code.");
25340 else if (fcode == ARM_BUILTIN_WSRLH)
25341 error ("the count should be no less than 0. please check the intrinsic _mm_srl_pi16 in code.");
25342 else if (fcode == ARM_BUILTIN_WSRLW)
25343 error ("the count should be no less than 0. please check the intrinsic _mm_srl_pi32 in code.");
25344 else if (fcode == ARM_BUILTIN_WSRLD)
25345 error ("the count should be no less than 0. please check the intrinsic _mm_srl_si64 in code.");
25346 else if (fcode == ARM_BUILTIN_WSLLH)
25347 error ("the count should be no less than 0. please check the intrinsic _mm_sll_pi16 in code.");
25348 else if (fcode == ARM_BUILTIN_WSLLW)
25349 error ("the count should be no less than 0. please check the intrinsic _mm_sll_pi32 in code.");
25350 else if (fcode == ARM_BUILTIN_WSLLD)
25351 error ("the count should be no less than 0. please check the intrinsic _mm_sll_si64 in code.");
25352 else if (fcode == ARM_BUILTIN_WSRAH)
25353 error ("the count should be no less than 0. please check the intrinsic _mm_sra_pi16 in code.");
25354 else if (fcode == ARM_BUILTIN_WSRAW)
25355 error ("the count should be no less than 0. please check the intrinsic _mm_sra_pi32 in code.");
25356 else
25357 error ("the count should be no less than 0. please check the intrinsic _mm_sra_si64 in code.");
25360 return arm_expand_binop_builtin (icode, exp, target);
25362 default:
25363 break;
25366 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
25367 if (d->code == (const enum arm_builtins) fcode)
25368 return arm_expand_binop_builtin (d->icode, exp, target);
25370 for (i = 0, d = bdesc_1arg; i < ARRAY_SIZE (bdesc_1arg); i++, d++)
25371 if (d->code == (const enum arm_builtins) fcode)
25372 return arm_expand_unop_builtin (d->icode, exp, target, 0);
25374 /* @@@ Should really do something sensible here. */
25375 return NULL_RTX;
25378 /* Return the number (counting from 0) of
25379 the least significant set bit in MASK. */
25381 inline static int
25382 number_of_first_bit_set (unsigned mask)
25384 return ctz_hwi (mask);
25387 /* Like emit_multi_reg_push, but allowing for a different set of
25388 registers to be described as saved. MASK is the set of registers
25389 to be saved; REAL_REGS is the set of registers to be described as
25390 saved. If REAL_REGS is 0, only describe the stack adjustment. */
25392 static rtx
25393 thumb1_emit_multi_reg_push (unsigned long mask, unsigned long real_regs)
25395 unsigned long regno;
25396 rtx par[10], tmp, reg, insn;
25397 int i, j;
25399 /* Build the parallel of the registers actually being stored. */
25400 for (i = 0; mask; ++i, mask &= mask - 1)
25402 regno = ctz_hwi (mask);
25403 reg = gen_rtx_REG (SImode, regno);
25405 if (i == 0)
25406 tmp = gen_rtx_UNSPEC (BLKmode, gen_rtvec (1, reg), UNSPEC_PUSH_MULT);
25407 else
25408 tmp = gen_rtx_USE (VOIDmode, reg);
25410 par[i] = tmp;
25413 tmp = plus_constant (Pmode, stack_pointer_rtx, -4 * i);
25414 tmp = gen_rtx_PRE_MODIFY (Pmode, stack_pointer_rtx, tmp);
25415 tmp = gen_frame_mem (BLKmode, tmp);
25416 tmp = gen_rtx_SET (VOIDmode, tmp, par[0]);
25417 par[0] = tmp;
25419 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (i, par));
25420 insn = emit_insn (tmp);
25422 /* Always build the stack adjustment note for unwind info. */
25423 tmp = plus_constant (Pmode, stack_pointer_rtx, -4 * i);
25424 tmp = gen_rtx_SET (VOIDmode, stack_pointer_rtx, tmp);
25425 par[0] = tmp;
25427 /* Build the parallel of the registers recorded as saved for unwind. */
25428 for (j = 0; real_regs; ++j, real_regs &= real_regs - 1)
25430 regno = ctz_hwi (real_regs);
25431 reg = gen_rtx_REG (SImode, regno);
25433 tmp = plus_constant (Pmode, stack_pointer_rtx, j * 4);
25434 tmp = gen_frame_mem (SImode, tmp);
25435 tmp = gen_rtx_SET (VOIDmode, tmp, reg);
25436 RTX_FRAME_RELATED_P (tmp) = 1;
25437 par[j + 1] = tmp;
25440 if (j == 0)
25441 tmp = par[0];
25442 else
25444 RTX_FRAME_RELATED_P (par[0]) = 1;
25445 tmp = gen_rtx_SEQUENCE (VOIDmode, gen_rtvec_v (j + 1, par));
25448 add_reg_note (insn, REG_FRAME_RELATED_EXPR, tmp);
25450 return insn;
25453 /* Emit code to push or pop registers to or from the stack. F is the
25454 assembly file. MASK is the registers to pop. */
25455 static void
25456 thumb_pop (FILE *f, unsigned long mask)
25458 int regno;
25459 int lo_mask = mask & 0xFF;
25460 int pushed_words = 0;
25462 gcc_assert (mask);
25464 if (lo_mask == 0 && (mask & (1 << PC_REGNUM)))
25466 /* Special case. Do not generate a POP PC statement here, do it in
25467 thumb_exit() */
25468 thumb_exit (f, -1);
25469 return;
25472 fprintf (f, "\tpop\t{");
25474 /* Look at the low registers first. */
25475 for (regno = 0; regno <= LAST_LO_REGNUM; regno++, lo_mask >>= 1)
25477 if (lo_mask & 1)
25479 asm_fprintf (f, "%r", regno);
25481 if ((lo_mask & ~1) != 0)
25482 fprintf (f, ", ");
25484 pushed_words++;
25488 if (mask & (1 << PC_REGNUM))
25490 /* Catch popping the PC. */
25491 if (TARGET_INTERWORK || TARGET_BACKTRACE
25492 || crtl->calls_eh_return)
25494 /* The PC is never poped directly, instead
25495 it is popped into r3 and then BX is used. */
25496 fprintf (f, "}\n");
25498 thumb_exit (f, -1);
25500 return;
25502 else
25504 if (mask & 0xFF)
25505 fprintf (f, ", ");
25507 asm_fprintf (f, "%r", PC_REGNUM);
25511 fprintf (f, "}\n");
25514 /* Generate code to return from a thumb function.
25515 If 'reg_containing_return_addr' is -1, then the return address is
25516 actually on the stack, at the stack pointer. */
25517 static void
25518 thumb_exit (FILE *f, int reg_containing_return_addr)
25520 unsigned regs_available_for_popping;
25521 unsigned regs_to_pop;
25522 int pops_needed;
25523 unsigned available;
25524 unsigned required;
25525 int mode;
25526 int size;
25527 int restore_a4 = FALSE;
25529 /* Compute the registers we need to pop. */
25530 regs_to_pop = 0;
25531 pops_needed = 0;
25533 if (reg_containing_return_addr == -1)
25535 regs_to_pop |= 1 << LR_REGNUM;
25536 ++pops_needed;
25539 if (TARGET_BACKTRACE)
25541 /* Restore the (ARM) frame pointer and stack pointer. */
25542 regs_to_pop |= (1 << ARM_HARD_FRAME_POINTER_REGNUM) | (1 << SP_REGNUM);
25543 pops_needed += 2;
25546 /* If there is nothing to pop then just emit the BX instruction and
25547 return. */
25548 if (pops_needed == 0)
25550 if (crtl->calls_eh_return)
25551 asm_fprintf (f, "\tadd\t%r, %r\n", SP_REGNUM, ARM_EH_STACKADJ_REGNUM);
25553 asm_fprintf (f, "\tbx\t%r\n", reg_containing_return_addr);
25554 return;
25556 /* Otherwise if we are not supporting interworking and we have not created
25557 a backtrace structure and the function was not entered in ARM mode then
25558 just pop the return address straight into the PC. */
25559 else if (!TARGET_INTERWORK
25560 && !TARGET_BACKTRACE
25561 && !is_called_in_ARM_mode (current_function_decl)
25562 && !crtl->calls_eh_return)
25564 asm_fprintf (f, "\tpop\t{%r}\n", PC_REGNUM);
25565 return;
25568 /* Find out how many of the (return) argument registers we can corrupt. */
25569 regs_available_for_popping = 0;
25571 /* If returning via __builtin_eh_return, the bottom three registers
25572 all contain information needed for the return. */
25573 if (crtl->calls_eh_return)
25574 size = 12;
25575 else
25577 /* If we can deduce the registers used from the function's
25578 return value. This is more reliable that examining
25579 df_regs_ever_live_p () because that will be set if the register is
25580 ever used in the function, not just if the register is used
25581 to hold a return value. */
25583 if (crtl->return_rtx != 0)
25584 mode = GET_MODE (crtl->return_rtx);
25585 else
25586 mode = DECL_MODE (DECL_RESULT (current_function_decl));
25588 size = GET_MODE_SIZE (mode);
25590 if (size == 0)
25592 /* In a void function we can use any argument register.
25593 In a function that returns a structure on the stack
25594 we can use the second and third argument registers. */
25595 if (mode == VOIDmode)
25596 regs_available_for_popping =
25597 (1 << ARG_REGISTER (1))
25598 | (1 << ARG_REGISTER (2))
25599 | (1 << ARG_REGISTER (3));
25600 else
25601 regs_available_for_popping =
25602 (1 << ARG_REGISTER (2))
25603 | (1 << ARG_REGISTER (3));
25605 else if (size <= 4)
25606 regs_available_for_popping =
25607 (1 << ARG_REGISTER (2))
25608 | (1 << ARG_REGISTER (3));
25609 else if (size <= 8)
25610 regs_available_for_popping =
25611 (1 << ARG_REGISTER (3));
25614 /* Match registers to be popped with registers into which we pop them. */
25615 for (available = regs_available_for_popping,
25616 required = regs_to_pop;
25617 required != 0 && available != 0;
25618 available &= ~(available & - available),
25619 required &= ~(required & - required))
25620 -- pops_needed;
25622 /* If we have any popping registers left over, remove them. */
25623 if (available > 0)
25624 regs_available_for_popping &= ~available;
25626 /* Otherwise if we need another popping register we can use
25627 the fourth argument register. */
25628 else if (pops_needed)
25630 /* If we have not found any free argument registers and
25631 reg a4 contains the return address, we must move it. */
25632 if (regs_available_for_popping == 0
25633 && reg_containing_return_addr == LAST_ARG_REGNUM)
25635 asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM, LAST_ARG_REGNUM);
25636 reg_containing_return_addr = LR_REGNUM;
25638 else if (size > 12)
25640 /* Register a4 is being used to hold part of the return value,
25641 but we have dire need of a free, low register. */
25642 restore_a4 = TRUE;
25644 asm_fprintf (f, "\tmov\t%r, %r\n",IP_REGNUM, LAST_ARG_REGNUM);
25647 if (reg_containing_return_addr != LAST_ARG_REGNUM)
25649 /* The fourth argument register is available. */
25650 regs_available_for_popping |= 1 << LAST_ARG_REGNUM;
25652 --pops_needed;
25656 /* Pop as many registers as we can. */
25657 thumb_pop (f, regs_available_for_popping);
25659 /* Process the registers we popped. */
25660 if (reg_containing_return_addr == -1)
25662 /* The return address was popped into the lowest numbered register. */
25663 regs_to_pop &= ~(1 << LR_REGNUM);
25665 reg_containing_return_addr =
25666 number_of_first_bit_set (regs_available_for_popping);
25668 /* Remove this register for the mask of available registers, so that
25669 the return address will not be corrupted by further pops. */
25670 regs_available_for_popping &= ~(1 << reg_containing_return_addr);
25673 /* If we popped other registers then handle them here. */
25674 if (regs_available_for_popping)
25676 int frame_pointer;
25678 /* Work out which register currently contains the frame pointer. */
25679 frame_pointer = number_of_first_bit_set (regs_available_for_popping);
25681 /* Move it into the correct place. */
25682 asm_fprintf (f, "\tmov\t%r, %r\n",
25683 ARM_HARD_FRAME_POINTER_REGNUM, frame_pointer);
25685 /* (Temporarily) remove it from the mask of popped registers. */
25686 regs_available_for_popping &= ~(1 << frame_pointer);
25687 regs_to_pop &= ~(1 << ARM_HARD_FRAME_POINTER_REGNUM);
25689 if (regs_available_for_popping)
25691 int stack_pointer;
25693 /* We popped the stack pointer as well,
25694 find the register that contains it. */
25695 stack_pointer = number_of_first_bit_set (regs_available_for_popping);
25697 /* Move it into the stack register. */
25698 asm_fprintf (f, "\tmov\t%r, %r\n", SP_REGNUM, stack_pointer);
25700 /* At this point we have popped all necessary registers, so
25701 do not worry about restoring regs_available_for_popping
25702 to its correct value:
25704 assert (pops_needed == 0)
25705 assert (regs_available_for_popping == (1 << frame_pointer))
25706 assert (regs_to_pop == (1 << STACK_POINTER)) */
25708 else
25710 /* Since we have just move the popped value into the frame
25711 pointer, the popping register is available for reuse, and
25712 we know that we still have the stack pointer left to pop. */
25713 regs_available_for_popping |= (1 << frame_pointer);
25717 /* If we still have registers left on the stack, but we no longer have
25718 any registers into which we can pop them, then we must move the return
25719 address into the link register and make available the register that
25720 contained it. */
25721 if (regs_available_for_popping == 0 && pops_needed > 0)
25723 regs_available_for_popping |= 1 << reg_containing_return_addr;
25725 asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM,
25726 reg_containing_return_addr);
25728 reg_containing_return_addr = LR_REGNUM;
25731 /* If we have registers left on the stack then pop some more.
25732 We know that at most we will want to pop FP and SP. */
25733 if (pops_needed > 0)
25735 int popped_into;
25736 int move_to;
25738 thumb_pop (f, regs_available_for_popping);
25740 /* We have popped either FP or SP.
25741 Move whichever one it is into the correct register. */
25742 popped_into = number_of_first_bit_set (regs_available_for_popping);
25743 move_to = number_of_first_bit_set (regs_to_pop);
25745 asm_fprintf (f, "\tmov\t%r, %r\n", move_to, popped_into);
25747 regs_to_pop &= ~(1 << move_to);
25749 --pops_needed;
25752 /* If we still have not popped everything then we must have only
25753 had one register available to us and we are now popping the SP. */
25754 if (pops_needed > 0)
25756 int popped_into;
25758 thumb_pop (f, regs_available_for_popping);
25760 popped_into = number_of_first_bit_set (regs_available_for_popping);
25762 asm_fprintf (f, "\tmov\t%r, %r\n", SP_REGNUM, popped_into);
25764 assert (regs_to_pop == (1 << STACK_POINTER))
25765 assert (pops_needed == 1)
25769 /* If necessary restore the a4 register. */
25770 if (restore_a4)
25772 if (reg_containing_return_addr != LR_REGNUM)
25774 asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM, LAST_ARG_REGNUM);
25775 reg_containing_return_addr = LR_REGNUM;
25778 asm_fprintf (f, "\tmov\t%r, %r\n", LAST_ARG_REGNUM, IP_REGNUM);
25781 if (crtl->calls_eh_return)
25782 asm_fprintf (f, "\tadd\t%r, %r\n", SP_REGNUM, ARM_EH_STACKADJ_REGNUM);
25784 /* Return to caller. */
25785 asm_fprintf (f, "\tbx\t%r\n", reg_containing_return_addr);
25788 /* Scan INSN just before assembler is output for it.
25789 For Thumb-1, we track the status of the condition codes; this
25790 information is used in the cbranchsi4_insn pattern. */
25791 void
25792 thumb1_final_prescan_insn (rtx insn)
25794 if (flag_print_asm_name)
25795 asm_fprintf (asm_out_file, "%@ 0x%04x\n",
25796 INSN_ADDRESSES (INSN_UID (insn)));
25797 /* Don't overwrite the previous setter when we get to a cbranch. */
25798 if (INSN_CODE (insn) != CODE_FOR_cbranchsi4_insn)
25800 enum attr_conds conds;
25802 if (cfun->machine->thumb1_cc_insn)
25804 if (modified_in_p (cfun->machine->thumb1_cc_op0, insn)
25805 || modified_in_p (cfun->machine->thumb1_cc_op1, insn))
25806 CC_STATUS_INIT;
25808 conds = get_attr_conds (insn);
25809 if (conds == CONDS_SET)
25811 rtx set = single_set (insn);
25812 cfun->machine->thumb1_cc_insn = insn;
25813 cfun->machine->thumb1_cc_op0 = SET_DEST (set);
25814 cfun->machine->thumb1_cc_op1 = const0_rtx;
25815 cfun->machine->thumb1_cc_mode = CC_NOOVmode;
25816 if (INSN_CODE (insn) == CODE_FOR_thumb1_subsi3_insn)
25818 rtx src1 = XEXP (SET_SRC (set), 1);
25819 if (src1 == const0_rtx)
25820 cfun->machine->thumb1_cc_mode = CCmode;
25822 else if (REG_P (SET_DEST (set)) && REG_P (SET_SRC (set)))
25824 /* Record the src register operand instead of dest because
25825 cprop_hardreg pass propagates src. */
25826 cfun->machine->thumb1_cc_op0 = SET_SRC (set);
25829 else if (conds != CONDS_NOCOND)
25830 cfun->machine->thumb1_cc_insn = NULL_RTX;
25833 /* Check if unexpected far jump is used. */
25834 if (cfun->machine->lr_save_eliminated
25835 && get_attr_far_jump (insn) == FAR_JUMP_YES)
25836 internal_error("Unexpected thumb1 far jump");
25840 thumb_shiftable_const (unsigned HOST_WIDE_INT val)
25842 unsigned HOST_WIDE_INT mask = 0xff;
25843 int i;
25845 val = val & (unsigned HOST_WIDE_INT)0xffffffffu;
25846 if (val == 0) /* XXX */
25847 return 0;
25849 for (i = 0; i < 25; i++)
25850 if ((val & (mask << i)) == val)
25851 return 1;
25853 return 0;
25856 /* Returns nonzero if the current function contains,
25857 or might contain a far jump. */
25858 static int
25859 thumb_far_jump_used_p (void)
25861 rtx insn;
25862 bool far_jump = false;
25863 unsigned int func_size = 0;
25865 /* This test is only important for leaf functions. */
25866 /* assert (!leaf_function_p ()); */
25868 /* If we have already decided that far jumps may be used,
25869 do not bother checking again, and always return true even if
25870 it turns out that they are not being used. Once we have made
25871 the decision that far jumps are present (and that hence the link
25872 register will be pushed onto the stack) we cannot go back on it. */
25873 if (cfun->machine->far_jump_used)
25874 return 1;
25876 /* If this function is not being called from the prologue/epilogue
25877 generation code then it must be being called from the
25878 INITIAL_ELIMINATION_OFFSET macro. */
25879 if (!(ARM_DOUBLEWORD_ALIGN || reload_completed))
25881 /* In this case we know that we are being asked about the elimination
25882 of the arg pointer register. If that register is not being used,
25883 then there are no arguments on the stack, and we do not have to
25884 worry that a far jump might force the prologue to push the link
25885 register, changing the stack offsets. In this case we can just
25886 return false, since the presence of far jumps in the function will
25887 not affect stack offsets.
25889 If the arg pointer is live (or if it was live, but has now been
25890 eliminated and so set to dead) then we do have to test to see if
25891 the function might contain a far jump. This test can lead to some
25892 false negatives, since before reload is completed, then length of
25893 branch instructions is not known, so gcc defaults to returning their
25894 longest length, which in turn sets the far jump attribute to true.
25896 A false negative will not result in bad code being generated, but it
25897 will result in a needless push and pop of the link register. We
25898 hope that this does not occur too often.
25900 If we need doubleword stack alignment this could affect the other
25901 elimination offsets so we can't risk getting it wrong. */
25902 if (df_regs_ever_live_p (ARG_POINTER_REGNUM))
25903 cfun->machine->arg_pointer_live = 1;
25904 else if (!cfun->machine->arg_pointer_live)
25905 return 0;
25908 /* Check to see if the function contains a branch
25909 insn with the far jump attribute set. */
25910 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
25912 if (JUMP_P (insn) && get_attr_far_jump (insn) == FAR_JUMP_YES)
25914 far_jump = true;
25916 func_size += get_attr_length (insn);
25919 /* Attribute far_jump will always be true for thumb1 before
25920 shorten_branch pass. So checking far_jump attribute before
25921 shorten_branch isn't much useful.
25923 Following heuristic tries to estimate more accurately if a far jump
25924 may finally be used. The heuristic is very conservative as there is
25925 no chance to roll-back the decision of not to use far jump.
25927 Thumb1 long branch offset is -2048 to 2046. The worst case is each
25928 2-byte insn is associated with a 4 byte constant pool. Using
25929 function size 2048/3 as the threshold is conservative enough. */
25930 if (far_jump)
25932 if ((func_size * 3) >= 2048)
25934 /* Record the fact that we have decided that
25935 the function does use far jumps. */
25936 cfun->machine->far_jump_used = 1;
25937 return 1;
25941 return 0;
25944 /* Return nonzero if FUNC must be entered in ARM mode. */
25946 is_called_in_ARM_mode (tree func)
25948 gcc_assert (TREE_CODE (func) == FUNCTION_DECL);
25950 /* Ignore the problem about functions whose address is taken. */
25951 if (TARGET_CALLEE_INTERWORKING && TREE_PUBLIC (func))
25952 return TRUE;
25954 #ifdef ARM_PE
25955 return lookup_attribute ("interfacearm", DECL_ATTRIBUTES (func)) != NULL_TREE;
25956 #else
25957 return FALSE;
25958 #endif
25961 /* Given the stack offsets and register mask in OFFSETS, decide how
25962 many additional registers to push instead of subtracting a constant
25963 from SP. For epilogues the principle is the same except we use pop.
25964 FOR_PROLOGUE indicates which we're generating. */
25965 static int
25966 thumb1_extra_regs_pushed (arm_stack_offsets *offsets, bool for_prologue)
25968 HOST_WIDE_INT amount;
25969 unsigned long live_regs_mask = offsets->saved_regs_mask;
25970 /* Extract a mask of the ones we can give to the Thumb's push/pop
25971 instruction. */
25972 unsigned long l_mask = live_regs_mask & (for_prologue ? 0x40ff : 0xff);
25973 /* Then count how many other high registers will need to be pushed. */
25974 unsigned long high_regs_pushed = bit_count (live_regs_mask & 0x0f00);
25975 int n_free, reg_base, size;
25977 if (!for_prologue && frame_pointer_needed)
25978 amount = offsets->locals_base - offsets->saved_regs;
25979 else
25980 amount = offsets->outgoing_args - offsets->saved_regs;
25982 /* If the stack frame size is 512 exactly, we can save one load
25983 instruction, which should make this a win even when optimizing
25984 for speed. */
25985 if (!optimize_size && amount != 512)
25986 return 0;
25988 /* Can't do this if there are high registers to push. */
25989 if (high_regs_pushed != 0)
25990 return 0;
25992 /* Shouldn't do it in the prologue if no registers would normally
25993 be pushed at all. In the epilogue, also allow it if we'll have
25994 a pop insn for the PC. */
25995 if (l_mask == 0
25996 && (for_prologue
25997 || TARGET_BACKTRACE
25998 || (live_regs_mask & 1 << LR_REGNUM) == 0
25999 || TARGET_INTERWORK
26000 || crtl->args.pretend_args_size != 0))
26001 return 0;
26003 /* Don't do this if thumb_expand_prologue wants to emit instructions
26004 between the push and the stack frame allocation. */
26005 if (for_prologue
26006 && ((flag_pic && arm_pic_register != INVALID_REGNUM)
26007 || (!frame_pointer_needed && CALLER_INTERWORKING_SLOT_SIZE > 0)))
26008 return 0;
26010 reg_base = 0;
26011 n_free = 0;
26012 if (!for_prologue)
26014 size = arm_size_return_regs ();
26015 reg_base = ARM_NUM_INTS (size);
26016 live_regs_mask >>= reg_base;
26019 while (reg_base + n_free < 8 && !(live_regs_mask & 1)
26020 && (for_prologue || call_used_regs[reg_base + n_free]))
26022 live_regs_mask >>= 1;
26023 n_free++;
26026 if (n_free == 0)
26027 return 0;
26028 gcc_assert (amount / 4 * 4 == amount);
26030 if (amount >= 512 && (amount - n_free * 4) < 512)
26031 return (amount - 508) / 4;
26032 if (amount <= n_free * 4)
26033 return amount / 4;
26034 return 0;
26037 /* The bits which aren't usefully expanded as rtl. */
26038 const char *
26039 thumb1_unexpanded_epilogue (void)
26041 arm_stack_offsets *offsets;
26042 int regno;
26043 unsigned long live_regs_mask = 0;
26044 int high_regs_pushed = 0;
26045 int extra_pop;
26046 int had_to_push_lr;
26047 int size;
26049 if (cfun->machine->return_used_this_function != 0)
26050 return "";
26052 if (IS_NAKED (arm_current_func_type ()))
26053 return "";
26055 offsets = arm_get_frame_offsets ();
26056 live_regs_mask = offsets->saved_regs_mask;
26057 high_regs_pushed = bit_count (live_regs_mask & 0x0f00);
26059 /* If we can deduce the registers used from the function's return value.
26060 This is more reliable that examining df_regs_ever_live_p () because that
26061 will be set if the register is ever used in the function, not just if
26062 the register is used to hold a return value. */
26063 size = arm_size_return_regs ();
26065 extra_pop = thumb1_extra_regs_pushed (offsets, false);
26066 if (extra_pop > 0)
26068 unsigned long extra_mask = (1 << extra_pop) - 1;
26069 live_regs_mask |= extra_mask << ARM_NUM_INTS (size);
26072 /* The prolog may have pushed some high registers to use as
26073 work registers. e.g. the testsuite file:
26074 gcc/testsuite/gcc/gcc.c-torture/execute/complex-2.c
26075 compiles to produce:
26076 push {r4, r5, r6, r7, lr}
26077 mov r7, r9
26078 mov r6, r8
26079 push {r6, r7}
26080 as part of the prolog. We have to undo that pushing here. */
26082 if (high_regs_pushed)
26084 unsigned long mask = live_regs_mask & 0xff;
26085 int next_hi_reg;
26087 /* The available low registers depend on the size of the value we are
26088 returning. */
26089 if (size <= 12)
26090 mask |= 1 << 3;
26091 if (size <= 8)
26092 mask |= 1 << 2;
26094 if (mask == 0)
26095 /* Oh dear! We have no low registers into which we can pop
26096 high registers! */
26097 internal_error
26098 ("no low registers available for popping high registers");
26100 for (next_hi_reg = 8; next_hi_reg < 13; next_hi_reg++)
26101 if (live_regs_mask & (1 << next_hi_reg))
26102 break;
26104 while (high_regs_pushed)
26106 /* Find lo register(s) into which the high register(s) can
26107 be popped. */
26108 for (regno = 0; regno <= LAST_LO_REGNUM; regno++)
26110 if (mask & (1 << regno))
26111 high_regs_pushed--;
26112 if (high_regs_pushed == 0)
26113 break;
26116 mask &= (2 << regno) - 1; /* A noop if regno == 8 */
26118 /* Pop the values into the low register(s). */
26119 thumb_pop (asm_out_file, mask);
26121 /* Move the value(s) into the high registers. */
26122 for (regno = 0; regno <= LAST_LO_REGNUM; regno++)
26124 if (mask & (1 << regno))
26126 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", next_hi_reg,
26127 regno);
26129 for (next_hi_reg++; next_hi_reg < 13; next_hi_reg++)
26130 if (live_regs_mask & (1 << next_hi_reg))
26131 break;
26135 live_regs_mask &= ~0x0f00;
26138 had_to_push_lr = (live_regs_mask & (1 << LR_REGNUM)) != 0;
26139 live_regs_mask &= 0xff;
26141 if (crtl->args.pretend_args_size == 0 || TARGET_BACKTRACE)
26143 /* Pop the return address into the PC. */
26144 if (had_to_push_lr)
26145 live_regs_mask |= 1 << PC_REGNUM;
26147 /* Either no argument registers were pushed or a backtrace
26148 structure was created which includes an adjusted stack
26149 pointer, so just pop everything. */
26150 if (live_regs_mask)
26151 thumb_pop (asm_out_file, live_regs_mask);
26153 /* We have either just popped the return address into the
26154 PC or it is was kept in LR for the entire function.
26155 Note that thumb_pop has already called thumb_exit if the
26156 PC was in the list. */
26157 if (!had_to_push_lr)
26158 thumb_exit (asm_out_file, LR_REGNUM);
26160 else
26162 /* Pop everything but the return address. */
26163 if (live_regs_mask)
26164 thumb_pop (asm_out_file, live_regs_mask);
26166 if (had_to_push_lr)
26168 if (size > 12)
26170 /* We have no free low regs, so save one. */
26171 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", IP_REGNUM,
26172 LAST_ARG_REGNUM);
26175 /* Get the return address into a temporary register. */
26176 thumb_pop (asm_out_file, 1 << LAST_ARG_REGNUM);
26178 if (size > 12)
26180 /* Move the return address to lr. */
26181 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", LR_REGNUM,
26182 LAST_ARG_REGNUM);
26183 /* Restore the low register. */
26184 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", LAST_ARG_REGNUM,
26185 IP_REGNUM);
26186 regno = LR_REGNUM;
26188 else
26189 regno = LAST_ARG_REGNUM;
26191 else
26192 regno = LR_REGNUM;
26194 /* Remove the argument registers that were pushed onto the stack. */
26195 asm_fprintf (asm_out_file, "\tadd\t%r, %r, #%d\n",
26196 SP_REGNUM, SP_REGNUM,
26197 crtl->args.pretend_args_size);
26199 thumb_exit (asm_out_file, regno);
26202 return "";
26205 /* Functions to save and restore machine-specific function data. */
26206 static struct machine_function *
26207 arm_init_machine_status (void)
26209 struct machine_function *machine;
26210 machine = ggc_alloc_cleared_machine_function ();
26212 #if ARM_FT_UNKNOWN != 0
26213 machine->func_type = ARM_FT_UNKNOWN;
26214 #endif
26215 return machine;
26218 /* Return an RTX indicating where the return address to the
26219 calling function can be found. */
26221 arm_return_addr (int count, rtx frame ATTRIBUTE_UNUSED)
26223 if (count != 0)
26224 return NULL_RTX;
26226 return get_hard_reg_initial_val (Pmode, LR_REGNUM);
26229 /* Do anything needed before RTL is emitted for each function. */
26230 void
26231 arm_init_expanders (void)
26233 /* Arrange to initialize and mark the machine per-function status. */
26234 init_machine_status = arm_init_machine_status;
26236 /* This is to stop the combine pass optimizing away the alignment
26237 adjustment of va_arg. */
26238 /* ??? It is claimed that this should not be necessary. */
26239 if (cfun)
26240 mark_reg_pointer (arg_pointer_rtx, PARM_BOUNDARY);
26244 /* Like arm_compute_initial_elimination offset. Simpler because there
26245 isn't an ABI specified frame pointer for Thumb. Instead, we set it
26246 to point at the base of the local variables after static stack
26247 space for a function has been allocated. */
26249 HOST_WIDE_INT
26250 thumb_compute_initial_elimination_offset (unsigned int from, unsigned int to)
26252 arm_stack_offsets *offsets;
26254 offsets = arm_get_frame_offsets ();
26256 switch (from)
26258 case ARG_POINTER_REGNUM:
26259 switch (to)
26261 case STACK_POINTER_REGNUM:
26262 return offsets->outgoing_args - offsets->saved_args;
26264 case FRAME_POINTER_REGNUM:
26265 return offsets->soft_frame - offsets->saved_args;
26267 case ARM_HARD_FRAME_POINTER_REGNUM:
26268 return offsets->saved_regs - offsets->saved_args;
26270 case THUMB_HARD_FRAME_POINTER_REGNUM:
26271 return offsets->locals_base - offsets->saved_args;
26273 default:
26274 gcc_unreachable ();
26276 break;
26278 case FRAME_POINTER_REGNUM:
26279 switch (to)
26281 case STACK_POINTER_REGNUM:
26282 return offsets->outgoing_args - offsets->soft_frame;
26284 case ARM_HARD_FRAME_POINTER_REGNUM:
26285 return offsets->saved_regs - offsets->soft_frame;
26287 case THUMB_HARD_FRAME_POINTER_REGNUM:
26288 return offsets->locals_base - offsets->soft_frame;
26290 default:
26291 gcc_unreachable ();
26293 break;
26295 default:
26296 gcc_unreachable ();
26300 /* Generate the function's prologue. */
26302 void
26303 thumb1_expand_prologue (void)
26305 rtx insn;
26307 HOST_WIDE_INT amount;
26308 arm_stack_offsets *offsets;
26309 unsigned long func_type;
26310 int regno;
26311 unsigned long live_regs_mask;
26312 unsigned long l_mask;
26313 unsigned high_regs_pushed = 0;
26315 func_type = arm_current_func_type ();
26317 /* Naked functions don't have prologues. */
26318 if (IS_NAKED (func_type))
26319 return;
26321 if (IS_INTERRUPT (func_type))
26323 error ("interrupt Service Routines cannot be coded in Thumb mode");
26324 return;
26327 if (is_called_in_ARM_mode (current_function_decl))
26328 emit_insn (gen_prologue_thumb1_interwork ());
26330 offsets = arm_get_frame_offsets ();
26331 live_regs_mask = offsets->saved_regs_mask;
26333 /* Extract a mask of the ones we can give to the Thumb's push instruction. */
26334 l_mask = live_regs_mask & 0x40ff;
26335 /* Then count how many other high registers will need to be pushed. */
26336 high_regs_pushed = bit_count (live_regs_mask & 0x0f00);
26338 if (crtl->args.pretend_args_size)
26340 rtx x = GEN_INT (-crtl->args.pretend_args_size);
26342 if (cfun->machine->uses_anonymous_args)
26344 int num_pushes = ARM_NUM_INTS (crtl->args.pretend_args_size);
26345 unsigned long mask;
26347 mask = 1ul << (LAST_ARG_REGNUM + 1);
26348 mask -= 1ul << (LAST_ARG_REGNUM + 1 - num_pushes);
26350 insn = thumb1_emit_multi_reg_push (mask, 0);
26352 else
26354 insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
26355 stack_pointer_rtx, x));
26357 RTX_FRAME_RELATED_P (insn) = 1;
26360 if (TARGET_BACKTRACE)
26362 HOST_WIDE_INT offset = 0;
26363 unsigned work_register;
26364 rtx work_reg, x, arm_hfp_rtx;
26366 /* We have been asked to create a stack backtrace structure.
26367 The code looks like this:
26369 0 .align 2
26370 0 func:
26371 0 sub SP, #16 Reserve space for 4 registers.
26372 2 push {R7} Push low registers.
26373 4 add R7, SP, #20 Get the stack pointer before the push.
26374 6 str R7, [SP, #8] Store the stack pointer
26375 (before reserving the space).
26376 8 mov R7, PC Get hold of the start of this code + 12.
26377 10 str R7, [SP, #16] Store it.
26378 12 mov R7, FP Get hold of the current frame pointer.
26379 14 str R7, [SP, #4] Store it.
26380 16 mov R7, LR Get hold of the current return address.
26381 18 str R7, [SP, #12] Store it.
26382 20 add R7, SP, #16 Point at the start of the
26383 backtrace structure.
26384 22 mov FP, R7 Put this value into the frame pointer. */
26386 work_register = thumb_find_work_register (live_regs_mask);
26387 work_reg = gen_rtx_REG (SImode, work_register);
26388 arm_hfp_rtx = gen_rtx_REG (SImode, ARM_HARD_FRAME_POINTER_REGNUM);
26390 insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
26391 stack_pointer_rtx, GEN_INT (-16)));
26392 RTX_FRAME_RELATED_P (insn) = 1;
26394 if (l_mask)
26396 insn = thumb1_emit_multi_reg_push (l_mask, l_mask);
26397 RTX_FRAME_RELATED_P (insn) = 1;
26399 offset = bit_count (l_mask) * UNITS_PER_WORD;
26402 x = GEN_INT (offset + 16 + crtl->args.pretend_args_size);
26403 emit_insn (gen_addsi3 (work_reg, stack_pointer_rtx, x));
26405 x = plus_constant (Pmode, stack_pointer_rtx, offset + 4);
26406 x = gen_frame_mem (SImode, x);
26407 emit_move_insn (x, work_reg);
26409 /* Make sure that the instruction fetching the PC is in the right place
26410 to calculate "start of backtrace creation code + 12". */
26411 /* ??? The stores using the common WORK_REG ought to be enough to
26412 prevent the scheduler from doing anything weird. Failing that
26413 we could always move all of the following into an UNSPEC_VOLATILE. */
26414 if (l_mask)
26416 x = gen_rtx_REG (SImode, PC_REGNUM);
26417 emit_move_insn (work_reg, x);
26419 x = plus_constant (Pmode, stack_pointer_rtx, offset + 12);
26420 x = gen_frame_mem (SImode, x);
26421 emit_move_insn (x, work_reg);
26423 emit_move_insn (work_reg, arm_hfp_rtx);
26425 x = plus_constant (Pmode, stack_pointer_rtx, offset);
26426 x = gen_frame_mem (SImode, x);
26427 emit_move_insn (x, work_reg);
26429 else
26431 emit_move_insn (work_reg, arm_hfp_rtx);
26433 x = plus_constant (Pmode, stack_pointer_rtx, offset);
26434 x = gen_frame_mem (SImode, x);
26435 emit_move_insn (x, work_reg);
26437 x = gen_rtx_REG (SImode, PC_REGNUM);
26438 emit_move_insn (work_reg, x);
26440 x = plus_constant (Pmode, stack_pointer_rtx, offset + 12);
26441 x = gen_frame_mem (SImode, x);
26442 emit_move_insn (x, work_reg);
26445 x = gen_rtx_REG (SImode, LR_REGNUM);
26446 emit_move_insn (work_reg, x);
26448 x = plus_constant (Pmode, stack_pointer_rtx, offset + 8);
26449 x = gen_frame_mem (SImode, x);
26450 emit_move_insn (x, work_reg);
26452 x = GEN_INT (offset + 12);
26453 emit_insn (gen_addsi3 (work_reg, stack_pointer_rtx, x));
26455 emit_move_insn (arm_hfp_rtx, work_reg);
26457 /* Optimization: If we are not pushing any low registers but we are going
26458 to push some high registers then delay our first push. This will just
26459 be a push of LR and we can combine it with the push of the first high
26460 register. */
26461 else if ((l_mask & 0xff) != 0
26462 || (high_regs_pushed == 0 && l_mask))
26464 unsigned long mask = l_mask;
26465 mask |= (1 << thumb1_extra_regs_pushed (offsets, true)) - 1;
26466 insn = thumb1_emit_multi_reg_push (mask, mask);
26467 RTX_FRAME_RELATED_P (insn) = 1;
26470 if (high_regs_pushed)
26472 unsigned pushable_regs;
26473 unsigned next_hi_reg;
26474 unsigned arg_regs_num = TARGET_AAPCS_BASED ? crtl->args.info.aapcs_ncrn
26475 : crtl->args.info.nregs;
26476 unsigned arg_regs_mask = (1 << arg_regs_num) - 1;
26478 for (next_hi_reg = 12; next_hi_reg > LAST_LO_REGNUM; next_hi_reg--)
26479 if (live_regs_mask & (1 << next_hi_reg))
26480 break;
26482 /* Here we need to mask out registers used for passing arguments
26483 even if they can be pushed. This is to avoid using them to stash the high
26484 registers. Such kind of stash may clobber the use of arguments. */
26485 pushable_regs = l_mask & (~arg_regs_mask) & 0xff;
26487 if (pushable_regs == 0)
26488 pushable_regs = 1 << thumb_find_work_register (live_regs_mask);
26490 while (high_regs_pushed > 0)
26492 unsigned long real_regs_mask = 0;
26494 for (regno = LAST_LO_REGNUM; regno >= 0; regno --)
26496 if (pushable_regs & (1 << regno))
26498 emit_move_insn (gen_rtx_REG (SImode, regno),
26499 gen_rtx_REG (SImode, next_hi_reg));
26501 high_regs_pushed --;
26502 real_regs_mask |= (1 << next_hi_reg);
26504 if (high_regs_pushed)
26506 for (next_hi_reg --; next_hi_reg > LAST_LO_REGNUM;
26507 next_hi_reg --)
26508 if (live_regs_mask & (1 << next_hi_reg))
26509 break;
26511 else
26513 pushable_regs &= ~((1 << regno) - 1);
26514 break;
26519 /* If we had to find a work register and we have not yet
26520 saved the LR then add it to the list of regs to push. */
26521 if (l_mask == (1 << LR_REGNUM))
26523 pushable_regs |= l_mask;
26524 real_regs_mask |= l_mask;
26525 l_mask = 0;
26528 insn = thumb1_emit_multi_reg_push (pushable_regs, real_regs_mask);
26529 RTX_FRAME_RELATED_P (insn) = 1;
26533 /* Load the pic register before setting the frame pointer,
26534 so we can use r7 as a temporary work register. */
26535 if (flag_pic && arm_pic_register != INVALID_REGNUM)
26536 arm_load_pic_register (live_regs_mask);
26538 if (!frame_pointer_needed && CALLER_INTERWORKING_SLOT_SIZE > 0)
26539 emit_move_insn (gen_rtx_REG (Pmode, ARM_HARD_FRAME_POINTER_REGNUM),
26540 stack_pointer_rtx);
26542 if (flag_stack_usage_info)
26543 current_function_static_stack_size
26544 = offsets->outgoing_args - offsets->saved_args;
26546 amount = offsets->outgoing_args - offsets->saved_regs;
26547 amount -= 4 * thumb1_extra_regs_pushed (offsets, true);
26548 if (amount)
26550 if (amount < 512)
26552 insn = emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
26553 GEN_INT (- amount)));
26554 RTX_FRAME_RELATED_P (insn) = 1;
26556 else
26558 rtx reg, dwarf;
26560 /* The stack decrement is too big for an immediate value in a single
26561 insn. In theory we could issue multiple subtracts, but after
26562 three of them it becomes more space efficient to place the full
26563 value in the constant pool and load into a register. (Also the
26564 ARM debugger really likes to see only one stack decrement per
26565 function). So instead we look for a scratch register into which
26566 we can load the decrement, and then we subtract this from the
26567 stack pointer. Unfortunately on the thumb the only available
26568 scratch registers are the argument registers, and we cannot use
26569 these as they may hold arguments to the function. Instead we
26570 attempt to locate a call preserved register which is used by this
26571 function. If we can find one, then we know that it will have
26572 been pushed at the start of the prologue and so we can corrupt
26573 it now. */
26574 for (regno = LAST_ARG_REGNUM + 1; regno <= LAST_LO_REGNUM; regno++)
26575 if (live_regs_mask & (1 << regno))
26576 break;
26578 gcc_assert(regno <= LAST_LO_REGNUM);
26580 reg = gen_rtx_REG (SImode, regno);
26582 emit_insn (gen_movsi (reg, GEN_INT (- amount)));
26584 insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
26585 stack_pointer_rtx, reg));
26587 dwarf = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
26588 plus_constant (Pmode, stack_pointer_rtx,
26589 -amount));
26590 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
26591 RTX_FRAME_RELATED_P (insn) = 1;
26595 if (frame_pointer_needed)
26596 thumb_set_frame_pointer (offsets);
26598 /* If we are profiling, make sure no instructions are scheduled before
26599 the call to mcount. Similarly if the user has requested no
26600 scheduling in the prolog. Similarly if we want non-call exceptions
26601 using the EABI unwinder, to prevent faulting instructions from being
26602 swapped with a stack adjustment. */
26603 if (crtl->profile || !TARGET_SCHED_PROLOG
26604 || (arm_except_unwind_info (&global_options) == UI_TARGET
26605 && cfun->can_throw_non_call_exceptions))
26606 emit_insn (gen_blockage ());
26608 cfun->machine->lr_save_eliminated = !thumb_force_lr_save ();
26609 if (live_regs_mask & 0xff)
26610 cfun->machine->lr_save_eliminated = 0;
26613 /* Generate pattern *pop_multiple_with_stack_update_and_return if single
26614 POP instruction can be generated. LR should be replaced by PC. All
26615 the checks required are already done by USE_RETURN_INSN (). Hence,
26616 all we really need to check here is if single register is to be
26617 returned, or multiple register return. */
26618 void
26619 thumb2_expand_return (bool simple_return)
26621 int i, num_regs;
26622 unsigned long saved_regs_mask;
26623 arm_stack_offsets *offsets;
26625 offsets = arm_get_frame_offsets ();
26626 saved_regs_mask = offsets->saved_regs_mask;
26628 for (i = 0, num_regs = 0; i <= LAST_ARM_REGNUM; i++)
26629 if (saved_regs_mask & (1 << i))
26630 num_regs++;
26632 if (!simple_return && saved_regs_mask)
26634 if (num_regs == 1)
26636 rtx par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
26637 rtx reg = gen_rtx_REG (SImode, PC_REGNUM);
26638 rtx addr = gen_rtx_MEM (SImode,
26639 gen_rtx_POST_INC (SImode,
26640 stack_pointer_rtx));
26641 set_mem_alias_set (addr, get_frame_alias_set ());
26642 XVECEXP (par, 0, 0) = ret_rtx;
26643 XVECEXP (par, 0, 1) = gen_rtx_SET (SImode, reg, addr);
26644 RTX_FRAME_RELATED_P (XVECEXP (par, 0, 1)) = 1;
26645 emit_jump_insn (par);
26647 else
26649 saved_regs_mask &= ~ (1 << LR_REGNUM);
26650 saved_regs_mask |= (1 << PC_REGNUM);
26651 arm_emit_multi_reg_pop (saved_regs_mask);
26654 else
26656 emit_jump_insn (simple_return_rtx);
26660 void
26661 thumb1_expand_epilogue (void)
26663 HOST_WIDE_INT amount;
26664 arm_stack_offsets *offsets;
26665 int regno;
26667 /* Naked functions don't have prologues. */
26668 if (IS_NAKED (arm_current_func_type ()))
26669 return;
26671 offsets = arm_get_frame_offsets ();
26672 amount = offsets->outgoing_args - offsets->saved_regs;
26674 if (frame_pointer_needed)
26676 emit_insn (gen_movsi (stack_pointer_rtx, hard_frame_pointer_rtx));
26677 amount = offsets->locals_base - offsets->saved_regs;
26679 amount -= 4 * thumb1_extra_regs_pushed (offsets, false);
26681 gcc_assert (amount >= 0);
26682 if (amount)
26684 emit_insn (gen_blockage ());
26686 if (amount < 512)
26687 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
26688 GEN_INT (amount)));
26689 else
26691 /* r3 is always free in the epilogue. */
26692 rtx reg = gen_rtx_REG (SImode, LAST_ARG_REGNUM);
26694 emit_insn (gen_movsi (reg, GEN_INT (amount)));
26695 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, reg));
26699 /* Emit a USE (stack_pointer_rtx), so that
26700 the stack adjustment will not be deleted. */
26701 emit_insn (gen_force_register_use (stack_pointer_rtx));
26703 if (crtl->profile || !TARGET_SCHED_PROLOG)
26704 emit_insn (gen_blockage ());
26706 /* Emit a clobber for each insn that will be restored in the epilogue,
26707 so that flow2 will get register lifetimes correct. */
26708 for (regno = 0; regno < 13; regno++)
26709 if (df_regs_ever_live_p (regno) && !call_used_regs[regno])
26710 emit_clobber (gen_rtx_REG (SImode, regno));
26712 if (! df_regs_ever_live_p (LR_REGNUM))
26713 emit_use (gen_rtx_REG (SImode, LR_REGNUM));
26716 /* Epilogue code for APCS frame. */
26717 static void
26718 arm_expand_epilogue_apcs_frame (bool really_return)
26720 unsigned long func_type;
26721 unsigned long saved_regs_mask;
26722 int num_regs = 0;
26723 int i;
26724 int floats_from_frame = 0;
26725 arm_stack_offsets *offsets;
26727 gcc_assert (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM);
26728 func_type = arm_current_func_type ();
26730 /* Get frame offsets for ARM. */
26731 offsets = arm_get_frame_offsets ();
26732 saved_regs_mask = offsets->saved_regs_mask;
26734 /* Find the offset of the floating-point save area in the frame. */
26735 floats_from_frame = offsets->saved_args - offsets->frame;
26737 /* Compute how many core registers saved and how far away the floats are. */
26738 for (i = 0; i <= LAST_ARM_REGNUM; i++)
26739 if (saved_regs_mask & (1 << i))
26741 num_regs++;
26742 floats_from_frame += 4;
26745 if (TARGET_HARD_FLOAT && TARGET_VFP)
26747 int start_reg;
26749 /* The offset is from IP_REGNUM. */
26750 int saved_size = arm_get_vfp_saved_size ();
26751 if (saved_size > 0)
26753 floats_from_frame += saved_size;
26754 emit_insn (gen_addsi3 (gen_rtx_REG (SImode, IP_REGNUM),
26755 hard_frame_pointer_rtx,
26756 GEN_INT (-floats_from_frame)));
26759 /* Generate VFP register multi-pop. */
26760 start_reg = FIRST_VFP_REGNUM;
26762 for (i = FIRST_VFP_REGNUM; i < LAST_VFP_REGNUM; i += 2)
26763 /* Look for a case where a reg does not need restoring. */
26764 if ((!df_regs_ever_live_p (i) || call_used_regs[i])
26765 && (!df_regs_ever_live_p (i + 1)
26766 || call_used_regs[i + 1]))
26768 if (start_reg != i)
26769 arm_emit_vfp_multi_reg_pop (start_reg,
26770 (i - start_reg) / 2,
26771 gen_rtx_REG (SImode,
26772 IP_REGNUM));
26773 start_reg = i + 2;
26776 /* Restore the remaining regs that we have discovered (or possibly
26777 even all of them, if the conditional in the for loop never
26778 fired). */
26779 if (start_reg != i)
26780 arm_emit_vfp_multi_reg_pop (start_reg,
26781 (i - start_reg) / 2,
26782 gen_rtx_REG (SImode, IP_REGNUM));
26785 if (TARGET_IWMMXT)
26787 /* The frame pointer is guaranteed to be non-double-word aligned, as
26788 it is set to double-word-aligned old_stack_pointer - 4. */
26789 rtx insn;
26790 int lrm_count = (num_regs % 2) ? (num_regs + 2) : (num_regs + 1);
26792 for (i = LAST_IWMMXT_REGNUM; i >= FIRST_IWMMXT_REGNUM; i--)
26793 if (df_regs_ever_live_p (i) && !call_used_regs[i])
26795 rtx addr = gen_frame_mem (V2SImode,
26796 plus_constant (Pmode, hard_frame_pointer_rtx,
26797 - lrm_count * 4));
26798 insn = emit_insn (gen_movsi (gen_rtx_REG (V2SImode, i), addr));
26799 REG_NOTES (insn) = alloc_reg_note (REG_CFA_RESTORE,
26800 gen_rtx_REG (V2SImode, i),
26801 NULL_RTX);
26802 lrm_count += 2;
26806 /* saved_regs_mask should contain IP which contains old stack pointer
26807 at the time of activation creation. Since SP and IP are adjacent registers,
26808 we can restore the value directly into SP. */
26809 gcc_assert (saved_regs_mask & (1 << IP_REGNUM));
26810 saved_regs_mask &= ~(1 << IP_REGNUM);
26811 saved_regs_mask |= (1 << SP_REGNUM);
26813 /* There are two registers left in saved_regs_mask - LR and PC. We
26814 only need to restore LR (the return address), but to
26815 save time we can load it directly into PC, unless we need a
26816 special function exit sequence, or we are not really returning. */
26817 if (really_return
26818 && ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL
26819 && !crtl->calls_eh_return)
26820 /* Delete LR from the register mask, so that LR on
26821 the stack is loaded into the PC in the register mask. */
26822 saved_regs_mask &= ~(1 << LR_REGNUM);
26823 else
26824 saved_regs_mask &= ~(1 << PC_REGNUM);
26826 num_regs = bit_count (saved_regs_mask);
26827 if ((offsets->outgoing_args != (1 + num_regs)) || cfun->calls_alloca)
26829 emit_insn (gen_blockage ());
26830 /* Unwind the stack to just below the saved registers. */
26831 emit_insn (gen_addsi3 (stack_pointer_rtx,
26832 hard_frame_pointer_rtx,
26833 GEN_INT (- 4 * num_regs)));
26836 arm_emit_multi_reg_pop (saved_regs_mask);
26838 if (IS_INTERRUPT (func_type))
26840 /* Interrupt handlers will have pushed the
26841 IP onto the stack, so restore it now. */
26842 rtx insn;
26843 rtx addr = gen_rtx_MEM (SImode,
26844 gen_rtx_POST_INC (SImode,
26845 stack_pointer_rtx));
26846 set_mem_alias_set (addr, get_frame_alias_set ());
26847 insn = emit_insn (gen_movsi (gen_rtx_REG (SImode, IP_REGNUM), addr));
26848 REG_NOTES (insn) = alloc_reg_note (REG_CFA_RESTORE,
26849 gen_rtx_REG (SImode, IP_REGNUM),
26850 NULL_RTX);
26853 if (!really_return || (saved_regs_mask & (1 << PC_REGNUM)))
26854 return;
26856 if (crtl->calls_eh_return)
26857 emit_insn (gen_addsi3 (stack_pointer_rtx,
26858 stack_pointer_rtx,
26859 GEN_INT (ARM_EH_STACKADJ_REGNUM)));
26861 if (IS_STACKALIGN (func_type))
26862 /* Restore the original stack pointer. Before prologue, the stack was
26863 realigned and the original stack pointer saved in r0. For details,
26864 see comment in arm_expand_prologue. */
26865 emit_insn (gen_movsi (stack_pointer_rtx, gen_rtx_REG (SImode, 0)));
26867 emit_jump_insn (simple_return_rtx);
26870 /* Generate RTL to represent ARM epilogue. Really_return is true if the
26871 function is not a sibcall. */
26872 void
26873 arm_expand_epilogue (bool really_return)
26875 unsigned long func_type;
26876 unsigned long saved_regs_mask;
26877 int num_regs = 0;
26878 int i;
26879 int amount;
26880 arm_stack_offsets *offsets;
26882 func_type = arm_current_func_type ();
26884 /* Naked functions don't have epilogue. Hence, generate return pattern, and
26885 let output_return_instruction take care of instruction emission if any. */
26886 if (IS_NAKED (func_type)
26887 || (IS_VOLATILE (func_type) && TARGET_ABORT_NORETURN))
26889 if (really_return)
26890 emit_jump_insn (simple_return_rtx);
26891 return;
26894 /* If we are throwing an exception, then we really must be doing a
26895 return, so we can't tail-call. */
26896 gcc_assert (!crtl->calls_eh_return || really_return);
26898 if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
26900 arm_expand_epilogue_apcs_frame (really_return);
26901 return;
26904 /* Get frame offsets for ARM. */
26905 offsets = arm_get_frame_offsets ();
26906 saved_regs_mask = offsets->saved_regs_mask;
26907 num_regs = bit_count (saved_regs_mask);
26909 if (frame_pointer_needed)
26911 rtx insn;
26912 /* Restore stack pointer if necessary. */
26913 if (TARGET_ARM)
26915 /* In ARM mode, frame pointer points to first saved register.
26916 Restore stack pointer to last saved register. */
26917 amount = offsets->frame - offsets->saved_regs;
26919 /* Force out any pending memory operations that reference stacked data
26920 before stack de-allocation occurs. */
26921 emit_insn (gen_blockage ());
26922 insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
26923 hard_frame_pointer_rtx,
26924 GEN_INT (amount)));
26925 arm_add_cfa_adjust_cfa_note (insn, amount,
26926 stack_pointer_rtx,
26927 hard_frame_pointer_rtx);
26929 /* Emit USE(stack_pointer_rtx) to ensure that stack adjustment is not
26930 deleted. */
26931 emit_insn (gen_force_register_use (stack_pointer_rtx));
26933 else
26935 /* In Thumb-2 mode, the frame pointer points to the last saved
26936 register. */
26937 amount = offsets->locals_base - offsets->saved_regs;
26938 if (amount)
26940 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
26941 hard_frame_pointer_rtx,
26942 GEN_INT (amount)));
26943 arm_add_cfa_adjust_cfa_note (insn, amount,
26944 hard_frame_pointer_rtx,
26945 hard_frame_pointer_rtx);
26948 /* Force out any pending memory operations that reference stacked data
26949 before stack de-allocation occurs. */
26950 emit_insn (gen_blockage ());
26951 insn = emit_insn (gen_movsi (stack_pointer_rtx,
26952 hard_frame_pointer_rtx));
26953 arm_add_cfa_adjust_cfa_note (insn, 0,
26954 stack_pointer_rtx,
26955 hard_frame_pointer_rtx);
26956 /* Emit USE(stack_pointer_rtx) to ensure that stack adjustment is not
26957 deleted. */
26958 emit_insn (gen_force_register_use (stack_pointer_rtx));
26961 else
26963 /* Pop off outgoing args and local frame to adjust stack pointer to
26964 last saved register. */
26965 amount = offsets->outgoing_args - offsets->saved_regs;
26966 if (amount)
26968 rtx tmp;
26969 /* Force out any pending memory operations that reference stacked data
26970 before stack de-allocation occurs. */
26971 emit_insn (gen_blockage ());
26972 tmp = emit_insn (gen_addsi3 (stack_pointer_rtx,
26973 stack_pointer_rtx,
26974 GEN_INT (amount)));
26975 arm_add_cfa_adjust_cfa_note (tmp, amount,
26976 stack_pointer_rtx, stack_pointer_rtx);
26977 /* Emit USE(stack_pointer_rtx) to ensure that stack adjustment is
26978 not deleted. */
26979 emit_insn (gen_force_register_use (stack_pointer_rtx));
26983 if (TARGET_HARD_FLOAT && TARGET_VFP)
26985 /* Generate VFP register multi-pop. */
26986 int end_reg = LAST_VFP_REGNUM + 1;
26988 /* Scan the registers in reverse order. We need to match
26989 any groupings made in the prologue and generate matching
26990 vldm operations. The need to match groups is because,
26991 unlike pop, vldm can only do consecutive regs. */
26992 for (i = LAST_VFP_REGNUM - 1; i >= FIRST_VFP_REGNUM; i -= 2)
26993 /* Look for a case where a reg does not need restoring. */
26994 if ((!df_regs_ever_live_p (i) || call_used_regs[i])
26995 && (!df_regs_ever_live_p (i + 1)
26996 || call_used_regs[i + 1]))
26998 /* Restore the regs discovered so far (from reg+2 to
26999 end_reg). */
27000 if (end_reg > i + 2)
27001 arm_emit_vfp_multi_reg_pop (i + 2,
27002 (end_reg - (i + 2)) / 2,
27003 stack_pointer_rtx);
27004 end_reg = i;
27007 /* Restore the remaining regs that we have discovered (or possibly
27008 even all of them, if the conditional in the for loop never
27009 fired). */
27010 if (end_reg > i + 2)
27011 arm_emit_vfp_multi_reg_pop (i + 2,
27012 (end_reg - (i + 2)) / 2,
27013 stack_pointer_rtx);
27016 if (TARGET_IWMMXT)
27017 for (i = FIRST_IWMMXT_REGNUM; i <= LAST_IWMMXT_REGNUM; i++)
27018 if (df_regs_ever_live_p (i) && !call_used_regs[i])
27020 rtx insn;
27021 rtx addr = gen_rtx_MEM (V2SImode,
27022 gen_rtx_POST_INC (SImode,
27023 stack_pointer_rtx));
27024 set_mem_alias_set (addr, get_frame_alias_set ());
27025 insn = emit_insn (gen_movsi (gen_rtx_REG (V2SImode, i), addr));
27026 REG_NOTES (insn) = alloc_reg_note (REG_CFA_RESTORE,
27027 gen_rtx_REG (V2SImode, i),
27028 NULL_RTX);
27029 arm_add_cfa_adjust_cfa_note (insn, UNITS_PER_WORD,
27030 stack_pointer_rtx, stack_pointer_rtx);
27033 if (saved_regs_mask)
27035 rtx insn;
27036 bool return_in_pc = false;
27038 if (ARM_FUNC_TYPE (func_type) != ARM_FT_INTERWORKED
27039 && (TARGET_ARM || ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL)
27040 && !IS_STACKALIGN (func_type)
27041 && really_return
27042 && crtl->args.pretend_args_size == 0
27043 && saved_regs_mask & (1 << LR_REGNUM)
27044 && !crtl->calls_eh_return)
27046 saved_regs_mask &= ~(1 << LR_REGNUM);
27047 saved_regs_mask |= (1 << PC_REGNUM);
27048 return_in_pc = true;
27051 if (num_regs == 1 && (!IS_INTERRUPT (func_type) || !return_in_pc))
27053 for (i = 0; i <= LAST_ARM_REGNUM; i++)
27054 if (saved_regs_mask & (1 << i))
27056 rtx addr = gen_rtx_MEM (SImode,
27057 gen_rtx_POST_INC (SImode,
27058 stack_pointer_rtx));
27059 set_mem_alias_set (addr, get_frame_alias_set ());
27061 if (i == PC_REGNUM)
27063 insn = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
27064 XVECEXP (insn, 0, 0) = ret_rtx;
27065 XVECEXP (insn, 0, 1) = gen_rtx_SET (SImode,
27066 gen_rtx_REG (SImode, i),
27067 addr);
27068 RTX_FRAME_RELATED_P (XVECEXP (insn, 0, 1)) = 1;
27069 insn = emit_jump_insn (insn);
27071 else
27073 insn = emit_insn (gen_movsi (gen_rtx_REG (SImode, i),
27074 addr));
27075 REG_NOTES (insn) = alloc_reg_note (REG_CFA_RESTORE,
27076 gen_rtx_REG (SImode, i),
27077 NULL_RTX);
27078 arm_add_cfa_adjust_cfa_note (insn, UNITS_PER_WORD,
27079 stack_pointer_rtx,
27080 stack_pointer_rtx);
27084 else
27086 if (TARGET_LDRD
27087 && current_tune->prefer_ldrd_strd
27088 && !optimize_function_for_size_p (cfun))
27090 if (TARGET_THUMB2)
27091 thumb2_emit_ldrd_pop (saved_regs_mask);
27092 else if (TARGET_ARM && !IS_INTERRUPT (func_type))
27093 arm_emit_ldrd_pop (saved_regs_mask);
27094 else
27095 arm_emit_multi_reg_pop (saved_regs_mask);
27097 else
27098 arm_emit_multi_reg_pop (saved_regs_mask);
27101 if (return_in_pc == true)
27102 return;
27105 if (crtl->args.pretend_args_size)
27107 int i, j;
27108 rtx dwarf = NULL_RTX;
27109 rtx tmp = emit_insn (gen_addsi3 (stack_pointer_rtx,
27110 stack_pointer_rtx,
27111 GEN_INT (crtl->args.pretend_args_size)));
27113 RTX_FRAME_RELATED_P (tmp) = 1;
27115 if (cfun->machine->uses_anonymous_args)
27117 /* Restore pretend args. Refer arm_expand_prologue on how to save
27118 pretend_args in stack. */
27119 int num_regs = crtl->args.pretend_args_size / 4;
27120 saved_regs_mask = (0xf0 >> num_regs) & 0xf;
27121 for (j = 0, i = 0; j < num_regs; i++)
27122 if (saved_regs_mask & (1 << i))
27124 rtx reg = gen_rtx_REG (SImode, i);
27125 dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
27126 j++;
27128 REG_NOTES (tmp) = dwarf;
27130 arm_add_cfa_adjust_cfa_note (tmp, crtl->args.pretend_args_size,
27131 stack_pointer_rtx, stack_pointer_rtx);
27134 if (!really_return)
27135 return;
27137 if (crtl->calls_eh_return)
27138 emit_insn (gen_addsi3 (stack_pointer_rtx,
27139 stack_pointer_rtx,
27140 gen_rtx_REG (SImode, ARM_EH_STACKADJ_REGNUM)));
27142 if (IS_STACKALIGN (func_type))
27143 /* Restore the original stack pointer. Before prologue, the stack was
27144 realigned and the original stack pointer saved in r0. For details,
27145 see comment in arm_expand_prologue. */
27146 emit_insn (gen_movsi (stack_pointer_rtx, gen_rtx_REG (SImode, 0)));
27148 emit_jump_insn (simple_return_rtx);
27151 /* Implementation of insn prologue_thumb1_interwork. This is the first
27152 "instruction" of a function called in ARM mode. Swap to thumb mode. */
27154 const char *
27155 thumb1_output_interwork (void)
27157 const char * name;
27158 FILE *f = asm_out_file;
27160 gcc_assert (MEM_P (DECL_RTL (current_function_decl)));
27161 gcc_assert (GET_CODE (XEXP (DECL_RTL (current_function_decl), 0))
27162 == SYMBOL_REF);
27163 name = XSTR (XEXP (DECL_RTL (current_function_decl), 0), 0);
27165 /* Generate code sequence to switch us into Thumb mode. */
27166 /* The .code 32 directive has already been emitted by
27167 ASM_DECLARE_FUNCTION_NAME. */
27168 asm_fprintf (f, "\torr\t%r, %r, #1\n", IP_REGNUM, PC_REGNUM);
27169 asm_fprintf (f, "\tbx\t%r\n", IP_REGNUM);
27171 /* Generate a label, so that the debugger will notice the
27172 change in instruction sets. This label is also used by
27173 the assembler to bypass the ARM code when this function
27174 is called from a Thumb encoded function elsewhere in the
27175 same file. Hence the definition of STUB_NAME here must
27176 agree with the definition in gas/config/tc-arm.c. */
27178 #define STUB_NAME ".real_start_of"
27180 fprintf (f, "\t.code\t16\n");
27181 #ifdef ARM_PE
27182 if (arm_dllexport_name_p (name))
27183 name = arm_strip_name_encoding (name);
27184 #endif
27185 asm_fprintf (f, "\t.globl %s%U%s\n", STUB_NAME, name);
27186 fprintf (f, "\t.thumb_func\n");
27187 asm_fprintf (f, "%s%U%s:\n", STUB_NAME, name);
27189 return "";
27192 /* Handle the case of a double word load into a low register from
27193 a computed memory address. The computed address may involve a
27194 register which is overwritten by the load. */
27195 const char *
27196 thumb_load_double_from_address (rtx *operands)
27198 rtx addr;
27199 rtx base;
27200 rtx offset;
27201 rtx arg1;
27202 rtx arg2;
27204 gcc_assert (REG_P (operands[0]));
27205 gcc_assert (MEM_P (operands[1]));
27207 /* Get the memory address. */
27208 addr = XEXP (operands[1], 0);
27210 /* Work out how the memory address is computed. */
27211 switch (GET_CODE (addr))
27213 case REG:
27214 operands[2] = adjust_address (operands[1], SImode, 4);
27216 if (REGNO (operands[0]) == REGNO (addr))
27218 output_asm_insn ("ldr\t%H0, %2", operands);
27219 output_asm_insn ("ldr\t%0, %1", operands);
27221 else
27223 output_asm_insn ("ldr\t%0, %1", operands);
27224 output_asm_insn ("ldr\t%H0, %2", operands);
27226 break;
27228 case CONST:
27229 /* Compute <address> + 4 for the high order load. */
27230 operands[2] = adjust_address (operands[1], SImode, 4);
27232 output_asm_insn ("ldr\t%0, %1", operands);
27233 output_asm_insn ("ldr\t%H0, %2", operands);
27234 break;
27236 case PLUS:
27237 arg1 = XEXP (addr, 0);
27238 arg2 = XEXP (addr, 1);
27240 if (CONSTANT_P (arg1))
27241 base = arg2, offset = arg1;
27242 else
27243 base = arg1, offset = arg2;
27245 gcc_assert (REG_P (base));
27247 /* Catch the case of <address> = <reg> + <reg> */
27248 if (REG_P (offset))
27250 int reg_offset = REGNO (offset);
27251 int reg_base = REGNO (base);
27252 int reg_dest = REGNO (operands[0]);
27254 /* Add the base and offset registers together into the
27255 higher destination register. */
27256 asm_fprintf (asm_out_file, "\tadd\t%r, %r, %r",
27257 reg_dest + 1, reg_base, reg_offset);
27259 /* Load the lower destination register from the address in
27260 the higher destination register. */
27261 asm_fprintf (asm_out_file, "\tldr\t%r, [%r, #0]",
27262 reg_dest, reg_dest + 1);
27264 /* Load the higher destination register from its own address
27265 plus 4. */
27266 asm_fprintf (asm_out_file, "\tldr\t%r, [%r, #4]",
27267 reg_dest + 1, reg_dest + 1);
27269 else
27271 /* Compute <address> + 4 for the high order load. */
27272 operands[2] = adjust_address (operands[1], SImode, 4);
27274 /* If the computed address is held in the low order register
27275 then load the high order register first, otherwise always
27276 load the low order register first. */
27277 if (REGNO (operands[0]) == REGNO (base))
27279 output_asm_insn ("ldr\t%H0, %2", operands);
27280 output_asm_insn ("ldr\t%0, %1", operands);
27282 else
27284 output_asm_insn ("ldr\t%0, %1", operands);
27285 output_asm_insn ("ldr\t%H0, %2", operands);
27288 break;
27290 case LABEL_REF:
27291 /* With no registers to worry about we can just load the value
27292 directly. */
27293 operands[2] = adjust_address (operands[1], SImode, 4);
27295 output_asm_insn ("ldr\t%H0, %2", operands);
27296 output_asm_insn ("ldr\t%0, %1", operands);
27297 break;
27299 default:
27300 gcc_unreachable ();
27303 return "";
27306 const char *
27307 thumb_output_move_mem_multiple (int n, rtx *operands)
27309 rtx tmp;
27311 switch (n)
27313 case 2:
27314 if (REGNO (operands[4]) > REGNO (operands[5]))
27316 tmp = operands[4];
27317 operands[4] = operands[5];
27318 operands[5] = tmp;
27320 output_asm_insn ("ldmia\t%1!, {%4, %5}", operands);
27321 output_asm_insn ("stmia\t%0!, {%4, %5}", operands);
27322 break;
27324 case 3:
27325 if (REGNO (operands[4]) > REGNO (operands[5]))
27327 tmp = operands[4];
27328 operands[4] = operands[5];
27329 operands[5] = tmp;
27331 if (REGNO (operands[5]) > REGNO (operands[6]))
27333 tmp = operands[5];
27334 operands[5] = operands[6];
27335 operands[6] = tmp;
27337 if (REGNO (operands[4]) > REGNO (operands[5]))
27339 tmp = operands[4];
27340 operands[4] = operands[5];
27341 operands[5] = tmp;
27344 output_asm_insn ("ldmia\t%1!, {%4, %5, %6}", operands);
27345 output_asm_insn ("stmia\t%0!, {%4, %5, %6}", operands);
27346 break;
27348 default:
27349 gcc_unreachable ();
27352 return "";
27355 /* Output a call-via instruction for thumb state. */
27356 const char *
27357 thumb_call_via_reg (rtx reg)
27359 int regno = REGNO (reg);
27360 rtx *labelp;
27362 gcc_assert (regno < LR_REGNUM);
27364 /* If we are in the normal text section we can use a single instance
27365 per compilation unit. If we are doing function sections, then we need
27366 an entry per section, since we can't rely on reachability. */
27367 if (in_section == text_section)
27369 thumb_call_reg_needed = 1;
27371 if (thumb_call_via_label[regno] == NULL)
27372 thumb_call_via_label[regno] = gen_label_rtx ();
27373 labelp = thumb_call_via_label + regno;
27375 else
27377 if (cfun->machine->call_via[regno] == NULL)
27378 cfun->machine->call_via[regno] = gen_label_rtx ();
27379 labelp = cfun->machine->call_via + regno;
27382 output_asm_insn ("bl\t%a0", labelp);
27383 return "";
27386 /* Routines for generating rtl. */
27387 void
27388 thumb_expand_movmemqi (rtx *operands)
27390 rtx out = copy_to_mode_reg (SImode, XEXP (operands[0], 0));
27391 rtx in = copy_to_mode_reg (SImode, XEXP (operands[1], 0));
27392 HOST_WIDE_INT len = INTVAL (operands[2]);
27393 HOST_WIDE_INT offset = 0;
27395 while (len >= 12)
27397 emit_insn (gen_movmem12b (out, in, out, in));
27398 len -= 12;
27401 if (len >= 8)
27403 emit_insn (gen_movmem8b (out, in, out, in));
27404 len -= 8;
27407 if (len >= 4)
27409 rtx reg = gen_reg_rtx (SImode);
27410 emit_insn (gen_movsi (reg, gen_rtx_MEM (SImode, in)));
27411 emit_insn (gen_movsi (gen_rtx_MEM (SImode, out), reg));
27412 len -= 4;
27413 offset += 4;
27416 if (len >= 2)
27418 rtx reg = gen_reg_rtx (HImode);
27419 emit_insn (gen_movhi (reg, gen_rtx_MEM (HImode,
27420 plus_constant (Pmode, in,
27421 offset))));
27422 emit_insn (gen_movhi (gen_rtx_MEM (HImode, plus_constant (Pmode, out,
27423 offset)),
27424 reg));
27425 len -= 2;
27426 offset += 2;
27429 if (len)
27431 rtx reg = gen_reg_rtx (QImode);
27432 emit_insn (gen_movqi (reg, gen_rtx_MEM (QImode,
27433 plus_constant (Pmode, in,
27434 offset))));
27435 emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (Pmode, out,
27436 offset)),
27437 reg));
27441 void
27442 thumb_reload_out_hi (rtx *operands)
27444 emit_insn (gen_thumb_movhi_clobber (operands[0], operands[1], operands[2]));
27447 /* Handle reading a half-word from memory during reload. */
27448 void
27449 thumb_reload_in_hi (rtx *operands ATTRIBUTE_UNUSED)
27451 gcc_unreachable ();
27454 /* Return the length of a function name prefix
27455 that starts with the character 'c'. */
27456 static int
27457 arm_get_strip_length (int c)
27459 switch (c)
27461 ARM_NAME_ENCODING_LENGTHS
27462 default: return 0;
27466 /* Return a pointer to a function's name with any
27467 and all prefix encodings stripped from it. */
27468 const char *
27469 arm_strip_name_encoding (const char *name)
27471 int skip;
27473 while ((skip = arm_get_strip_length (* name)))
27474 name += skip;
27476 return name;
27479 /* If there is a '*' anywhere in the name's prefix, then
27480 emit the stripped name verbatim, otherwise prepend an
27481 underscore if leading underscores are being used. */
27482 void
27483 arm_asm_output_labelref (FILE *stream, const char *name)
27485 int skip;
27486 int verbatim = 0;
27488 while ((skip = arm_get_strip_length (* name)))
27490 verbatim |= (*name == '*');
27491 name += skip;
27494 if (verbatim)
27495 fputs (name, stream);
27496 else
27497 asm_fprintf (stream, "%U%s", name);
27500 /* This function is used to emit an EABI tag and its associated value.
27501 We emit the numerical value of the tag in case the assembler does not
27502 support textual tags. (Eg gas prior to 2.20). If requested we include
27503 the tag name in a comment so that anyone reading the assembler output
27504 will know which tag is being set.
27506 This function is not static because arm-c.c needs it too. */
27508 void
27509 arm_emit_eabi_attribute (const char *name, int num, int val)
27511 asm_fprintf (asm_out_file, "\t.eabi_attribute %d, %d", num, val);
27512 if (flag_verbose_asm || flag_debug_asm)
27513 asm_fprintf (asm_out_file, "\t%s %s", ASM_COMMENT_START, name);
27514 asm_fprintf (asm_out_file, "\n");
27517 static void
27518 arm_file_start (void)
27520 int val;
27522 if (TARGET_UNIFIED_ASM)
27523 asm_fprintf (asm_out_file, "\t.syntax unified\n");
27525 if (TARGET_BPABI)
27527 const char *fpu_name;
27528 if (arm_selected_arch)
27529 asm_fprintf (asm_out_file, "\t.arch %s\n", arm_selected_arch->name);
27530 else if (strncmp (arm_selected_cpu->name, "generic", 7) == 0)
27531 asm_fprintf (asm_out_file, "\t.arch %s\n", arm_selected_cpu->name + 8);
27532 else
27534 const char* truncated_name
27535 = arm_rewrite_selected_cpu (arm_selected_cpu->name);
27536 asm_fprintf (asm_out_file, "\t.cpu %s\n", truncated_name);
27539 if (TARGET_SOFT_FLOAT)
27541 fpu_name = "softvfp";
27543 else
27545 fpu_name = arm_fpu_desc->name;
27546 if (arm_fpu_desc->model == ARM_FP_MODEL_VFP)
27548 if (TARGET_HARD_FLOAT)
27549 arm_emit_eabi_attribute ("Tag_ABI_HardFP_use", 27, 3);
27550 if (TARGET_HARD_FLOAT_ABI)
27551 arm_emit_eabi_attribute ("Tag_ABI_VFP_args", 28, 1);
27554 asm_fprintf (asm_out_file, "\t.fpu %s\n", fpu_name);
27556 /* Some of these attributes only apply when the corresponding features
27557 are used. However we don't have any easy way of figuring this out.
27558 Conservatively record the setting that would have been used. */
27560 if (flag_rounding_math)
27561 arm_emit_eabi_attribute ("Tag_ABI_FP_rounding", 19, 1);
27563 if (!flag_unsafe_math_optimizations)
27565 arm_emit_eabi_attribute ("Tag_ABI_FP_denormal", 20, 1);
27566 arm_emit_eabi_attribute ("Tag_ABI_FP_exceptions", 21, 1);
27568 if (flag_signaling_nans)
27569 arm_emit_eabi_attribute ("Tag_ABI_FP_user_exceptions", 22, 1);
27571 arm_emit_eabi_attribute ("Tag_ABI_FP_number_model", 23,
27572 flag_finite_math_only ? 1 : 3);
27574 arm_emit_eabi_attribute ("Tag_ABI_align8_needed", 24, 1);
27575 arm_emit_eabi_attribute ("Tag_ABI_align8_preserved", 25, 1);
27576 arm_emit_eabi_attribute ("Tag_ABI_enum_size", 26,
27577 flag_short_enums ? 1 : 2);
27579 /* Tag_ABI_optimization_goals. */
27580 if (optimize_size)
27581 val = 4;
27582 else if (optimize >= 2)
27583 val = 2;
27584 else if (optimize)
27585 val = 1;
27586 else
27587 val = 6;
27588 arm_emit_eabi_attribute ("Tag_ABI_optimization_goals", 30, val);
27590 arm_emit_eabi_attribute ("Tag_CPU_unaligned_access", 34,
27591 unaligned_access);
27593 if (arm_fp16_format)
27594 arm_emit_eabi_attribute ("Tag_ABI_FP_16bit_format", 38,
27595 (int) arm_fp16_format);
27597 if (arm_lang_output_object_attributes_hook)
27598 arm_lang_output_object_attributes_hook();
27601 default_file_start ();
27604 static void
27605 arm_file_end (void)
27607 int regno;
27609 if (NEED_INDICATE_EXEC_STACK)
27610 /* Add .note.GNU-stack. */
27611 file_end_indicate_exec_stack ();
27613 if (! thumb_call_reg_needed)
27614 return;
27616 switch_to_section (text_section);
27617 asm_fprintf (asm_out_file, "\t.code 16\n");
27618 ASM_OUTPUT_ALIGN (asm_out_file, 1);
27620 for (regno = 0; regno < LR_REGNUM; regno++)
27622 rtx label = thumb_call_via_label[regno];
27624 if (label != 0)
27626 targetm.asm_out.internal_label (asm_out_file, "L",
27627 CODE_LABEL_NUMBER (label));
27628 asm_fprintf (asm_out_file, "\tbx\t%r\n", regno);
27633 #ifndef ARM_PE
27634 /* Symbols in the text segment can be accessed without indirecting via the
27635 constant pool; it may take an extra binary operation, but this is still
27636 faster than indirecting via memory. Don't do this when not optimizing,
27637 since we won't be calculating al of the offsets necessary to do this
27638 simplification. */
27640 static void
27641 arm_encode_section_info (tree decl, rtx rtl, int first)
27643 if (optimize > 0 && TREE_CONSTANT (decl))
27644 SYMBOL_REF_FLAG (XEXP (rtl, 0)) = 1;
27646 default_encode_section_info (decl, rtl, first);
27648 #endif /* !ARM_PE */
27650 static void
27651 arm_internal_label (FILE *stream, const char *prefix, unsigned long labelno)
27653 if (arm_ccfsm_state == 3 && (unsigned) arm_target_label == labelno
27654 && !strcmp (prefix, "L"))
27656 arm_ccfsm_state = 0;
27657 arm_target_insn = NULL;
27659 default_internal_label (stream, prefix, labelno);
27662 /* Output code to add DELTA to the first argument, and then jump
27663 to FUNCTION. Used for C++ multiple inheritance. */
27664 static void
27665 arm_output_mi_thunk (FILE *file, tree thunk ATTRIBUTE_UNUSED,
27666 HOST_WIDE_INT delta,
27667 HOST_WIDE_INT vcall_offset ATTRIBUTE_UNUSED,
27668 tree function)
27670 static int thunk_label = 0;
27671 char label[256];
27672 char labelpc[256];
27673 int mi_delta = delta;
27674 const char *const mi_op = mi_delta < 0 ? "sub" : "add";
27675 int shift = 0;
27676 int this_regno = (aggregate_value_p (TREE_TYPE (TREE_TYPE (function)), function)
27677 ? 1 : 0);
27678 if (mi_delta < 0)
27679 mi_delta = - mi_delta;
27681 final_start_function (emit_barrier (), file, 1);
27683 if (TARGET_THUMB1)
27685 int labelno = thunk_label++;
27686 ASM_GENERATE_INTERNAL_LABEL (label, "LTHUMBFUNC", labelno);
27687 /* Thunks are entered in arm mode when avaiable. */
27688 if (TARGET_THUMB1_ONLY)
27690 /* push r3 so we can use it as a temporary. */
27691 /* TODO: Omit this save if r3 is not used. */
27692 fputs ("\tpush {r3}\n", file);
27693 fputs ("\tldr\tr3, ", file);
27695 else
27697 fputs ("\tldr\tr12, ", file);
27699 assemble_name (file, label);
27700 fputc ('\n', file);
27701 if (flag_pic)
27703 /* If we are generating PIC, the ldr instruction below loads
27704 "(target - 7) - .LTHUNKPCn" into r12. The pc reads as
27705 the address of the add + 8, so we have:
27707 r12 = (target - 7) - .LTHUNKPCn + (.LTHUNKPCn + 8)
27708 = target + 1.
27710 Note that we have "+ 1" because some versions of GNU ld
27711 don't set the low bit of the result for R_ARM_REL32
27712 relocations against thumb function symbols.
27713 On ARMv6M this is +4, not +8. */
27714 ASM_GENERATE_INTERNAL_LABEL (labelpc, "LTHUNKPC", labelno);
27715 assemble_name (file, labelpc);
27716 fputs (":\n", file);
27717 if (TARGET_THUMB1_ONLY)
27719 /* This is 2 insns after the start of the thunk, so we know it
27720 is 4-byte aligned. */
27721 fputs ("\tadd\tr3, pc, r3\n", file);
27722 fputs ("\tmov r12, r3\n", file);
27724 else
27725 fputs ("\tadd\tr12, pc, r12\n", file);
27727 else if (TARGET_THUMB1_ONLY)
27728 fputs ("\tmov r12, r3\n", file);
27730 if (TARGET_THUMB1_ONLY)
27732 if (mi_delta > 255)
27734 fputs ("\tldr\tr3, ", file);
27735 assemble_name (file, label);
27736 fputs ("+4\n", file);
27737 asm_fprintf (file, "\t%s\t%r, %r, r3\n",
27738 mi_op, this_regno, this_regno);
27740 else if (mi_delta != 0)
27742 asm_fprintf (file, "\t%s\t%r, %r, #%d\n",
27743 mi_op, this_regno, this_regno,
27744 mi_delta);
27747 else
27749 /* TODO: Use movw/movt for large constants when available. */
27750 while (mi_delta != 0)
27752 if ((mi_delta & (3 << shift)) == 0)
27753 shift += 2;
27754 else
27756 asm_fprintf (file, "\t%s\t%r, %r, #%d\n",
27757 mi_op, this_regno, this_regno,
27758 mi_delta & (0xff << shift));
27759 mi_delta &= ~(0xff << shift);
27760 shift += 8;
27764 if (TARGET_THUMB1)
27766 if (TARGET_THUMB1_ONLY)
27767 fputs ("\tpop\t{r3}\n", file);
27769 fprintf (file, "\tbx\tr12\n");
27770 ASM_OUTPUT_ALIGN (file, 2);
27771 assemble_name (file, label);
27772 fputs (":\n", file);
27773 if (flag_pic)
27775 /* Output ".word .LTHUNKn-7-.LTHUNKPCn". */
27776 rtx tem = XEXP (DECL_RTL (function), 0);
27777 tem = plus_constant (GET_MODE (tem), tem, -7);
27778 tem = gen_rtx_MINUS (GET_MODE (tem),
27779 tem,
27780 gen_rtx_SYMBOL_REF (Pmode,
27781 ggc_strdup (labelpc)));
27782 assemble_integer (tem, 4, BITS_PER_WORD, 1);
27784 else
27785 /* Output ".word .LTHUNKn". */
27786 assemble_integer (XEXP (DECL_RTL (function), 0), 4, BITS_PER_WORD, 1);
27788 if (TARGET_THUMB1_ONLY && mi_delta > 255)
27789 assemble_integer (GEN_INT(mi_delta), 4, BITS_PER_WORD, 1);
27791 else
27793 fputs ("\tb\t", file);
27794 assemble_name (file, XSTR (XEXP (DECL_RTL (function), 0), 0));
27795 if (NEED_PLT_RELOC)
27796 fputs ("(PLT)", file);
27797 fputc ('\n', file);
27800 final_end_function ();
27804 arm_emit_vector_const (FILE *file, rtx x)
27806 int i;
27807 const char * pattern;
27809 gcc_assert (GET_CODE (x) == CONST_VECTOR);
27811 switch (GET_MODE (x))
27813 case V2SImode: pattern = "%08x"; break;
27814 case V4HImode: pattern = "%04x"; break;
27815 case V8QImode: pattern = "%02x"; break;
27816 default: gcc_unreachable ();
27819 fprintf (file, "0x");
27820 for (i = CONST_VECTOR_NUNITS (x); i--;)
27822 rtx element;
27824 element = CONST_VECTOR_ELT (x, i);
27825 fprintf (file, pattern, INTVAL (element));
27828 return 1;
27831 /* Emit a fp16 constant appropriately padded to occupy a 4-byte word.
27832 HFmode constant pool entries are actually loaded with ldr. */
27833 void
27834 arm_emit_fp16_const (rtx c)
27836 REAL_VALUE_TYPE r;
27837 long bits;
27839 REAL_VALUE_FROM_CONST_DOUBLE (r, c);
27840 bits = real_to_target (NULL, &r, HFmode);
27841 if (WORDS_BIG_ENDIAN)
27842 assemble_zeros (2);
27843 assemble_integer (GEN_INT (bits), 2, BITS_PER_WORD, 1);
27844 if (!WORDS_BIG_ENDIAN)
27845 assemble_zeros (2);
27848 const char *
27849 arm_output_load_gr (rtx *operands)
27851 rtx reg;
27852 rtx offset;
27853 rtx wcgr;
27854 rtx sum;
27856 if (!MEM_P (operands [1])
27857 || GET_CODE (sum = XEXP (operands [1], 0)) != PLUS
27858 || !REG_P (reg = XEXP (sum, 0))
27859 || !CONST_INT_P (offset = XEXP (sum, 1))
27860 || ((INTVAL (offset) < 1024) && (INTVAL (offset) > -1024)))
27861 return "wldrw%?\t%0, %1";
27863 /* Fix up an out-of-range load of a GR register. */
27864 output_asm_insn ("str%?\t%0, [sp, #-4]!\t@ Start of GR load expansion", & reg);
27865 wcgr = operands[0];
27866 operands[0] = reg;
27867 output_asm_insn ("ldr%?\t%0, %1", operands);
27869 operands[0] = wcgr;
27870 operands[1] = reg;
27871 output_asm_insn ("tmcr%?\t%0, %1", operands);
27872 output_asm_insn ("ldr%?\t%0, [sp], #4\t@ End of GR load expansion", & reg);
27874 return "";
27877 /* Worker function for TARGET_SETUP_INCOMING_VARARGS.
27879 On the ARM, PRETEND_SIZE is set in order to have the prologue push the last
27880 named arg and all anonymous args onto the stack.
27881 XXX I know the prologue shouldn't be pushing registers, but it is faster
27882 that way. */
27884 static void
27885 arm_setup_incoming_varargs (cumulative_args_t pcum_v,
27886 enum machine_mode mode,
27887 tree type,
27888 int *pretend_size,
27889 int second_time ATTRIBUTE_UNUSED)
27891 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
27892 int nregs;
27894 cfun->machine->uses_anonymous_args = 1;
27895 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
27897 nregs = pcum->aapcs_ncrn;
27898 if ((nregs & 1) && arm_needs_doubleword_align (mode, type))
27899 nregs++;
27901 else
27902 nregs = pcum->nregs;
27904 if (nregs < NUM_ARG_REGS)
27905 *pretend_size = (NUM_ARG_REGS - nregs) * UNITS_PER_WORD;
27908 /* We can't rely on the caller doing the proper promotion when
27909 using APCS or ATPCS. */
27911 static bool
27912 arm_promote_prototypes (const_tree t ATTRIBUTE_UNUSED)
27914 return !TARGET_AAPCS_BASED;
27917 static enum machine_mode
27918 arm_promote_function_mode (const_tree type ATTRIBUTE_UNUSED,
27919 enum machine_mode mode,
27920 int *punsignedp ATTRIBUTE_UNUSED,
27921 const_tree fntype ATTRIBUTE_UNUSED,
27922 int for_return ATTRIBUTE_UNUSED)
27924 if (GET_MODE_CLASS (mode) == MODE_INT
27925 && GET_MODE_SIZE (mode) < 4)
27926 return SImode;
27928 return mode;
27931 /* AAPCS based ABIs use short enums by default. */
27933 static bool
27934 arm_default_short_enums (void)
27936 return TARGET_AAPCS_BASED && arm_abi != ARM_ABI_AAPCS_LINUX;
27940 /* AAPCS requires that anonymous bitfields affect structure alignment. */
27942 static bool
27943 arm_align_anon_bitfield (void)
27945 return TARGET_AAPCS_BASED;
27949 /* The generic C++ ABI says 64-bit (long long). The EABI says 32-bit. */
27951 static tree
27952 arm_cxx_guard_type (void)
27954 return TARGET_AAPCS_BASED ? integer_type_node : long_long_integer_type_node;
27958 /* The EABI says test the least significant bit of a guard variable. */
27960 static bool
27961 arm_cxx_guard_mask_bit (void)
27963 return TARGET_AAPCS_BASED;
27967 /* The EABI specifies that all array cookies are 8 bytes long. */
27969 static tree
27970 arm_get_cookie_size (tree type)
27972 tree size;
27974 if (!TARGET_AAPCS_BASED)
27975 return default_cxx_get_cookie_size (type);
27977 size = build_int_cst (sizetype, 8);
27978 return size;
27982 /* The EABI says that array cookies should also contain the element size. */
27984 static bool
27985 arm_cookie_has_size (void)
27987 return TARGET_AAPCS_BASED;
27991 /* The EABI says constructors and destructors should return a pointer to
27992 the object constructed/destroyed. */
27994 static bool
27995 arm_cxx_cdtor_returns_this (void)
27997 return TARGET_AAPCS_BASED;
28000 /* The EABI says that an inline function may never be the key
28001 method. */
28003 static bool
28004 arm_cxx_key_method_may_be_inline (void)
28006 return !TARGET_AAPCS_BASED;
28009 static void
28010 arm_cxx_determine_class_data_visibility (tree decl)
28012 if (!TARGET_AAPCS_BASED
28013 || !TARGET_DLLIMPORT_DECL_ATTRIBUTES)
28014 return;
28016 /* In general, \S 3.2.5.5 of the ARM EABI requires that class data
28017 is exported. However, on systems without dynamic vague linkage,
28018 \S 3.2.5.6 says that COMDAT class data has hidden linkage. */
28019 if (!TARGET_ARM_DYNAMIC_VAGUE_LINKAGE_P && DECL_COMDAT (decl))
28020 DECL_VISIBILITY (decl) = VISIBILITY_HIDDEN;
28021 else
28022 DECL_VISIBILITY (decl) = VISIBILITY_DEFAULT;
28023 DECL_VISIBILITY_SPECIFIED (decl) = 1;
28026 static bool
28027 arm_cxx_class_data_always_comdat (void)
28029 /* \S 3.2.5.4 of the ARM C++ ABI says that class data only have
28030 vague linkage if the class has no key function. */
28031 return !TARGET_AAPCS_BASED;
28035 /* The EABI says __aeabi_atexit should be used to register static
28036 destructors. */
28038 static bool
28039 arm_cxx_use_aeabi_atexit (void)
28041 return TARGET_AAPCS_BASED;
28045 void
28046 arm_set_return_address (rtx source, rtx scratch)
28048 arm_stack_offsets *offsets;
28049 HOST_WIDE_INT delta;
28050 rtx addr;
28051 unsigned long saved_regs;
28053 offsets = arm_get_frame_offsets ();
28054 saved_regs = offsets->saved_regs_mask;
28056 if ((saved_regs & (1 << LR_REGNUM)) == 0)
28057 emit_move_insn (gen_rtx_REG (Pmode, LR_REGNUM), source);
28058 else
28060 if (frame_pointer_needed)
28061 addr = plus_constant (Pmode, hard_frame_pointer_rtx, -4);
28062 else
28064 /* LR will be the first saved register. */
28065 delta = offsets->outgoing_args - (offsets->frame + 4);
28068 if (delta >= 4096)
28070 emit_insn (gen_addsi3 (scratch, stack_pointer_rtx,
28071 GEN_INT (delta & ~4095)));
28072 addr = scratch;
28073 delta &= 4095;
28075 else
28076 addr = stack_pointer_rtx;
28078 addr = plus_constant (Pmode, addr, delta);
28080 emit_move_insn (gen_frame_mem (Pmode, addr), source);
28085 void
28086 thumb_set_return_address (rtx source, rtx scratch)
28088 arm_stack_offsets *offsets;
28089 HOST_WIDE_INT delta;
28090 HOST_WIDE_INT limit;
28091 int reg;
28092 rtx addr;
28093 unsigned long mask;
28095 emit_use (source);
28097 offsets = arm_get_frame_offsets ();
28098 mask = offsets->saved_regs_mask;
28099 if (mask & (1 << LR_REGNUM))
28101 limit = 1024;
28102 /* Find the saved regs. */
28103 if (frame_pointer_needed)
28105 delta = offsets->soft_frame - offsets->saved_args;
28106 reg = THUMB_HARD_FRAME_POINTER_REGNUM;
28107 if (TARGET_THUMB1)
28108 limit = 128;
28110 else
28112 delta = offsets->outgoing_args - offsets->saved_args;
28113 reg = SP_REGNUM;
28115 /* Allow for the stack frame. */
28116 if (TARGET_THUMB1 && TARGET_BACKTRACE)
28117 delta -= 16;
28118 /* The link register is always the first saved register. */
28119 delta -= 4;
28121 /* Construct the address. */
28122 addr = gen_rtx_REG (SImode, reg);
28123 if (delta > limit)
28125 emit_insn (gen_movsi (scratch, GEN_INT (delta)));
28126 emit_insn (gen_addsi3 (scratch, scratch, stack_pointer_rtx));
28127 addr = scratch;
28129 else
28130 addr = plus_constant (Pmode, addr, delta);
28132 emit_move_insn (gen_frame_mem (Pmode, addr), source);
28134 else
28135 emit_move_insn (gen_rtx_REG (Pmode, LR_REGNUM), source);
28138 /* Implements target hook vector_mode_supported_p. */
28139 bool
28140 arm_vector_mode_supported_p (enum machine_mode mode)
28142 /* Neon also supports V2SImode, etc. listed in the clause below. */
28143 if (TARGET_NEON && (mode == V2SFmode || mode == V4SImode || mode == V8HImode
28144 || mode == V4HFmode || mode == V16QImode || mode == V4SFmode || mode == V2DImode))
28145 return true;
28147 if ((TARGET_NEON || TARGET_IWMMXT)
28148 && ((mode == V2SImode)
28149 || (mode == V4HImode)
28150 || (mode == V8QImode)))
28151 return true;
28153 if (TARGET_INT_SIMD && (mode == V4UQQmode || mode == V4QQmode
28154 || mode == V2UHQmode || mode == V2HQmode || mode == V2UHAmode
28155 || mode == V2HAmode))
28156 return true;
28158 return false;
28161 /* Implements target hook array_mode_supported_p. */
28163 static bool
28164 arm_array_mode_supported_p (enum machine_mode mode,
28165 unsigned HOST_WIDE_INT nelems)
28167 if (TARGET_NEON
28168 && (VALID_NEON_DREG_MODE (mode) || VALID_NEON_QREG_MODE (mode))
28169 && (nelems >= 2 && nelems <= 4))
28170 return true;
28172 return false;
28175 /* Use the option -mvectorize-with-neon-double to override the use of quardword
28176 registers when autovectorizing for Neon, at least until multiple vector
28177 widths are supported properly by the middle-end. */
28179 static enum machine_mode
28180 arm_preferred_simd_mode (enum machine_mode mode)
28182 if (TARGET_NEON)
28183 switch (mode)
28185 case SFmode:
28186 return TARGET_NEON_VECTORIZE_DOUBLE ? V2SFmode : V4SFmode;
28187 case SImode:
28188 return TARGET_NEON_VECTORIZE_DOUBLE ? V2SImode : V4SImode;
28189 case HImode:
28190 return TARGET_NEON_VECTORIZE_DOUBLE ? V4HImode : V8HImode;
28191 case QImode:
28192 return TARGET_NEON_VECTORIZE_DOUBLE ? V8QImode : V16QImode;
28193 case DImode:
28194 if (!TARGET_NEON_VECTORIZE_DOUBLE)
28195 return V2DImode;
28196 break;
28198 default:;
28201 if (TARGET_REALLY_IWMMXT)
28202 switch (mode)
28204 case SImode:
28205 return V2SImode;
28206 case HImode:
28207 return V4HImode;
28208 case QImode:
28209 return V8QImode;
28211 default:;
28214 return word_mode;
28217 /* Implement TARGET_CLASS_LIKELY_SPILLED_P.
28219 We need to define this for LO_REGS on Thumb-1. Otherwise we can end up
28220 using r0-r4 for function arguments, r7 for the stack frame and don't have
28221 enough left over to do doubleword arithmetic. For Thumb-2 all the
28222 potentially problematic instructions accept high registers so this is not
28223 necessary. Care needs to be taken to avoid adding new Thumb-2 patterns
28224 that require many low registers. */
28225 static bool
28226 arm_class_likely_spilled_p (reg_class_t rclass)
28228 if ((TARGET_THUMB1 && rclass == LO_REGS)
28229 || rclass == CC_REG)
28230 return true;
28232 return false;
28235 /* Implements target hook small_register_classes_for_mode_p. */
28236 bool
28237 arm_small_register_classes_for_mode_p (enum machine_mode mode ATTRIBUTE_UNUSED)
28239 return TARGET_THUMB1;
28242 /* Implement TARGET_SHIFT_TRUNCATION_MASK. SImode shifts use normal
28243 ARM insns and therefore guarantee that the shift count is modulo 256.
28244 DImode shifts (those implemented by lib1funcs.S or by optabs.c)
28245 guarantee no particular behavior for out-of-range counts. */
28247 static unsigned HOST_WIDE_INT
28248 arm_shift_truncation_mask (enum machine_mode mode)
28250 return mode == SImode ? 255 : 0;
28254 /* Map internal gcc register numbers to DWARF2 register numbers. */
28256 unsigned int
28257 arm_dbx_register_number (unsigned int regno)
28259 if (regno < 16)
28260 return regno;
28262 if (IS_VFP_REGNUM (regno))
28264 /* See comment in arm_dwarf_register_span. */
28265 if (VFP_REGNO_OK_FOR_SINGLE (regno))
28266 return 64 + regno - FIRST_VFP_REGNUM;
28267 else
28268 return 256 + (regno - FIRST_VFP_REGNUM) / 2;
28271 if (IS_IWMMXT_GR_REGNUM (regno))
28272 return 104 + regno - FIRST_IWMMXT_GR_REGNUM;
28274 if (IS_IWMMXT_REGNUM (regno))
28275 return 112 + regno - FIRST_IWMMXT_REGNUM;
28277 gcc_unreachable ();
28280 /* Dwarf models VFPv3 registers as 32 64-bit registers.
28281 GCC models tham as 64 32-bit registers, so we need to describe this to
28282 the DWARF generation code. Other registers can use the default. */
28283 static rtx
28284 arm_dwarf_register_span (rtx rtl)
28286 enum machine_mode mode;
28287 unsigned regno;
28288 rtx parts[8];
28289 int nregs;
28290 int i;
28292 regno = REGNO (rtl);
28293 if (!IS_VFP_REGNUM (regno))
28294 return NULL_RTX;
28296 /* XXX FIXME: The EABI defines two VFP register ranges:
28297 64-95: Legacy VFPv2 numbering for S0-S31 (obsolescent)
28298 256-287: D0-D31
28299 The recommended encoding for S0-S31 is a DW_OP_bit_piece of the
28300 corresponding D register. Until GDB supports this, we shall use the
28301 legacy encodings. We also use these encodings for D0-D15 for
28302 compatibility with older debuggers. */
28303 mode = GET_MODE (rtl);
28304 if (GET_MODE_SIZE (mode) < 8)
28305 return NULL_RTX;
28307 if (VFP_REGNO_OK_FOR_SINGLE (regno))
28309 nregs = GET_MODE_SIZE (mode) / 4;
28310 for (i = 0; i < nregs; i += 2)
28311 if (TARGET_BIG_END)
28313 parts[i] = gen_rtx_REG (SImode, regno + i + 1);
28314 parts[i + 1] = gen_rtx_REG (SImode, regno + i);
28316 else
28318 parts[i] = gen_rtx_REG (SImode, regno + i);
28319 parts[i + 1] = gen_rtx_REG (SImode, regno + i + 1);
28322 else
28324 nregs = GET_MODE_SIZE (mode) / 8;
28325 for (i = 0; i < nregs; i++)
28326 parts[i] = gen_rtx_REG (DImode, regno + i);
28329 return gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (nregs , parts));
28332 #if ARM_UNWIND_INFO
28333 /* Emit unwind directives for a store-multiple instruction or stack pointer
28334 push during alignment.
28335 These should only ever be generated by the function prologue code, so
28336 expect them to have a particular form. */
28338 static void
28339 arm_unwind_emit_sequence (FILE * asm_out_file, rtx p)
28341 int i;
28342 HOST_WIDE_INT offset;
28343 HOST_WIDE_INT nregs;
28344 int reg_size;
28345 unsigned reg;
28346 unsigned lastreg;
28347 rtx e;
28349 e = XVECEXP (p, 0, 0);
28350 if (GET_CODE (e) != SET)
28351 abort ();
28353 /* First insn will adjust the stack pointer. */
28354 if (GET_CODE (e) != SET
28355 || !REG_P (XEXP (e, 0))
28356 || REGNO (XEXP (e, 0)) != SP_REGNUM
28357 || GET_CODE (XEXP (e, 1)) != PLUS)
28358 abort ();
28360 offset = -INTVAL (XEXP (XEXP (e, 1), 1));
28361 nregs = XVECLEN (p, 0) - 1;
28363 reg = REGNO (XEXP (XVECEXP (p, 0, 1), 1));
28364 if (reg < 16)
28366 /* The function prologue may also push pc, but not annotate it as it is
28367 never restored. We turn this into a stack pointer adjustment. */
28368 if (nregs * 4 == offset - 4)
28370 fprintf (asm_out_file, "\t.pad #4\n");
28371 offset -= 4;
28373 reg_size = 4;
28374 fprintf (asm_out_file, "\t.save {");
28376 else if (IS_VFP_REGNUM (reg))
28378 reg_size = 8;
28379 fprintf (asm_out_file, "\t.vsave {");
28381 else
28382 /* Unknown register type. */
28383 abort ();
28385 /* If the stack increment doesn't match the size of the saved registers,
28386 something has gone horribly wrong. */
28387 if (offset != nregs * reg_size)
28388 abort ();
28390 offset = 0;
28391 lastreg = 0;
28392 /* The remaining insns will describe the stores. */
28393 for (i = 1; i <= nregs; i++)
28395 /* Expect (set (mem <addr>) (reg)).
28396 Where <addr> is (reg:SP) or (plus (reg:SP) (const_int)). */
28397 e = XVECEXP (p, 0, i);
28398 if (GET_CODE (e) != SET
28399 || !MEM_P (XEXP (e, 0))
28400 || !REG_P (XEXP (e, 1)))
28401 abort ();
28403 reg = REGNO (XEXP (e, 1));
28404 if (reg < lastreg)
28405 abort ();
28407 if (i != 1)
28408 fprintf (asm_out_file, ", ");
28409 /* We can't use %r for vfp because we need to use the
28410 double precision register names. */
28411 if (IS_VFP_REGNUM (reg))
28412 asm_fprintf (asm_out_file, "d%d", (reg - FIRST_VFP_REGNUM) / 2);
28413 else
28414 asm_fprintf (asm_out_file, "%r", reg);
28416 #ifdef ENABLE_CHECKING
28417 /* Check that the addresses are consecutive. */
28418 e = XEXP (XEXP (e, 0), 0);
28419 if (GET_CODE (e) == PLUS)
28421 offset += reg_size;
28422 if (!REG_P (XEXP (e, 0))
28423 || REGNO (XEXP (e, 0)) != SP_REGNUM
28424 || !CONST_INT_P (XEXP (e, 1))
28425 || offset != INTVAL (XEXP (e, 1)))
28426 abort ();
28428 else if (i != 1
28429 || !REG_P (e)
28430 || REGNO (e) != SP_REGNUM)
28431 abort ();
28432 #endif
28434 fprintf (asm_out_file, "}\n");
28437 /* Emit unwind directives for a SET. */
28439 static void
28440 arm_unwind_emit_set (FILE * asm_out_file, rtx p)
28442 rtx e0;
28443 rtx e1;
28444 unsigned reg;
28446 e0 = XEXP (p, 0);
28447 e1 = XEXP (p, 1);
28448 switch (GET_CODE (e0))
28450 case MEM:
28451 /* Pushing a single register. */
28452 if (GET_CODE (XEXP (e0, 0)) != PRE_DEC
28453 || !REG_P (XEXP (XEXP (e0, 0), 0))
28454 || REGNO (XEXP (XEXP (e0, 0), 0)) != SP_REGNUM)
28455 abort ();
28457 asm_fprintf (asm_out_file, "\t.save ");
28458 if (IS_VFP_REGNUM (REGNO (e1)))
28459 asm_fprintf(asm_out_file, "{d%d}\n",
28460 (REGNO (e1) - FIRST_VFP_REGNUM) / 2);
28461 else
28462 asm_fprintf(asm_out_file, "{%r}\n", REGNO (e1));
28463 break;
28465 case REG:
28466 if (REGNO (e0) == SP_REGNUM)
28468 /* A stack increment. */
28469 if (GET_CODE (e1) != PLUS
28470 || !REG_P (XEXP (e1, 0))
28471 || REGNO (XEXP (e1, 0)) != SP_REGNUM
28472 || !CONST_INT_P (XEXP (e1, 1)))
28473 abort ();
28475 asm_fprintf (asm_out_file, "\t.pad #%wd\n",
28476 -INTVAL (XEXP (e1, 1)));
28478 else if (REGNO (e0) == HARD_FRAME_POINTER_REGNUM)
28480 HOST_WIDE_INT offset;
28482 if (GET_CODE (e1) == PLUS)
28484 if (!REG_P (XEXP (e1, 0))
28485 || !CONST_INT_P (XEXP (e1, 1)))
28486 abort ();
28487 reg = REGNO (XEXP (e1, 0));
28488 offset = INTVAL (XEXP (e1, 1));
28489 asm_fprintf (asm_out_file, "\t.setfp %r, %r, #%wd\n",
28490 HARD_FRAME_POINTER_REGNUM, reg,
28491 offset);
28493 else if (REG_P (e1))
28495 reg = REGNO (e1);
28496 asm_fprintf (asm_out_file, "\t.setfp %r, %r\n",
28497 HARD_FRAME_POINTER_REGNUM, reg);
28499 else
28500 abort ();
28502 else if (REG_P (e1) && REGNO (e1) == SP_REGNUM)
28504 /* Move from sp to reg. */
28505 asm_fprintf (asm_out_file, "\t.movsp %r\n", REGNO (e0));
28507 else if (GET_CODE (e1) == PLUS
28508 && REG_P (XEXP (e1, 0))
28509 && REGNO (XEXP (e1, 0)) == SP_REGNUM
28510 && CONST_INT_P (XEXP (e1, 1)))
28512 /* Set reg to offset from sp. */
28513 asm_fprintf (asm_out_file, "\t.movsp %r, #%d\n",
28514 REGNO (e0), (int)INTVAL(XEXP (e1, 1)));
28516 else
28517 abort ();
28518 break;
28520 default:
28521 abort ();
28526 /* Emit unwind directives for the given insn. */
28528 static void
28529 arm_unwind_emit (FILE * asm_out_file, rtx insn)
28531 rtx note, pat;
28532 bool handled_one = false;
28534 if (arm_except_unwind_info (&global_options) != UI_TARGET)
28535 return;
28537 if (!(flag_unwind_tables || crtl->uses_eh_lsda)
28538 && (TREE_NOTHROW (current_function_decl)
28539 || crtl->all_throwers_are_sibcalls))
28540 return;
28542 if (NOTE_P (insn) || !RTX_FRAME_RELATED_P (insn))
28543 return;
28545 for (note = REG_NOTES (insn); note ; note = XEXP (note, 1))
28547 switch (REG_NOTE_KIND (note))
28549 case REG_FRAME_RELATED_EXPR:
28550 pat = XEXP (note, 0);
28551 goto found;
28553 case REG_CFA_REGISTER:
28554 pat = XEXP (note, 0);
28555 if (pat == NULL)
28557 pat = PATTERN (insn);
28558 if (GET_CODE (pat) == PARALLEL)
28559 pat = XVECEXP (pat, 0, 0);
28562 /* Only emitted for IS_STACKALIGN re-alignment. */
28564 rtx dest, src;
28565 unsigned reg;
28567 src = SET_SRC (pat);
28568 dest = SET_DEST (pat);
28570 gcc_assert (src == stack_pointer_rtx);
28571 reg = REGNO (dest);
28572 asm_fprintf (asm_out_file, "\t.unwind_raw 0, 0x%x @ vsp = r%d\n",
28573 reg + 0x90, reg);
28575 handled_one = true;
28576 break;
28578 /* The INSN is generated in epilogue. It is set as RTX_FRAME_RELATED_P
28579 to get correct dwarf information for shrink-wrap. We should not
28580 emit unwind information for it because these are used either for
28581 pretend arguments or notes to adjust sp and restore registers from
28582 stack. */
28583 case REG_CFA_ADJUST_CFA:
28584 case REG_CFA_RESTORE:
28585 return;
28587 case REG_CFA_DEF_CFA:
28588 case REG_CFA_EXPRESSION:
28589 case REG_CFA_OFFSET:
28590 /* ??? Only handling here what we actually emit. */
28591 gcc_unreachable ();
28593 default:
28594 break;
28597 if (handled_one)
28598 return;
28599 pat = PATTERN (insn);
28600 found:
28602 switch (GET_CODE (pat))
28604 case SET:
28605 arm_unwind_emit_set (asm_out_file, pat);
28606 break;
28608 case SEQUENCE:
28609 /* Store multiple. */
28610 arm_unwind_emit_sequence (asm_out_file, pat);
28611 break;
28613 default:
28614 abort();
28619 /* Output a reference from a function exception table to the type_info
28620 object X. The EABI specifies that the symbol should be relocated by
28621 an R_ARM_TARGET2 relocation. */
28623 static bool
28624 arm_output_ttype (rtx x)
28626 fputs ("\t.word\t", asm_out_file);
28627 output_addr_const (asm_out_file, x);
28628 /* Use special relocations for symbol references. */
28629 if (!CONST_INT_P (x))
28630 fputs ("(TARGET2)", asm_out_file);
28631 fputc ('\n', asm_out_file);
28633 return TRUE;
28636 /* Implement TARGET_ASM_EMIT_EXCEPT_PERSONALITY. */
28638 static void
28639 arm_asm_emit_except_personality (rtx personality)
28641 fputs ("\t.personality\t", asm_out_file);
28642 output_addr_const (asm_out_file, personality);
28643 fputc ('\n', asm_out_file);
28646 /* Implement TARGET_ASM_INITIALIZE_SECTIONS. */
28648 static void
28649 arm_asm_init_sections (void)
28651 exception_section = get_unnamed_section (0, output_section_asm_op,
28652 "\t.handlerdata");
28654 #endif /* ARM_UNWIND_INFO */
28656 /* Output unwind directives for the start/end of a function. */
28658 void
28659 arm_output_fn_unwind (FILE * f, bool prologue)
28661 if (arm_except_unwind_info (&global_options) != UI_TARGET)
28662 return;
28664 if (prologue)
28665 fputs ("\t.fnstart\n", f);
28666 else
28668 /* If this function will never be unwound, then mark it as such.
28669 The came condition is used in arm_unwind_emit to suppress
28670 the frame annotations. */
28671 if (!(flag_unwind_tables || crtl->uses_eh_lsda)
28672 && (TREE_NOTHROW (current_function_decl)
28673 || crtl->all_throwers_are_sibcalls))
28674 fputs("\t.cantunwind\n", f);
28676 fputs ("\t.fnend\n", f);
28680 static bool
28681 arm_emit_tls_decoration (FILE *fp, rtx x)
28683 enum tls_reloc reloc;
28684 rtx val;
28686 val = XVECEXP (x, 0, 0);
28687 reloc = (enum tls_reloc) INTVAL (XVECEXP (x, 0, 1));
28689 output_addr_const (fp, val);
28691 switch (reloc)
28693 case TLS_GD32:
28694 fputs ("(tlsgd)", fp);
28695 break;
28696 case TLS_LDM32:
28697 fputs ("(tlsldm)", fp);
28698 break;
28699 case TLS_LDO32:
28700 fputs ("(tlsldo)", fp);
28701 break;
28702 case TLS_IE32:
28703 fputs ("(gottpoff)", fp);
28704 break;
28705 case TLS_LE32:
28706 fputs ("(tpoff)", fp);
28707 break;
28708 case TLS_DESCSEQ:
28709 fputs ("(tlsdesc)", fp);
28710 break;
28711 default:
28712 gcc_unreachable ();
28715 switch (reloc)
28717 case TLS_GD32:
28718 case TLS_LDM32:
28719 case TLS_IE32:
28720 case TLS_DESCSEQ:
28721 fputs (" + (. - ", fp);
28722 output_addr_const (fp, XVECEXP (x, 0, 2));
28723 /* For DESCSEQ the 3rd operand encodes thumbness, and is added */
28724 fputs (reloc == TLS_DESCSEQ ? " + " : " - ", fp);
28725 output_addr_const (fp, XVECEXP (x, 0, 3));
28726 fputc (')', fp);
28727 break;
28728 default:
28729 break;
28732 return TRUE;
28735 /* ARM implementation of TARGET_ASM_OUTPUT_DWARF_DTPREL. */
28737 static void
28738 arm_output_dwarf_dtprel (FILE *file, int size, rtx x)
28740 gcc_assert (size == 4);
28741 fputs ("\t.word\t", file);
28742 output_addr_const (file, x);
28743 fputs ("(tlsldo)", file);
28746 /* Implement TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA. */
28748 static bool
28749 arm_output_addr_const_extra (FILE *fp, rtx x)
28751 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
28752 return arm_emit_tls_decoration (fp, x);
28753 else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_PIC_LABEL)
28755 char label[256];
28756 int labelno = INTVAL (XVECEXP (x, 0, 0));
28758 ASM_GENERATE_INTERNAL_LABEL (label, "LPIC", labelno);
28759 assemble_name_raw (fp, label);
28761 return TRUE;
28763 else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_GOTSYM_OFF)
28765 assemble_name (fp, "_GLOBAL_OFFSET_TABLE_");
28766 if (GOT_PCREL)
28767 fputs ("+.", fp);
28768 fputs ("-(", fp);
28769 output_addr_const (fp, XVECEXP (x, 0, 0));
28770 fputc (')', fp);
28771 return TRUE;
28773 else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_SYMBOL_OFFSET)
28775 output_addr_const (fp, XVECEXP (x, 0, 0));
28776 if (GOT_PCREL)
28777 fputs ("+.", fp);
28778 fputs ("-(", fp);
28779 output_addr_const (fp, XVECEXP (x, 0, 1));
28780 fputc (')', fp);
28781 return TRUE;
28783 else if (GET_CODE (x) == CONST_VECTOR)
28784 return arm_emit_vector_const (fp, x);
28786 return FALSE;
28789 /* Output assembly for a shift instruction.
28790 SET_FLAGS determines how the instruction modifies the condition codes.
28791 0 - Do not set condition codes.
28792 1 - Set condition codes.
28793 2 - Use smallest instruction. */
28794 const char *
28795 arm_output_shift(rtx * operands, int set_flags)
28797 char pattern[100];
28798 static const char flag_chars[3] = {'?', '.', '!'};
28799 const char *shift;
28800 HOST_WIDE_INT val;
28801 char c;
28803 c = flag_chars[set_flags];
28804 if (TARGET_UNIFIED_ASM)
28806 shift = shift_op(operands[3], &val);
28807 if (shift)
28809 if (val != -1)
28810 operands[2] = GEN_INT(val);
28811 sprintf (pattern, "%s%%%c\t%%0, %%1, %%2", shift, c);
28813 else
28814 sprintf (pattern, "mov%%%c\t%%0, %%1", c);
28816 else
28817 sprintf (pattern, "mov%%%c\t%%0, %%1%%S3", c);
28818 output_asm_insn (pattern, operands);
28819 return "";
28822 /* Output assembly for a WMMX immediate shift instruction. */
28823 const char *
28824 arm_output_iwmmxt_shift_immediate (const char *insn_name, rtx *operands, bool wror_or_wsra)
28826 int shift = INTVAL (operands[2]);
28827 char templ[50];
28828 enum machine_mode opmode = GET_MODE (operands[0]);
28830 gcc_assert (shift >= 0);
28832 /* If the shift value in the register versions is > 63 (for D qualifier),
28833 31 (for W qualifier) or 15 (for H qualifier). */
28834 if (((opmode == V4HImode) && (shift > 15))
28835 || ((opmode == V2SImode) && (shift > 31))
28836 || ((opmode == DImode) && (shift > 63)))
28838 if (wror_or_wsra)
28840 sprintf (templ, "%s\t%%0, %%1, #%d", insn_name, 32);
28841 output_asm_insn (templ, operands);
28842 if (opmode == DImode)
28844 sprintf (templ, "%s\t%%0, %%0, #%d", insn_name, 32);
28845 output_asm_insn (templ, operands);
28848 else
28850 /* The destination register will contain all zeros. */
28851 sprintf (templ, "wzero\t%%0");
28852 output_asm_insn (templ, operands);
28854 return "";
28857 if ((opmode == DImode) && (shift > 32))
28859 sprintf (templ, "%s\t%%0, %%1, #%d", insn_name, 32);
28860 output_asm_insn (templ, operands);
28861 sprintf (templ, "%s\t%%0, %%0, #%d", insn_name, shift - 32);
28862 output_asm_insn (templ, operands);
28864 else
28866 sprintf (templ, "%s\t%%0, %%1, #%d", insn_name, shift);
28867 output_asm_insn (templ, operands);
28869 return "";
28872 /* Output assembly for a WMMX tinsr instruction. */
28873 const char *
28874 arm_output_iwmmxt_tinsr (rtx *operands)
28876 int mask = INTVAL (operands[3]);
28877 int i;
28878 char templ[50];
28879 int units = mode_nunits[GET_MODE (operands[0])];
28880 gcc_assert ((mask & (mask - 1)) == 0);
28881 for (i = 0; i < units; ++i)
28883 if ((mask & 0x01) == 1)
28885 break;
28887 mask >>= 1;
28889 gcc_assert (i < units);
28891 switch (GET_MODE (operands[0]))
28893 case V8QImode:
28894 sprintf (templ, "tinsrb%%?\t%%0, %%2, #%d", i);
28895 break;
28896 case V4HImode:
28897 sprintf (templ, "tinsrh%%?\t%%0, %%2, #%d", i);
28898 break;
28899 case V2SImode:
28900 sprintf (templ, "tinsrw%%?\t%%0, %%2, #%d", i);
28901 break;
28902 default:
28903 gcc_unreachable ();
28904 break;
28906 output_asm_insn (templ, operands);
28908 return "";
28911 /* Output a Thumb-1 casesi dispatch sequence. */
28912 const char *
28913 thumb1_output_casesi (rtx *operands)
28915 rtx diff_vec = PATTERN (NEXT_INSN (operands[0]));
28917 gcc_assert (GET_CODE (diff_vec) == ADDR_DIFF_VEC);
28919 switch (GET_MODE(diff_vec))
28921 case QImode:
28922 return (ADDR_DIFF_VEC_FLAGS (diff_vec).offset_unsigned ?
28923 "bl\t%___gnu_thumb1_case_uqi" : "bl\t%___gnu_thumb1_case_sqi");
28924 case HImode:
28925 return (ADDR_DIFF_VEC_FLAGS (diff_vec).offset_unsigned ?
28926 "bl\t%___gnu_thumb1_case_uhi" : "bl\t%___gnu_thumb1_case_shi");
28927 case SImode:
28928 return "bl\t%___gnu_thumb1_case_si";
28929 default:
28930 gcc_unreachable ();
28934 /* Output a Thumb-2 casesi instruction. */
28935 const char *
28936 thumb2_output_casesi (rtx *operands)
28938 rtx diff_vec = PATTERN (NEXT_INSN (operands[2]));
28940 gcc_assert (GET_CODE (diff_vec) == ADDR_DIFF_VEC);
28942 output_asm_insn ("cmp\t%0, %1", operands);
28943 output_asm_insn ("bhi\t%l3", operands);
28944 switch (GET_MODE(diff_vec))
28946 case QImode:
28947 return "tbb\t[%|pc, %0]";
28948 case HImode:
28949 return "tbh\t[%|pc, %0, lsl #1]";
28950 case SImode:
28951 if (flag_pic)
28953 output_asm_insn ("adr\t%4, %l2", operands);
28954 output_asm_insn ("ldr\t%5, [%4, %0, lsl #2]", operands);
28955 output_asm_insn ("add\t%4, %4, %5", operands);
28956 return "bx\t%4";
28958 else
28960 output_asm_insn ("adr\t%4, %l2", operands);
28961 return "ldr\t%|pc, [%4, %0, lsl #2]";
28963 default:
28964 gcc_unreachable ();
28968 /* Most ARM cores are single issue, but some newer ones can dual issue.
28969 The scheduler descriptions rely on this being correct. */
28970 static int
28971 arm_issue_rate (void)
28973 switch (arm_tune)
28975 case cortexa15:
28976 return 3;
28978 case cortexr4:
28979 case cortexr4f:
28980 case cortexr5:
28981 case genericv7a:
28982 case cortexa5:
28983 case cortexa7:
28984 case cortexa8:
28985 case cortexa9:
28986 case cortexa53:
28987 case fa726te:
28988 case marvell_pj4:
28989 return 2;
28991 default:
28992 return 1;
28996 /* A table and a function to perform ARM-specific name mangling for
28997 NEON vector types in order to conform to the AAPCS (see "Procedure
28998 Call Standard for the ARM Architecture", Appendix A). To qualify
28999 for emission with the mangled names defined in that document, a
29000 vector type must not only be of the correct mode but also be
29001 composed of NEON vector element types (e.g. __builtin_neon_qi). */
29002 typedef struct
29004 enum machine_mode mode;
29005 const char *element_type_name;
29006 const char *aapcs_name;
29007 } arm_mangle_map_entry;
29009 static arm_mangle_map_entry arm_mangle_map[] = {
29010 /* 64-bit containerized types. */
29011 { V8QImode, "__builtin_neon_qi", "15__simd64_int8_t" },
29012 { V8QImode, "__builtin_neon_uqi", "16__simd64_uint8_t" },
29013 { V4HImode, "__builtin_neon_hi", "16__simd64_int16_t" },
29014 { V4HImode, "__builtin_neon_uhi", "17__simd64_uint16_t" },
29015 { V4HFmode, "__builtin_neon_hf", "18__simd64_float16_t" },
29016 { V2SImode, "__builtin_neon_si", "16__simd64_int32_t" },
29017 { V2SImode, "__builtin_neon_usi", "17__simd64_uint32_t" },
29018 { V2SFmode, "__builtin_neon_sf", "18__simd64_float32_t" },
29019 { V8QImode, "__builtin_neon_poly8", "16__simd64_poly8_t" },
29020 { V4HImode, "__builtin_neon_poly16", "17__simd64_poly16_t" },
29021 /* 128-bit containerized types. */
29022 { V16QImode, "__builtin_neon_qi", "16__simd128_int8_t" },
29023 { V16QImode, "__builtin_neon_uqi", "17__simd128_uint8_t" },
29024 { V8HImode, "__builtin_neon_hi", "17__simd128_int16_t" },
29025 { V8HImode, "__builtin_neon_uhi", "18__simd128_uint16_t" },
29026 { V4SImode, "__builtin_neon_si", "17__simd128_int32_t" },
29027 { V4SImode, "__builtin_neon_usi", "18__simd128_uint32_t" },
29028 { V4SFmode, "__builtin_neon_sf", "19__simd128_float32_t" },
29029 { V16QImode, "__builtin_neon_poly8", "17__simd128_poly8_t" },
29030 { V8HImode, "__builtin_neon_poly16", "18__simd128_poly16_t" },
29031 { VOIDmode, NULL, NULL }
29034 const char *
29035 arm_mangle_type (const_tree type)
29037 arm_mangle_map_entry *pos = arm_mangle_map;
29039 /* The ARM ABI documents (10th October 2008) say that "__va_list"
29040 has to be managled as if it is in the "std" namespace. */
29041 if (TARGET_AAPCS_BASED
29042 && lang_hooks.types_compatible_p (CONST_CAST_TREE (type), va_list_type))
29043 return "St9__va_list";
29045 /* Half-precision float. */
29046 if (TREE_CODE (type) == REAL_TYPE && TYPE_PRECISION (type) == 16)
29047 return "Dh";
29049 if (TREE_CODE (type) != VECTOR_TYPE)
29050 return NULL;
29052 /* Check the mode of the vector type, and the name of the vector
29053 element type, against the table. */
29054 while (pos->mode != VOIDmode)
29056 tree elt_type = TREE_TYPE (type);
29058 if (pos->mode == TYPE_MODE (type)
29059 && TREE_CODE (TYPE_NAME (elt_type)) == TYPE_DECL
29060 && !strcmp (IDENTIFIER_POINTER (DECL_NAME (TYPE_NAME (elt_type))),
29061 pos->element_type_name))
29062 return pos->aapcs_name;
29064 pos++;
29067 /* Use the default mangling for unrecognized (possibly user-defined)
29068 vector types. */
29069 return NULL;
29072 /* Order of allocation of core registers for Thumb: this allocation is
29073 written over the corresponding initial entries of the array
29074 initialized with REG_ALLOC_ORDER. We allocate all low registers
29075 first. Saving and restoring a low register is usually cheaper than
29076 using a call-clobbered high register. */
29078 static const int thumb_core_reg_alloc_order[] =
29080 3, 2, 1, 0, 4, 5, 6, 7,
29081 14, 12, 8, 9, 10, 11
29084 /* Adjust register allocation order when compiling for Thumb. */
29086 void
29087 arm_order_regs_for_local_alloc (void)
29089 const int arm_reg_alloc_order[] = REG_ALLOC_ORDER;
29090 memcpy(reg_alloc_order, arm_reg_alloc_order, sizeof (reg_alloc_order));
29091 if (TARGET_THUMB)
29092 memcpy (reg_alloc_order, thumb_core_reg_alloc_order,
29093 sizeof (thumb_core_reg_alloc_order));
29096 /* Implement TARGET_FRAME_POINTER_REQUIRED. */
29098 bool
29099 arm_frame_pointer_required (void)
29101 return (cfun->has_nonlocal_label
29102 || SUBTARGET_FRAME_POINTER_REQUIRED
29103 || (TARGET_ARM && TARGET_APCS_FRAME && ! leaf_function_p ()));
29106 /* Only thumb1 can't support conditional execution, so return true if
29107 the target is not thumb1. */
29108 static bool
29109 arm_have_conditional_execution (void)
29111 return !TARGET_THUMB1;
29114 tree
29115 arm_builtin_vectorized_function (tree fndecl, tree type_out, tree type_in)
29117 enum machine_mode in_mode, out_mode;
29118 int in_n, out_n;
29120 if (TREE_CODE (type_out) != VECTOR_TYPE
29121 || TREE_CODE (type_in) != VECTOR_TYPE
29122 || !(TARGET_NEON && TARGET_FPU_ARMV8 && flag_unsafe_math_optimizations))
29123 return NULL_TREE;
29125 out_mode = TYPE_MODE (TREE_TYPE (type_out));
29126 out_n = TYPE_VECTOR_SUBPARTS (type_out);
29127 in_mode = TYPE_MODE (TREE_TYPE (type_in));
29128 in_n = TYPE_VECTOR_SUBPARTS (type_in);
29130 /* ARM_CHECK_BUILTIN_MODE and ARM_FIND_VRINT_VARIANT are used to find the
29131 decl of the vectorized builtin for the appropriate vector mode.
29132 NULL_TREE is returned if no such builtin is available. */
29133 #undef ARM_CHECK_BUILTIN_MODE
29134 #define ARM_CHECK_BUILTIN_MODE(C) \
29135 (out_mode == SFmode && out_n == C \
29136 && in_mode == SFmode && in_n == C)
29138 #undef ARM_FIND_VRINT_VARIANT
29139 #define ARM_FIND_VRINT_VARIANT(N) \
29140 (ARM_CHECK_BUILTIN_MODE (2) \
29141 ? arm_builtin_decl(ARM_BUILTIN_NEON_##N##v2sf, false) \
29142 : (ARM_CHECK_BUILTIN_MODE (4) \
29143 ? arm_builtin_decl(ARM_BUILTIN_NEON_##N##v4sf, false) \
29144 : NULL_TREE))
29146 if (DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_NORMAL)
29148 enum built_in_function fn = DECL_FUNCTION_CODE (fndecl);
29149 switch (fn)
29151 case BUILT_IN_FLOORF:
29152 return ARM_FIND_VRINT_VARIANT (vrintm);
29153 case BUILT_IN_CEILF:
29154 return ARM_FIND_VRINT_VARIANT (vrintp);
29155 case BUILT_IN_TRUNCF:
29156 return ARM_FIND_VRINT_VARIANT (vrintz);
29157 case BUILT_IN_ROUNDF:
29158 return ARM_FIND_VRINT_VARIANT (vrinta);
29159 default:
29160 return NULL_TREE;
29163 return NULL_TREE;
29165 #undef ARM_CHECK_BUILTIN_MODE
29166 #undef ARM_FIND_VRINT_VARIANT
29168 /* The AAPCS sets the maximum alignment of a vector to 64 bits. */
29169 static HOST_WIDE_INT
29170 arm_vector_alignment (const_tree type)
29172 HOST_WIDE_INT align = tree_to_shwi (TYPE_SIZE (type));
29174 if (TARGET_AAPCS_BASED)
29175 align = MIN (align, 64);
29177 return align;
29180 static unsigned int
29181 arm_autovectorize_vector_sizes (void)
29183 return TARGET_NEON_VECTORIZE_DOUBLE ? 0 : (16 | 8);
29186 static bool
29187 arm_vector_alignment_reachable (const_tree type, bool is_packed)
29189 /* Vectors which aren't in packed structures will not be less aligned than
29190 the natural alignment of their element type, so this is safe. */
29191 if (TARGET_NEON && !BYTES_BIG_ENDIAN)
29192 return !is_packed;
29194 return default_builtin_vector_alignment_reachable (type, is_packed);
29197 static bool
29198 arm_builtin_support_vector_misalignment (enum machine_mode mode,
29199 const_tree type, int misalignment,
29200 bool is_packed)
29202 if (TARGET_NEON && !BYTES_BIG_ENDIAN)
29204 HOST_WIDE_INT align = TYPE_ALIGN_UNIT (type);
29206 if (is_packed)
29207 return align == 1;
29209 /* If the misalignment is unknown, we should be able to handle the access
29210 so long as it is not to a member of a packed data structure. */
29211 if (misalignment == -1)
29212 return true;
29214 /* Return true if the misalignment is a multiple of the natural alignment
29215 of the vector's element type. This is probably always going to be
29216 true in practice, since we've already established that this isn't a
29217 packed access. */
29218 return ((misalignment % align) == 0);
29221 return default_builtin_support_vector_misalignment (mode, type, misalignment,
29222 is_packed);
29225 static void
29226 arm_conditional_register_usage (void)
29228 int regno;
29230 if (TARGET_THUMB1 && optimize_size)
29232 /* When optimizing for size on Thumb-1, it's better not
29233 to use the HI regs, because of the overhead of
29234 stacking them. */
29235 for (regno = FIRST_HI_REGNUM;
29236 regno <= LAST_HI_REGNUM; ++regno)
29237 fixed_regs[regno] = call_used_regs[regno] = 1;
29240 /* The link register can be clobbered by any branch insn,
29241 but we have no way to track that at present, so mark
29242 it as unavailable. */
29243 if (TARGET_THUMB1)
29244 fixed_regs[LR_REGNUM] = call_used_regs[LR_REGNUM] = 1;
29246 if (TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP)
29248 /* VFPv3 registers are disabled when earlier VFP
29249 versions are selected due to the definition of
29250 LAST_VFP_REGNUM. */
29251 for (regno = FIRST_VFP_REGNUM;
29252 regno <= LAST_VFP_REGNUM; ++ regno)
29254 fixed_regs[regno] = 0;
29255 call_used_regs[regno] = regno < FIRST_VFP_REGNUM + 16
29256 || regno >= FIRST_VFP_REGNUM + 32;
29260 if (TARGET_REALLY_IWMMXT)
29262 regno = FIRST_IWMMXT_GR_REGNUM;
29263 /* The 2002/10/09 revision of the XScale ABI has wCG0
29264 and wCG1 as call-preserved registers. The 2002/11/21
29265 revision changed this so that all wCG registers are
29266 scratch registers. */
29267 for (regno = FIRST_IWMMXT_GR_REGNUM;
29268 regno <= LAST_IWMMXT_GR_REGNUM; ++ regno)
29269 fixed_regs[regno] = 0;
29270 /* The XScale ABI has wR0 - wR9 as scratch registers,
29271 the rest as call-preserved registers. */
29272 for (regno = FIRST_IWMMXT_REGNUM;
29273 regno <= LAST_IWMMXT_REGNUM; ++ regno)
29275 fixed_regs[regno] = 0;
29276 call_used_regs[regno] = regno < FIRST_IWMMXT_REGNUM + 10;
29280 if ((unsigned) PIC_OFFSET_TABLE_REGNUM != INVALID_REGNUM)
29282 fixed_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
29283 call_used_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
29285 else if (TARGET_APCS_STACK)
29287 fixed_regs[10] = 1;
29288 call_used_regs[10] = 1;
29290 /* -mcaller-super-interworking reserves r11 for calls to
29291 _interwork_r11_call_via_rN(). Making the register global
29292 is an easy way of ensuring that it remains valid for all
29293 calls. */
29294 if (TARGET_APCS_FRAME || TARGET_CALLER_INTERWORKING
29295 || TARGET_TPCS_FRAME || TARGET_TPCS_LEAF_FRAME)
29297 fixed_regs[ARM_HARD_FRAME_POINTER_REGNUM] = 1;
29298 call_used_regs[ARM_HARD_FRAME_POINTER_REGNUM] = 1;
29299 if (TARGET_CALLER_INTERWORKING)
29300 global_regs[ARM_HARD_FRAME_POINTER_REGNUM] = 1;
29302 SUBTARGET_CONDITIONAL_REGISTER_USAGE
29305 static reg_class_t
29306 arm_preferred_rename_class (reg_class_t rclass)
29308 /* Thumb-2 instructions using LO_REGS may be smaller than instructions
29309 using GENERIC_REGS. During register rename pass, we prefer LO_REGS,
29310 and code size can be reduced. */
29311 if (TARGET_THUMB2 && rclass == GENERAL_REGS)
29312 return LO_REGS;
29313 else
29314 return NO_REGS;
29317 /* Compute the atrribute "length" of insn "*push_multi".
29318 So this function MUST be kept in sync with that insn pattern. */
29320 arm_attr_length_push_multi(rtx parallel_op, rtx first_op)
29322 int i, regno, hi_reg;
29323 int num_saves = XVECLEN (parallel_op, 0);
29325 /* ARM mode. */
29326 if (TARGET_ARM)
29327 return 4;
29328 /* Thumb1 mode. */
29329 if (TARGET_THUMB1)
29330 return 2;
29332 /* Thumb2 mode. */
29333 regno = REGNO (first_op);
29334 hi_reg = (REGNO_REG_CLASS (regno) == HI_REGS) && (regno != LR_REGNUM);
29335 for (i = 1; i < num_saves && !hi_reg; i++)
29337 regno = REGNO (XEXP (XVECEXP (parallel_op, 0, i), 0));
29338 hi_reg |= (REGNO_REG_CLASS (regno) == HI_REGS) && (regno != LR_REGNUM);
29341 if (!hi_reg)
29342 return 2;
29343 return 4;
29346 /* Compute the number of instructions emitted by output_move_double. */
29348 arm_count_output_move_double_insns (rtx *operands)
29350 int count;
29351 rtx ops[2];
29352 /* output_move_double may modify the operands array, so call it
29353 here on a copy of the array. */
29354 ops[0] = operands[0];
29355 ops[1] = operands[1];
29356 output_move_double (ops, false, &count);
29357 return count;
29361 vfp3_const_double_for_fract_bits (rtx operand)
29363 REAL_VALUE_TYPE r0;
29365 if (!CONST_DOUBLE_P (operand))
29366 return 0;
29368 REAL_VALUE_FROM_CONST_DOUBLE (r0, operand);
29369 if (exact_real_inverse (DFmode, &r0))
29371 if (exact_real_truncate (DFmode, &r0))
29373 HOST_WIDE_INT value = real_to_integer (&r0);
29374 value = value & 0xffffffff;
29375 if ((value != 0) && ( (value & (value - 1)) == 0))
29376 return int_log2 (value);
29379 return 0;
29382 /* Emit a memory barrier around an atomic sequence according to MODEL. */
29384 static void
29385 arm_pre_atomic_barrier (enum memmodel model)
29387 if (need_atomic_barrier_p (model, true))
29388 emit_insn (gen_memory_barrier ());
29391 static void
29392 arm_post_atomic_barrier (enum memmodel model)
29394 if (need_atomic_barrier_p (model, false))
29395 emit_insn (gen_memory_barrier ());
29398 /* Emit the load-exclusive and store-exclusive instructions.
29399 Use acquire and release versions if necessary. */
29401 static void
29402 arm_emit_load_exclusive (enum machine_mode mode, rtx rval, rtx mem, bool acq)
29404 rtx (*gen) (rtx, rtx);
29406 if (acq)
29408 switch (mode)
29410 case QImode: gen = gen_arm_load_acquire_exclusiveqi; break;
29411 case HImode: gen = gen_arm_load_acquire_exclusivehi; break;
29412 case SImode: gen = gen_arm_load_acquire_exclusivesi; break;
29413 case DImode: gen = gen_arm_load_acquire_exclusivedi; break;
29414 default:
29415 gcc_unreachable ();
29418 else
29420 switch (mode)
29422 case QImode: gen = gen_arm_load_exclusiveqi; break;
29423 case HImode: gen = gen_arm_load_exclusivehi; break;
29424 case SImode: gen = gen_arm_load_exclusivesi; break;
29425 case DImode: gen = gen_arm_load_exclusivedi; break;
29426 default:
29427 gcc_unreachable ();
29431 emit_insn (gen (rval, mem));
29434 static void
29435 arm_emit_store_exclusive (enum machine_mode mode, rtx bval, rtx rval,
29436 rtx mem, bool rel)
29438 rtx (*gen) (rtx, rtx, rtx);
29440 if (rel)
29442 switch (mode)
29444 case QImode: gen = gen_arm_store_release_exclusiveqi; break;
29445 case HImode: gen = gen_arm_store_release_exclusivehi; break;
29446 case SImode: gen = gen_arm_store_release_exclusivesi; break;
29447 case DImode: gen = gen_arm_store_release_exclusivedi; break;
29448 default:
29449 gcc_unreachable ();
29452 else
29454 switch (mode)
29456 case QImode: gen = gen_arm_store_exclusiveqi; break;
29457 case HImode: gen = gen_arm_store_exclusivehi; break;
29458 case SImode: gen = gen_arm_store_exclusivesi; break;
29459 case DImode: gen = gen_arm_store_exclusivedi; break;
29460 default:
29461 gcc_unreachable ();
29465 emit_insn (gen (bval, rval, mem));
29468 /* Mark the previous jump instruction as unlikely. */
29470 static void
29471 emit_unlikely_jump (rtx insn)
29473 int very_unlikely = REG_BR_PROB_BASE / 100 - 1;
29475 insn = emit_jump_insn (insn);
29476 add_int_reg_note (insn, REG_BR_PROB, very_unlikely);
29479 /* Expand a compare and swap pattern. */
29481 void
29482 arm_expand_compare_and_swap (rtx operands[])
29484 rtx bval, rval, mem, oldval, newval, is_weak, mod_s, mod_f, x;
29485 enum machine_mode mode;
29486 rtx (*gen) (rtx, rtx, rtx, rtx, rtx, rtx, rtx);
29488 bval = operands[0];
29489 rval = operands[1];
29490 mem = operands[2];
29491 oldval = operands[3];
29492 newval = operands[4];
29493 is_weak = operands[5];
29494 mod_s = operands[6];
29495 mod_f = operands[7];
29496 mode = GET_MODE (mem);
29498 /* Normally the succ memory model must be stronger than fail, but in the
29499 unlikely event of fail being ACQUIRE and succ being RELEASE we need to
29500 promote succ to ACQ_REL so that we don't lose the acquire semantics. */
29502 if (TARGET_HAVE_LDACQ
29503 && INTVAL (mod_f) == MEMMODEL_ACQUIRE
29504 && INTVAL (mod_s) == MEMMODEL_RELEASE)
29505 mod_s = GEN_INT (MEMMODEL_ACQ_REL);
29507 switch (mode)
29509 case QImode:
29510 case HImode:
29511 /* For narrow modes, we're going to perform the comparison in SImode,
29512 so do the zero-extension now. */
29513 rval = gen_reg_rtx (SImode);
29514 oldval = convert_modes (SImode, mode, oldval, true);
29515 /* FALLTHRU */
29517 case SImode:
29518 /* Force the value into a register if needed. We waited until after
29519 the zero-extension above to do this properly. */
29520 if (!arm_add_operand (oldval, SImode))
29521 oldval = force_reg (SImode, oldval);
29522 break;
29524 case DImode:
29525 if (!cmpdi_operand (oldval, mode))
29526 oldval = force_reg (mode, oldval);
29527 break;
29529 default:
29530 gcc_unreachable ();
29533 switch (mode)
29535 case QImode: gen = gen_atomic_compare_and_swapqi_1; break;
29536 case HImode: gen = gen_atomic_compare_and_swaphi_1; break;
29537 case SImode: gen = gen_atomic_compare_and_swapsi_1; break;
29538 case DImode: gen = gen_atomic_compare_and_swapdi_1; break;
29539 default:
29540 gcc_unreachable ();
29543 emit_insn (gen (rval, mem, oldval, newval, is_weak, mod_s, mod_f));
29545 if (mode == QImode || mode == HImode)
29546 emit_move_insn (operands[1], gen_lowpart (mode, rval));
29548 /* In all cases, we arrange for success to be signaled by Z set.
29549 This arrangement allows for the boolean result to be used directly
29550 in a subsequent branch, post optimization. */
29551 x = gen_rtx_REG (CCmode, CC_REGNUM);
29552 x = gen_rtx_EQ (SImode, x, const0_rtx);
29553 emit_insn (gen_rtx_SET (VOIDmode, bval, x));
29556 /* Split a compare and swap pattern. It is IMPLEMENTATION DEFINED whether
29557 another memory store between the load-exclusive and store-exclusive can
29558 reset the monitor from Exclusive to Open state. This means we must wait
29559 until after reload to split the pattern, lest we get a register spill in
29560 the middle of the atomic sequence. */
29562 void
29563 arm_split_compare_and_swap (rtx operands[])
29565 rtx rval, mem, oldval, newval, scratch;
29566 enum machine_mode mode;
29567 enum memmodel mod_s, mod_f;
29568 bool is_weak;
29569 rtx label1, label2, x, cond;
29571 rval = operands[0];
29572 mem = operands[1];
29573 oldval = operands[2];
29574 newval = operands[3];
29575 is_weak = (operands[4] != const0_rtx);
29576 mod_s = (enum memmodel) INTVAL (operands[5]);
29577 mod_f = (enum memmodel) INTVAL (operands[6]);
29578 scratch = operands[7];
29579 mode = GET_MODE (mem);
29581 bool use_acquire = TARGET_HAVE_LDACQ
29582 && !(mod_s == MEMMODEL_RELAXED
29583 || mod_s == MEMMODEL_CONSUME
29584 || mod_s == MEMMODEL_RELEASE);
29586 bool use_release = TARGET_HAVE_LDACQ
29587 && !(mod_s == MEMMODEL_RELAXED
29588 || mod_s == MEMMODEL_CONSUME
29589 || mod_s == MEMMODEL_ACQUIRE);
29591 /* Checks whether a barrier is needed and emits one accordingly. */
29592 if (!(use_acquire || use_release))
29593 arm_pre_atomic_barrier (mod_s);
29595 label1 = NULL_RTX;
29596 if (!is_weak)
29598 label1 = gen_label_rtx ();
29599 emit_label (label1);
29601 label2 = gen_label_rtx ();
29603 arm_emit_load_exclusive (mode, rval, mem, use_acquire);
29605 cond = arm_gen_compare_reg (NE, rval, oldval, scratch);
29606 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
29607 x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
29608 gen_rtx_LABEL_REF (Pmode, label2), pc_rtx);
29609 emit_unlikely_jump (gen_rtx_SET (VOIDmode, pc_rtx, x));
29611 arm_emit_store_exclusive (mode, scratch, mem, newval, use_release);
29613 /* Weak or strong, we want EQ to be true for success, so that we
29614 match the flags that we got from the compare above. */
29615 cond = gen_rtx_REG (CCmode, CC_REGNUM);
29616 x = gen_rtx_COMPARE (CCmode, scratch, const0_rtx);
29617 emit_insn (gen_rtx_SET (VOIDmode, cond, x));
29619 if (!is_weak)
29621 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
29622 x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
29623 gen_rtx_LABEL_REF (Pmode, label1), pc_rtx);
29624 emit_unlikely_jump (gen_rtx_SET (VOIDmode, pc_rtx, x));
29627 if (mod_f != MEMMODEL_RELAXED)
29628 emit_label (label2);
29630 /* Checks whether a barrier is needed and emits one accordingly. */
29631 if (!(use_acquire || use_release))
29632 arm_post_atomic_barrier (mod_s);
29634 if (mod_f == MEMMODEL_RELAXED)
29635 emit_label (label2);
29638 void
29639 arm_split_atomic_op (enum rtx_code code, rtx old_out, rtx new_out, rtx mem,
29640 rtx value, rtx model_rtx, rtx cond)
29642 enum memmodel model = (enum memmodel) INTVAL (model_rtx);
29643 enum machine_mode mode = GET_MODE (mem);
29644 enum machine_mode wmode = (mode == DImode ? DImode : SImode);
29645 rtx label, x;
29647 bool use_acquire = TARGET_HAVE_LDACQ
29648 && !(model == MEMMODEL_RELAXED
29649 || model == MEMMODEL_CONSUME
29650 || model == MEMMODEL_RELEASE);
29652 bool use_release = TARGET_HAVE_LDACQ
29653 && !(model == MEMMODEL_RELAXED
29654 || model == MEMMODEL_CONSUME
29655 || model == MEMMODEL_ACQUIRE);
29657 /* Checks whether a barrier is needed and emits one accordingly. */
29658 if (!(use_acquire || use_release))
29659 arm_pre_atomic_barrier (model);
29661 label = gen_label_rtx ();
29662 emit_label (label);
29664 if (new_out)
29665 new_out = gen_lowpart (wmode, new_out);
29666 if (old_out)
29667 old_out = gen_lowpart (wmode, old_out);
29668 else
29669 old_out = new_out;
29670 value = simplify_gen_subreg (wmode, value, mode, 0);
29672 arm_emit_load_exclusive (mode, old_out, mem, use_acquire);
29674 switch (code)
29676 case SET:
29677 new_out = value;
29678 break;
29680 case NOT:
29681 x = gen_rtx_AND (wmode, old_out, value);
29682 emit_insn (gen_rtx_SET (VOIDmode, new_out, x));
29683 x = gen_rtx_NOT (wmode, new_out);
29684 emit_insn (gen_rtx_SET (VOIDmode, new_out, x));
29685 break;
29687 case MINUS:
29688 if (CONST_INT_P (value))
29690 value = GEN_INT (-INTVAL (value));
29691 code = PLUS;
29693 /* FALLTHRU */
29695 case PLUS:
29696 if (mode == DImode)
29698 /* DImode plus/minus need to clobber flags. */
29699 /* The adddi3 and subdi3 patterns are incorrectly written so that
29700 they require matching operands, even when we could easily support
29701 three operands. Thankfully, this can be fixed up post-splitting,
29702 as the individual add+adc patterns do accept three operands and
29703 post-reload cprop can make these moves go away. */
29704 emit_move_insn (new_out, old_out);
29705 if (code == PLUS)
29706 x = gen_adddi3 (new_out, new_out, value);
29707 else
29708 x = gen_subdi3 (new_out, new_out, value);
29709 emit_insn (x);
29710 break;
29712 /* FALLTHRU */
29714 default:
29715 x = gen_rtx_fmt_ee (code, wmode, old_out, value);
29716 emit_insn (gen_rtx_SET (VOIDmode, new_out, x));
29717 break;
29720 arm_emit_store_exclusive (mode, cond, mem, gen_lowpart (mode, new_out),
29721 use_release);
29723 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
29724 emit_unlikely_jump (gen_cbranchsi4 (x, cond, const0_rtx, label));
29726 /* Checks whether a barrier is needed and emits one accordingly. */
29727 if (!(use_acquire || use_release))
29728 arm_post_atomic_barrier (model);
29731 #define MAX_VECT_LEN 16
29733 struct expand_vec_perm_d
29735 rtx target, op0, op1;
29736 unsigned char perm[MAX_VECT_LEN];
29737 enum machine_mode vmode;
29738 unsigned char nelt;
29739 bool one_vector_p;
29740 bool testing_p;
29743 /* Generate a variable permutation. */
29745 static void
29746 arm_expand_vec_perm_1 (rtx target, rtx op0, rtx op1, rtx sel)
29748 enum machine_mode vmode = GET_MODE (target);
29749 bool one_vector_p = rtx_equal_p (op0, op1);
29751 gcc_checking_assert (vmode == V8QImode || vmode == V16QImode);
29752 gcc_checking_assert (GET_MODE (op0) == vmode);
29753 gcc_checking_assert (GET_MODE (op1) == vmode);
29754 gcc_checking_assert (GET_MODE (sel) == vmode);
29755 gcc_checking_assert (TARGET_NEON);
29757 if (one_vector_p)
29759 if (vmode == V8QImode)
29760 emit_insn (gen_neon_vtbl1v8qi (target, op0, sel));
29761 else
29762 emit_insn (gen_neon_vtbl1v16qi (target, op0, sel));
29764 else
29766 rtx pair;
29768 if (vmode == V8QImode)
29770 pair = gen_reg_rtx (V16QImode);
29771 emit_insn (gen_neon_vcombinev8qi (pair, op0, op1));
29772 pair = gen_lowpart (TImode, pair);
29773 emit_insn (gen_neon_vtbl2v8qi (target, pair, sel));
29775 else
29777 pair = gen_reg_rtx (OImode);
29778 emit_insn (gen_neon_vcombinev16qi (pair, op0, op1));
29779 emit_insn (gen_neon_vtbl2v16qi (target, pair, sel));
29784 void
29785 arm_expand_vec_perm (rtx target, rtx op0, rtx op1, rtx sel)
29787 enum machine_mode vmode = GET_MODE (target);
29788 unsigned int i, nelt = GET_MODE_NUNITS (vmode);
29789 bool one_vector_p = rtx_equal_p (op0, op1);
29790 rtx rmask[MAX_VECT_LEN], mask;
29792 /* TODO: ARM's VTBL indexing is little-endian. In order to handle GCC's
29793 numbering of elements for big-endian, we must reverse the order. */
29794 gcc_checking_assert (!BYTES_BIG_ENDIAN);
29796 /* The VTBL instruction does not use a modulo index, so we must take care
29797 of that ourselves. */
29798 mask = GEN_INT (one_vector_p ? nelt - 1 : 2 * nelt - 1);
29799 for (i = 0; i < nelt; ++i)
29800 rmask[i] = mask;
29801 mask = gen_rtx_CONST_VECTOR (vmode, gen_rtvec_v (nelt, rmask));
29802 sel = expand_simple_binop (vmode, AND, sel, mask, NULL, 0, OPTAB_LIB_WIDEN);
29804 arm_expand_vec_perm_1 (target, op0, op1, sel);
29807 /* Generate or test for an insn that supports a constant permutation. */
29809 /* Recognize patterns for the VUZP insns. */
29811 static bool
29812 arm_evpc_neon_vuzp (struct expand_vec_perm_d *d)
29814 unsigned int i, odd, mask, nelt = d->nelt;
29815 rtx out0, out1, in0, in1, x;
29816 rtx (*gen)(rtx, rtx, rtx, rtx);
29818 if (GET_MODE_UNIT_SIZE (d->vmode) >= 8)
29819 return false;
29821 /* Note that these are little-endian tests. Adjust for big-endian later. */
29822 if (d->perm[0] == 0)
29823 odd = 0;
29824 else if (d->perm[0] == 1)
29825 odd = 1;
29826 else
29827 return false;
29828 mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
29830 for (i = 0; i < nelt; i++)
29832 unsigned elt = (i * 2 + odd) & mask;
29833 if (d->perm[i] != elt)
29834 return false;
29837 /* Success! */
29838 if (d->testing_p)
29839 return true;
29841 switch (d->vmode)
29843 case V16QImode: gen = gen_neon_vuzpv16qi_internal; break;
29844 case V8QImode: gen = gen_neon_vuzpv8qi_internal; break;
29845 case V8HImode: gen = gen_neon_vuzpv8hi_internal; break;
29846 case V4HImode: gen = gen_neon_vuzpv4hi_internal; break;
29847 case V4SImode: gen = gen_neon_vuzpv4si_internal; break;
29848 case V2SImode: gen = gen_neon_vuzpv2si_internal; break;
29849 case V2SFmode: gen = gen_neon_vuzpv2sf_internal; break;
29850 case V4SFmode: gen = gen_neon_vuzpv4sf_internal; break;
29851 default:
29852 gcc_unreachable ();
29855 in0 = d->op0;
29856 in1 = d->op1;
29857 if (BYTES_BIG_ENDIAN)
29859 x = in0, in0 = in1, in1 = x;
29860 odd = !odd;
29863 out0 = d->target;
29864 out1 = gen_reg_rtx (d->vmode);
29865 if (odd)
29866 x = out0, out0 = out1, out1 = x;
29868 emit_insn (gen (out0, in0, in1, out1));
29869 return true;
29872 /* Recognize patterns for the VZIP insns. */
29874 static bool
29875 arm_evpc_neon_vzip (struct expand_vec_perm_d *d)
29877 unsigned int i, high, mask, nelt = d->nelt;
29878 rtx out0, out1, in0, in1, x;
29879 rtx (*gen)(rtx, rtx, rtx, rtx);
29881 if (GET_MODE_UNIT_SIZE (d->vmode) >= 8)
29882 return false;
29884 /* Note that these are little-endian tests. Adjust for big-endian later. */
29885 high = nelt / 2;
29886 if (d->perm[0] == high)
29888 else if (d->perm[0] == 0)
29889 high = 0;
29890 else
29891 return false;
29892 mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
29894 for (i = 0; i < nelt / 2; i++)
29896 unsigned elt = (i + high) & mask;
29897 if (d->perm[i * 2] != elt)
29898 return false;
29899 elt = (elt + nelt) & mask;
29900 if (d->perm[i * 2 + 1] != elt)
29901 return false;
29904 /* Success! */
29905 if (d->testing_p)
29906 return true;
29908 switch (d->vmode)
29910 case V16QImode: gen = gen_neon_vzipv16qi_internal; break;
29911 case V8QImode: gen = gen_neon_vzipv8qi_internal; break;
29912 case V8HImode: gen = gen_neon_vzipv8hi_internal; break;
29913 case V4HImode: gen = gen_neon_vzipv4hi_internal; break;
29914 case V4SImode: gen = gen_neon_vzipv4si_internal; break;
29915 case V2SImode: gen = gen_neon_vzipv2si_internal; break;
29916 case V2SFmode: gen = gen_neon_vzipv2sf_internal; break;
29917 case V4SFmode: gen = gen_neon_vzipv4sf_internal; break;
29918 default:
29919 gcc_unreachable ();
29922 in0 = d->op0;
29923 in1 = d->op1;
29924 if (BYTES_BIG_ENDIAN)
29926 x = in0, in0 = in1, in1 = x;
29927 high = !high;
29930 out0 = d->target;
29931 out1 = gen_reg_rtx (d->vmode);
29932 if (high)
29933 x = out0, out0 = out1, out1 = x;
29935 emit_insn (gen (out0, in0, in1, out1));
29936 return true;
29939 /* Recognize patterns for the VREV insns. */
29941 static bool
29942 arm_evpc_neon_vrev (struct expand_vec_perm_d *d)
29944 unsigned int i, j, diff, nelt = d->nelt;
29945 rtx (*gen)(rtx, rtx, rtx);
29947 if (!d->one_vector_p)
29948 return false;
29950 diff = d->perm[0];
29951 switch (diff)
29953 case 7:
29954 switch (d->vmode)
29956 case V16QImode: gen = gen_neon_vrev64v16qi; break;
29957 case V8QImode: gen = gen_neon_vrev64v8qi; break;
29958 default:
29959 return false;
29961 break;
29962 case 3:
29963 switch (d->vmode)
29965 case V16QImode: gen = gen_neon_vrev32v16qi; break;
29966 case V8QImode: gen = gen_neon_vrev32v8qi; break;
29967 case V8HImode: gen = gen_neon_vrev64v8hi; break;
29968 case V4HImode: gen = gen_neon_vrev64v4hi; break;
29969 default:
29970 return false;
29972 break;
29973 case 1:
29974 switch (d->vmode)
29976 case V16QImode: gen = gen_neon_vrev16v16qi; break;
29977 case V8QImode: gen = gen_neon_vrev16v8qi; break;
29978 case V8HImode: gen = gen_neon_vrev32v8hi; break;
29979 case V4HImode: gen = gen_neon_vrev32v4hi; break;
29980 case V4SImode: gen = gen_neon_vrev64v4si; break;
29981 case V2SImode: gen = gen_neon_vrev64v2si; break;
29982 case V4SFmode: gen = gen_neon_vrev64v4sf; break;
29983 case V2SFmode: gen = gen_neon_vrev64v2sf; break;
29984 default:
29985 return false;
29987 break;
29988 default:
29989 return false;
29992 for (i = 0; i < nelt ; i += diff + 1)
29993 for (j = 0; j <= diff; j += 1)
29995 /* This is guaranteed to be true as the value of diff
29996 is 7, 3, 1 and we should have enough elements in the
29997 queue to generate this. Getting a vector mask with a
29998 value of diff other than these values implies that
29999 something is wrong by the time we get here. */
30000 gcc_assert (i + j < nelt);
30001 if (d->perm[i + j] != i + diff - j)
30002 return false;
30005 /* Success! */
30006 if (d->testing_p)
30007 return true;
30009 /* ??? The third operand is an artifact of the builtin infrastructure
30010 and is ignored by the actual instruction. */
30011 emit_insn (gen (d->target, d->op0, const0_rtx));
30012 return true;
30015 /* Recognize patterns for the VTRN insns. */
30017 static bool
30018 arm_evpc_neon_vtrn (struct expand_vec_perm_d *d)
30020 unsigned int i, odd, mask, nelt = d->nelt;
30021 rtx out0, out1, in0, in1, x;
30022 rtx (*gen)(rtx, rtx, rtx, rtx);
30024 if (GET_MODE_UNIT_SIZE (d->vmode) >= 8)
30025 return false;
30027 /* Note that these are little-endian tests. Adjust for big-endian later. */
30028 if (d->perm[0] == 0)
30029 odd = 0;
30030 else if (d->perm[0] == 1)
30031 odd = 1;
30032 else
30033 return false;
30034 mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
30036 for (i = 0; i < nelt; i += 2)
30038 if (d->perm[i] != i + odd)
30039 return false;
30040 if (d->perm[i + 1] != ((i + nelt + odd) & mask))
30041 return false;
30044 /* Success! */
30045 if (d->testing_p)
30046 return true;
30048 switch (d->vmode)
30050 case V16QImode: gen = gen_neon_vtrnv16qi_internal; break;
30051 case V8QImode: gen = gen_neon_vtrnv8qi_internal; break;
30052 case V8HImode: gen = gen_neon_vtrnv8hi_internal; break;
30053 case V4HImode: gen = gen_neon_vtrnv4hi_internal; break;
30054 case V4SImode: gen = gen_neon_vtrnv4si_internal; break;
30055 case V2SImode: gen = gen_neon_vtrnv2si_internal; break;
30056 case V2SFmode: gen = gen_neon_vtrnv2sf_internal; break;
30057 case V4SFmode: gen = gen_neon_vtrnv4sf_internal; break;
30058 default:
30059 gcc_unreachable ();
30062 in0 = d->op0;
30063 in1 = d->op1;
30064 if (BYTES_BIG_ENDIAN)
30066 x = in0, in0 = in1, in1 = x;
30067 odd = !odd;
30070 out0 = d->target;
30071 out1 = gen_reg_rtx (d->vmode);
30072 if (odd)
30073 x = out0, out0 = out1, out1 = x;
30075 emit_insn (gen (out0, in0, in1, out1));
30076 return true;
30079 /* Recognize patterns for the VEXT insns. */
30081 static bool
30082 arm_evpc_neon_vext (struct expand_vec_perm_d *d)
30084 unsigned int i, nelt = d->nelt;
30085 rtx (*gen) (rtx, rtx, rtx, rtx);
30086 rtx offset;
30088 unsigned int location;
30090 unsigned int next = d->perm[0] + 1;
30092 /* TODO: Handle GCC's numbering of elements for big-endian. */
30093 if (BYTES_BIG_ENDIAN)
30094 return false;
30096 /* Check if the extracted indexes are increasing by one. */
30097 for (i = 1; i < nelt; next++, i++)
30099 /* If we hit the most significant element of the 2nd vector in
30100 the previous iteration, no need to test further. */
30101 if (next == 2 * nelt)
30102 return false;
30104 /* If we are operating on only one vector: it could be a
30105 rotation. If there are only two elements of size < 64, let
30106 arm_evpc_neon_vrev catch it. */
30107 if (d->one_vector_p && (next == nelt))
30109 if ((nelt == 2) && (d->vmode != V2DImode))
30110 return false;
30111 else
30112 next = 0;
30115 if (d->perm[i] != next)
30116 return false;
30119 location = d->perm[0];
30121 switch (d->vmode)
30123 case V16QImode: gen = gen_neon_vextv16qi; break;
30124 case V8QImode: gen = gen_neon_vextv8qi; break;
30125 case V4HImode: gen = gen_neon_vextv4hi; break;
30126 case V8HImode: gen = gen_neon_vextv8hi; break;
30127 case V2SImode: gen = gen_neon_vextv2si; break;
30128 case V4SImode: gen = gen_neon_vextv4si; break;
30129 case V2SFmode: gen = gen_neon_vextv2sf; break;
30130 case V4SFmode: gen = gen_neon_vextv4sf; break;
30131 case V2DImode: gen = gen_neon_vextv2di; break;
30132 default:
30133 return false;
30136 /* Success! */
30137 if (d->testing_p)
30138 return true;
30140 offset = GEN_INT (location);
30141 emit_insn (gen (d->target, d->op0, d->op1, offset));
30142 return true;
30145 /* The NEON VTBL instruction is a fully variable permuation that's even
30146 stronger than what we expose via VEC_PERM_EXPR. What it doesn't do
30147 is mask the index operand as VEC_PERM_EXPR requires. Therefore we
30148 can do slightly better by expanding this as a constant where we don't
30149 have to apply a mask. */
30151 static bool
30152 arm_evpc_neon_vtbl (struct expand_vec_perm_d *d)
30154 rtx rperm[MAX_VECT_LEN], sel;
30155 enum machine_mode vmode = d->vmode;
30156 unsigned int i, nelt = d->nelt;
30158 /* TODO: ARM's VTBL indexing is little-endian. In order to handle GCC's
30159 numbering of elements for big-endian, we must reverse the order. */
30160 if (BYTES_BIG_ENDIAN)
30161 return false;
30163 if (d->testing_p)
30164 return true;
30166 /* Generic code will try constant permutation twice. Once with the
30167 original mode and again with the elements lowered to QImode.
30168 So wait and don't do the selector expansion ourselves. */
30169 if (vmode != V8QImode && vmode != V16QImode)
30170 return false;
30172 for (i = 0; i < nelt; ++i)
30173 rperm[i] = GEN_INT (d->perm[i]);
30174 sel = gen_rtx_CONST_VECTOR (vmode, gen_rtvec_v (nelt, rperm));
30175 sel = force_reg (vmode, sel);
30177 arm_expand_vec_perm_1 (d->target, d->op0, d->op1, sel);
30178 return true;
30181 static bool
30182 arm_expand_vec_perm_const_1 (struct expand_vec_perm_d *d)
30184 /* Check if the input mask matches vext before reordering the
30185 operands. */
30186 if (TARGET_NEON)
30187 if (arm_evpc_neon_vext (d))
30188 return true;
30190 /* The pattern matching functions above are written to look for a small
30191 number to begin the sequence (0, 1, N/2). If we begin with an index
30192 from the second operand, we can swap the operands. */
30193 if (d->perm[0] >= d->nelt)
30195 unsigned i, nelt = d->nelt;
30196 rtx x;
30198 for (i = 0; i < nelt; ++i)
30199 d->perm[i] = (d->perm[i] + nelt) & (2 * nelt - 1);
30201 x = d->op0;
30202 d->op0 = d->op1;
30203 d->op1 = x;
30206 if (TARGET_NEON)
30208 if (arm_evpc_neon_vuzp (d))
30209 return true;
30210 if (arm_evpc_neon_vzip (d))
30211 return true;
30212 if (arm_evpc_neon_vrev (d))
30213 return true;
30214 if (arm_evpc_neon_vtrn (d))
30215 return true;
30216 return arm_evpc_neon_vtbl (d);
30218 return false;
30221 /* Expand a vec_perm_const pattern. */
30223 bool
30224 arm_expand_vec_perm_const (rtx target, rtx op0, rtx op1, rtx sel)
30226 struct expand_vec_perm_d d;
30227 int i, nelt, which;
30229 d.target = target;
30230 d.op0 = op0;
30231 d.op1 = op1;
30233 d.vmode = GET_MODE (target);
30234 gcc_assert (VECTOR_MODE_P (d.vmode));
30235 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
30236 d.testing_p = false;
30238 for (i = which = 0; i < nelt; ++i)
30240 rtx e = XVECEXP (sel, 0, i);
30241 int ei = INTVAL (e) & (2 * nelt - 1);
30242 which |= (ei < nelt ? 1 : 2);
30243 d.perm[i] = ei;
30246 switch (which)
30248 default:
30249 gcc_unreachable();
30251 case 3:
30252 d.one_vector_p = false;
30253 if (!rtx_equal_p (op0, op1))
30254 break;
30256 /* The elements of PERM do not suggest that only the first operand
30257 is used, but both operands are identical. Allow easier matching
30258 of the permutation by folding the permutation into the single
30259 input vector. */
30260 /* FALLTHRU */
30261 case 2:
30262 for (i = 0; i < nelt; ++i)
30263 d.perm[i] &= nelt - 1;
30264 d.op0 = op1;
30265 d.one_vector_p = true;
30266 break;
30268 case 1:
30269 d.op1 = op0;
30270 d.one_vector_p = true;
30271 break;
30274 return arm_expand_vec_perm_const_1 (&d);
30277 /* Implement TARGET_VECTORIZE_VEC_PERM_CONST_OK. */
30279 static bool
30280 arm_vectorize_vec_perm_const_ok (enum machine_mode vmode,
30281 const unsigned char *sel)
30283 struct expand_vec_perm_d d;
30284 unsigned int i, nelt, which;
30285 bool ret;
30287 d.vmode = vmode;
30288 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
30289 d.testing_p = true;
30290 memcpy (d.perm, sel, nelt);
30292 /* Categorize the set of elements in the selector. */
30293 for (i = which = 0; i < nelt; ++i)
30295 unsigned char e = d.perm[i];
30296 gcc_assert (e < 2 * nelt);
30297 which |= (e < nelt ? 1 : 2);
30300 /* For all elements from second vector, fold the elements to first. */
30301 if (which == 2)
30302 for (i = 0; i < nelt; ++i)
30303 d.perm[i] -= nelt;
30305 /* Check whether the mask can be applied to the vector type. */
30306 d.one_vector_p = (which != 3);
30308 d.target = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 1);
30309 d.op1 = d.op0 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 2);
30310 if (!d.one_vector_p)
30311 d.op1 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 3);
30313 start_sequence ();
30314 ret = arm_expand_vec_perm_const_1 (&d);
30315 end_sequence ();
30317 return ret;
30320 bool
30321 arm_autoinc_modes_ok_p (enum machine_mode mode, enum arm_auto_incmodes code)
30323 /* If we are soft float and we do not have ldrd
30324 then all auto increment forms are ok. */
30325 if (TARGET_SOFT_FLOAT && (TARGET_LDRD || GET_MODE_SIZE (mode) <= 4))
30326 return true;
30328 switch (code)
30330 /* Post increment and Pre Decrement are supported for all
30331 instruction forms except for vector forms. */
30332 case ARM_POST_INC:
30333 case ARM_PRE_DEC:
30334 if (VECTOR_MODE_P (mode))
30336 if (code != ARM_PRE_DEC)
30337 return true;
30338 else
30339 return false;
30342 return true;
30344 case ARM_POST_DEC:
30345 case ARM_PRE_INC:
30346 /* Without LDRD and mode size greater than
30347 word size, there is no point in auto-incrementing
30348 because ldm and stm will not have these forms. */
30349 if (!TARGET_LDRD && GET_MODE_SIZE (mode) > 4)
30350 return false;
30352 /* Vector and floating point modes do not support
30353 these auto increment forms. */
30354 if (FLOAT_MODE_P (mode) || VECTOR_MODE_P (mode))
30355 return false;
30357 return true;
30359 default:
30360 return false;
30364 return false;
30367 /* The default expansion of general 64-bit shifts in core-regs is suboptimal,
30368 on ARM, since we know that shifts by negative amounts are no-ops.
30369 Additionally, the default expansion code is not available or suitable
30370 for post-reload insn splits (this can occur when the register allocator
30371 chooses not to do a shift in NEON).
30373 This function is used in both initial expand and post-reload splits, and
30374 handles all kinds of 64-bit shifts.
30376 Input requirements:
30377 - It is safe for the input and output to be the same register, but
30378 early-clobber rules apply for the shift amount and scratch registers.
30379 - Shift by register requires both scratch registers. In all other cases
30380 the scratch registers may be NULL.
30381 - Ashiftrt by a register also clobbers the CC register. */
30382 void
30383 arm_emit_coreregs_64bit_shift (enum rtx_code code, rtx out, rtx in,
30384 rtx amount, rtx scratch1, rtx scratch2)
30386 rtx out_high = gen_highpart (SImode, out);
30387 rtx out_low = gen_lowpart (SImode, out);
30388 rtx in_high = gen_highpart (SImode, in);
30389 rtx in_low = gen_lowpart (SImode, in);
30391 /* Terminology:
30392 in = the register pair containing the input value.
30393 out = the destination register pair.
30394 up = the high- or low-part of each pair.
30395 down = the opposite part to "up".
30396 In a shift, we can consider bits to shift from "up"-stream to
30397 "down"-stream, so in a left-shift "up" is the low-part and "down"
30398 is the high-part of each register pair. */
30400 rtx out_up = code == ASHIFT ? out_low : out_high;
30401 rtx out_down = code == ASHIFT ? out_high : out_low;
30402 rtx in_up = code == ASHIFT ? in_low : in_high;
30403 rtx in_down = code == ASHIFT ? in_high : in_low;
30405 gcc_assert (code == ASHIFT || code == ASHIFTRT || code == LSHIFTRT);
30406 gcc_assert (out
30407 && (REG_P (out) || GET_CODE (out) == SUBREG)
30408 && GET_MODE (out) == DImode);
30409 gcc_assert (in
30410 && (REG_P (in) || GET_CODE (in) == SUBREG)
30411 && GET_MODE (in) == DImode);
30412 gcc_assert (amount
30413 && (((REG_P (amount) || GET_CODE (amount) == SUBREG)
30414 && GET_MODE (amount) == SImode)
30415 || CONST_INT_P (amount)));
30416 gcc_assert (scratch1 == NULL
30417 || (GET_CODE (scratch1) == SCRATCH)
30418 || (GET_MODE (scratch1) == SImode
30419 && REG_P (scratch1)));
30420 gcc_assert (scratch2 == NULL
30421 || (GET_CODE (scratch2) == SCRATCH)
30422 || (GET_MODE (scratch2) == SImode
30423 && REG_P (scratch2)));
30424 gcc_assert (!REG_P (out) || !REG_P (amount)
30425 || !HARD_REGISTER_P (out)
30426 || (REGNO (out) != REGNO (amount)
30427 && REGNO (out) + 1 != REGNO (amount)));
30429 /* Macros to make following code more readable. */
30430 #define SUB_32(DEST,SRC) \
30431 gen_addsi3 ((DEST), (SRC), GEN_INT (-32))
30432 #define RSB_32(DEST,SRC) \
30433 gen_subsi3 ((DEST), GEN_INT (32), (SRC))
30434 #define SUB_S_32(DEST,SRC) \
30435 gen_addsi3_compare0 ((DEST), (SRC), \
30436 GEN_INT (-32))
30437 #define SET(DEST,SRC) \
30438 gen_rtx_SET (SImode, (DEST), (SRC))
30439 #define SHIFT(CODE,SRC,AMOUNT) \
30440 gen_rtx_fmt_ee ((CODE), SImode, (SRC), (AMOUNT))
30441 #define LSHIFT(CODE,SRC,AMOUNT) \
30442 gen_rtx_fmt_ee ((CODE) == ASHIFT ? ASHIFT : LSHIFTRT, \
30443 SImode, (SRC), (AMOUNT))
30444 #define REV_LSHIFT(CODE,SRC,AMOUNT) \
30445 gen_rtx_fmt_ee ((CODE) == ASHIFT ? LSHIFTRT : ASHIFT, \
30446 SImode, (SRC), (AMOUNT))
30447 #define ORR(A,B) \
30448 gen_rtx_IOR (SImode, (A), (B))
30449 #define BRANCH(COND,LABEL) \
30450 gen_arm_cond_branch ((LABEL), \
30451 gen_rtx_ ## COND (CCmode, cc_reg, \
30452 const0_rtx), \
30453 cc_reg)
30455 /* Shifts by register and shifts by constant are handled separately. */
30456 if (CONST_INT_P (amount))
30458 /* We have a shift-by-constant. */
30460 /* First, handle out-of-range shift amounts.
30461 In both cases we try to match the result an ARM instruction in a
30462 shift-by-register would give. This helps reduce execution
30463 differences between optimization levels, but it won't stop other
30464 parts of the compiler doing different things. This is "undefined
30465 behaviour, in any case. */
30466 if (INTVAL (amount) <= 0)
30467 emit_insn (gen_movdi (out, in));
30468 else if (INTVAL (amount) >= 64)
30470 if (code == ASHIFTRT)
30472 rtx const31_rtx = GEN_INT (31);
30473 emit_insn (SET (out_down, SHIFT (code, in_up, const31_rtx)));
30474 emit_insn (SET (out_up, SHIFT (code, in_up, const31_rtx)));
30476 else
30477 emit_insn (gen_movdi (out, const0_rtx));
30480 /* Now handle valid shifts. */
30481 else if (INTVAL (amount) < 32)
30483 /* Shifts by a constant less than 32. */
30484 rtx reverse_amount = GEN_INT (32 - INTVAL (amount));
30486 emit_insn (SET (out_down, LSHIFT (code, in_down, amount)));
30487 emit_insn (SET (out_down,
30488 ORR (REV_LSHIFT (code, in_up, reverse_amount),
30489 out_down)));
30490 emit_insn (SET (out_up, SHIFT (code, in_up, amount)));
30492 else
30494 /* Shifts by a constant greater than 31. */
30495 rtx adj_amount = GEN_INT (INTVAL (amount) - 32);
30497 emit_insn (SET (out_down, SHIFT (code, in_up, adj_amount)));
30498 if (code == ASHIFTRT)
30499 emit_insn (gen_ashrsi3 (out_up, in_up,
30500 GEN_INT (31)));
30501 else
30502 emit_insn (SET (out_up, const0_rtx));
30505 else
30507 /* We have a shift-by-register. */
30508 rtx cc_reg = gen_rtx_REG (CC_NOOVmode, CC_REGNUM);
30510 /* This alternative requires the scratch registers. */
30511 gcc_assert (scratch1 && REG_P (scratch1));
30512 gcc_assert (scratch2 && REG_P (scratch2));
30514 /* We will need the values "amount-32" and "32-amount" later.
30515 Swapping them around now allows the later code to be more general. */
30516 switch (code)
30518 case ASHIFT:
30519 emit_insn (SUB_32 (scratch1, amount));
30520 emit_insn (RSB_32 (scratch2, amount));
30521 break;
30522 case ASHIFTRT:
30523 emit_insn (RSB_32 (scratch1, amount));
30524 /* Also set CC = amount > 32. */
30525 emit_insn (SUB_S_32 (scratch2, amount));
30526 break;
30527 case LSHIFTRT:
30528 emit_insn (RSB_32 (scratch1, amount));
30529 emit_insn (SUB_32 (scratch2, amount));
30530 break;
30531 default:
30532 gcc_unreachable ();
30535 /* Emit code like this:
30537 arithmetic-left:
30538 out_down = in_down << amount;
30539 out_down = (in_up << (amount - 32)) | out_down;
30540 out_down = ((unsigned)in_up >> (32 - amount)) | out_down;
30541 out_up = in_up << amount;
30543 arithmetic-right:
30544 out_down = in_down >> amount;
30545 out_down = (in_up << (32 - amount)) | out_down;
30546 if (amount < 32)
30547 out_down = ((signed)in_up >> (amount - 32)) | out_down;
30548 out_up = in_up << amount;
30550 logical-right:
30551 out_down = in_down >> amount;
30552 out_down = (in_up << (32 - amount)) | out_down;
30553 if (amount < 32)
30554 out_down = ((unsigned)in_up >> (amount - 32)) | out_down;
30555 out_up = in_up << amount;
30557 The ARM and Thumb2 variants are the same but implemented slightly
30558 differently. If this were only called during expand we could just
30559 use the Thumb2 case and let combine do the right thing, but this
30560 can also be called from post-reload splitters. */
30562 emit_insn (SET (out_down, LSHIFT (code, in_down, amount)));
30564 if (!TARGET_THUMB2)
30566 /* Emit code for ARM mode. */
30567 emit_insn (SET (out_down,
30568 ORR (SHIFT (ASHIFT, in_up, scratch1), out_down)));
30569 if (code == ASHIFTRT)
30571 rtx done_label = gen_label_rtx ();
30572 emit_jump_insn (BRANCH (LT, done_label));
30573 emit_insn (SET (out_down, ORR (SHIFT (ASHIFTRT, in_up, scratch2),
30574 out_down)));
30575 emit_label (done_label);
30577 else
30578 emit_insn (SET (out_down, ORR (SHIFT (LSHIFTRT, in_up, scratch2),
30579 out_down)));
30581 else
30583 /* Emit code for Thumb2 mode.
30584 Thumb2 can't do shift and or in one insn. */
30585 emit_insn (SET (scratch1, SHIFT (ASHIFT, in_up, scratch1)));
30586 emit_insn (gen_iorsi3 (out_down, out_down, scratch1));
30588 if (code == ASHIFTRT)
30590 rtx done_label = gen_label_rtx ();
30591 emit_jump_insn (BRANCH (LT, done_label));
30592 emit_insn (SET (scratch2, SHIFT (ASHIFTRT, in_up, scratch2)));
30593 emit_insn (SET (out_down, ORR (out_down, scratch2)));
30594 emit_label (done_label);
30596 else
30598 emit_insn (SET (scratch2, SHIFT (LSHIFTRT, in_up, scratch2)));
30599 emit_insn (gen_iorsi3 (out_down, out_down, scratch2));
30603 emit_insn (SET (out_up, SHIFT (code, in_up, amount)));
30606 #undef SUB_32
30607 #undef RSB_32
30608 #undef SUB_S_32
30609 #undef SET
30610 #undef SHIFT
30611 #undef LSHIFT
30612 #undef REV_LSHIFT
30613 #undef ORR
30614 #undef BRANCH
30618 /* Returns true if a valid comparison operation and makes
30619 the operands in a form that is valid. */
30620 bool
30621 arm_validize_comparison (rtx *comparison, rtx * op1, rtx * op2)
30623 enum rtx_code code = GET_CODE (*comparison);
30624 int code_int;
30625 enum machine_mode mode = (GET_MODE (*op1) == VOIDmode)
30626 ? GET_MODE (*op2) : GET_MODE (*op1);
30628 gcc_assert (GET_MODE (*op1) != VOIDmode || GET_MODE (*op2) != VOIDmode);
30630 if (code == UNEQ || code == LTGT)
30631 return false;
30633 code_int = (int)code;
30634 arm_canonicalize_comparison (&code_int, op1, op2, 0);
30635 PUT_CODE (*comparison, (enum rtx_code)code_int);
30637 switch (mode)
30639 case SImode:
30640 if (!arm_add_operand (*op1, mode))
30641 *op1 = force_reg (mode, *op1);
30642 if (!arm_add_operand (*op2, mode))
30643 *op2 = force_reg (mode, *op2);
30644 return true;
30646 case DImode:
30647 if (!cmpdi_operand (*op1, mode))
30648 *op1 = force_reg (mode, *op1);
30649 if (!cmpdi_operand (*op2, mode))
30650 *op2 = force_reg (mode, *op2);
30651 return true;
30653 case SFmode:
30654 case DFmode:
30655 if (!arm_float_compare_operand (*op1, mode))
30656 *op1 = force_reg (mode, *op1);
30657 if (!arm_float_compare_operand (*op2, mode))
30658 *op2 = force_reg (mode, *op2);
30659 return true;
30660 default:
30661 break;
30664 return false;
30668 /* Implement the TARGET_ASAN_SHADOW_OFFSET hook. */
30670 static unsigned HOST_WIDE_INT
30671 arm_asan_shadow_offset (void)
30673 return (unsigned HOST_WIDE_INT) 1 << 29;
30676 #include "gt-arm.h"