gcc/
[official-gcc.git] / gcc-4_8-branch / gcc / config / arm / arm.c
blobc3eacdd21bf6bc8cb8ec0b49ff1b514f2906209e
1 /* Output routines for GCC for ARM.
2 Copyright (C) 1991-2013 Free Software Foundation, Inc.
3 Contributed by Pieter `Tiggr' Schoenmakers (rcpieter@win.tue.nl)
4 and Martin Simmons (@harleqn.co.uk).
5 More major hacks by Richard Earnshaw (rearnsha@arm.com).
7 This file is part of GCC.
9 GCC is free software; you can redistribute it and/or modify it
10 under the terms of the GNU General Public License as published
11 by the Free Software Foundation; either version 3, or (at your
12 option) any later version.
14 GCC is distributed in the hope that it will be useful, but WITHOUT
15 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
16 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
17 License for more details.
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING3. If not see
21 <http://www.gnu.org/licenses/>. */
23 #include "config.h"
24 #include "system.h"
25 #include "coretypes.h"
26 #include "tm.h"
27 #include "rtl.h"
28 #include "tree.h"
29 #include "obstack.h"
30 #include "regs.h"
31 #include "hard-reg-set.h"
32 #include "insn-config.h"
33 #include "conditions.h"
34 #include "output.h"
35 #include "insn-attr.h"
36 #include "flags.h"
37 #include "reload.h"
38 #include "function.h"
39 #include "expr.h"
40 #include "optabs.h"
41 #include "diagnostic-core.h"
42 #include "recog.h"
43 #include "cgraph.h"
44 #include "ggc.h"
45 #include "except.h"
46 #include "tm_p.h"
47 #include "target.h"
48 #include "target-def.h"
49 #include "debug.h"
50 #include "langhooks.h"
51 #include "df.h"
52 #include "intl.h"
53 #include "libfuncs.h"
54 #include "params.h"
55 #include "opts.h"
56 #include "dumpfile.h"
58 /* Forward definitions of types. */
59 typedef struct minipool_node Mnode;
60 typedef struct minipool_fixup Mfix;
62 void (*arm_lang_output_object_attributes_hook)(void);
64 struct four_ints
66 int i[4];
69 /* Forward function declarations. */
70 static bool arm_needs_doubleword_align (enum machine_mode, const_tree);
71 static int arm_compute_static_chain_stack_bytes (void);
72 static arm_stack_offsets *arm_get_frame_offsets (void);
73 static void arm_add_gc_roots (void);
74 static int arm_gen_constant (enum rtx_code, enum machine_mode, rtx,
75 HOST_WIDE_INT, rtx, rtx, int, int);
76 static unsigned bit_count (unsigned long);
77 static int arm_address_register_rtx_p (rtx, int);
78 static int arm_legitimate_index_p (enum machine_mode, rtx, RTX_CODE, int);
79 static int thumb2_legitimate_index_p (enum machine_mode, rtx, int);
80 static int thumb1_base_register_rtx_p (rtx, enum machine_mode, int);
81 static rtx arm_legitimize_address (rtx, rtx, enum machine_mode);
82 static reg_class_t arm_preferred_reload_class (rtx, reg_class_t);
83 static rtx thumb_legitimize_address (rtx, rtx, enum machine_mode);
84 inline static int thumb1_index_register_rtx_p (rtx, int);
85 static bool arm_legitimate_address_p (enum machine_mode, rtx, bool);
86 static int thumb_far_jump_used_p (void);
87 static bool thumb_force_lr_save (void);
88 static unsigned arm_size_return_regs (void);
89 static bool arm_assemble_integer (rtx, unsigned int, int);
90 static void arm_print_operand (FILE *, rtx, int);
91 static void arm_print_operand_address (FILE *, rtx);
92 static bool arm_print_operand_punct_valid_p (unsigned char code);
93 static const char *fp_const_from_val (REAL_VALUE_TYPE *);
94 static arm_cc get_arm_condition_code (rtx);
95 static HOST_WIDE_INT int_log2 (HOST_WIDE_INT);
96 static rtx is_jump_table (rtx);
97 static const char *output_multi_immediate (rtx *, const char *, const char *,
98 int, HOST_WIDE_INT);
99 static const char *shift_op (rtx, HOST_WIDE_INT *);
100 static struct machine_function *arm_init_machine_status (void);
101 static void thumb_exit (FILE *, int);
102 static rtx is_jump_table (rtx);
103 static HOST_WIDE_INT get_jump_table_size (rtx);
104 static Mnode *move_minipool_fix_forward_ref (Mnode *, Mnode *, HOST_WIDE_INT);
105 static Mnode *add_minipool_forward_ref (Mfix *);
106 static Mnode *move_minipool_fix_backward_ref (Mnode *, Mnode *, HOST_WIDE_INT);
107 static Mnode *add_minipool_backward_ref (Mfix *);
108 static void assign_minipool_offsets (Mfix *);
109 static void arm_print_value (FILE *, rtx);
110 static void dump_minipool (rtx);
111 static int arm_barrier_cost (rtx);
112 static Mfix *create_fix_barrier (Mfix *, HOST_WIDE_INT);
113 static void push_minipool_barrier (rtx, HOST_WIDE_INT);
114 static void push_minipool_fix (rtx, HOST_WIDE_INT, rtx *, enum machine_mode,
115 rtx);
116 static void arm_reorg (void);
117 static void note_invalid_constants (rtx, HOST_WIDE_INT, int);
118 static unsigned long arm_compute_save_reg0_reg12_mask (void);
119 static unsigned long arm_compute_save_reg_mask (void);
120 static unsigned long arm_isr_value (tree);
121 static unsigned long arm_compute_func_type (void);
122 static tree arm_handle_fndecl_attribute (tree *, tree, tree, int, bool *);
123 static tree arm_handle_pcs_attribute (tree *, tree, tree, int, bool *);
124 static tree arm_handle_isr_attribute (tree *, tree, tree, int, bool *);
125 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
126 static tree arm_handle_notshared_attribute (tree *, tree, tree, int, bool *);
127 #endif
128 static void arm_output_function_epilogue (FILE *, HOST_WIDE_INT);
129 static void arm_output_function_prologue (FILE *, HOST_WIDE_INT);
130 static int arm_comp_type_attributes (const_tree, const_tree);
131 static void arm_set_default_type_attributes (tree);
132 static int arm_adjust_cost (rtx, rtx, rtx, int);
133 static int arm_sched_reorder (FILE *, int, rtx *, int *, int);
134 static int optimal_immediate_sequence (enum rtx_code code,
135 unsigned HOST_WIDE_INT val,
136 struct four_ints *return_sequence);
137 static int optimal_immediate_sequence_1 (enum rtx_code code,
138 unsigned HOST_WIDE_INT val,
139 struct four_ints *return_sequence,
140 int i);
141 static int arm_get_strip_length (int);
142 static bool arm_function_ok_for_sibcall (tree, tree);
143 static enum machine_mode arm_promote_function_mode (const_tree,
144 enum machine_mode, int *,
145 const_tree, int);
146 static bool arm_return_in_memory (const_tree, const_tree);
147 static rtx arm_function_value (const_tree, const_tree, bool);
148 static rtx arm_libcall_value_1 (enum machine_mode);
149 static rtx arm_libcall_value (enum machine_mode, const_rtx);
150 static bool arm_function_value_regno_p (const unsigned int);
151 static void arm_internal_label (FILE *, const char *, unsigned long);
152 static void arm_output_mi_thunk (FILE *, tree, HOST_WIDE_INT, HOST_WIDE_INT,
153 tree);
154 static bool arm_have_conditional_execution (void);
155 static bool arm_cannot_force_const_mem (enum machine_mode, rtx);
156 static bool arm_legitimate_constant_p (enum machine_mode, rtx);
157 static bool arm_rtx_costs_1 (rtx, enum rtx_code, int*, bool);
158 static bool arm_size_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *);
159 static bool arm_slowmul_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *, bool);
160 static bool arm_fastmul_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *, bool);
161 static bool arm_xscale_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *, bool);
162 static bool arm_9e_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *, bool);
163 static bool arm_rtx_costs (rtx, int, int, int, int *, bool);
164 static int arm_address_cost (rtx, enum machine_mode, addr_space_t, bool);
165 static int arm_register_move_cost (enum machine_mode, reg_class_t, reg_class_t);
166 static int arm_memory_move_cost (enum machine_mode, reg_class_t, bool);
167 static void arm_init_builtins (void);
168 static void arm_init_iwmmxt_builtins (void);
169 static rtx safe_vector_operand (rtx, enum machine_mode);
170 static rtx arm_expand_binop_builtin (enum insn_code, tree, rtx);
171 static rtx arm_expand_unop_builtin (enum insn_code, tree, rtx, int);
172 static rtx arm_expand_builtin (tree, rtx, rtx, enum machine_mode, int);
173 static tree arm_builtin_decl (unsigned, bool);
174 static void emit_constant_insn (rtx cond, rtx pattern);
175 static rtx emit_set_insn (rtx, rtx);
176 static rtx emit_multi_reg_push (unsigned long);
177 static int arm_arg_partial_bytes (cumulative_args_t, enum machine_mode,
178 tree, bool);
179 static rtx arm_function_arg (cumulative_args_t, enum machine_mode,
180 const_tree, bool);
181 static void arm_function_arg_advance (cumulative_args_t, enum machine_mode,
182 const_tree, bool);
183 static unsigned int arm_function_arg_boundary (enum machine_mode, const_tree);
184 static rtx aapcs_allocate_return_reg (enum machine_mode, const_tree,
185 const_tree);
186 static rtx aapcs_libcall_value (enum machine_mode);
187 static int aapcs_select_return_coproc (const_tree, const_tree);
189 #ifdef OBJECT_FORMAT_ELF
190 static void arm_elf_asm_constructor (rtx, int) ATTRIBUTE_UNUSED;
191 static void arm_elf_asm_destructor (rtx, int) ATTRIBUTE_UNUSED;
192 #endif
193 #ifndef ARM_PE
194 static void arm_encode_section_info (tree, rtx, int);
195 #endif
197 static void arm_file_end (void);
198 static void arm_file_start (void);
200 static void arm_setup_incoming_varargs (cumulative_args_t, enum machine_mode,
201 tree, int *, int);
202 static bool arm_pass_by_reference (cumulative_args_t,
203 enum machine_mode, const_tree, bool);
204 static bool arm_promote_prototypes (const_tree);
205 static bool arm_default_short_enums (void);
206 static bool arm_align_anon_bitfield (void);
207 static bool arm_return_in_msb (const_tree);
208 static bool arm_must_pass_in_stack (enum machine_mode, const_tree);
209 static bool arm_return_in_memory (const_tree, const_tree);
210 #if ARM_UNWIND_INFO
211 static void arm_unwind_emit (FILE *, rtx);
212 static bool arm_output_ttype (rtx);
213 static void arm_asm_emit_except_personality (rtx);
214 static void arm_asm_init_sections (void);
215 #endif
216 static rtx arm_dwarf_register_span (rtx);
218 static tree arm_cxx_guard_type (void);
219 static bool arm_cxx_guard_mask_bit (void);
220 static tree arm_get_cookie_size (tree);
221 static bool arm_cookie_has_size (void);
222 static bool arm_cxx_cdtor_returns_this (void);
223 static bool arm_cxx_key_method_may_be_inline (void);
224 static void arm_cxx_determine_class_data_visibility (tree);
225 static bool arm_cxx_class_data_always_comdat (void);
226 static bool arm_cxx_use_aeabi_atexit (void);
227 static void arm_init_libfuncs (void);
228 static tree arm_build_builtin_va_list (void);
229 static void arm_expand_builtin_va_start (tree, rtx);
230 static tree arm_gimplify_va_arg_expr (tree, tree, gimple_seq *, gimple_seq *);
231 static void arm_option_override (void);
232 static unsigned HOST_WIDE_INT arm_shift_truncation_mask (enum machine_mode);
233 static bool arm_cannot_copy_insn_p (rtx);
234 static bool arm_tls_symbol_p (rtx x);
235 static int arm_issue_rate (void);
236 static void arm_output_dwarf_dtprel (FILE *, int, rtx) ATTRIBUTE_UNUSED;
237 static bool arm_output_addr_const_extra (FILE *, rtx);
238 static bool arm_allocate_stack_slots_for_args (void);
239 static bool arm_warn_func_return (tree);
240 static const char *arm_invalid_parameter_type (const_tree t);
241 static const char *arm_invalid_return_type (const_tree t);
242 static tree arm_promoted_type (const_tree t);
243 static tree arm_convert_to_type (tree type, tree expr);
244 static bool arm_scalar_mode_supported_p (enum machine_mode);
245 static bool arm_frame_pointer_required (void);
246 static bool arm_can_eliminate (const int, const int);
247 static void arm_asm_trampoline_template (FILE *);
248 static void arm_trampoline_init (rtx, tree, rtx);
249 static rtx arm_trampoline_adjust_address (rtx);
250 static rtx arm_pic_static_addr (rtx orig, rtx reg);
251 static bool cortex_a9_sched_adjust_cost (rtx, rtx, rtx, int *);
252 static bool xscale_sched_adjust_cost (rtx, rtx, rtx, int *);
253 static bool fa726te_sched_adjust_cost (rtx, rtx, rtx, int *);
254 static bool arm_array_mode_supported_p (enum machine_mode,
255 unsigned HOST_WIDE_INT);
256 static enum machine_mode arm_preferred_simd_mode (enum machine_mode);
257 static bool arm_class_likely_spilled_p (reg_class_t);
258 static HOST_WIDE_INT arm_vector_alignment (const_tree type);
259 static bool arm_vector_alignment_reachable (const_tree type, bool is_packed);
260 static bool arm_builtin_support_vector_misalignment (enum machine_mode mode,
261 const_tree type,
262 int misalignment,
263 bool is_packed);
264 static void arm_conditional_register_usage (void);
265 static reg_class_t arm_preferred_rename_class (reg_class_t rclass);
266 static unsigned int arm_autovectorize_vector_sizes (void);
267 static int arm_default_branch_cost (bool, bool);
268 static int arm_cortex_a5_branch_cost (bool, bool);
270 static bool arm_vectorize_vec_perm_const_ok (enum machine_mode vmode,
271 const unsigned char *sel);
273 static int arm_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
274 tree vectype,
275 int misalign ATTRIBUTE_UNUSED);
276 static unsigned arm_add_stmt_cost (void *data, int count,
277 enum vect_cost_for_stmt kind,
278 struct _stmt_vec_info *stmt_info,
279 int misalign,
280 enum vect_cost_model_location where);
282 static void arm_canonicalize_comparison (int *code, rtx *op0, rtx *op1,
283 bool op0_preserve_value);
284 static unsigned HOST_WIDE_INT arm_asan_shadow_offset (void);
286 /* Table of machine attributes. */
287 static const struct attribute_spec arm_attribute_table[] =
289 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler,
290 affects_type_identity } */
291 /* Function calls made to this symbol must be done indirectly, because
292 it may lie outside of the 26 bit addressing range of a normal function
293 call. */
294 { "long_call", 0, 0, false, true, true, NULL, false },
295 /* Whereas these functions are always known to reside within the 26 bit
296 addressing range. */
297 { "short_call", 0, 0, false, true, true, NULL, false },
298 /* Specify the procedure call conventions for a function. */
299 { "pcs", 1, 1, false, true, true, arm_handle_pcs_attribute,
300 false },
301 /* Interrupt Service Routines have special prologue and epilogue requirements. */
302 { "isr", 0, 1, false, false, false, arm_handle_isr_attribute,
303 false },
304 { "interrupt", 0, 1, false, false, false, arm_handle_isr_attribute,
305 false },
306 { "naked", 0, 0, true, false, false, arm_handle_fndecl_attribute,
307 false },
308 #ifdef ARM_PE
309 /* ARM/PE has three new attributes:
310 interfacearm - ?
311 dllexport - for exporting a function/variable that will live in a dll
312 dllimport - for importing a function/variable from a dll
314 Microsoft allows multiple declspecs in one __declspec, separating
315 them with spaces. We do NOT support this. Instead, use __declspec
316 multiple times.
318 { "dllimport", 0, 0, true, false, false, NULL, false },
319 { "dllexport", 0, 0, true, false, false, NULL, false },
320 { "interfacearm", 0, 0, true, false, false, arm_handle_fndecl_attribute,
321 false },
322 #elif TARGET_DLLIMPORT_DECL_ATTRIBUTES
323 { "dllimport", 0, 0, false, false, false, handle_dll_attribute, false },
324 { "dllexport", 0, 0, false, false, false, handle_dll_attribute, false },
325 { "notshared", 0, 0, false, true, false, arm_handle_notshared_attribute,
326 false },
327 #endif
328 { NULL, 0, 0, false, false, false, NULL, false }
331 /* Initialize the GCC target structure. */
332 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
333 #undef TARGET_MERGE_DECL_ATTRIBUTES
334 #define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
335 #endif
337 #undef TARGET_LEGITIMIZE_ADDRESS
338 #define TARGET_LEGITIMIZE_ADDRESS arm_legitimize_address
340 #undef TARGET_ATTRIBUTE_TABLE
341 #define TARGET_ATTRIBUTE_TABLE arm_attribute_table
343 #undef TARGET_ASM_FILE_START
344 #define TARGET_ASM_FILE_START arm_file_start
345 #undef TARGET_ASM_FILE_END
346 #define TARGET_ASM_FILE_END arm_file_end
348 #undef TARGET_ASM_ALIGNED_SI_OP
349 #define TARGET_ASM_ALIGNED_SI_OP NULL
350 #undef TARGET_ASM_INTEGER
351 #define TARGET_ASM_INTEGER arm_assemble_integer
353 #undef TARGET_PRINT_OPERAND
354 #define TARGET_PRINT_OPERAND arm_print_operand
355 #undef TARGET_PRINT_OPERAND_ADDRESS
356 #define TARGET_PRINT_OPERAND_ADDRESS arm_print_operand_address
357 #undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
358 #define TARGET_PRINT_OPERAND_PUNCT_VALID_P arm_print_operand_punct_valid_p
360 #undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA
361 #define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA arm_output_addr_const_extra
363 #undef TARGET_ASM_FUNCTION_PROLOGUE
364 #define TARGET_ASM_FUNCTION_PROLOGUE arm_output_function_prologue
366 #undef TARGET_ASM_FUNCTION_EPILOGUE
367 #define TARGET_ASM_FUNCTION_EPILOGUE arm_output_function_epilogue
369 #undef TARGET_OPTION_OVERRIDE
370 #define TARGET_OPTION_OVERRIDE arm_option_override
372 #undef TARGET_COMP_TYPE_ATTRIBUTES
373 #define TARGET_COMP_TYPE_ATTRIBUTES arm_comp_type_attributes
375 #undef TARGET_SET_DEFAULT_TYPE_ATTRIBUTES
376 #define TARGET_SET_DEFAULT_TYPE_ATTRIBUTES arm_set_default_type_attributes
378 #undef TARGET_SCHED_ADJUST_COST
379 #define TARGET_SCHED_ADJUST_COST arm_adjust_cost
381 #undef TARGET_SCHED_REORDER
382 #define TARGET_SCHED_REORDER arm_sched_reorder
384 #undef TARGET_REGISTER_MOVE_COST
385 #define TARGET_REGISTER_MOVE_COST arm_register_move_cost
387 #undef TARGET_MEMORY_MOVE_COST
388 #define TARGET_MEMORY_MOVE_COST arm_memory_move_cost
390 #undef TARGET_ENCODE_SECTION_INFO
391 #ifdef ARM_PE
392 #define TARGET_ENCODE_SECTION_INFO arm_pe_encode_section_info
393 #else
394 #define TARGET_ENCODE_SECTION_INFO arm_encode_section_info
395 #endif
397 #undef TARGET_STRIP_NAME_ENCODING
398 #define TARGET_STRIP_NAME_ENCODING arm_strip_name_encoding
400 #undef TARGET_ASM_INTERNAL_LABEL
401 #define TARGET_ASM_INTERNAL_LABEL arm_internal_label
403 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
404 #define TARGET_FUNCTION_OK_FOR_SIBCALL arm_function_ok_for_sibcall
406 #undef TARGET_FUNCTION_VALUE
407 #define TARGET_FUNCTION_VALUE arm_function_value
409 #undef TARGET_LIBCALL_VALUE
410 #define TARGET_LIBCALL_VALUE arm_libcall_value
412 #undef TARGET_FUNCTION_VALUE_REGNO_P
413 #define TARGET_FUNCTION_VALUE_REGNO_P arm_function_value_regno_p
415 #undef TARGET_ASM_OUTPUT_MI_THUNK
416 #define TARGET_ASM_OUTPUT_MI_THUNK arm_output_mi_thunk
417 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
418 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK default_can_output_mi_thunk_no_vcall
420 #undef TARGET_RTX_COSTS
421 #define TARGET_RTX_COSTS arm_rtx_costs
422 #undef TARGET_ADDRESS_COST
423 #define TARGET_ADDRESS_COST arm_address_cost
425 #undef TARGET_SHIFT_TRUNCATION_MASK
426 #define TARGET_SHIFT_TRUNCATION_MASK arm_shift_truncation_mask
427 #undef TARGET_VECTOR_MODE_SUPPORTED_P
428 #define TARGET_VECTOR_MODE_SUPPORTED_P arm_vector_mode_supported_p
429 #undef TARGET_ARRAY_MODE_SUPPORTED_P
430 #define TARGET_ARRAY_MODE_SUPPORTED_P arm_array_mode_supported_p
431 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
432 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE arm_preferred_simd_mode
433 #undef TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES
434 #define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES \
435 arm_autovectorize_vector_sizes
437 #undef TARGET_MACHINE_DEPENDENT_REORG
438 #define TARGET_MACHINE_DEPENDENT_REORG arm_reorg
440 #undef TARGET_INIT_BUILTINS
441 #define TARGET_INIT_BUILTINS arm_init_builtins
442 #undef TARGET_EXPAND_BUILTIN
443 #define TARGET_EXPAND_BUILTIN arm_expand_builtin
444 #undef TARGET_BUILTIN_DECL
445 #define TARGET_BUILTIN_DECL arm_builtin_decl
447 #undef TARGET_INIT_LIBFUNCS
448 #define TARGET_INIT_LIBFUNCS arm_init_libfuncs
450 #undef TARGET_PROMOTE_FUNCTION_MODE
451 #define TARGET_PROMOTE_FUNCTION_MODE arm_promote_function_mode
452 #undef TARGET_PROMOTE_PROTOTYPES
453 #define TARGET_PROMOTE_PROTOTYPES arm_promote_prototypes
454 #undef TARGET_PASS_BY_REFERENCE
455 #define TARGET_PASS_BY_REFERENCE arm_pass_by_reference
456 #undef TARGET_ARG_PARTIAL_BYTES
457 #define TARGET_ARG_PARTIAL_BYTES arm_arg_partial_bytes
458 #undef TARGET_FUNCTION_ARG
459 #define TARGET_FUNCTION_ARG arm_function_arg
460 #undef TARGET_FUNCTION_ARG_ADVANCE
461 #define TARGET_FUNCTION_ARG_ADVANCE arm_function_arg_advance
462 #undef TARGET_FUNCTION_ARG_BOUNDARY
463 #define TARGET_FUNCTION_ARG_BOUNDARY arm_function_arg_boundary
465 #undef TARGET_SETUP_INCOMING_VARARGS
466 #define TARGET_SETUP_INCOMING_VARARGS arm_setup_incoming_varargs
468 #undef TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS
469 #define TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS arm_allocate_stack_slots_for_args
471 #undef TARGET_ASM_TRAMPOLINE_TEMPLATE
472 #define TARGET_ASM_TRAMPOLINE_TEMPLATE arm_asm_trampoline_template
473 #undef TARGET_TRAMPOLINE_INIT
474 #define TARGET_TRAMPOLINE_INIT arm_trampoline_init
475 #undef TARGET_TRAMPOLINE_ADJUST_ADDRESS
476 #define TARGET_TRAMPOLINE_ADJUST_ADDRESS arm_trampoline_adjust_address
478 #undef TARGET_WARN_FUNC_RETURN
479 #define TARGET_WARN_FUNC_RETURN arm_warn_func_return
481 #undef TARGET_DEFAULT_SHORT_ENUMS
482 #define TARGET_DEFAULT_SHORT_ENUMS arm_default_short_enums
484 #undef TARGET_ALIGN_ANON_BITFIELD
485 #define TARGET_ALIGN_ANON_BITFIELD arm_align_anon_bitfield
487 #undef TARGET_NARROW_VOLATILE_BITFIELD
488 #define TARGET_NARROW_VOLATILE_BITFIELD hook_bool_void_false
490 #undef TARGET_CXX_GUARD_TYPE
491 #define TARGET_CXX_GUARD_TYPE arm_cxx_guard_type
493 #undef TARGET_CXX_GUARD_MASK_BIT
494 #define TARGET_CXX_GUARD_MASK_BIT arm_cxx_guard_mask_bit
496 #undef TARGET_CXX_GET_COOKIE_SIZE
497 #define TARGET_CXX_GET_COOKIE_SIZE arm_get_cookie_size
499 #undef TARGET_CXX_COOKIE_HAS_SIZE
500 #define TARGET_CXX_COOKIE_HAS_SIZE arm_cookie_has_size
502 #undef TARGET_CXX_CDTOR_RETURNS_THIS
503 #define TARGET_CXX_CDTOR_RETURNS_THIS arm_cxx_cdtor_returns_this
505 #undef TARGET_CXX_KEY_METHOD_MAY_BE_INLINE
506 #define TARGET_CXX_KEY_METHOD_MAY_BE_INLINE arm_cxx_key_method_may_be_inline
508 #undef TARGET_CXX_USE_AEABI_ATEXIT
509 #define TARGET_CXX_USE_AEABI_ATEXIT arm_cxx_use_aeabi_atexit
511 #undef TARGET_CXX_DETERMINE_CLASS_DATA_VISIBILITY
512 #define TARGET_CXX_DETERMINE_CLASS_DATA_VISIBILITY \
513 arm_cxx_determine_class_data_visibility
515 #undef TARGET_CXX_CLASS_DATA_ALWAYS_COMDAT
516 #define TARGET_CXX_CLASS_DATA_ALWAYS_COMDAT arm_cxx_class_data_always_comdat
518 #undef TARGET_RETURN_IN_MSB
519 #define TARGET_RETURN_IN_MSB arm_return_in_msb
521 #undef TARGET_RETURN_IN_MEMORY
522 #define TARGET_RETURN_IN_MEMORY arm_return_in_memory
524 #undef TARGET_MUST_PASS_IN_STACK
525 #define TARGET_MUST_PASS_IN_STACK arm_must_pass_in_stack
527 #if ARM_UNWIND_INFO
528 #undef TARGET_ASM_UNWIND_EMIT
529 #define TARGET_ASM_UNWIND_EMIT arm_unwind_emit
531 /* EABI unwinding tables use a different format for the typeinfo tables. */
532 #undef TARGET_ASM_TTYPE
533 #define TARGET_ASM_TTYPE arm_output_ttype
535 #undef TARGET_ARM_EABI_UNWINDER
536 #define TARGET_ARM_EABI_UNWINDER true
538 #undef TARGET_ASM_EMIT_EXCEPT_PERSONALITY
539 #define TARGET_ASM_EMIT_EXCEPT_PERSONALITY arm_asm_emit_except_personality
541 #undef TARGET_ASM_INIT_SECTIONS
542 #define TARGET_ASM_INIT_SECTIONS arm_asm_init_sections
543 #endif /* ARM_UNWIND_INFO */
545 #undef TARGET_DWARF_REGISTER_SPAN
546 #define TARGET_DWARF_REGISTER_SPAN arm_dwarf_register_span
548 #undef TARGET_CANNOT_COPY_INSN_P
549 #define TARGET_CANNOT_COPY_INSN_P arm_cannot_copy_insn_p
551 #ifdef HAVE_AS_TLS
552 #undef TARGET_HAVE_TLS
553 #define TARGET_HAVE_TLS true
554 #endif
556 #undef TARGET_HAVE_CONDITIONAL_EXECUTION
557 #define TARGET_HAVE_CONDITIONAL_EXECUTION arm_have_conditional_execution
559 #undef TARGET_LEGITIMATE_CONSTANT_P
560 #define TARGET_LEGITIMATE_CONSTANT_P arm_legitimate_constant_p
562 #undef TARGET_CANNOT_FORCE_CONST_MEM
563 #define TARGET_CANNOT_FORCE_CONST_MEM arm_cannot_force_const_mem
565 #undef TARGET_MAX_ANCHOR_OFFSET
566 #define TARGET_MAX_ANCHOR_OFFSET 4095
568 /* The minimum is set such that the total size of the block
569 for a particular anchor is -4088 + 1 + 4095 bytes, which is
570 divisible by eight, ensuring natural spacing of anchors. */
571 #undef TARGET_MIN_ANCHOR_OFFSET
572 #define TARGET_MIN_ANCHOR_OFFSET -4088
574 #undef TARGET_SCHED_ISSUE_RATE
575 #define TARGET_SCHED_ISSUE_RATE arm_issue_rate
577 #undef TARGET_MANGLE_TYPE
578 #define TARGET_MANGLE_TYPE arm_mangle_type
580 #undef TARGET_BUILD_BUILTIN_VA_LIST
581 #define TARGET_BUILD_BUILTIN_VA_LIST arm_build_builtin_va_list
582 #undef TARGET_EXPAND_BUILTIN_VA_START
583 #define TARGET_EXPAND_BUILTIN_VA_START arm_expand_builtin_va_start
584 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
585 #define TARGET_GIMPLIFY_VA_ARG_EXPR arm_gimplify_va_arg_expr
587 #ifdef HAVE_AS_TLS
588 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
589 #define TARGET_ASM_OUTPUT_DWARF_DTPREL arm_output_dwarf_dtprel
590 #endif
592 #undef TARGET_LEGITIMATE_ADDRESS_P
593 #define TARGET_LEGITIMATE_ADDRESS_P arm_legitimate_address_p
595 #undef TARGET_PREFERRED_RELOAD_CLASS
596 #define TARGET_PREFERRED_RELOAD_CLASS arm_preferred_reload_class
598 #undef TARGET_INVALID_PARAMETER_TYPE
599 #define TARGET_INVALID_PARAMETER_TYPE arm_invalid_parameter_type
601 #undef TARGET_INVALID_RETURN_TYPE
602 #define TARGET_INVALID_RETURN_TYPE arm_invalid_return_type
604 #undef TARGET_PROMOTED_TYPE
605 #define TARGET_PROMOTED_TYPE arm_promoted_type
607 #undef TARGET_CONVERT_TO_TYPE
608 #define TARGET_CONVERT_TO_TYPE arm_convert_to_type
610 #undef TARGET_SCALAR_MODE_SUPPORTED_P
611 #define TARGET_SCALAR_MODE_SUPPORTED_P arm_scalar_mode_supported_p
613 #undef TARGET_FRAME_POINTER_REQUIRED
614 #define TARGET_FRAME_POINTER_REQUIRED arm_frame_pointer_required
616 #undef TARGET_CAN_ELIMINATE
617 #define TARGET_CAN_ELIMINATE arm_can_eliminate
619 #undef TARGET_CONDITIONAL_REGISTER_USAGE
620 #define TARGET_CONDITIONAL_REGISTER_USAGE arm_conditional_register_usage
622 #undef TARGET_CLASS_LIKELY_SPILLED_P
623 #define TARGET_CLASS_LIKELY_SPILLED_P arm_class_likely_spilled_p
625 #undef TARGET_VECTORIZE_BUILTINS
626 #define TARGET_VECTORIZE_BUILTINS
628 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
629 #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
630 arm_builtin_vectorized_function
632 #undef TARGET_VECTOR_ALIGNMENT
633 #define TARGET_VECTOR_ALIGNMENT arm_vector_alignment
635 #undef TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE
636 #define TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE \
637 arm_vector_alignment_reachable
639 #undef TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT
640 #define TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT \
641 arm_builtin_support_vector_misalignment
643 #undef TARGET_PREFERRED_RENAME_CLASS
644 #define TARGET_PREFERRED_RENAME_CLASS \
645 arm_preferred_rename_class
647 #undef TARGET_VECTORIZE_VEC_PERM_CONST_OK
648 #define TARGET_VECTORIZE_VEC_PERM_CONST_OK \
649 arm_vectorize_vec_perm_const_ok
651 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
652 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
653 arm_builtin_vectorization_cost
654 #undef TARGET_VECTORIZE_ADD_STMT_COST
655 #define TARGET_VECTORIZE_ADD_STMT_COST arm_add_stmt_cost
657 #undef TARGET_CANONICALIZE_COMPARISON
658 #define TARGET_CANONICALIZE_COMPARISON \
659 arm_canonicalize_comparison
661 #undef TARGET_ASAN_SHADOW_OFFSET
662 #define TARGET_ASAN_SHADOW_OFFSET arm_asan_shadow_offset
664 #undef MAX_INSN_PER_IT_BLOCK
665 #define MAX_INSN_PER_IT_BLOCK (arm_restrict_it ? 1 : 4)
668 struct gcc_target targetm = TARGET_INITIALIZER;
670 /* Obstack for minipool constant handling. */
671 static struct obstack minipool_obstack;
672 static char * minipool_startobj;
674 /* The maximum number of insns skipped which
675 will be conditionalised if possible. */
676 static int max_insns_skipped = 5;
678 extern FILE * asm_out_file;
680 /* True if we are currently building a constant table. */
681 int making_const_table;
683 /* The processor for which instructions should be scheduled. */
684 enum processor_type arm_tune = arm_none;
686 /* The current tuning set. */
687 const struct tune_params *current_tune;
689 /* Which floating point hardware to schedule for. */
690 int arm_fpu_attr;
692 /* Which floating popint hardware to use. */
693 const struct arm_fpu_desc *arm_fpu_desc;
695 /* Used for Thumb call_via trampolines. */
696 rtx thumb_call_via_label[14];
697 static int thumb_call_reg_needed;
699 /* Bit values used to identify processor capabilities. */
700 #define FL_CO_PROC (1 << 0) /* Has external co-processor bus */
701 #define FL_ARCH3M (1 << 1) /* Extended multiply */
702 #define FL_MODE26 (1 << 2) /* 26-bit mode support */
703 #define FL_MODE32 (1 << 3) /* 32-bit mode support */
704 #define FL_ARCH4 (1 << 4) /* Architecture rel 4 */
705 #define FL_ARCH5 (1 << 5) /* Architecture rel 5 */
706 #define FL_THUMB (1 << 6) /* Thumb aware */
707 #define FL_LDSCHED (1 << 7) /* Load scheduling necessary */
708 #define FL_STRONG (1 << 8) /* StrongARM */
709 #define FL_ARCH5E (1 << 9) /* DSP extensions to v5 */
710 #define FL_XSCALE (1 << 10) /* XScale */
711 /* spare (1 << 11) */
712 #define FL_ARCH6 (1 << 12) /* Architecture rel 6. Adds
713 media instructions. */
714 #define FL_VFPV2 (1 << 13) /* Vector Floating Point V2. */
715 #define FL_WBUF (1 << 14) /* Schedule for write buffer ops.
716 Note: ARM6 & 7 derivatives only. */
717 #define FL_ARCH6K (1 << 15) /* Architecture rel 6 K extensions. */
718 #define FL_THUMB2 (1 << 16) /* Thumb-2. */
719 #define FL_NOTM (1 << 17) /* Instructions not present in the 'M'
720 profile. */
721 #define FL_THUMB_DIV (1 << 18) /* Hardware divide (Thumb mode). */
722 #define FL_VFPV3 (1 << 19) /* Vector Floating Point V3. */
723 #define FL_NEON (1 << 20) /* Neon instructions. */
724 #define FL_ARCH7EM (1 << 21) /* Instructions present in the ARMv7E-M
725 architecture. */
726 #define FL_ARCH7 (1 << 22) /* Architecture 7. */
727 #define FL_ARM_DIV (1 << 23) /* Hardware divide (ARM mode). */
728 #define FL_ARCH8 (1 << 24) /* Architecture 8. */
729 #define FL_CRC32 (1 << 25) /* ARMv8 CRC32 instructions. */
731 #define FL_IWMMXT (1 << 29) /* XScale v2 or "Intel Wireless MMX technology". */
732 #define FL_IWMMXT2 (1 << 30) /* "Intel Wireless MMX2 technology". */
734 /* Flags that only effect tuning, not available instructions. */
735 #define FL_TUNE (FL_WBUF | FL_VFPV2 | FL_STRONG | FL_LDSCHED \
736 | FL_CO_PROC)
738 #define FL_FOR_ARCH2 FL_NOTM
739 #define FL_FOR_ARCH3 (FL_FOR_ARCH2 | FL_MODE32)
740 #define FL_FOR_ARCH3M (FL_FOR_ARCH3 | FL_ARCH3M)
741 #define FL_FOR_ARCH4 (FL_FOR_ARCH3M | FL_ARCH4)
742 #define FL_FOR_ARCH4T (FL_FOR_ARCH4 | FL_THUMB)
743 #define FL_FOR_ARCH5 (FL_FOR_ARCH4 | FL_ARCH5)
744 #define FL_FOR_ARCH5T (FL_FOR_ARCH5 | FL_THUMB)
745 #define FL_FOR_ARCH5E (FL_FOR_ARCH5 | FL_ARCH5E)
746 #define FL_FOR_ARCH5TE (FL_FOR_ARCH5E | FL_THUMB)
747 #define FL_FOR_ARCH5TEJ FL_FOR_ARCH5TE
748 #define FL_FOR_ARCH6 (FL_FOR_ARCH5TE | FL_ARCH6)
749 #define FL_FOR_ARCH6J FL_FOR_ARCH6
750 #define FL_FOR_ARCH6K (FL_FOR_ARCH6 | FL_ARCH6K)
751 #define FL_FOR_ARCH6Z FL_FOR_ARCH6
752 #define FL_FOR_ARCH6ZK FL_FOR_ARCH6K
753 #define FL_FOR_ARCH6T2 (FL_FOR_ARCH6 | FL_THUMB2)
754 #define FL_FOR_ARCH6M (FL_FOR_ARCH6 & ~FL_NOTM)
755 #define FL_FOR_ARCH7 ((FL_FOR_ARCH6T2 & ~FL_NOTM) | FL_ARCH7)
756 #define FL_FOR_ARCH7A (FL_FOR_ARCH7 | FL_NOTM | FL_ARCH6K)
757 #define FL_FOR_ARCH7R (FL_FOR_ARCH7A | FL_THUMB_DIV)
758 #define FL_FOR_ARCH7M (FL_FOR_ARCH7 | FL_THUMB_DIV)
759 #define FL_FOR_ARCH7EM (FL_FOR_ARCH7M | FL_ARCH7EM)
760 #define FL_FOR_ARCH8A (FL_FOR_ARCH7 | FL_ARCH6K | FL_ARCH8 | FL_THUMB_DIV \
761 | FL_ARM_DIV | FL_NOTM)
763 /* The bits in this mask specify which
764 instructions we are allowed to generate. */
765 static unsigned long insn_flags = 0;
767 /* The bits in this mask specify which instruction scheduling options should
768 be used. */
769 static unsigned long tune_flags = 0;
771 /* The highest ARM architecture version supported by the
772 target. */
773 enum base_architecture arm_base_arch = BASE_ARCH_0;
775 /* The following are used in the arm.md file as equivalents to bits
776 in the above two flag variables. */
778 /* Nonzero if this chip supports the ARM Architecture 3M extensions. */
779 int arm_arch3m = 0;
781 /* Nonzero if this chip supports the ARM Architecture 4 extensions. */
782 int arm_arch4 = 0;
784 /* Nonzero if this chip supports the ARM Architecture 4t extensions. */
785 int arm_arch4t = 0;
787 /* Nonzero if this chip supports the ARM Architecture 5 extensions. */
788 int arm_arch5 = 0;
790 /* Nonzero if this chip supports the ARM Architecture 5E extensions. */
791 int arm_arch5e = 0;
793 /* Nonzero if this chip supports the ARM Architecture 6 extensions. */
794 int arm_arch6 = 0;
796 /* Nonzero if this chip supports the ARM 6K extensions. */
797 int arm_arch6k = 0;
799 /* Nonzero if instructions present in ARMv6-M can be used. */
800 int arm_arch6m = 0;
802 /* Nonzero if this chip supports the ARM 7 extensions. */
803 int arm_arch7 = 0;
805 /* Nonzero if instructions not present in the 'M' profile can be used. */
806 int arm_arch_notm = 0;
808 /* Nonzero if instructions present in ARMv7E-M can be used. */
809 int arm_arch7em = 0;
811 /* Nonzero if instructions present in ARMv8 can be used. */
812 int arm_arch8 = 0;
814 /* Nonzero if this chip can benefit from load scheduling. */
815 int arm_ld_sched = 0;
817 /* Nonzero if this chip is a StrongARM. */
818 int arm_tune_strongarm = 0;
820 /* Nonzero if this chip supports Intel Wireless MMX technology. */
821 int arm_arch_iwmmxt = 0;
823 /* Nonzero if this chip supports Intel Wireless MMX2 technology. */
824 int arm_arch_iwmmxt2 = 0;
826 /* Nonzero if this chip is an XScale. */
827 int arm_arch_xscale = 0;
829 /* Nonzero if tuning for XScale */
830 int arm_tune_xscale = 0;
832 /* Nonzero if we want to tune for stores that access the write-buffer.
833 This typically means an ARM6 or ARM7 with MMU or MPU. */
834 int arm_tune_wbuf = 0;
836 /* Nonzero if tuning for Cortex-A9. */
837 int arm_tune_cortex_a9 = 0;
839 /* Nonzero if generating Thumb instructions. */
840 int thumb_code = 0;
842 /* Nonzero if generating Thumb-1 instructions. */
843 int thumb1_code = 0;
845 /* Nonzero if we should define __THUMB_INTERWORK__ in the
846 preprocessor.
847 XXX This is a bit of a hack, it's intended to help work around
848 problems in GLD which doesn't understand that armv5t code is
849 interworking clean. */
850 int arm_cpp_interwork = 0;
852 /* Nonzero if chip supports Thumb 2. */
853 int arm_arch_thumb2;
855 /* Nonzero if chip supports integer division instruction. */
856 int arm_arch_arm_hwdiv;
857 int arm_arch_thumb_hwdiv;
859 /* Nonzero if we should use Neon to handle 64-bits operations rather
860 than core registers. */
861 int prefer_neon_for_64bits = 0;
863 /* In case of a PRE_INC, POST_INC, PRE_DEC, POST_DEC memory reference,
864 we must report the mode of the memory reference from
865 TARGET_PRINT_OPERAND to TARGET_PRINT_OPERAND_ADDRESS. */
866 enum machine_mode output_memory_reference_mode;
868 /* The register number to be used for the PIC offset register. */
869 unsigned arm_pic_register = INVALID_REGNUM;
871 /* Set to 1 after arm_reorg has started. Reset to start at the start of
872 the next function. */
873 static int after_arm_reorg = 0;
875 enum arm_pcs arm_pcs_default;
877 /* For an explanation of these variables, see final_prescan_insn below. */
878 int arm_ccfsm_state;
879 /* arm_current_cc is also used for Thumb-2 cond_exec blocks. */
880 enum arm_cond_code arm_current_cc;
882 rtx arm_target_insn;
883 int arm_target_label;
884 /* The number of conditionally executed insns, including the current insn. */
885 int arm_condexec_count = 0;
886 /* A bitmask specifying the patterns for the IT block.
887 Zero means do not output an IT block before this insn. */
888 int arm_condexec_mask = 0;
889 /* The number of bits used in arm_condexec_mask. */
890 int arm_condexec_masklen = 0;
892 /* Nonzero if chip supports the ARMv8 CRC instructions. */
893 int arm_arch_crc = 0;
895 /* The condition codes of the ARM, and the inverse function. */
896 static const char * const arm_condition_codes[] =
898 "eq", "ne", "cs", "cc", "mi", "pl", "vs", "vc",
899 "hi", "ls", "ge", "lt", "gt", "le", "al", "nv"
902 /* The register numbers in sequence, for passing to arm_gen_load_multiple. */
903 int arm_regs_in_sequence[] =
905 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
908 #define ARM_LSL_NAME (TARGET_UNIFIED_ASM ? "lsl" : "asl")
909 #define streq(string1, string2) (strcmp (string1, string2) == 0)
911 #define THUMB2_WORK_REGS (0xff & ~( (1 << THUMB_HARD_FRAME_POINTER_REGNUM) \
912 | (1 << SP_REGNUM) | (1 << PC_REGNUM) \
913 | (1 << PIC_OFFSET_TABLE_REGNUM)))
915 /* Initialization code. */
917 struct processors
919 const char *const name;
920 enum processor_type core;
921 const char *arch;
922 enum base_architecture base_arch;
923 const unsigned long flags;
924 const struct tune_params *const tune;
928 #define ARM_PREFETCH_NOT_BENEFICIAL 0, -1, -1
929 #define ARM_PREFETCH_BENEFICIAL(prefetch_slots,l1_size,l1_line_size) \
930 prefetch_slots, \
931 l1_size, \
932 l1_line_size
934 /* arm generic vectorizer costs. */
935 static const
936 struct cpu_vec_costs arm_default_vec_cost = {
937 1, /* scalar_stmt_cost. */
938 1, /* scalar load_cost. */
939 1, /* scalar_store_cost. */
940 1, /* vec_stmt_cost. */
941 1, /* vec_to_scalar_cost. */
942 1, /* scalar_to_vec_cost. */
943 1, /* vec_align_load_cost. */
944 1, /* vec_unalign_load_cost. */
945 1, /* vec_unalign_store_cost. */
946 1, /* vec_store_cost. */
947 3, /* cond_taken_branch_cost. */
948 1, /* cond_not_taken_branch_cost. */
951 const struct tune_params arm_slowmul_tune =
953 arm_slowmul_rtx_costs,
954 NULL,
955 3, /* Constant limit. */
956 5, /* Max cond insns. */
957 ARM_PREFETCH_NOT_BENEFICIAL,
958 true, /* Prefer constant pool. */
959 arm_default_branch_cost,
960 false, /* Prefer LDRD/STRD. */
961 {true, true}, /* Prefer non short circuit. */
962 &arm_default_vec_cost, /* Vectorizer costs. */
963 false /* Prefer Neon for 64-bits bitops. */
966 const struct tune_params arm_fastmul_tune =
968 arm_fastmul_rtx_costs,
969 NULL,
970 1, /* Constant limit. */
971 5, /* Max cond insns. */
972 ARM_PREFETCH_NOT_BENEFICIAL,
973 true, /* Prefer constant pool. */
974 arm_default_branch_cost,
975 false, /* Prefer LDRD/STRD. */
976 {true, true}, /* Prefer non short circuit. */
977 &arm_default_vec_cost, /* Vectorizer costs. */
978 false /* Prefer Neon for 64-bits bitops. */
981 /* StrongARM has early execution of branches, so a sequence that is worth
982 skipping is shorter. Set max_insns_skipped to a lower value. */
984 const struct tune_params arm_strongarm_tune =
986 arm_fastmul_rtx_costs,
987 NULL,
988 1, /* Constant limit. */
989 3, /* Max cond insns. */
990 ARM_PREFETCH_NOT_BENEFICIAL,
991 true, /* Prefer constant pool. */
992 arm_default_branch_cost,
993 false, /* Prefer LDRD/STRD. */
994 {true, true}, /* Prefer non short circuit. */
995 &arm_default_vec_cost, /* Vectorizer costs. */
996 false /* Prefer Neon for 64-bits bitops. */
999 const struct tune_params arm_xscale_tune =
1001 arm_xscale_rtx_costs,
1002 xscale_sched_adjust_cost,
1003 2, /* Constant limit. */
1004 3, /* Max cond insns. */
1005 ARM_PREFETCH_NOT_BENEFICIAL,
1006 true, /* Prefer constant pool. */
1007 arm_default_branch_cost,
1008 false, /* Prefer LDRD/STRD. */
1009 {true, true}, /* Prefer non short circuit. */
1010 &arm_default_vec_cost, /* Vectorizer costs. */
1011 false /* Prefer Neon for 64-bits bitops. */
1014 const struct tune_params arm_9e_tune =
1016 arm_9e_rtx_costs,
1017 NULL,
1018 1, /* Constant limit. */
1019 5, /* Max cond insns. */
1020 ARM_PREFETCH_NOT_BENEFICIAL,
1021 true, /* Prefer constant pool. */
1022 arm_default_branch_cost,
1023 false, /* Prefer LDRD/STRD. */
1024 {true, true}, /* Prefer non short circuit. */
1025 &arm_default_vec_cost, /* Vectorizer costs. */
1026 false /* Prefer Neon for 64-bits bitops. */
1029 const struct tune_params arm_v6t2_tune =
1031 arm_9e_rtx_costs,
1032 NULL,
1033 1, /* Constant limit. */
1034 5, /* Max cond insns. */
1035 ARM_PREFETCH_NOT_BENEFICIAL,
1036 false, /* Prefer constant pool. */
1037 arm_default_branch_cost,
1038 false, /* Prefer LDRD/STRD. */
1039 {true, true}, /* Prefer non short circuit. */
1040 &arm_default_vec_cost, /* Vectorizer costs. */
1041 false /* Prefer Neon for 64-bits bitops. */
1044 /* Generic Cortex tuning. Use more specific tunings if appropriate. */
1045 const struct tune_params arm_cortex_tune =
1047 arm_9e_rtx_costs,
1048 NULL,
1049 1, /* Constant limit. */
1050 5, /* Max cond insns. */
1051 ARM_PREFETCH_NOT_BENEFICIAL,
1052 false, /* Prefer constant pool. */
1053 arm_default_branch_cost,
1054 false, /* Prefer LDRD/STRD. */
1055 {true, true}, /* Prefer non short circuit. */
1056 &arm_default_vec_cost, /* Vectorizer costs. */
1057 false /* Prefer Neon for 64-bits bitops. */
1060 const struct tune_params arm_cortex_a15_tune =
1062 arm_9e_rtx_costs,
1063 NULL,
1064 1, /* Constant limit. */
1065 2, /* Max cond insns. */
1066 ARM_PREFETCH_NOT_BENEFICIAL,
1067 false, /* Prefer constant pool. */
1068 arm_default_branch_cost,
1069 true, /* Prefer LDRD/STRD. */
1070 {true, true}, /* Prefer non short circuit. */
1071 &arm_default_vec_cost, /* Vectorizer costs. */
1072 false /* Prefer Neon for 64-bits bitops. */
1075 /* Branches can be dual-issued on Cortex-A5, so conditional execution is
1076 less appealing. Set max_insns_skipped to a low value. */
1078 const struct tune_params arm_cortex_a5_tune =
1080 arm_9e_rtx_costs,
1081 NULL,
1082 1, /* Constant limit. */
1083 1, /* Max cond insns. */
1084 ARM_PREFETCH_NOT_BENEFICIAL,
1085 false, /* Prefer constant pool. */
1086 arm_cortex_a5_branch_cost,
1087 false, /* Prefer LDRD/STRD. */
1088 {false, false}, /* Prefer non short circuit. */
1089 &arm_default_vec_cost, /* Vectorizer costs. */
1090 false /* Prefer Neon for 64-bits bitops. */
1093 const struct tune_params arm_cortex_a9_tune =
1095 arm_9e_rtx_costs,
1096 cortex_a9_sched_adjust_cost,
1097 1, /* Constant limit. */
1098 5, /* Max cond insns. */
1099 ARM_PREFETCH_BENEFICIAL(4,32,32),
1100 false, /* Prefer constant pool. */
1101 arm_default_branch_cost,
1102 false, /* Prefer LDRD/STRD. */
1103 {true, true}, /* Prefer non short circuit. */
1104 &arm_default_vec_cost, /* Vectorizer costs. */
1105 false /* Prefer Neon for 64-bits bitops. */
1108 /* The arm_v6m_tune is duplicated from arm_cortex_tune, rather than
1109 arm_v6t2_tune. It is used for cortex-m0, cortex-m1 and cortex-m0plus. */
1110 const struct tune_params arm_v6m_tune =
1112 arm_9e_rtx_costs,
1113 NULL,
1114 1, /* Constant limit. */
1115 5, /* Max cond insns. */
1116 ARM_PREFETCH_NOT_BENEFICIAL,
1117 false, /* Prefer constant pool. */
1118 arm_default_branch_cost,
1119 false, /* Prefer LDRD/STRD. */
1120 {false, false}, /* Prefer non short circuit. */
1121 &arm_default_vec_cost, /* Vectorizer costs. */
1122 false /* Prefer Neon for 64-bits bitops. */
1125 const struct tune_params arm_fa726te_tune =
1127 arm_9e_rtx_costs,
1128 fa726te_sched_adjust_cost,
1129 1, /* Constant limit. */
1130 5, /* Max cond insns. */
1131 ARM_PREFETCH_NOT_BENEFICIAL,
1132 true, /* Prefer constant pool. */
1133 arm_default_branch_cost,
1134 false, /* Prefer LDRD/STRD. */
1135 {true, true}, /* Prefer non short circuit. */
1136 &arm_default_vec_cost, /* Vectorizer costs. */
1137 false /* Prefer Neon for 64-bits bitops. */
1141 /* Not all of these give usefully different compilation alternatives,
1142 but there is no simple way of generalizing them. */
1143 static const struct processors all_cores[] =
1145 /* ARM Cores */
1146 #define ARM_CORE(NAME, IDENT, ARCH, FLAGS, COSTS) \
1147 {NAME, IDENT, #ARCH, BASE_ARCH_##ARCH, \
1148 FLAGS | FL_FOR_ARCH##ARCH, &arm_##COSTS##_tune},
1149 #include "arm-cores.def"
1150 #undef ARM_CORE
1151 {NULL, arm_none, NULL, BASE_ARCH_0, 0, NULL}
1154 static const struct processors all_architectures[] =
1156 /* ARM Architectures */
1157 /* We don't specify tuning costs here as it will be figured out
1158 from the core. */
1160 #define ARM_ARCH(NAME, CORE, ARCH, FLAGS) \
1161 {NAME, CORE, #ARCH, BASE_ARCH_##ARCH, FLAGS, NULL},
1162 #include "arm-arches.def"
1163 #undef ARM_ARCH
1164 {NULL, arm_none, NULL, BASE_ARCH_0, 0, NULL}
1168 /* These are populated as commandline arguments are processed, or NULL
1169 if not specified. */
1170 static const struct processors *arm_selected_arch;
1171 static const struct processors *arm_selected_cpu;
1172 static const struct processors *arm_selected_tune;
1174 /* The name of the preprocessor macro to define for this architecture. */
1176 char arm_arch_name[] = "__ARM_ARCH_0UNK__";
1178 /* Available values for -mfpu=. */
1180 static const struct arm_fpu_desc all_fpus[] =
1182 #define ARM_FPU(NAME, MODEL, REV, VFP_REGS, NEON, FP16, CRYPTO) \
1183 { NAME, MODEL, REV, VFP_REGS, NEON, FP16, CRYPTO },
1184 #include "arm-fpus.def"
1185 #undef ARM_FPU
1189 /* Supported TLS relocations. */
1191 enum tls_reloc {
1192 TLS_GD32,
1193 TLS_LDM32,
1194 TLS_LDO32,
1195 TLS_IE32,
1196 TLS_LE32,
1197 TLS_DESCSEQ /* GNU scheme */
1200 /* The maximum number of insns to be used when loading a constant. */
1201 inline static int
1202 arm_constant_limit (bool size_p)
1204 return size_p ? 1 : current_tune->constant_limit;
1207 /* Emit an insn that's a simple single-set. Both the operands must be known
1208 to be valid. */
1209 inline static rtx
1210 emit_set_insn (rtx x, rtx y)
1212 return emit_insn (gen_rtx_SET (VOIDmode, x, y));
1215 /* Return the number of bits set in VALUE. */
1216 static unsigned
1217 bit_count (unsigned long value)
1219 unsigned long count = 0;
1221 while (value)
1223 count++;
1224 value &= value - 1; /* Clear the least-significant set bit. */
1227 return count;
1230 typedef struct
1232 enum machine_mode mode;
1233 const char *name;
1234 } arm_fixed_mode_set;
1236 /* A small helper for setting fixed-point library libfuncs. */
1238 static void
1239 arm_set_fixed_optab_libfunc (optab optable, enum machine_mode mode,
1240 const char *funcname, const char *modename,
1241 int num_suffix)
1243 char buffer[50];
1245 if (num_suffix == 0)
1246 sprintf (buffer, "__gnu_%s%s", funcname, modename);
1247 else
1248 sprintf (buffer, "__gnu_%s%s%d", funcname, modename, num_suffix);
1250 set_optab_libfunc (optable, mode, buffer);
1253 static void
1254 arm_set_fixed_conv_libfunc (convert_optab optable, enum machine_mode to,
1255 enum machine_mode from, const char *funcname,
1256 const char *toname, const char *fromname)
1258 char buffer[50];
1259 const char *maybe_suffix_2 = "";
1261 /* Follow the logic for selecting a "2" suffix in fixed-bit.h. */
1262 if (ALL_FIXED_POINT_MODE_P (from) && ALL_FIXED_POINT_MODE_P (to)
1263 && UNSIGNED_FIXED_POINT_MODE_P (from) == UNSIGNED_FIXED_POINT_MODE_P (to)
1264 && ALL_FRACT_MODE_P (from) == ALL_FRACT_MODE_P (to))
1265 maybe_suffix_2 = "2";
1267 sprintf (buffer, "__gnu_%s%s%s%s", funcname, fromname, toname,
1268 maybe_suffix_2);
1270 set_conv_libfunc (optable, to, from, buffer);
1273 /* Set up library functions unique to ARM. */
1275 static void
1276 arm_init_libfuncs (void)
1278 /* For Linux, we have access to kernel support for atomic operations. */
1279 if (arm_abi == ARM_ABI_AAPCS_LINUX)
1280 init_sync_libfuncs (2 * UNITS_PER_WORD);
1282 /* There are no special library functions unless we are using the
1283 ARM BPABI. */
1284 if (!TARGET_BPABI)
1285 return;
1287 /* The functions below are described in Section 4 of the "Run-Time
1288 ABI for the ARM architecture", Version 1.0. */
1290 /* Double-precision floating-point arithmetic. Table 2. */
1291 set_optab_libfunc (add_optab, DFmode, "__aeabi_dadd");
1292 set_optab_libfunc (sdiv_optab, DFmode, "__aeabi_ddiv");
1293 set_optab_libfunc (smul_optab, DFmode, "__aeabi_dmul");
1294 set_optab_libfunc (neg_optab, DFmode, "__aeabi_dneg");
1295 set_optab_libfunc (sub_optab, DFmode, "__aeabi_dsub");
1297 /* Double-precision comparisons. Table 3. */
1298 set_optab_libfunc (eq_optab, DFmode, "__aeabi_dcmpeq");
1299 set_optab_libfunc (ne_optab, DFmode, NULL);
1300 set_optab_libfunc (lt_optab, DFmode, "__aeabi_dcmplt");
1301 set_optab_libfunc (le_optab, DFmode, "__aeabi_dcmple");
1302 set_optab_libfunc (ge_optab, DFmode, "__aeabi_dcmpge");
1303 set_optab_libfunc (gt_optab, DFmode, "__aeabi_dcmpgt");
1304 set_optab_libfunc (unord_optab, DFmode, "__aeabi_dcmpun");
1306 /* Single-precision floating-point arithmetic. Table 4. */
1307 set_optab_libfunc (add_optab, SFmode, "__aeabi_fadd");
1308 set_optab_libfunc (sdiv_optab, SFmode, "__aeabi_fdiv");
1309 set_optab_libfunc (smul_optab, SFmode, "__aeabi_fmul");
1310 set_optab_libfunc (neg_optab, SFmode, "__aeabi_fneg");
1311 set_optab_libfunc (sub_optab, SFmode, "__aeabi_fsub");
1313 /* Single-precision comparisons. Table 5. */
1314 set_optab_libfunc (eq_optab, SFmode, "__aeabi_fcmpeq");
1315 set_optab_libfunc (ne_optab, SFmode, NULL);
1316 set_optab_libfunc (lt_optab, SFmode, "__aeabi_fcmplt");
1317 set_optab_libfunc (le_optab, SFmode, "__aeabi_fcmple");
1318 set_optab_libfunc (ge_optab, SFmode, "__aeabi_fcmpge");
1319 set_optab_libfunc (gt_optab, SFmode, "__aeabi_fcmpgt");
1320 set_optab_libfunc (unord_optab, SFmode, "__aeabi_fcmpun");
1322 /* Floating-point to integer conversions. Table 6. */
1323 set_conv_libfunc (sfix_optab, SImode, DFmode, "__aeabi_d2iz");
1324 set_conv_libfunc (ufix_optab, SImode, DFmode, "__aeabi_d2uiz");
1325 set_conv_libfunc (sfix_optab, DImode, DFmode, "__aeabi_d2lz");
1326 set_conv_libfunc (ufix_optab, DImode, DFmode, "__aeabi_d2ulz");
1327 set_conv_libfunc (sfix_optab, SImode, SFmode, "__aeabi_f2iz");
1328 set_conv_libfunc (ufix_optab, SImode, SFmode, "__aeabi_f2uiz");
1329 set_conv_libfunc (sfix_optab, DImode, SFmode, "__aeabi_f2lz");
1330 set_conv_libfunc (ufix_optab, DImode, SFmode, "__aeabi_f2ulz");
1332 /* Conversions between floating types. Table 7. */
1333 set_conv_libfunc (trunc_optab, SFmode, DFmode, "__aeabi_d2f");
1334 set_conv_libfunc (sext_optab, DFmode, SFmode, "__aeabi_f2d");
1336 /* Integer to floating-point conversions. Table 8. */
1337 set_conv_libfunc (sfloat_optab, DFmode, SImode, "__aeabi_i2d");
1338 set_conv_libfunc (ufloat_optab, DFmode, SImode, "__aeabi_ui2d");
1339 set_conv_libfunc (sfloat_optab, DFmode, DImode, "__aeabi_l2d");
1340 set_conv_libfunc (ufloat_optab, DFmode, DImode, "__aeabi_ul2d");
1341 set_conv_libfunc (sfloat_optab, SFmode, SImode, "__aeabi_i2f");
1342 set_conv_libfunc (ufloat_optab, SFmode, SImode, "__aeabi_ui2f");
1343 set_conv_libfunc (sfloat_optab, SFmode, DImode, "__aeabi_l2f");
1344 set_conv_libfunc (ufloat_optab, SFmode, DImode, "__aeabi_ul2f");
1346 /* Long long. Table 9. */
1347 set_optab_libfunc (smul_optab, DImode, "__aeabi_lmul");
1348 set_optab_libfunc (sdivmod_optab, DImode, "__aeabi_ldivmod");
1349 set_optab_libfunc (udivmod_optab, DImode, "__aeabi_uldivmod");
1350 set_optab_libfunc (ashl_optab, DImode, "__aeabi_llsl");
1351 set_optab_libfunc (lshr_optab, DImode, "__aeabi_llsr");
1352 set_optab_libfunc (ashr_optab, DImode, "__aeabi_lasr");
1353 set_optab_libfunc (cmp_optab, DImode, "__aeabi_lcmp");
1354 set_optab_libfunc (ucmp_optab, DImode, "__aeabi_ulcmp");
1356 /* Integer (32/32->32) division. \S 4.3.1. */
1357 set_optab_libfunc (sdivmod_optab, SImode, "__aeabi_idivmod");
1358 set_optab_libfunc (udivmod_optab, SImode, "__aeabi_uidivmod");
1360 /* The divmod functions are designed so that they can be used for
1361 plain division, even though they return both the quotient and the
1362 remainder. The quotient is returned in the usual location (i.e.,
1363 r0 for SImode, {r0, r1} for DImode), just as would be expected
1364 for an ordinary division routine. Because the AAPCS calling
1365 conventions specify that all of { r0, r1, r2, r3 } are
1366 callee-saved registers, there is no need to tell the compiler
1367 explicitly that those registers are clobbered by these
1368 routines. */
1369 set_optab_libfunc (sdiv_optab, DImode, "__aeabi_ldivmod");
1370 set_optab_libfunc (udiv_optab, DImode, "__aeabi_uldivmod");
1372 /* For SImode division the ABI provides div-without-mod routines,
1373 which are faster. */
1374 set_optab_libfunc (sdiv_optab, SImode, "__aeabi_idiv");
1375 set_optab_libfunc (udiv_optab, SImode, "__aeabi_uidiv");
1377 /* We don't have mod libcalls. Fortunately gcc knows how to use the
1378 divmod libcalls instead. */
1379 set_optab_libfunc (smod_optab, DImode, NULL);
1380 set_optab_libfunc (umod_optab, DImode, NULL);
1381 set_optab_libfunc (smod_optab, SImode, NULL);
1382 set_optab_libfunc (umod_optab, SImode, NULL);
1384 /* Half-precision float operations. The compiler handles all operations
1385 with NULL libfuncs by converting the SFmode. */
1386 switch (arm_fp16_format)
1388 case ARM_FP16_FORMAT_IEEE:
1389 case ARM_FP16_FORMAT_ALTERNATIVE:
1391 /* Conversions. */
1392 set_conv_libfunc (trunc_optab, HFmode, SFmode,
1393 (arm_fp16_format == ARM_FP16_FORMAT_IEEE
1394 ? "__gnu_f2h_ieee"
1395 : "__gnu_f2h_alternative"));
1396 set_conv_libfunc (sext_optab, SFmode, HFmode,
1397 (arm_fp16_format == ARM_FP16_FORMAT_IEEE
1398 ? "__gnu_h2f_ieee"
1399 : "__gnu_h2f_alternative"));
1401 /* Arithmetic. */
1402 set_optab_libfunc (add_optab, HFmode, NULL);
1403 set_optab_libfunc (sdiv_optab, HFmode, NULL);
1404 set_optab_libfunc (smul_optab, HFmode, NULL);
1405 set_optab_libfunc (neg_optab, HFmode, NULL);
1406 set_optab_libfunc (sub_optab, HFmode, NULL);
1408 /* Comparisons. */
1409 set_optab_libfunc (eq_optab, HFmode, NULL);
1410 set_optab_libfunc (ne_optab, HFmode, NULL);
1411 set_optab_libfunc (lt_optab, HFmode, NULL);
1412 set_optab_libfunc (le_optab, HFmode, NULL);
1413 set_optab_libfunc (ge_optab, HFmode, NULL);
1414 set_optab_libfunc (gt_optab, HFmode, NULL);
1415 set_optab_libfunc (unord_optab, HFmode, NULL);
1416 break;
1418 default:
1419 break;
1422 /* Use names prefixed with __gnu_ for fixed-point helper functions. */
1424 const arm_fixed_mode_set fixed_arith_modes[] =
1426 { QQmode, "qq" },
1427 { UQQmode, "uqq" },
1428 { HQmode, "hq" },
1429 { UHQmode, "uhq" },
1430 { SQmode, "sq" },
1431 { USQmode, "usq" },
1432 { DQmode, "dq" },
1433 { UDQmode, "udq" },
1434 { TQmode, "tq" },
1435 { UTQmode, "utq" },
1436 { HAmode, "ha" },
1437 { UHAmode, "uha" },
1438 { SAmode, "sa" },
1439 { USAmode, "usa" },
1440 { DAmode, "da" },
1441 { UDAmode, "uda" },
1442 { TAmode, "ta" },
1443 { UTAmode, "uta" }
1445 const arm_fixed_mode_set fixed_conv_modes[] =
1447 { QQmode, "qq" },
1448 { UQQmode, "uqq" },
1449 { HQmode, "hq" },
1450 { UHQmode, "uhq" },
1451 { SQmode, "sq" },
1452 { USQmode, "usq" },
1453 { DQmode, "dq" },
1454 { UDQmode, "udq" },
1455 { TQmode, "tq" },
1456 { UTQmode, "utq" },
1457 { HAmode, "ha" },
1458 { UHAmode, "uha" },
1459 { SAmode, "sa" },
1460 { USAmode, "usa" },
1461 { DAmode, "da" },
1462 { UDAmode, "uda" },
1463 { TAmode, "ta" },
1464 { UTAmode, "uta" },
1465 { QImode, "qi" },
1466 { HImode, "hi" },
1467 { SImode, "si" },
1468 { DImode, "di" },
1469 { TImode, "ti" },
1470 { SFmode, "sf" },
1471 { DFmode, "df" }
1473 unsigned int i, j;
1475 for (i = 0; i < ARRAY_SIZE (fixed_arith_modes); i++)
1477 arm_set_fixed_optab_libfunc (add_optab, fixed_arith_modes[i].mode,
1478 "add", fixed_arith_modes[i].name, 3);
1479 arm_set_fixed_optab_libfunc (ssadd_optab, fixed_arith_modes[i].mode,
1480 "ssadd", fixed_arith_modes[i].name, 3);
1481 arm_set_fixed_optab_libfunc (usadd_optab, fixed_arith_modes[i].mode,
1482 "usadd", fixed_arith_modes[i].name, 3);
1483 arm_set_fixed_optab_libfunc (sub_optab, fixed_arith_modes[i].mode,
1484 "sub", fixed_arith_modes[i].name, 3);
1485 arm_set_fixed_optab_libfunc (sssub_optab, fixed_arith_modes[i].mode,
1486 "sssub", fixed_arith_modes[i].name, 3);
1487 arm_set_fixed_optab_libfunc (ussub_optab, fixed_arith_modes[i].mode,
1488 "ussub", fixed_arith_modes[i].name, 3);
1489 arm_set_fixed_optab_libfunc (smul_optab, fixed_arith_modes[i].mode,
1490 "mul", fixed_arith_modes[i].name, 3);
1491 arm_set_fixed_optab_libfunc (ssmul_optab, fixed_arith_modes[i].mode,
1492 "ssmul", fixed_arith_modes[i].name, 3);
1493 arm_set_fixed_optab_libfunc (usmul_optab, fixed_arith_modes[i].mode,
1494 "usmul", fixed_arith_modes[i].name, 3);
1495 arm_set_fixed_optab_libfunc (sdiv_optab, fixed_arith_modes[i].mode,
1496 "div", fixed_arith_modes[i].name, 3);
1497 arm_set_fixed_optab_libfunc (udiv_optab, fixed_arith_modes[i].mode,
1498 "udiv", fixed_arith_modes[i].name, 3);
1499 arm_set_fixed_optab_libfunc (ssdiv_optab, fixed_arith_modes[i].mode,
1500 "ssdiv", fixed_arith_modes[i].name, 3);
1501 arm_set_fixed_optab_libfunc (usdiv_optab, fixed_arith_modes[i].mode,
1502 "usdiv", fixed_arith_modes[i].name, 3);
1503 arm_set_fixed_optab_libfunc (neg_optab, fixed_arith_modes[i].mode,
1504 "neg", fixed_arith_modes[i].name, 2);
1505 arm_set_fixed_optab_libfunc (ssneg_optab, fixed_arith_modes[i].mode,
1506 "ssneg", fixed_arith_modes[i].name, 2);
1507 arm_set_fixed_optab_libfunc (usneg_optab, fixed_arith_modes[i].mode,
1508 "usneg", fixed_arith_modes[i].name, 2);
1509 arm_set_fixed_optab_libfunc (ashl_optab, fixed_arith_modes[i].mode,
1510 "ashl", fixed_arith_modes[i].name, 3);
1511 arm_set_fixed_optab_libfunc (ashr_optab, fixed_arith_modes[i].mode,
1512 "ashr", fixed_arith_modes[i].name, 3);
1513 arm_set_fixed_optab_libfunc (lshr_optab, fixed_arith_modes[i].mode,
1514 "lshr", fixed_arith_modes[i].name, 3);
1515 arm_set_fixed_optab_libfunc (ssashl_optab, fixed_arith_modes[i].mode,
1516 "ssashl", fixed_arith_modes[i].name, 3);
1517 arm_set_fixed_optab_libfunc (usashl_optab, fixed_arith_modes[i].mode,
1518 "usashl", fixed_arith_modes[i].name, 3);
1519 arm_set_fixed_optab_libfunc (cmp_optab, fixed_arith_modes[i].mode,
1520 "cmp", fixed_arith_modes[i].name, 2);
1523 for (i = 0; i < ARRAY_SIZE (fixed_conv_modes); i++)
1524 for (j = 0; j < ARRAY_SIZE (fixed_conv_modes); j++)
1526 if (i == j
1527 || (!ALL_FIXED_POINT_MODE_P (fixed_conv_modes[i].mode)
1528 && !ALL_FIXED_POINT_MODE_P (fixed_conv_modes[j].mode)))
1529 continue;
1531 arm_set_fixed_conv_libfunc (fract_optab, fixed_conv_modes[i].mode,
1532 fixed_conv_modes[j].mode, "fract",
1533 fixed_conv_modes[i].name,
1534 fixed_conv_modes[j].name);
1535 arm_set_fixed_conv_libfunc (satfract_optab,
1536 fixed_conv_modes[i].mode,
1537 fixed_conv_modes[j].mode, "satfract",
1538 fixed_conv_modes[i].name,
1539 fixed_conv_modes[j].name);
1540 arm_set_fixed_conv_libfunc (fractuns_optab,
1541 fixed_conv_modes[i].mode,
1542 fixed_conv_modes[j].mode, "fractuns",
1543 fixed_conv_modes[i].name,
1544 fixed_conv_modes[j].name);
1545 arm_set_fixed_conv_libfunc (satfractuns_optab,
1546 fixed_conv_modes[i].mode,
1547 fixed_conv_modes[j].mode, "satfractuns",
1548 fixed_conv_modes[i].name,
1549 fixed_conv_modes[j].name);
1553 if (TARGET_AAPCS_BASED)
1554 synchronize_libfunc = init_one_libfunc ("__sync_synchronize");
1557 /* On AAPCS systems, this is the "struct __va_list". */
1558 static GTY(()) tree va_list_type;
1560 /* Return the type to use as __builtin_va_list. */
1561 static tree
1562 arm_build_builtin_va_list (void)
1564 tree va_list_name;
1565 tree ap_field;
1567 if (!TARGET_AAPCS_BASED)
1568 return std_build_builtin_va_list ();
1570 /* AAPCS \S 7.1.4 requires that va_list be a typedef for a type
1571 defined as:
1573 struct __va_list
1575 void *__ap;
1578 The C Library ABI further reinforces this definition in \S
1579 4.1.
1581 We must follow this definition exactly. The structure tag
1582 name is visible in C++ mangled names, and thus forms a part
1583 of the ABI. The field name may be used by people who
1584 #include <stdarg.h>. */
1585 /* Create the type. */
1586 va_list_type = lang_hooks.types.make_type (RECORD_TYPE);
1587 /* Give it the required name. */
1588 va_list_name = build_decl (BUILTINS_LOCATION,
1589 TYPE_DECL,
1590 get_identifier ("__va_list"),
1591 va_list_type);
1592 DECL_ARTIFICIAL (va_list_name) = 1;
1593 TYPE_NAME (va_list_type) = va_list_name;
1594 TYPE_STUB_DECL (va_list_type) = va_list_name;
1595 /* Create the __ap field. */
1596 ap_field = build_decl (BUILTINS_LOCATION,
1597 FIELD_DECL,
1598 get_identifier ("__ap"),
1599 ptr_type_node);
1600 DECL_ARTIFICIAL (ap_field) = 1;
1601 DECL_FIELD_CONTEXT (ap_field) = va_list_type;
1602 TYPE_FIELDS (va_list_type) = ap_field;
1603 /* Compute its layout. */
1604 layout_type (va_list_type);
1606 return va_list_type;
1609 /* Return an expression of type "void *" pointing to the next
1610 available argument in a variable-argument list. VALIST is the
1611 user-level va_list object, of type __builtin_va_list. */
1612 static tree
1613 arm_extract_valist_ptr (tree valist)
1615 if (TREE_TYPE (valist) == error_mark_node)
1616 return error_mark_node;
1618 /* On an AAPCS target, the pointer is stored within "struct
1619 va_list". */
1620 if (TARGET_AAPCS_BASED)
1622 tree ap_field = TYPE_FIELDS (TREE_TYPE (valist));
1623 valist = build3 (COMPONENT_REF, TREE_TYPE (ap_field),
1624 valist, ap_field, NULL_TREE);
1627 return valist;
1630 /* Implement TARGET_EXPAND_BUILTIN_VA_START. */
1631 static void
1632 arm_expand_builtin_va_start (tree valist, rtx nextarg)
1634 valist = arm_extract_valist_ptr (valist);
1635 std_expand_builtin_va_start (valist, nextarg);
1638 /* Implement TARGET_GIMPLIFY_VA_ARG_EXPR. */
1639 static tree
1640 arm_gimplify_va_arg_expr (tree valist, tree type, gimple_seq *pre_p,
1641 gimple_seq *post_p)
1643 valist = arm_extract_valist_ptr (valist);
1644 return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
1647 /* Fix up any incompatible options that the user has specified. */
1648 static void
1649 arm_option_override (void)
1651 if (global_options_set.x_arm_arch_option)
1652 arm_selected_arch = &all_architectures[arm_arch_option];
1654 if (global_options_set.x_arm_cpu_option)
1655 arm_selected_cpu = &all_cores[(int) arm_cpu_option];
1657 if (global_options_set.x_arm_tune_option)
1658 arm_selected_tune = &all_cores[(int) arm_tune_option];
1660 #ifdef SUBTARGET_OVERRIDE_OPTIONS
1661 SUBTARGET_OVERRIDE_OPTIONS;
1662 #endif
1664 if (arm_selected_arch)
1666 if (arm_selected_cpu)
1668 /* Check for conflict between mcpu and march. */
1669 if ((arm_selected_cpu->flags ^ arm_selected_arch->flags) & ~FL_TUNE)
1671 warning (0, "switch -mcpu=%s conflicts with -march=%s switch",
1672 arm_selected_cpu->name, arm_selected_arch->name);
1673 /* -march wins for code generation.
1674 -mcpu wins for default tuning. */
1675 if (!arm_selected_tune)
1676 arm_selected_tune = arm_selected_cpu;
1678 arm_selected_cpu = arm_selected_arch;
1680 else
1681 /* -mcpu wins. */
1682 arm_selected_arch = NULL;
1684 else
1685 /* Pick a CPU based on the architecture. */
1686 arm_selected_cpu = arm_selected_arch;
1689 /* If the user did not specify a processor, choose one for them. */
1690 if (!arm_selected_cpu)
1692 const struct processors * sel;
1693 unsigned int sought;
1695 arm_selected_cpu = &all_cores[TARGET_CPU_DEFAULT];
1696 if (!arm_selected_cpu->name)
1698 #ifdef SUBTARGET_CPU_DEFAULT
1699 /* Use the subtarget default CPU if none was specified by
1700 configure. */
1701 arm_selected_cpu = &all_cores[SUBTARGET_CPU_DEFAULT];
1702 #endif
1703 /* Default to ARM6. */
1704 if (!arm_selected_cpu->name)
1705 arm_selected_cpu = &all_cores[arm6];
1708 sel = arm_selected_cpu;
1709 insn_flags = sel->flags;
1711 /* Now check to see if the user has specified some command line
1712 switch that require certain abilities from the cpu. */
1713 sought = 0;
1715 if (TARGET_INTERWORK || TARGET_THUMB)
1717 sought |= (FL_THUMB | FL_MODE32);
1719 /* There are no ARM processors that support both APCS-26 and
1720 interworking. Therefore we force FL_MODE26 to be removed
1721 from insn_flags here (if it was set), so that the search
1722 below will always be able to find a compatible processor. */
1723 insn_flags &= ~FL_MODE26;
1726 if (sought != 0 && ((sought & insn_flags) != sought))
1728 /* Try to locate a CPU type that supports all of the abilities
1729 of the default CPU, plus the extra abilities requested by
1730 the user. */
1731 for (sel = all_cores; sel->name != NULL; sel++)
1732 if ((sel->flags & sought) == (sought | insn_flags))
1733 break;
1735 if (sel->name == NULL)
1737 unsigned current_bit_count = 0;
1738 const struct processors * best_fit = NULL;
1740 /* Ideally we would like to issue an error message here
1741 saying that it was not possible to find a CPU compatible
1742 with the default CPU, but which also supports the command
1743 line options specified by the programmer, and so they
1744 ought to use the -mcpu=<name> command line option to
1745 override the default CPU type.
1747 If we cannot find a cpu that has both the
1748 characteristics of the default cpu and the given
1749 command line options we scan the array again looking
1750 for a best match. */
1751 for (sel = all_cores; sel->name != NULL; sel++)
1752 if ((sel->flags & sought) == sought)
1754 unsigned count;
1756 count = bit_count (sel->flags & insn_flags);
1758 if (count >= current_bit_count)
1760 best_fit = sel;
1761 current_bit_count = count;
1765 gcc_assert (best_fit);
1766 sel = best_fit;
1769 arm_selected_cpu = sel;
1773 gcc_assert (arm_selected_cpu);
1774 /* The selected cpu may be an architecture, so lookup tuning by core ID. */
1775 if (!arm_selected_tune)
1776 arm_selected_tune = &all_cores[arm_selected_cpu->core];
1778 sprintf (arm_arch_name, "__ARM_ARCH_%s__", arm_selected_cpu->arch);
1779 insn_flags = arm_selected_cpu->flags;
1780 arm_base_arch = arm_selected_cpu->base_arch;
1782 arm_tune = arm_selected_tune->core;
1783 tune_flags = arm_selected_tune->flags;
1784 current_tune = arm_selected_tune->tune;
1786 /* Make sure that the processor choice does not conflict with any of the
1787 other command line choices. */
1788 if (TARGET_ARM && !(insn_flags & FL_NOTM))
1789 error ("target CPU does not support ARM mode");
1791 /* BPABI targets use linker tricks to allow interworking on cores
1792 without thumb support. */
1793 if (TARGET_INTERWORK && !((insn_flags & FL_THUMB) || TARGET_BPABI))
1795 warning (0, "target CPU does not support interworking" );
1796 target_flags &= ~MASK_INTERWORK;
1799 if (TARGET_THUMB && !(insn_flags & FL_THUMB))
1801 warning (0, "target CPU does not support THUMB instructions");
1802 target_flags &= ~MASK_THUMB;
1805 if (TARGET_APCS_FRAME && TARGET_THUMB)
1807 /* warning (0, "ignoring -mapcs-frame because -mthumb was used"); */
1808 target_flags &= ~MASK_APCS_FRAME;
1811 /* Callee super interworking implies thumb interworking. Adding
1812 this to the flags here simplifies the logic elsewhere. */
1813 if (TARGET_THUMB && TARGET_CALLEE_INTERWORKING)
1814 target_flags |= MASK_INTERWORK;
1816 /* TARGET_BACKTRACE calls leaf_function_p, which causes a crash if done
1817 from here where no function is being compiled currently. */
1818 if ((TARGET_TPCS_FRAME || TARGET_TPCS_LEAF_FRAME) && TARGET_ARM)
1819 warning (0, "enabling backtrace support is only meaningful when compiling for the Thumb");
1821 if (TARGET_ARM && TARGET_CALLEE_INTERWORKING)
1822 warning (0, "enabling callee interworking support is only meaningful when compiling for the Thumb");
1824 if (TARGET_APCS_STACK && !TARGET_APCS_FRAME)
1826 warning (0, "-mapcs-stack-check incompatible with -mno-apcs-frame");
1827 target_flags |= MASK_APCS_FRAME;
1830 if (TARGET_POKE_FUNCTION_NAME)
1831 target_flags |= MASK_APCS_FRAME;
1833 if (TARGET_APCS_REENT && flag_pic)
1834 error ("-fpic and -mapcs-reent are incompatible");
1836 if (TARGET_APCS_REENT)
1837 warning (0, "APCS reentrant code not supported. Ignored");
1839 /* If this target is normally configured to use APCS frames, warn if they
1840 are turned off and debugging is turned on. */
1841 if (TARGET_ARM
1842 && write_symbols != NO_DEBUG
1843 && !TARGET_APCS_FRAME
1844 && (TARGET_DEFAULT & MASK_APCS_FRAME))
1845 warning (0, "-g with -mno-apcs-frame may not give sensible debugging");
1847 if (TARGET_APCS_FLOAT)
1848 warning (0, "passing floating point arguments in fp regs not yet supported");
1850 if (TARGET_LITTLE_WORDS)
1851 warning (OPT_Wdeprecated, "%<mwords-little-endian%> is deprecated and "
1852 "will be removed in a future release");
1854 /* Initialize boolean versions of the flags, for use in the arm.md file. */
1855 arm_arch3m = (insn_flags & FL_ARCH3M) != 0;
1856 arm_arch4 = (insn_flags & FL_ARCH4) != 0;
1857 arm_arch4t = arm_arch4 & ((insn_flags & FL_THUMB) != 0);
1858 arm_arch5 = (insn_flags & FL_ARCH5) != 0;
1859 arm_arch5e = (insn_flags & FL_ARCH5E) != 0;
1860 arm_arch6 = (insn_flags & FL_ARCH6) != 0;
1861 arm_arch6k = (insn_flags & FL_ARCH6K) != 0;
1862 arm_arch_notm = (insn_flags & FL_NOTM) != 0;
1863 arm_arch6m = arm_arch6 && !arm_arch_notm;
1864 arm_arch7 = (insn_flags & FL_ARCH7) != 0;
1865 arm_arch7em = (insn_flags & FL_ARCH7EM) != 0;
1866 arm_arch8 = (insn_flags & FL_ARCH8) != 0;
1867 arm_arch_thumb2 = (insn_flags & FL_THUMB2) != 0;
1868 arm_arch_xscale = (insn_flags & FL_XSCALE) != 0;
1870 arm_ld_sched = (tune_flags & FL_LDSCHED) != 0;
1871 arm_tune_strongarm = (tune_flags & FL_STRONG) != 0;
1872 thumb_code = TARGET_ARM == 0;
1873 thumb1_code = TARGET_THUMB1 != 0;
1874 arm_tune_wbuf = (tune_flags & FL_WBUF) != 0;
1875 arm_tune_xscale = (tune_flags & FL_XSCALE) != 0;
1876 arm_arch_iwmmxt = (insn_flags & FL_IWMMXT) != 0;
1877 arm_arch_iwmmxt2 = (insn_flags & FL_IWMMXT2) != 0;
1878 arm_arch_thumb_hwdiv = (insn_flags & FL_THUMB_DIV) != 0;
1879 arm_arch_arm_hwdiv = (insn_flags & FL_ARM_DIV) != 0;
1880 arm_tune_cortex_a9 = (arm_tune == cortexa9) != 0;
1881 arm_arch_crc = (insn_flags & FL_CRC32) != 0;
1882 if (arm_restrict_it == 2)
1883 arm_restrict_it = arm_arch8 && TARGET_THUMB2;
1885 if (!TARGET_THUMB2)
1886 arm_restrict_it = 0;
1888 /* If we are not using the default (ARM mode) section anchor offset
1889 ranges, then set the correct ranges now. */
1890 if (TARGET_THUMB1)
1892 /* Thumb-1 LDR instructions cannot have negative offsets.
1893 Permissible positive offset ranges are 5-bit (for byte loads),
1894 6-bit (for halfword loads), or 7-bit (for word loads).
1895 Empirical results suggest a 7-bit anchor range gives the best
1896 overall code size. */
1897 targetm.min_anchor_offset = 0;
1898 targetm.max_anchor_offset = 127;
1900 else if (TARGET_THUMB2)
1902 /* The minimum is set such that the total size of the block
1903 for a particular anchor is 248 + 1 + 4095 bytes, which is
1904 divisible by eight, ensuring natural spacing of anchors. */
1905 targetm.min_anchor_offset = -248;
1906 targetm.max_anchor_offset = 4095;
1909 /* V5 code we generate is completely interworking capable, so we turn off
1910 TARGET_INTERWORK here to avoid many tests later on. */
1912 /* XXX However, we must pass the right pre-processor defines to CPP
1913 or GLD can get confused. This is a hack. */
1914 if (TARGET_INTERWORK)
1915 arm_cpp_interwork = 1;
1917 if (arm_arch5)
1918 target_flags &= ~MASK_INTERWORK;
1920 if (TARGET_IWMMXT && !ARM_DOUBLEWORD_ALIGN)
1921 error ("iwmmxt requires an AAPCS compatible ABI for proper operation");
1923 if (TARGET_IWMMXT_ABI && !TARGET_IWMMXT)
1924 error ("iwmmxt abi requires an iwmmxt capable cpu");
1926 if (!global_options_set.x_arm_fpu_index)
1928 const char *target_fpu_name;
1929 bool ok;
1931 #ifdef FPUTYPE_DEFAULT
1932 target_fpu_name = FPUTYPE_DEFAULT;
1933 #else
1934 target_fpu_name = "vfp";
1935 #endif
1937 ok = opt_enum_arg_to_value (OPT_mfpu_, target_fpu_name, &arm_fpu_index,
1938 CL_TARGET);
1939 gcc_assert (ok);
1942 arm_fpu_desc = &all_fpus[arm_fpu_index];
1944 switch (arm_fpu_desc->model)
1946 case ARM_FP_MODEL_VFP:
1947 arm_fpu_attr = FPU_VFP;
1948 break;
1950 default:
1951 gcc_unreachable();
1954 if (TARGET_AAPCS_BASED)
1956 if (TARGET_CALLER_INTERWORKING)
1957 error ("AAPCS does not support -mcaller-super-interworking");
1958 else
1959 if (TARGET_CALLEE_INTERWORKING)
1960 error ("AAPCS does not support -mcallee-super-interworking");
1963 /* iWMMXt and NEON are incompatible. */
1964 if (TARGET_IWMMXT && TARGET_NEON)
1965 error ("iWMMXt and NEON are incompatible");
1967 /* iWMMXt unsupported under Thumb mode. */
1968 if (TARGET_THUMB && TARGET_IWMMXT)
1969 error ("iWMMXt unsupported under Thumb mode");
1971 /* __fp16 support currently assumes the core has ldrh. */
1972 if (!arm_arch4 && arm_fp16_format != ARM_FP16_FORMAT_NONE)
1973 sorry ("__fp16 and no ldrh");
1975 /* If soft-float is specified then don't use FPU. */
1976 if (TARGET_SOFT_FLOAT)
1977 arm_fpu_attr = FPU_NONE;
1979 if (TARGET_AAPCS_BASED)
1981 if (arm_abi == ARM_ABI_IWMMXT)
1982 arm_pcs_default = ARM_PCS_AAPCS_IWMMXT;
1983 else if (arm_float_abi == ARM_FLOAT_ABI_HARD
1984 && TARGET_HARD_FLOAT
1985 && TARGET_VFP)
1986 arm_pcs_default = ARM_PCS_AAPCS_VFP;
1987 else
1988 arm_pcs_default = ARM_PCS_AAPCS;
1990 else
1992 if (arm_float_abi == ARM_FLOAT_ABI_HARD && TARGET_VFP)
1993 sorry ("-mfloat-abi=hard and VFP");
1995 if (arm_abi == ARM_ABI_APCS)
1996 arm_pcs_default = ARM_PCS_APCS;
1997 else
1998 arm_pcs_default = ARM_PCS_ATPCS;
2001 /* For arm2/3 there is no need to do any scheduling if we are doing
2002 software floating-point. */
2003 if (TARGET_SOFT_FLOAT && (tune_flags & FL_MODE32) == 0)
2004 flag_schedule_insns = flag_schedule_insns_after_reload = 0;
2006 /* Use the cp15 method if it is available. */
2007 if (target_thread_pointer == TP_AUTO)
2009 if (arm_arch6k && !TARGET_THUMB1)
2010 target_thread_pointer = TP_CP15;
2011 else
2012 target_thread_pointer = TP_SOFT;
2015 if (TARGET_HARD_TP && TARGET_THUMB1)
2016 error ("can not use -mtp=cp15 with 16-bit Thumb");
2018 /* Override the default structure alignment for AAPCS ABI. */
2019 if (!global_options_set.x_arm_structure_size_boundary)
2021 if (TARGET_AAPCS_BASED)
2022 arm_structure_size_boundary = 8;
2024 else
2026 if (arm_structure_size_boundary != 8
2027 && arm_structure_size_boundary != 32
2028 && !(ARM_DOUBLEWORD_ALIGN && arm_structure_size_boundary == 64))
2030 if (ARM_DOUBLEWORD_ALIGN)
2031 warning (0,
2032 "structure size boundary can only be set to 8, 32 or 64");
2033 else
2034 warning (0, "structure size boundary can only be set to 8 or 32");
2035 arm_structure_size_boundary
2036 = (TARGET_AAPCS_BASED ? 8 : DEFAULT_STRUCTURE_SIZE_BOUNDARY);
2040 if (!TARGET_ARM && TARGET_VXWORKS_RTP && flag_pic)
2042 error ("RTP PIC is incompatible with Thumb");
2043 flag_pic = 0;
2046 /* If stack checking is disabled, we can use r10 as the PIC register,
2047 which keeps r9 available. The EABI specifies r9 as the PIC register. */
2048 if (flag_pic && TARGET_SINGLE_PIC_BASE)
2050 if (TARGET_VXWORKS_RTP)
2051 warning (0, "RTP PIC is incompatible with -msingle-pic-base");
2052 arm_pic_register = (TARGET_APCS_STACK || TARGET_AAPCS_BASED) ? 9 : 10;
2055 if (flag_pic && TARGET_VXWORKS_RTP)
2056 arm_pic_register = 9;
2058 if (arm_pic_register_string != NULL)
2060 int pic_register = decode_reg_name (arm_pic_register_string);
2062 if (!flag_pic)
2063 warning (0, "-mpic-register= is useless without -fpic");
2065 /* Prevent the user from choosing an obviously stupid PIC register. */
2066 else if (pic_register < 0 || call_used_regs[pic_register]
2067 || pic_register == HARD_FRAME_POINTER_REGNUM
2068 || pic_register == STACK_POINTER_REGNUM
2069 || pic_register >= PC_REGNUM
2070 || (TARGET_VXWORKS_RTP
2071 && (unsigned int) pic_register != arm_pic_register))
2072 error ("unable to use '%s' for PIC register", arm_pic_register_string);
2073 else
2074 arm_pic_register = pic_register;
2077 /* Enable -mfix-cortex-m3-ldrd by default for Cortex-M3 cores. */
2078 if (fix_cm3_ldrd == 2)
2080 if (arm_selected_cpu->core == cortexm3)
2081 fix_cm3_ldrd = 1;
2082 else
2083 fix_cm3_ldrd = 0;
2086 /* Enable -munaligned-access by default for
2087 - all ARMv6 architecture-based processors
2088 - ARMv7-A, ARMv7-R, and ARMv7-M architecture-based processors.
2089 - ARMv8 architecture-base processors.
2091 Disable -munaligned-access by default for
2092 - all pre-ARMv6 architecture-based processors
2093 - ARMv6-M architecture-based processors. */
2095 if (unaligned_access == 2)
2097 if (arm_arch6 && (arm_arch_notm || arm_arch7))
2098 unaligned_access = 1;
2099 else
2100 unaligned_access = 0;
2102 else if (unaligned_access == 1
2103 && !(arm_arch6 && (arm_arch_notm || arm_arch7)))
2105 warning (0, "target CPU does not support unaligned accesses");
2106 unaligned_access = 0;
2109 if (TARGET_THUMB1 && flag_schedule_insns)
2111 /* Don't warn since it's on by default in -O2. */
2112 flag_schedule_insns = 0;
2115 if (optimize_size)
2117 /* If optimizing for size, bump the number of instructions that we
2118 are prepared to conditionally execute (even on a StrongARM). */
2119 max_insns_skipped = 6;
2121 else
2122 max_insns_skipped = current_tune->max_insns_skipped;
2124 /* Hot/Cold partitioning is not currently supported, since we can't
2125 handle literal pool placement in that case. */
2126 if (flag_reorder_blocks_and_partition)
2128 inform (input_location,
2129 "-freorder-blocks-and-partition not supported on this architecture");
2130 flag_reorder_blocks_and_partition = 0;
2131 flag_reorder_blocks = 1;
2134 if (flag_pic)
2135 /* Hoisting PIC address calculations more aggressively provides a small,
2136 but measurable, size reduction for PIC code. Therefore, we decrease
2137 the bar for unrestricted expression hoisting to the cost of PIC address
2138 calculation, which is 2 instructions. */
2139 maybe_set_param_value (PARAM_GCSE_UNRESTRICTED_COST, 2,
2140 global_options.x_param_values,
2141 global_options_set.x_param_values);
2143 /* ARM EABI defaults to strict volatile bitfields. */
2144 if (TARGET_AAPCS_BASED && flag_strict_volatile_bitfields < 0
2145 && abi_version_at_least(2))
2146 flag_strict_volatile_bitfields = 1;
2148 /* Enable sw prefetching at -O3 for CPUS that have prefetch, and we have deemed
2149 it beneficial (signified by setting num_prefetch_slots to 1 or more.) */
2150 if (flag_prefetch_loop_arrays < 0
2151 && HAVE_prefetch
2152 && optimize >= 3
2153 && current_tune->num_prefetch_slots > 0)
2154 flag_prefetch_loop_arrays = 1;
2156 /* Set up parameters to be used in prefetching algorithm. Do not override the
2157 defaults unless we are tuning for a core we have researched values for. */
2158 if (current_tune->num_prefetch_slots > 0)
2159 maybe_set_param_value (PARAM_SIMULTANEOUS_PREFETCHES,
2160 current_tune->num_prefetch_slots,
2161 global_options.x_param_values,
2162 global_options_set.x_param_values);
2163 if (current_tune->l1_cache_line_size >= 0)
2164 maybe_set_param_value (PARAM_L1_CACHE_LINE_SIZE,
2165 current_tune->l1_cache_line_size,
2166 global_options.x_param_values,
2167 global_options_set.x_param_values);
2168 if (current_tune->l1_cache_size >= 0)
2169 maybe_set_param_value (PARAM_L1_CACHE_SIZE,
2170 current_tune->l1_cache_size,
2171 global_options.x_param_values,
2172 global_options_set.x_param_values);
2174 /* Use Neon to perform 64-bits operations rather than core
2175 registers. */
2176 prefer_neon_for_64bits = current_tune->prefer_neon_for_64bits;
2177 if (use_neon_for_64bits == 1)
2178 prefer_neon_for_64bits = true;
2180 /* Use the alternative scheduling-pressure algorithm by default. */
2181 maybe_set_param_value (PARAM_SCHED_PRESSURE_ALGORITHM, 2,
2182 global_options.x_param_values,
2183 global_options_set.x_param_values);
2185 /* Disable shrink-wrap when optimizing function for size, since it tends to
2186 generate additional returns. */
2187 if (optimize_function_for_size_p (cfun) && TARGET_THUMB2)
2188 flag_shrink_wrap = false;
2189 /* TBD: Dwarf info for apcs frame is not handled yet. */
2190 if (TARGET_APCS_FRAME)
2191 flag_shrink_wrap = false;
2193 /* Register global variables with the garbage collector. */
2194 arm_add_gc_roots ();
2197 static void
2198 arm_add_gc_roots (void)
2200 gcc_obstack_init(&minipool_obstack);
2201 minipool_startobj = (char *) obstack_alloc (&minipool_obstack, 0);
2204 /* A table of known ARM exception types.
2205 For use with the interrupt function attribute. */
2207 typedef struct
2209 const char *const arg;
2210 const unsigned long return_value;
2212 isr_attribute_arg;
2214 static const isr_attribute_arg isr_attribute_args [] =
2216 { "IRQ", ARM_FT_ISR },
2217 { "irq", ARM_FT_ISR },
2218 { "FIQ", ARM_FT_FIQ },
2219 { "fiq", ARM_FT_FIQ },
2220 { "ABORT", ARM_FT_ISR },
2221 { "abort", ARM_FT_ISR },
2222 { "ABORT", ARM_FT_ISR },
2223 { "abort", ARM_FT_ISR },
2224 { "UNDEF", ARM_FT_EXCEPTION },
2225 { "undef", ARM_FT_EXCEPTION },
2226 { "SWI", ARM_FT_EXCEPTION },
2227 { "swi", ARM_FT_EXCEPTION },
2228 { NULL, ARM_FT_NORMAL }
2231 /* Returns the (interrupt) function type of the current
2232 function, or ARM_FT_UNKNOWN if the type cannot be determined. */
2234 static unsigned long
2235 arm_isr_value (tree argument)
2237 const isr_attribute_arg * ptr;
2238 const char * arg;
2240 if (!arm_arch_notm)
2241 return ARM_FT_NORMAL | ARM_FT_STACKALIGN;
2243 /* No argument - default to IRQ. */
2244 if (argument == NULL_TREE)
2245 return ARM_FT_ISR;
2247 /* Get the value of the argument. */
2248 if (TREE_VALUE (argument) == NULL_TREE
2249 || TREE_CODE (TREE_VALUE (argument)) != STRING_CST)
2250 return ARM_FT_UNKNOWN;
2252 arg = TREE_STRING_POINTER (TREE_VALUE (argument));
2254 /* Check it against the list of known arguments. */
2255 for (ptr = isr_attribute_args; ptr->arg != NULL; ptr++)
2256 if (streq (arg, ptr->arg))
2257 return ptr->return_value;
2259 /* An unrecognized interrupt type. */
2260 return ARM_FT_UNKNOWN;
2263 /* Computes the type of the current function. */
2265 static unsigned long
2266 arm_compute_func_type (void)
2268 unsigned long type = ARM_FT_UNKNOWN;
2269 tree a;
2270 tree attr;
2272 gcc_assert (TREE_CODE (current_function_decl) == FUNCTION_DECL);
2274 /* Decide if the current function is volatile. Such functions
2275 never return, and many memory cycles can be saved by not storing
2276 register values that will never be needed again. This optimization
2277 was added to speed up context switching in a kernel application. */
2278 if (optimize > 0
2279 && (TREE_NOTHROW (current_function_decl)
2280 || !(flag_unwind_tables
2281 || (flag_exceptions
2282 && arm_except_unwind_info (&global_options) != UI_SJLJ)))
2283 && TREE_THIS_VOLATILE (current_function_decl))
2284 type |= ARM_FT_VOLATILE;
2286 if (cfun->static_chain_decl != NULL)
2287 type |= ARM_FT_NESTED;
2289 attr = DECL_ATTRIBUTES (current_function_decl);
2291 a = lookup_attribute ("naked", attr);
2292 if (a != NULL_TREE)
2293 type |= ARM_FT_NAKED;
2295 a = lookup_attribute ("isr", attr);
2296 if (a == NULL_TREE)
2297 a = lookup_attribute ("interrupt", attr);
2299 if (a == NULL_TREE)
2300 type |= TARGET_INTERWORK ? ARM_FT_INTERWORKED : ARM_FT_NORMAL;
2301 else
2302 type |= arm_isr_value (TREE_VALUE (a));
2304 return type;
2307 /* Returns the type of the current function. */
2309 unsigned long
2310 arm_current_func_type (void)
2312 if (ARM_FUNC_TYPE (cfun->machine->func_type) == ARM_FT_UNKNOWN)
2313 cfun->machine->func_type = arm_compute_func_type ();
2315 return cfun->machine->func_type;
2318 bool
2319 arm_allocate_stack_slots_for_args (void)
2321 /* Naked functions should not allocate stack slots for arguments. */
2322 return !IS_NAKED (arm_current_func_type ());
2325 static bool
2326 arm_warn_func_return (tree decl)
2328 /* Naked functions are implemented entirely in assembly, including the
2329 return sequence, so suppress warnings about this. */
2330 return lookup_attribute ("naked", DECL_ATTRIBUTES (decl)) == NULL_TREE;
2334 /* Output assembler code for a block containing the constant parts
2335 of a trampoline, leaving space for the variable parts.
2337 On the ARM, (if r8 is the static chain regnum, and remembering that
2338 referencing pc adds an offset of 8) the trampoline looks like:
2339 ldr r8, [pc, #0]
2340 ldr pc, [pc]
2341 .word static chain value
2342 .word function's address
2343 XXX FIXME: When the trampoline returns, r8 will be clobbered. */
2345 static void
2346 arm_asm_trampoline_template (FILE *f)
2348 if (TARGET_ARM)
2350 asm_fprintf (f, "\tldr\t%r, [%r, #0]\n", STATIC_CHAIN_REGNUM, PC_REGNUM);
2351 asm_fprintf (f, "\tldr\t%r, [%r, #0]\n", PC_REGNUM, PC_REGNUM);
2353 else if (TARGET_THUMB2)
2355 /* The Thumb-2 trampoline is similar to the arm implementation.
2356 Unlike 16-bit Thumb, we enter the stub in thumb mode. */
2357 asm_fprintf (f, "\tldr.w\t%r, [%r, #4]\n",
2358 STATIC_CHAIN_REGNUM, PC_REGNUM);
2359 asm_fprintf (f, "\tldr.w\t%r, [%r, #4]\n", PC_REGNUM, PC_REGNUM);
2361 else
2363 ASM_OUTPUT_ALIGN (f, 2);
2364 fprintf (f, "\t.code\t16\n");
2365 fprintf (f, ".Ltrampoline_start:\n");
2366 asm_fprintf (f, "\tpush\t{r0, r1}\n");
2367 asm_fprintf (f, "\tldr\tr0, [%r, #8]\n", PC_REGNUM);
2368 asm_fprintf (f, "\tmov\t%r, r0\n", STATIC_CHAIN_REGNUM);
2369 asm_fprintf (f, "\tldr\tr0, [%r, #8]\n", PC_REGNUM);
2370 asm_fprintf (f, "\tstr\tr0, [%r, #4]\n", SP_REGNUM);
2371 asm_fprintf (f, "\tpop\t{r0, %r}\n", PC_REGNUM);
2373 assemble_aligned_integer (UNITS_PER_WORD, const0_rtx);
2374 assemble_aligned_integer (UNITS_PER_WORD, const0_rtx);
2377 /* Emit RTL insns to initialize the variable parts of a trampoline. */
2379 static void
2380 arm_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value)
2382 rtx fnaddr, mem, a_tramp;
2384 emit_block_move (m_tramp, assemble_trampoline_template (),
2385 GEN_INT (TRAMPOLINE_SIZE), BLOCK_OP_NORMAL);
2387 mem = adjust_address (m_tramp, SImode, TARGET_32BIT ? 8 : 12);
2388 emit_move_insn (mem, chain_value);
2390 mem = adjust_address (m_tramp, SImode, TARGET_32BIT ? 12 : 16);
2391 fnaddr = XEXP (DECL_RTL (fndecl), 0);
2392 emit_move_insn (mem, fnaddr);
2394 a_tramp = XEXP (m_tramp, 0);
2395 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__clear_cache"),
2396 LCT_NORMAL, VOIDmode, 2, a_tramp, Pmode,
2397 plus_constant (Pmode, a_tramp, TRAMPOLINE_SIZE), Pmode);
2400 /* Thumb trampolines should be entered in thumb mode, so set
2401 the bottom bit of the address. */
2403 static rtx
2404 arm_trampoline_adjust_address (rtx addr)
2406 if (TARGET_THUMB)
2407 addr = expand_simple_binop (Pmode, IOR, addr, const1_rtx,
2408 NULL, 0, OPTAB_LIB_WIDEN);
2409 return addr;
2412 /* Return 1 if it is possible to return using a single instruction.
2413 If SIBLING is non-null, this is a test for a return before a sibling
2414 call. SIBLING is the call insn, so we can examine its register usage. */
2417 use_return_insn (int iscond, rtx sibling)
2419 int regno;
2420 unsigned int func_type;
2421 unsigned long saved_int_regs;
2422 unsigned HOST_WIDE_INT stack_adjust;
2423 arm_stack_offsets *offsets;
2425 /* Never use a return instruction before reload has run. */
2426 if (!reload_completed)
2427 return 0;
2429 func_type = arm_current_func_type ();
2431 /* Naked, volatile and stack alignment functions need special
2432 consideration. */
2433 if (func_type & (ARM_FT_VOLATILE | ARM_FT_NAKED | ARM_FT_STACKALIGN))
2434 return 0;
2436 /* So do interrupt functions that use the frame pointer and Thumb
2437 interrupt functions. */
2438 if (IS_INTERRUPT (func_type) && (frame_pointer_needed || TARGET_THUMB))
2439 return 0;
2441 if (TARGET_LDRD && current_tune->prefer_ldrd_strd
2442 && !optimize_function_for_size_p (cfun))
2443 return 0;
2445 offsets = arm_get_frame_offsets ();
2446 stack_adjust = offsets->outgoing_args - offsets->saved_regs;
2448 /* As do variadic functions. */
2449 if (crtl->args.pretend_args_size
2450 || cfun->machine->uses_anonymous_args
2451 /* Or if the function calls __builtin_eh_return () */
2452 || crtl->calls_eh_return
2453 /* Or if the function calls alloca */
2454 || cfun->calls_alloca
2455 /* Or if there is a stack adjustment. However, if the stack pointer
2456 is saved on the stack, we can use a pre-incrementing stack load. */
2457 || !(stack_adjust == 0 || (TARGET_APCS_FRAME && frame_pointer_needed
2458 && stack_adjust == 4)))
2459 return 0;
2461 saved_int_regs = offsets->saved_regs_mask;
2463 /* Unfortunately, the insn
2465 ldmib sp, {..., sp, ...}
2467 triggers a bug on most SA-110 based devices, such that the stack
2468 pointer won't be correctly restored if the instruction takes a
2469 page fault. We work around this problem by popping r3 along with
2470 the other registers, since that is never slower than executing
2471 another instruction.
2473 We test for !arm_arch5 here, because code for any architecture
2474 less than this could potentially be run on one of the buggy
2475 chips. */
2476 if (stack_adjust == 4 && !arm_arch5 && TARGET_ARM)
2478 /* Validate that r3 is a call-clobbered register (always true in
2479 the default abi) ... */
2480 if (!call_used_regs[3])
2481 return 0;
2483 /* ... that it isn't being used for a return value ... */
2484 if (arm_size_return_regs () >= (4 * UNITS_PER_WORD))
2485 return 0;
2487 /* ... or for a tail-call argument ... */
2488 if (sibling)
2490 gcc_assert (CALL_P (sibling));
2492 if (find_regno_fusage (sibling, USE, 3))
2493 return 0;
2496 /* ... and that there are no call-saved registers in r0-r2
2497 (always true in the default ABI). */
2498 if (saved_int_regs & 0x7)
2499 return 0;
2502 /* Can't be done if interworking with Thumb, and any registers have been
2503 stacked. */
2504 if (TARGET_INTERWORK && saved_int_regs != 0 && !IS_INTERRUPT(func_type))
2505 return 0;
2507 /* On StrongARM, conditional returns are expensive if they aren't
2508 taken and multiple registers have been stacked. */
2509 if (iscond && arm_tune_strongarm)
2511 /* Conditional return when just the LR is stored is a simple
2512 conditional-load instruction, that's not expensive. */
2513 if (saved_int_regs != 0 && saved_int_regs != (1 << LR_REGNUM))
2514 return 0;
2516 if (flag_pic
2517 && arm_pic_register != INVALID_REGNUM
2518 && df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM))
2519 return 0;
2522 /* If there are saved registers but the LR isn't saved, then we need
2523 two instructions for the return. */
2524 if (saved_int_regs && !(saved_int_regs & (1 << LR_REGNUM)))
2525 return 0;
2527 /* Can't be done if any of the VFP regs are pushed,
2528 since this also requires an insn. */
2529 if (TARGET_HARD_FLOAT && TARGET_VFP)
2530 for (regno = FIRST_VFP_REGNUM; regno <= LAST_VFP_REGNUM; regno++)
2531 if (df_regs_ever_live_p (regno) && !call_used_regs[regno])
2532 return 0;
2534 if (TARGET_REALLY_IWMMXT)
2535 for (regno = FIRST_IWMMXT_REGNUM; regno <= LAST_IWMMXT_REGNUM; regno++)
2536 if (df_regs_ever_live_p (regno) && ! call_used_regs[regno])
2537 return 0;
2539 return 1;
2542 /* Return TRUE if we should try to use a simple_return insn, i.e. perform
2543 shrink-wrapping if possible. This is the case if we need to emit a
2544 prologue, which we can test by looking at the offsets. */
2545 bool
2546 use_simple_return_p (void)
2548 arm_stack_offsets *offsets;
2550 offsets = arm_get_frame_offsets ();
2551 return offsets->outgoing_args != 0;
2554 /* Return TRUE if int I is a valid immediate ARM constant. */
2557 const_ok_for_arm (HOST_WIDE_INT i)
2559 int lowbit;
2561 /* For machines with >32 bit HOST_WIDE_INT, the bits above bit 31 must
2562 be all zero, or all one. */
2563 if ((i & ~(unsigned HOST_WIDE_INT) 0xffffffff) != 0
2564 && ((i & ~(unsigned HOST_WIDE_INT) 0xffffffff)
2565 != ((~(unsigned HOST_WIDE_INT) 0)
2566 & ~(unsigned HOST_WIDE_INT) 0xffffffff)))
2567 return FALSE;
2569 i &= (unsigned HOST_WIDE_INT) 0xffffffff;
2571 /* Fast return for 0 and small values. We must do this for zero, since
2572 the code below can't handle that one case. */
2573 if ((i & ~(unsigned HOST_WIDE_INT) 0xff) == 0)
2574 return TRUE;
2576 /* Get the number of trailing zeros. */
2577 lowbit = ffs((int) i) - 1;
2579 /* Only even shifts are allowed in ARM mode so round down to the
2580 nearest even number. */
2581 if (TARGET_ARM)
2582 lowbit &= ~1;
2584 if ((i & ~(((unsigned HOST_WIDE_INT) 0xff) << lowbit)) == 0)
2585 return TRUE;
2587 if (TARGET_ARM)
2589 /* Allow rotated constants in ARM mode. */
2590 if (lowbit <= 4
2591 && ((i & ~0xc000003f) == 0
2592 || (i & ~0xf000000f) == 0
2593 || (i & ~0xfc000003) == 0))
2594 return TRUE;
2596 else
2598 HOST_WIDE_INT v;
2600 /* Allow repeated patterns 0x00XY00XY or 0xXYXYXYXY. */
2601 v = i & 0xff;
2602 v |= v << 16;
2603 if (i == v || i == (v | (v << 8)))
2604 return TRUE;
2606 /* Allow repeated pattern 0xXY00XY00. */
2607 v = i & 0xff00;
2608 v |= v << 16;
2609 if (i == v)
2610 return TRUE;
2613 return FALSE;
2616 /* Return true if I is a valid constant for the operation CODE. */
2618 const_ok_for_op (HOST_WIDE_INT i, enum rtx_code code)
2620 if (const_ok_for_arm (i))
2621 return 1;
2623 switch (code)
2625 case SET:
2626 /* See if we can use movw. */
2627 if (arm_arch_thumb2 && (i & 0xffff0000) == 0)
2628 return 1;
2629 else
2630 /* Otherwise, try mvn. */
2631 return const_ok_for_arm (ARM_SIGN_EXTEND (~i));
2633 case PLUS:
2634 /* See if we can use addw or subw. */
2635 if (TARGET_THUMB2
2636 && ((i & 0xfffff000) == 0
2637 || ((-i) & 0xfffff000) == 0))
2638 return 1;
2639 /* else fall through. */
2641 case COMPARE:
2642 case EQ:
2643 case NE:
2644 case GT:
2645 case LE:
2646 case LT:
2647 case GE:
2648 case GEU:
2649 case LTU:
2650 case GTU:
2651 case LEU:
2652 case UNORDERED:
2653 case ORDERED:
2654 case UNEQ:
2655 case UNGE:
2656 case UNLT:
2657 case UNGT:
2658 case UNLE:
2659 return const_ok_for_arm (ARM_SIGN_EXTEND (-i));
2661 case MINUS: /* Should only occur with (MINUS I reg) => rsb */
2662 case XOR:
2663 return 0;
2665 case IOR:
2666 if (TARGET_THUMB2)
2667 return const_ok_for_arm (ARM_SIGN_EXTEND (~i));
2668 return 0;
2670 case AND:
2671 return const_ok_for_arm (ARM_SIGN_EXTEND (~i));
2673 default:
2674 gcc_unreachable ();
2678 /* Return true if I is a valid di mode constant for the operation CODE. */
2680 const_ok_for_dimode_op (HOST_WIDE_INT i, enum rtx_code code)
2682 HOST_WIDE_INT hi_val = (i >> 32) & 0xFFFFFFFF;
2683 HOST_WIDE_INT lo_val = i & 0xFFFFFFFF;
2684 rtx hi = GEN_INT (hi_val);
2685 rtx lo = GEN_INT (lo_val);
2687 if (TARGET_THUMB1)
2688 return 0;
2690 switch (code)
2692 case AND:
2693 case IOR:
2694 case XOR:
2695 return (const_ok_for_op (hi_val, code) || hi_val == 0xFFFFFFFF)
2696 && (const_ok_for_op (lo_val, code) || lo_val == 0xFFFFFFFF);
2697 case PLUS:
2698 return arm_not_operand (hi, SImode) && arm_add_operand (lo, SImode);
2700 default:
2701 return 0;
2705 /* Emit a sequence of insns to handle a large constant.
2706 CODE is the code of the operation required, it can be any of SET, PLUS,
2707 IOR, AND, XOR, MINUS;
2708 MODE is the mode in which the operation is being performed;
2709 VAL is the integer to operate on;
2710 SOURCE is the other operand (a register, or a null-pointer for SET);
2711 SUBTARGETS means it is safe to create scratch registers if that will
2712 either produce a simpler sequence, or we will want to cse the values.
2713 Return value is the number of insns emitted. */
2715 /* ??? Tweak this for thumb2. */
2717 arm_split_constant (enum rtx_code code, enum machine_mode mode, rtx insn,
2718 HOST_WIDE_INT val, rtx target, rtx source, int subtargets)
2720 rtx cond;
2722 if (insn && GET_CODE (PATTERN (insn)) == COND_EXEC)
2723 cond = COND_EXEC_TEST (PATTERN (insn));
2724 else
2725 cond = NULL_RTX;
2727 if (subtargets || code == SET
2728 || (REG_P (target) && REG_P (source)
2729 && REGNO (target) != REGNO (source)))
2731 /* After arm_reorg has been called, we can't fix up expensive
2732 constants by pushing them into memory so we must synthesize
2733 them in-line, regardless of the cost. This is only likely to
2734 be more costly on chips that have load delay slots and we are
2735 compiling without running the scheduler (so no splitting
2736 occurred before the final instruction emission).
2738 Ref: gcc -O1 -mcpu=strongarm gcc.c-torture/compile/980506-2.c
2740 if (!after_arm_reorg
2741 && !cond
2742 && (arm_gen_constant (code, mode, NULL_RTX, val, target, source,
2743 1, 0)
2744 > (arm_constant_limit (optimize_function_for_size_p (cfun))
2745 + (code != SET))))
2747 if (code == SET)
2749 /* Currently SET is the only monadic value for CODE, all
2750 the rest are diadic. */
2751 if (TARGET_USE_MOVT)
2752 arm_emit_movpair (target, GEN_INT (val));
2753 else
2754 emit_set_insn (target, GEN_INT (val));
2756 return 1;
2758 else
2760 rtx temp = subtargets ? gen_reg_rtx (mode) : target;
2762 if (TARGET_USE_MOVT)
2763 arm_emit_movpair (temp, GEN_INT (val));
2764 else
2765 emit_set_insn (temp, GEN_INT (val));
2767 /* For MINUS, the value is subtracted from, since we never
2768 have subtraction of a constant. */
2769 if (code == MINUS)
2770 emit_set_insn (target, gen_rtx_MINUS (mode, temp, source));
2771 else
2772 emit_set_insn (target,
2773 gen_rtx_fmt_ee (code, mode, source, temp));
2774 return 2;
2779 return arm_gen_constant (code, mode, cond, val, target, source, subtargets,
2783 /* Return a sequence of integers, in RETURN_SEQUENCE that fit into
2784 ARM/THUMB2 immediates, and add up to VAL.
2785 Thr function return value gives the number of insns required. */
2786 static int
2787 optimal_immediate_sequence (enum rtx_code code, unsigned HOST_WIDE_INT val,
2788 struct four_ints *return_sequence)
2790 int best_consecutive_zeros = 0;
2791 int i;
2792 int best_start = 0;
2793 int insns1, insns2;
2794 struct four_ints tmp_sequence;
2796 /* If we aren't targeting ARM, the best place to start is always at
2797 the bottom, otherwise look more closely. */
2798 if (TARGET_ARM)
2800 for (i = 0; i < 32; i += 2)
2802 int consecutive_zeros = 0;
2804 if (!(val & (3 << i)))
2806 while ((i < 32) && !(val & (3 << i)))
2808 consecutive_zeros += 2;
2809 i += 2;
2811 if (consecutive_zeros > best_consecutive_zeros)
2813 best_consecutive_zeros = consecutive_zeros;
2814 best_start = i - consecutive_zeros;
2816 i -= 2;
2821 /* So long as it won't require any more insns to do so, it's
2822 desirable to emit a small constant (in bits 0...9) in the last
2823 insn. This way there is more chance that it can be combined with
2824 a later addressing insn to form a pre-indexed load or store
2825 operation. Consider:
2827 *((volatile int *)0xe0000100) = 1;
2828 *((volatile int *)0xe0000110) = 2;
2830 We want this to wind up as:
2832 mov rA, #0xe0000000
2833 mov rB, #1
2834 str rB, [rA, #0x100]
2835 mov rB, #2
2836 str rB, [rA, #0x110]
2838 rather than having to synthesize both large constants from scratch.
2840 Therefore, we calculate how many insns would be required to emit
2841 the constant starting from `best_start', and also starting from
2842 zero (i.e. with bit 31 first to be output). If `best_start' doesn't
2843 yield a shorter sequence, we may as well use zero. */
2844 insns1 = optimal_immediate_sequence_1 (code, val, return_sequence, best_start);
2845 if (best_start != 0
2846 && ((((unsigned HOST_WIDE_INT) 1) << best_start) < val))
2848 insns2 = optimal_immediate_sequence_1 (code, val, &tmp_sequence, 0);
2849 if (insns2 <= insns1)
2851 *return_sequence = tmp_sequence;
2852 insns1 = insns2;
2856 return insns1;
2859 /* As for optimal_immediate_sequence, but starting at bit-position I. */
2860 static int
2861 optimal_immediate_sequence_1 (enum rtx_code code, unsigned HOST_WIDE_INT val,
2862 struct four_ints *return_sequence, int i)
2864 int remainder = val & 0xffffffff;
2865 int insns = 0;
2867 /* Try and find a way of doing the job in either two or three
2868 instructions.
2870 In ARM mode we can use 8-bit constants, rotated to any 2-bit aligned
2871 location. We start at position I. This may be the MSB, or
2872 optimial_immediate_sequence may have positioned it at the largest block
2873 of zeros that are aligned on a 2-bit boundary. We then fill up the temps,
2874 wrapping around to the top of the word when we drop off the bottom.
2875 In the worst case this code should produce no more than four insns.
2877 In Thumb2 mode, we can use 32/16-bit replicated constants, and 8-bit
2878 constants, shifted to any arbitrary location. We should always start
2879 at the MSB. */
2882 int end;
2883 unsigned int b1, b2, b3, b4;
2884 unsigned HOST_WIDE_INT result;
2885 int loc;
2887 gcc_assert (insns < 4);
2889 if (i <= 0)
2890 i += 32;
2892 /* First, find the next normal 12/8-bit shifted/rotated immediate. */
2893 if (remainder & ((TARGET_ARM ? (3 << (i - 2)) : (1 << (i - 1)))))
2895 loc = i;
2896 if (i <= 12 && TARGET_THUMB2 && code == PLUS)
2897 /* We can use addw/subw for the last 12 bits. */
2898 result = remainder;
2899 else
2901 /* Use an 8-bit shifted/rotated immediate. */
2902 end = i - 8;
2903 if (end < 0)
2904 end += 32;
2905 result = remainder & ((0x0ff << end)
2906 | ((i < end) ? (0xff >> (32 - end))
2907 : 0));
2908 i -= 8;
2911 else
2913 /* Arm allows rotates by a multiple of two. Thumb-2 allows
2914 arbitrary shifts. */
2915 i -= TARGET_ARM ? 2 : 1;
2916 continue;
2919 /* Next, see if we can do a better job with a thumb2 replicated
2920 constant.
2922 We do it this way around to catch the cases like 0x01F001E0 where
2923 two 8-bit immediates would work, but a replicated constant would
2924 make it worse.
2926 TODO: 16-bit constants that don't clear all the bits, but still win.
2927 TODO: Arithmetic splitting for set/add/sub, rather than bitwise. */
2928 if (TARGET_THUMB2)
2930 b1 = (remainder & 0xff000000) >> 24;
2931 b2 = (remainder & 0x00ff0000) >> 16;
2932 b3 = (remainder & 0x0000ff00) >> 8;
2933 b4 = remainder & 0xff;
2935 if (loc > 24)
2937 /* The 8-bit immediate already found clears b1 (and maybe b2),
2938 but must leave b3 and b4 alone. */
2940 /* First try to find a 32-bit replicated constant that clears
2941 almost everything. We can assume that we can't do it in one,
2942 or else we wouldn't be here. */
2943 unsigned int tmp = b1 & b2 & b3 & b4;
2944 unsigned int tmp2 = tmp + (tmp << 8) + (tmp << 16)
2945 + (tmp << 24);
2946 unsigned int matching_bytes = (tmp == b1) + (tmp == b2)
2947 + (tmp == b3) + (tmp == b4);
2948 if (tmp
2949 && (matching_bytes >= 3
2950 || (matching_bytes == 2
2951 && const_ok_for_op (remainder & ~tmp2, code))))
2953 /* At least 3 of the bytes match, and the fourth has at
2954 least as many bits set, or two of the bytes match
2955 and it will only require one more insn to finish. */
2956 result = tmp2;
2957 i = tmp != b1 ? 32
2958 : tmp != b2 ? 24
2959 : tmp != b3 ? 16
2960 : 8;
2963 /* Second, try to find a 16-bit replicated constant that can
2964 leave three of the bytes clear. If b2 or b4 is already
2965 zero, then we can. If the 8-bit from above would not
2966 clear b2 anyway, then we still win. */
2967 else if (b1 == b3 && (!b2 || !b4
2968 || (remainder & 0x00ff0000 & ~result)))
2970 result = remainder & 0xff00ff00;
2971 i = 24;
2974 else if (loc > 16)
2976 /* The 8-bit immediate already found clears b2 (and maybe b3)
2977 and we don't get here unless b1 is alredy clear, but it will
2978 leave b4 unchanged. */
2980 /* If we can clear b2 and b4 at once, then we win, since the
2981 8-bits couldn't possibly reach that far. */
2982 if (b2 == b4)
2984 result = remainder & 0x00ff00ff;
2985 i = 16;
2990 return_sequence->i[insns++] = result;
2991 remainder &= ~result;
2993 if (code == SET || code == MINUS)
2994 code = PLUS;
2996 while (remainder);
2998 return insns;
3001 /* Emit an instruction with the indicated PATTERN. If COND is
3002 non-NULL, conditionalize the execution of the instruction on COND
3003 being true. */
3005 static void
3006 emit_constant_insn (rtx cond, rtx pattern)
3008 if (cond)
3009 pattern = gen_rtx_COND_EXEC (VOIDmode, copy_rtx (cond), pattern);
3010 emit_insn (pattern);
3013 /* As above, but extra parameter GENERATE which, if clear, suppresses
3014 RTL generation. */
3016 static int
3017 arm_gen_constant (enum rtx_code code, enum machine_mode mode, rtx cond,
3018 HOST_WIDE_INT val, rtx target, rtx source, int subtargets,
3019 int generate)
3021 int can_invert = 0;
3022 int can_negate = 0;
3023 int final_invert = 0;
3024 int i;
3025 int set_sign_bit_copies = 0;
3026 int clear_sign_bit_copies = 0;
3027 int clear_zero_bit_copies = 0;
3028 int set_zero_bit_copies = 0;
3029 int insns = 0, neg_insns, inv_insns;
3030 unsigned HOST_WIDE_INT temp1, temp2;
3031 unsigned HOST_WIDE_INT remainder = val & 0xffffffff;
3032 struct four_ints *immediates;
3033 struct four_ints pos_immediates, neg_immediates, inv_immediates;
3035 /* Find out which operations are safe for a given CODE. Also do a quick
3036 check for degenerate cases; these can occur when DImode operations
3037 are split. */
3038 switch (code)
3040 case SET:
3041 can_invert = 1;
3042 break;
3044 case PLUS:
3045 can_negate = 1;
3046 break;
3048 case IOR:
3049 if (remainder == 0xffffffff)
3051 if (generate)
3052 emit_constant_insn (cond,
3053 gen_rtx_SET (VOIDmode, target,
3054 GEN_INT (ARM_SIGN_EXTEND (val))));
3055 return 1;
3058 if (remainder == 0)
3060 if (reload_completed && rtx_equal_p (target, source))
3061 return 0;
3063 if (generate)
3064 emit_constant_insn (cond,
3065 gen_rtx_SET (VOIDmode, target, source));
3066 return 1;
3068 break;
3070 case AND:
3071 if (remainder == 0)
3073 if (generate)
3074 emit_constant_insn (cond,
3075 gen_rtx_SET (VOIDmode, target, const0_rtx));
3076 return 1;
3078 if (remainder == 0xffffffff)
3080 if (reload_completed && rtx_equal_p (target, source))
3081 return 0;
3082 if (generate)
3083 emit_constant_insn (cond,
3084 gen_rtx_SET (VOIDmode, target, source));
3085 return 1;
3087 can_invert = 1;
3088 break;
3090 case XOR:
3091 if (remainder == 0)
3093 if (reload_completed && rtx_equal_p (target, source))
3094 return 0;
3095 if (generate)
3096 emit_constant_insn (cond,
3097 gen_rtx_SET (VOIDmode, target, source));
3098 return 1;
3101 if (remainder == 0xffffffff)
3103 if (generate)
3104 emit_constant_insn (cond,
3105 gen_rtx_SET (VOIDmode, target,
3106 gen_rtx_NOT (mode, source)));
3107 return 1;
3109 final_invert = 1;
3110 break;
3112 case MINUS:
3113 /* We treat MINUS as (val - source), since (source - val) is always
3114 passed as (source + (-val)). */
3115 if (remainder == 0)
3117 if (generate)
3118 emit_constant_insn (cond,
3119 gen_rtx_SET (VOIDmode, target,
3120 gen_rtx_NEG (mode, source)));
3121 return 1;
3123 if (const_ok_for_arm (val))
3125 if (generate)
3126 emit_constant_insn (cond,
3127 gen_rtx_SET (VOIDmode, target,
3128 gen_rtx_MINUS (mode, GEN_INT (val),
3129 source)));
3130 return 1;
3133 break;
3135 default:
3136 gcc_unreachable ();
3139 /* If we can do it in one insn get out quickly. */
3140 if (const_ok_for_op (val, code))
3142 if (generate)
3143 emit_constant_insn (cond,
3144 gen_rtx_SET (VOIDmode, target,
3145 (source
3146 ? gen_rtx_fmt_ee (code, mode, source,
3147 GEN_INT (val))
3148 : GEN_INT (val))));
3149 return 1;
3152 /* On targets with UXTH/UBFX, we can deal with AND (2^N)-1 in a single
3153 insn. */
3154 if (code == AND && (i = exact_log2 (remainder + 1)) > 0
3155 && (arm_arch_thumb2 || (i == 16 && arm_arch6 && mode == SImode)))
3157 if (generate)
3159 if (mode == SImode && i == 16)
3160 /* Use UXTH in preference to UBFX, since on Thumb2 it's a
3161 smaller insn. */
3162 emit_constant_insn (cond,
3163 gen_zero_extendhisi2
3164 (target, gen_lowpart (HImode, source)));
3165 else
3166 /* Extz only supports SImode, but we can coerce the operands
3167 into that mode. */
3168 emit_constant_insn (cond,
3169 gen_extzv_t2 (gen_lowpart (SImode, target),
3170 gen_lowpart (SImode, source),
3171 GEN_INT (i), const0_rtx));
3174 return 1;
3177 /* Calculate a few attributes that may be useful for specific
3178 optimizations. */
3179 /* Count number of leading zeros. */
3180 for (i = 31; i >= 0; i--)
3182 if ((remainder & (1 << i)) == 0)
3183 clear_sign_bit_copies++;
3184 else
3185 break;
3188 /* Count number of leading 1's. */
3189 for (i = 31; i >= 0; i--)
3191 if ((remainder & (1 << i)) != 0)
3192 set_sign_bit_copies++;
3193 else
3194 break;
3197 /* Count number of trailing zero's. */
3198 for (i = 0; i <= 31; i++)
3200 if ((remainder & (1 << i)) == 0)
3201 clear_zero_bit_copies++;
3202 else
3203 break;
3206 /* Count number of trailing 1's. */
3207 for (i = 0; i <= 31; i++)
3209 if ((remainder & (1 << i)) != 0)
3210 set_zero_bit_copies++;
3211 else
3212 break;
3215 switch (code)
3217 case SET:
3218 /* See if we can do this by sign_extending a constant that is known
3219 to be negative. This is a good, way of doing it, since the shift
3220 may well merge into a subsequent insn. */
3221 if (set_sign_bit_copies > 1)
3223 if (const_ok_for_arm
3224 (temp1 = ARM_SIGN_EXTEND (remainder
3225 << (set_sign_bit_copies - 1))))
3227 if (generate)
3229 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
3230 emit_constant_insn (cond,
3231 gen_rtx_SET (VOIDmode, new_src,
3232 GEN_INT (temp1)));
3233 emit_constant_insn (cond,
3234 gen_ashrsi3 (target, new_src,
3235 GEN_INT (set_sign_bit_copies - 1)));
3237 return 2;
3239 /* For an inverted constant, we will need to set the low bits,
3240 these will be shifted out of harm's way. */
3241 temp1 |= (1 << (set_sign_bit_copies - 1)) - 1;
3242 if (const_ok_for_arm (~temp1))
3244 if (generate)
3246 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
3247 emit_constant_insn (cond,
3248 gen_rtx_SET (VOIDmode, new_src,
3249 GEN_INT (temp1)));
3250 emit_constant_insn (cond,
3251 gen_ashrsi3 (target, new_src,
3252 GEN_INT (set_sign_bit_copies - 1)));
3254 return 2;
3258 /* See if we can calculate the value as the difference between two
3259 valid immediates. */
3260 if (clear_sign_bit_copies + clear_zero_bit_copies <= 16)
3262 int topshift = clear_sign_bit_copies & ~1;
3264 temp1 = ARM_SIGN_EXTEND ((remainder + (0x00800000 >> topshift))
3265 & (0xff000000 >> topshift));
3267 /* If temp1 is zero, then that means the 9 most significant
3268 bits of remainder were 1 and we've caused it to overflow.
3269 When topshift is 0 we don't need to do anything since we
3270 can borrow from 'bit 32'. */
3271 if (temp1 == 0 && topshift != 0)
3272 temp1 = 0x80000000 >> (topshift - 1);
3274 temp2 = ARM_SIGN_EXTEND (temp1 - remainder);
3276 if (const_ok_for_arm (temp2))
3278 if (generate)
3280 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
3281 emit_constant_insn (cond,
3282 gen_rtx_SET (VOIDmode, new_src,
3283 GEN_INT (temp1)));
3284 emit_constant_insn (cond,
3285 gen_addsi3 (target, new_src,
3286 GEN_INT (-temp2)));
3289 return 2;
3293 /* See if we can generate this by setting the bottom (or the top)
3294 16 bits, and then shifting these into the other half of the
3295 word. We only look for the simplest cases, to do more would cost
3296 too much. Be careful, however, not to generate this when the
3297 alternative would take fewer insns. */
3298 if (val & 0xffff0000)
3300 temp1 = remainder & 0xffff0000;
3301 temp2 = remainder & 0x0000ffff;
3303 /* Overlaps outside this range are best done using other methods. */
3304 for (i = 9; i < 24; i++)
3306 if ((((temp2 | (temp2 << i)) & 0xffffffff) == remainder)
3307 && !const_ok_for_arm (temp2))
3309 rtx new_src = (subtargets
3310 ? (generate ? gen_reg_rtx (mode) : NULL_RTX)
3311 : target);
3312 insns = arm_gen_constant (code, mode, cond, temp2, new_src,
3313 source, subtargets, generate);
3314 source = new_src;
3315 if (generate)
3316 emit_constant_insn
3317 (cond,
3318 gen_rtx_SET
3319 (VOIDmode, target,
3320 gen_rtx_IOR (mode,
3321 gen_rtx_ASHIFT (mode, source,
3322 GEN_INT (i)),
3323 source)));
3324 return insns + 1;
3328 /* Don't duplicate cases already considered. */
3329 for (i = 17; i < 24; i++)
3331 if (((temp1 | (temp1 >> i)) == remainder)
3332 && !const_ok_for_arm (temp1))
3334 rtx new_src = (subtargets
3335 ? (generate ? gen_reg_rtx (mode) : NULL_RTX)
3336 : target);
3337 insns = arm_gen_constant (code, mode, cond, temp1, new_src,
3338 source, subtargets, generate);
3339 source = new_src;
3340 if (generate)
3341 emit_constant_insn
3342 (cond,
3343 gen_rtx_SET (VOIDmode, target,
3344 gen_rtx_IOR
3345 (mode,
3346 gen_rtx_LSHIFTRT (mode, source,
3347 GEN_INT (i)),
3348 source)));
3349 return insns + 1;
3353 break;
3355 case IOR:
3356 case XOR:
3357 /* If we have IOR or XOR, and the constant can be loaded in a
3358 single instruction, and we can find a temporary to put it in,
3359 then this can be done in two instructions instead of 3-4. */
3360 if (subtargets
3361 /* TARGET can't be NULL if SUBTARGETS is 0 */
3362 || (reload_completed && !reg_mentioned_p (target, source)))
3364 if (const_ok_for_arm (ARM_SIGN_EXTEND (~val)))
3366 if (generate)
3368 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
3370 emit_constant_insn (cond,
3371 gen_rtx_SET (VOIDmode, sub,
3372 GEN_INT (val)));
3373 emit_constant_insn (cond,
3374 gen_rtx_SET (VOIDmode, target,
3375 gen_rtx_fmt_ee (code, mode,
3376 source, sub)));
3378 return 2;
3382 if (code == XOR)
3383 break;
3385 /* Convert.
3386 x = y | constant ( which is composed of set_sign_bit_copies of leading 1s
3387 and the remainder 0s for e.g. 0xfff00000)
3388 x = ~(~(y ashift set_sign_bit_copies) lshiftrt set_sign_bit_copies)
3390 This can be done in 2 instructions by using shifts with mov or mvn.
3391 e.g. for
3392 x = x | 0xfff00000;
3393 we generate.
3394 mvn r0, r0, asl #12
3395 mvn r0, r0, lsr #12 */
3396 if (set_sign_bit_copies > 8
3397 && (val & (-1 << (32 - set_sign_bit_copies))) == val)
3399 if (generate)
3401 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
3402 rtx shift = GEN_INT (set_sign_bit_copies);
3404 emit_constant_insn
3405 (cond,
3406 gen_rtx_SET (VOIDmode, sub,
3407 gen_rtx_NOT (mode,
3408 gen_rtx_ASHIFT (mode,
3409 source,
3410 shift))));
3411 emit_constant_insn
3412 (cond,
3413 gen_rtx_SET (VOIDmode, target,
3414 gen_rtx_NOT (mode,
3415 gen_rtx_LSHIFTRT (mode, sub,
3416 shift))));
3418 return 2;
3421 /* Convert
3422 x = y | constant (which has set_zero_bit_copies number of trailing ones).
3424 x = ~((~y lshiftrt set_zero_bit_copies) ashift set_zero_bit_copies).
3426 For eg. r0 = r0 | 0xfff
3427 mvn r0, r0, lsr #12
3428 mvn r0, r0, asl #12
3431 if (set_zero_bit_copies > 8
3432 && (remainder & ((1 << set_zero_bit_copies) - 1)) == remainder)
3434 if (generate)
3436 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
3437 rtx shift = GEN_INT (set_zero_bit_copies);
3439 emit_constant_insn
3440 (cond,
3441 gen_rtx_SET (VOIDmode, sub,
3442 gen_rtx_NOT (mode,
3443 gen_rtx_LSHIFTRT (mode,
3444 source,
3445 shift))));
3446 emit_constant_insn
3447 (cond,
3448 gen_rtx_SET (VOIDmode, target,
3449 gen_rtx_NOT (mode,
3450 gen_rtx_ASHIFT (mode, sub,
3451 shift))));
3453 return 2;
3456 /* This will never be reached for Thumb2 because orn is a valid
3457 instruction. This is for Thumb1 and the ARM 32 bit cases.
3459 x = y | constant (such that ~constant is a valid constant)
3460 Transform this to
3461 x = ~(~y & ~constant).
3463 if (const_ok_for_arm (temp1 = ARM_SIGN_EXTEND (~val)))
3465 if (generate)
3467 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
3468 emit_constant_insn (cond,
3469 gen_rtx_SET (VOIDmode, sub,
3470 gen_rtx_NOT (mode, source)));
3471 source = sub;
3472 if (subtargets)
3473 sub = gen_reg_rtx (mode);
3474 emit_constant_insn (cond,
3475 gen_rtx_SET (VOIDmode, sub,
3476 gen_rtx_AND (mode, source,
3477 GEN_INT (temp1))));
3478 emit_constant_insn (cond,
3479 gen_rtx_SET (VOIDmode, target,
3480 gen_rtx_NOT (mode, sub)));
3482 return 3;
3484 break;
3486 case AND:
3487 /* See if two shifts will do 2 or more insn's worth of work. */
3488 if (clear_sign_bit_copies >= 16 && clear_sign_bit_copies < 24)
3490 HOST_WIDE_INT shift_mask = ((0xffffffff
3491 << (32 - clear_sign_bit_copies))
3492 & 0xffffffff);
3494 if ((remainder | shift_mask) != 0xffffffff)
3496 if (generate)
3498 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
3499 insns = arm_gen_constant (AND, mode, cond,
3500 remainder | shift_mask,
3501 new_src, source, subtargets, 1);
3502 source = new_src;
3504 else
3506 rtx targ = subtargets ? NULL_RTX : target;
3507 insns = arm_gen_constant (AND, mode, cond,
3508 remainder | shift_mask,
3509 targ, source, subtargets, 0);
3513 if (generate)
3515 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
3516 rtx shift = GEN_INT (clear_sign_bit_copies);
3518 emit_insn (gen_ashlsi3 (new_src, source, shift));
3519 emit_insn (gen_lshrsi3 (target, new_src, shift));
3522 return insns + 2;
3525 if (clear_zero_bit_copies >= 16 && clear_zero_bit_copies < 24)
3527 HOST_WIDE_INT shift_mask = (1 << clear_zero_bit_copies) - 1;
3529 if ((remainder | shift_mask) != 0xffffffff)
3531 if (generate)
3533 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
3535 insns = arm_gen_constant (AND, mode, cond,
3536 remainder | shift_mask,
3537 new_src, source, subtargets, 1);
3538 source = new_src;
3540 else
3542 rtx targ = subtargets ? NULL_RTX : target;
3544 insns = arm_gen_constant (AND, mode, cond,
3545 remainder | shift_mask,
3546 targ, source, subtargets, 0);
3550 if (generate)
3552 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
3553 rtx shift = GEN_INT (clear_zero_bit_copies);
3555 emit_insn (gen_lshrsi3 (new_src, source, shift));
3556 emit_insn (gen_ashlsi3 (target, new_src, shift));
3559 return insns + 2;
3562 break;
3564 default:
3565 break;
3568 /* Calculate what the instruction sequences would be if we generated it
3569 normally, negated, or inverted. */
3570 if (code == AND)
3571 /* AND cannot be split into multiple insns, so invert and use BIC. */
3572 insns = 99;
3573 else
3574 insns = optimal_immediate_sequence (code, remainder, &pos_immediates);
3576 if (can_negate)
3577 neg_insns = optimal_immediate_sequence (code, (-remainder) & 0xffffffff,
3578 &neg_immediates);
3579 else
3580 neg_insns = 99;
3582 if (can_invert || final_invert)
3583 inv_insns = optimal_immediate_sequence (code, remainder ^ 0xffffffff,
3584 &inv_immediates);
3585 else
3586 inv_insns = 99;
3588 immediates = &pos_immediates;
3590 /* Is the negated immediate sequence more efficient? */
3591 if (neg_insns < insns && neg_insns <= inv_insns)
3593 insns = neg_insns;
3594 immediates = &neg_immediates;
3596 else
3597 can_negate = 0;
3599 /* Is the inverted immediate sequence more efficient?
3600 We must allow for an extra NOT instruction for XOR operations, although
3601 there is some chance that the final 'mvn' will get optimized later. */
3602 if ((inv_insns + 1) < insns || (!final_invert && inv_insns < insns))
3604 insns = inv_insns;
3605 immediates = &inv_immediates;
3607 else
3609 can_invert = 0;
3610 final_invert = 0;
3613 /* Now output the chosen sequence as instructions. */
3614 if (generate)
3616 for (i = 0; i < insns; i++)
3618 rtx new_src, temp1_rtx;
3620 temp1 = immediates->i[i];
3622 if (code == SET || code == MINUS)
3623 new_src = (subtargets ? gen_reg_rtx (mode) : target);
3624 else if ((final_invert || i < (insns - 1)) && subtargets)
3625 new_src = gen_reg_rtx (mode);
3626 else
3627 new_src = target;
3629 if (can_invert)
3630 temp1 = ~temp1;
3631 else if (can_negate)
3632 temp1 = -temp1;
3634 temp1 = trunc_int_for_mode (temp1, mode);
3635 temp1_rtx = GEN_INT (temp1);
3637 if (code == SET)
3639 else if (code == MINUS)
3640 temp1_rtx = gen_rtx_MINUS (mode, temp1_rtx, source);
3641 else
3642 temp1_rtx = gen_rtx_fmt_ee (code, mode, source, temp1_rtx);
3644 emit_constant_insn (cond,
3645 gen_rtx_SET (VOIDmode, new_src,
3646 temp1_rtx));
3647 source = new_src;
3649 if (code == SET)
3651 can_negate = can_invert;
3652 can_invert = 0;
3653 code = PLUS;
3655 else if (code == MINUS)
3656 code = PLUS;
3660 if (final_invert)
3662 if (generate)
3663 emit_constant_insn (cond, gen_rtx_SET (VOIDmode, target,
3664 gen_rtx_NOT (mode, source)));
3665 insns++;
3668 return insns;
3671 /* Canonicalize a comparison so that we are more likely to recognize it.
3672 This can be done for a few constant compares, where we can make the
3673 immediate value easier to load. */
3675 static void
3676 arm_canonicalize_comparison (int *code, rtx *op0, rtx *op1,
3677 bool op0_preserve_value)
3679 enum machine_mode mode;
3680 unsigned HOST_WIDE_INT i, maxval;
3682 mode = GET_MODE (*op0);
3683 if (mode == VOIDmode)
3684 mode = GET_MODE (*op1);
3686 maxval = (((unsigned HOST_WIDE_INT) 1) << (GET_MODE_BITSIZE(mode) - 1)) - 1;
3688 /* For DImode, we have GE/LT/GEU/LTU comparisons. In ARM mode
3689 we can also use cmp/cmpeq for GTU/LEU. GT/LE must be either
3690 reversed or (for constant OP1) adjusted to GE/LT. Similarly
3691 for GTU/LEU in Thumb mode. */
3692 if (mode == DImode)
3694 rtx tem;
3696 if (*code == GT || *code == LE
3697 || (!TARGET_ARM && (*code == GTU || *code == LEU)))
3699 /* Missing comparison. First try to use an available
3700 comparison. */
3701 if (CONST_INT_P (*op1))
3703 i = INTVAL (*op1);
3704 switch (*code)
3706 case GT:
3707 case LE:
3708 if (i != maxval
3709 && arm_const_double_by_immediates (GEN_INT (i + 1)))
3711 *op1 = GEN_INT (i + 1);
3712 *code = *code == GT ? GE : LT;
3713 return;
3715 break;
3716 case GTU:
3717 case LEU:
3718 if (i != ~((unsigned HOST_WIDE_INT) 0)
3719 && arm_const_double_by_immediates (GEN_INT (i + 1)))
3721 *op1 = GEN_INT (i + 1);
3722 *code = *code == GTU ? GEU : LTU;
3723 return;
3725 break;
3726 default:
3727 gcc_unreachable ();
3731 /* If that did not work, reverse the condition. */
3732 if (!op0_preserve_value)
3734 tem = *op0;
3735 *op0 = *op1;
3736 *op1 = tem;
3737 *code = (int)swap_condition ((enum rtx_code)*code);
3740 return;
3743 /* If *op0 is (zero_extend:SI (subreg:QI (reg:SI) 0)) and comparing
3744 with const0_rtx, change it to (and:SI (reg:SI) (const_int 255)),
3745 to facilitate possible combining with a cmp into 'ands'. */
3746 if (mode == SImode
3747 && GET_CODE (*op0) == ZERO_EXTEND
3748 && GET_CODE (XEXP (*op0, 0)) == SUBREG
3749 && GET_MODE (XEXP (*op0, 0)) == QImode
3750 && GET_MODE (SUBREG_REG (XEXP (*op0, 0))) == SImode
3751 && subreg_lowpart_p (XEXP (*op0, 0))
3752 && *op1 == const0_rtx)
3753 *op0 = gen_rtx_AND (SImode, SUBREG_REG (XEXP (*op0, 0)),
3754 GEN_INT (255));
3756 /* Comparisons smaller than DImode. Only adjust comparisons against
3757 an out-of-range constant. */
3758 if (!CONST_INT_P (*op1)
3759 || const_ok_for_arm (INTVAL (*op1))
3760 || const_ok_for_arm (- INTVAL (*op1)))
3761 return;
3763 i = INTVAL (*op1);
3765 switch (*code)
3767 case EQ:
3768 case NE:
3769 return;
3771 case GT:
3772 case LE:
3773 if (i != maxval
3774 && (const_ok_for_arm (i + 1) || const_ok_for_arm (-(i + 1))))
3776 *op1 = GEN_INT (i + 1);
3777 *code = *code == GT ? GE : LT;
3778 return;
3780 break;
3782 case GE:
3783 case LT:
3784 if (i != ~maxval
3785 && (const_ok_for_arm (i - 1) || const_ok_for_arm (-(i - 1))))
3787 *op1 = GEN_INT (i - 1);
3788 *code = *code == GE ? GT : LE;
3789 return;
3791 break;
3793 case GTU:
3794 case LEU:
3795 if (i != ~((unsigned HOST_WIDE_INT) 0)
3796 && (const_ok_for_arm (i + 1) || const_ok_for_arm (-(i + 1))))
3798 *op1 = GEN_INT (i + 1);
3799 *code = *code == GTU ? GEU : LTU;
3800 return;
3802 break;
3804 case GEU:
3805 case LTU:
3806 if (i != 0
3807 && (const_ok_for_arm (i - 1) || const_ok_for_arm (-(i - 1))))
3809 *op1 = GEN_INT (i - 1);
3810 *code = *code == GEU ? GTU : LEU;
3811 return;
3813 break;
3815 default:
3816 gcc_unreachable ();
3821 /* Define how to find the value returned by a function. */
3823 static rtx
3824 arm_function_value(const_tree type, const_tree func,
3825 bool outgoing ATTRIBUTE_UNUSED)
3827 enum machine_mode mode;
3828 int unsignedp ATTRIBUTE_UNUSED;
3829 rtx r ATTRIBUTE_UNUSED;
3831 mode = TYPE_MODE (type);
3833 if (TARGET_AAPCS_BASED)
3834 return aapcs_allocate_return_reg (mode, type, func);
3836 /* Promote integer types. */
3837 if (INTEGRAL_TYPE_P (type))
3838 mode = arm_promote_function_mode (type, mode, &unsignedp, func, 1);
3840 /* Promotes small structs returned in a register to full-word size
3841 for big-endian AAPCS. */
3842 if (arm_return_in_msb (type))
3844 HOST_WIDE_INT size = int_size_in_bytes (type);
3845 if (size % UNITS_PER_WORD != 0)
3847 size += UNITS_PER_WORD - size % UNITS_PER_WORD;
3848 mode = mode_for_size (size * BITS_PER_UNIT, MODE_INT, 0);
3852 return arm_libcall_value_1 (mode);
3855 static int
3856 libcall_eq (const void *p1, const void *p2)
3858 return rtx_equal_p ((const_rtx) p1, (const_rtx) p2);
3861 static hashval_t
3862 libcall_hash (const void *p1)
3864 return hash_rtx ((const_rtx) p1, VOIDmode, NULL, NULL, FALSE);
3867 static void
3868 add_libcall (htab_t htab, rtx libcall)
3870 *htab_find_slot (htab, libcall, INSERT) = libcall;
3873 static bool
3874 arm_libcall_uses_aapcs_base (const_rtx libcall)
3876 static bool init_done = false;
3877 static htab_t libcall_htab;
3879 if (!init_done)
3881 init_done = true;
3883 libcall_htab = htab_create (31, libcall_hash, libcall_eq,
3884 NULL);
3885 add_libcall (libcall_htab,
3886 convert_optab_libfunc (sfloat_optab, SFmode, SImode));
3887 add_libcall (libcall_htab,
3888 convert_optab_libfunc (sfloat_optab, DFmode, SImode));
3889 add_libcall (libcall_htab,
3890 convert_optab_libfunc (sfloat_optab, SFmode, DImode));
3891 add_libcall (libcall_htab,
3892 convert_optab_libfunc (sfloat_optab, DFmode, DImode));
3894 add_libcall (libcall_htab,
3895 convert_optab_libfunc (ufloat_optab, SFmode, SImode));
3896 add_libcall (libcall_htab,
3897 convert_optab_libfunc (ufloat_optab, DFmode, SImode));
3898 add_libcall (libcall_htab,
3899 convert_optab_libfunc (ufloat_optab, SFmode, DImode));
3900 add_libcall (libcall_htab,
3901 convert_optab_libfunc (ufloat_optab, DFmode, DImode));
3903 add_libcall (libcall_htab,
3904 convert_optab_libfunc (sext_optab, SFmode, HFmode));
3905 add_libcall (libcall_htab,
3906 convert_optab_libfunc (trunc_optab, HFmode, SFmode));
3907 add_libcall (libcall_htab,
3908 convert_optab_libfunc (sfix_optab, SImode, DFmode));
3909 add_libcall (libcall_htab,
3910 convert_optab_libfunc (ufix_optab, SImode, DFmode));
3911 add_libcall (libcall_htab,
3912 convert_optab_libfunc (sfix_optab, DImode, DFmode));
3913 add_libcall (libcall_htab,
3914 convert_optab_libfunc (ufix_optab, DImode, DFmode));
3915 add_libcall (libcall_htab,
3916 convert_optab_libfunc (sfix_optab, DImode, SFmode));
3917 add_libcall (libcall_htab,
3918 convert_optab_libfunc (ufix_optab, DImode, SFmode));
3920 /* Values from double-precision helper functions are returned in core
3921 registers if the selected core only supports single-precision
3922 arithmetic, even if we are using the hard-float ABI. The same is
3923 true for single-precision helpers, but we will never be using the
3924 hard-float ABI on a CPU which doesn't support single-precision
3925 operations in hardware. */
3926 add_libcall (libcall_htab, optab_libfunc (add_optab, DFmode));
3927 add_libcall (libcall_htab, optab_libfunc (sdiv_optab, DFmode));
3928 add_libcall (libcall_htab, optab_libfunc (smul_optab, DFmode));
3929 add_libcall (libcall_htab, optab_libfunc (neg_optab, DFmode));
3930 add_libcall (libcall_htab, optab_libfunc (sub_optab, DFmode));
3931 add_libcall (libcall_htab, optab_libfunc (eq_optab, DFmode));
3932 add_libcall (libcall_htab, optab_libfunc (lt_optab, DFmode));
3933 add_libcall (libcall_htab, optab_libfunc (le_optab, DFmode));
3934 add_libcall (libcall_htab, optab_libfunc (ge_optab, DFmode));
3935 add_libcall (libcall_htab, optab_libfunc (gt_optab, DFmode));
3936 add_libcall (libcall_htab, optab_libfunc (unord_optab, DFmode));
3937 add_libcall (libcall_htab, convert_optab_libfunc (sext_optab, DFmode,
3938 SFmode));
3939 add_libcall (libcall_htab, convert_optab_libfunc (trunc_optab, SFmode,
3940 DFmode));
3943 return libcall && htab_find (libcall_htab, libcall) != NULL;
3946 static rtx
3947 arm_libcall_value_1 (enum machine_mode mode)
3949 if (TARGET_AAPCS_BASED)
3950 return aapcs_libcall_value (mode);
3951 else if (TARGET_IWMMXT_ABI
3952 && arm_vector_mode_supported_p (mode))
3953 return gen_rtx_REG (mode, FIRST_IWMMXT_REGNUM);
3954 else
3955 return gen_rtx_REG (mode, ARG_REGISTER (1));
3958 /* Define how to find the value returned by a library function
3959 assuming the value has mode MODE. */
3961 static rtx
3962 arm_libcall_value (enum machine_mode mode, const_rtx libcall)
3964 if (TARGET_AAPCS_BASED && arm_pcs_default != ARM_PCS_AAPCS
3965 && GET_MODE_CLASS (mode) == MODE_FLOAT)
3967 /* The following libcalls return their result in integer registers,
3968 even though they return a floating point value. */
3969 if (arm_libcall_uses_aapcs_base (libcall))
3970 return gen_rtx_REG (mode, ARG_REGISTER(1));
3974 return arm_libcall_value_1 (mode);
3977 /* Implement TARGET_FUNCTION_VALUE_REGNO_P. */
3979 static bool
3980 arm_function_value_regno_p (const unsigned int regno)
3982 if (regno == ARG_REGISTER (1)
3983 || (TARGET_32BIT
3984 && TARGET_AAPCS_BASED
3985 && TARGET_VFP
3986 && TARGET_HARD_FLOAT
3987 && regno == FIRST_VFP_REGNUM)
3988 || (TARGET_IWMMXT_ABI
3989 && regno == FIRST_IWMMXT_REGNUM))
3990 return true;
3992 return false;
3995 /* Determine the amount of memory needed to store the possible return
3996 registers of an untyped call. */
3998 arm_apply_result_size (void)
4000 int size = 16;
4002 if (TARGET_32BIT)
4004 if (TARGET_HARD_FLOAT_ABI && TARGET_VFP)
4005 size += 32;
4006 if (TARGET_IWMMXT_ABI)
4007 size += 8;
4010 return size;
4013 /* Decide whether TYPE should be returned in memory (true)
4014 or in a register (false). FNTYPE is the type of the function making
4015 the call. */
4016 static bool
4017 arm_return_in_memory (const_tree type, const_tree fntype)
4019 HOST_WIDE_INT size;
4021 size = int_size_in_bytes (type); /* Negative if not fixed size. */
4023 if (TARGET_AAPCS_BASED)
4025 /* Simple, non-aggregate types (ie not including vectors and
4026 complex) are always returned in a register (or registers).
4027 We don't care about which register here, so we can short-cut
4028 some of the detail. */
4029 if (!AGGREGATE_TYPE_P (type)
4030 && TREE_CODE (type) != VECTOR_TYPE
4031 && TREE_CODE (type) != COMPLEX_TYPE)
4032 return false;
4034 /* Any return value that is no larger than one word can be
4035 returned in r0. */
4036 if (((unsigned HOST_WIDE_INT) size) <= UNITS_PER_WORD)
4037 return false;
4039 /* Check any available co-processors to see if they accept the
4040 type as a register candidate (VFP, for example, can return
4041 some aggregates in consecutive registers). These aren't
4042 available if the call is variadic. */
4043 if (aapcs_select_return_coproc (type, fntype) >= 0)
4044 return false;
4046 /* Vector values should be returned using ARM registers, not
4047 memory (unless they're over 16 bytes, which will break since
4048 we only have four call-clobbered registers to play with). */
4049 if (TREE_CODE (type) == VECTOR_TYPE)
4050 return (size < 0 || size > (4 * UNITS_PER_WORD));
4052 /* The rest go in memory. */
4053 return true;
4056 if (TREE_CODE (type) == VECTOR_TYPE)
4057 return (size < 0 || size > (4 * UNITS_PER_WORD));
4059 if (!AGGREGATE_TYPE_P (type) &&
4060 (TREE_CODE (type) != VECTOR_TYPE))
4061 /* All simple types are returned in registers. */
4062 return false;
4064 if (arm_abi != ARM_ABI_APCS)
4066 /* ATPCS and later return aggregate types in memory only if they are
4067 larger than a word (or are variable size). */
4068 return (size < 0 || size > UNITS_PER_WORD);
4071 /* For the arm-wince targets we choose to be compatible with Microsoft's
4072 ARM and Thumb compilers, which always return aggregates in memory. */
4073 #ifndef ARM_WINCE
4074 /* All structures/unions bigger than one word are returned in memory.
4075 Also catch the case where int_size_in_bytes returns -1. In this case
4076 the aggregate is either huge or of variable size, and in either case
4077 we will want to return it via memory and not in a register. */
4078 if (size < 0 || size > UNITS_PER_WORD)
4079 return true;
4081 if (TREE_CODE (type) == RECORD_TYPE)
4083 tree field;
4085 /* For a struct the APCS says that we only return in a register
4086 if the type is 'integer like' and every addressable element
4087 has an offset of zero. For practical purposes this means
4088 that the structure can have at most one non bit-field element
4089 and that this element must be the first one in the structure. */
4091 /* Find the first field, ignoring non FIELD_DECL things which will
4092 have been created by C++. */
4093 for (field = TYPE_FIELDS (type);
4094 field && TREE_CODE (field) != FIELD_DECL;
4095 field = DECL_CHAIN (field))
4096 continue;
4098 if (field == NULL)
4099 return false; /* An empty structure. Allowed by an extension to ANSI C. */
4101 /* Check that the first field is valid for returning in a register. */
4103 /* ... Floats are not allowed */
4104 if (FLOAT_TYPE_P (TREE_TYPE (field)))
4105 return true;
4107 /* ... Aggregates that are not themselves valid for returning in
4108 a register are not allowed. */
4109 if (arm_return_in_memory (TREE_TYPE (field), NULL_TREE))
4110 return true;
4112 /* Now check the remaining fields, if any. Only bitfields are allowed,
4113 since they are not addressable. */
4114 for (field = DECL_CHAIN (field);
4115 field;
4116 field = DECL_CHAIN (field))
4118 if (TREE_CODE (field) != FIELD_DECL)
4119 continue;
4121 if (!DECL_BIT_FIELD_TYPE (field))
4122 return true;
4125 return false;
4128 if (TREE_CODE (type) == UNION_TYPE)
4130 tree field;
4132 /* Unions can be returned in registers if every element is
4133 integral, or can be returned in an integer register. */
4134 for (field = TYPE_FIELDS (type);
4135 field;
4136 field = DECL_CHAIN (field))
4138 if (TREE_CODE (field) != FIELD_DECL)
4139 continue;
4141 if (FLOAT_TYPE_P (TREE_TYPE (field)))
4142 return true;
4144 if (arm_return_in_memory (TREE_TYPE (field), NULL_TREE))
4145 return true;
4148 return false;
4150 #endif /* not ARM_WINCE */
4152 /* Return all other types in memory. */
4153 return true;
4156 const struct pcs_attribute_arg
4158 const char *arg;
4159 enum arm_pcs value;
4160 } pcs_attribute_args[] =
4162 {"aapcs", ARM_PCS_AAPCS},
4163 {"aapcs-vfp", ARM_PCS_AAPCS_VFP},
4164 #if 0
4165 /* We could recognize these, but changes would be needed elsewhere
4166 * to implement them. */
4167 {"aapcs-iwmmxt", ARM_PCS_AAPCS_IWMMXT},
4168 {"atpcs", ARM_PCS_ATPCS},
4169 {"apcs", ARM_PCS_APCS},
4170 #endif
4171 {NULL, ARM_PCS_UNKNOWN}
4174 static enum arm_pcs
4175 arm_pcs_from_attribute (tree attr)
4177 const struct pcs_attribute_arg *ptr;
4178 const char *arg;
4180 /* Get the value of the argument. */
4181 if (TREE_VALUE (attr) == NULL_TREE
4182 || TREE_CODE (TREE_VALUE (attr)) != STRING_CST)
4183 return ARM_PCS_UNKNOWN;
4185 arg = TREE_STRING_POINTER (TREE_VALUE (attr));
4187 /* Check it against the list of known arguments. */
4188 for (ptr = pcs_attribute_args; ptr->arg != NULL; ptr++)
4189 if (streq (arg, ptr->arg))
4190 return ptr->value;
4192 /* An unrecognized interrupt type. */
4193 return ARM_PCS_UNKNOWN;
4196 /* Get the PCS variant to use for this call. TYPE is the function's type
4197 specification, DECL is the specific declartion. DECL may be null if
4198 the call could be indirect or if this is a library call. */
4199 static enum arm_pcs
4200 arm_get_pcs_model (const_tree type, const_tree decl)
4202 bool user_convention = false;
4203 enum arm_pcs user_pcs = arm_pcs_default;
4204 tree attr;
4206 gcc_assert (type);
4208 attr = lookup_attribute ("pcs", TYPE_ATTRIBUTES (type));
4209 if (attr)
4211 user_pcs = arm_pcs_from_attribute (TREE_VALUE (attr));
4212 user_convention = true;
4215 if (TARGET_AAPCS_BASED)
4217 /* Detect varargs functions. These always use the base rules
4218 (no argument is ever a candidate for a co-processor
4219 register). */
4220 bool base_rules = stdarg_p (type);
4222 if (user_convention)
4224 if (user_pcs > ARM_PCS_AAPCS_LOCAL)
4225 sorry ("non-AAPCS derived PCS variant");
4226 else if (base_rules && user_pcs != ARM_PCS_AAPCS)
4227 error ("variadic functions must use the base AAPCS variant");
4230 if (base_rules)
4231 return ARM_PCS_AAPCS;
4232 else if (user_convention)
4233 return user_pcs;
4234 else if (decl && flag_unit_at_a_time)
4236 /* Local functions never leak outside this compilation unit,
4237 so we are free to use whatever conventions are
4238 appropriate. */
4239 /* FIXME: remove CONST_CAST_TREE when cgraph is constified. */
4240 struct cgraph_local_info *i = cgraph_local_info (CONST_CAST_TREE(decl));
4241 if (i && i->local)
4242 return ARM_PCS_AAPCS_LOCAL;
4245 else if (user_convention && user_pcs != arm_pcs_default)
4246 sorry ("PCS variant");
4248 /* For everything else we use the target's default. */
4249 return arm_pcs_default;
4253 static void
4254 aapcs_vfp_cum_init (CUMULATIVE_ARGS *pcum ATTRIBUTE_UNUSED,
4255 const_tree fntype ATTRIBUTE_UNUSED,
4256 rtx libcall ATTRIBUTE_UNUSED,
4257 const_tree fndecl ATTRIBUTE_UNUSED)
4259 /* Record the unallocated VFP registers. */
4260 pcum->aapcs_vfp_regs_free = (1 << NUM_VFP_ARG_REGS) - 1;
4261 pcum->aapcs_vfp_reg_alloc = 0;
4264 /* Walk down the type tree of TYPE counting consecutive base elements.
4265 If *MODEP is VOIDmode, then set it to the first valid floating point
4266 type. If a non-floating point type is found, or if a floating point
4267 type that doesn't match a non-VOIDmode *MODEP is found, then return -1,
4268 otherwise return the count in the sub-tree. */
4269 static int
4270 aapcs_vfp_sub_candidate (const_tree type, enum machine_mode *modep)
4272 enum machine_mode mode;
4273 HOST_WIDE_INT size;
4275 switch (TREE_CODE (type))
4277 case REAL_TYPE:
4278 mode = TYPE_MODE (type);
4279 if (mode != DFmode && mode != SFmode)
4280 return -1;
4282 if (*modep == VOIDmode)
4283 *modep = mode;
4285 if (*modep == mode)
4286 return 1;
4288 break;
4290 case COMPLEX_TYPE:
4291 mode = TYPE_MODE (TREE_TYPE (type));
4292 if (mode != DFmode && mode != SFmode)
4293 return -1;
4295 if (*modep == VOIDmode)
4296 *modep = mode;
4298 if (*modep == mode)
4299 return 2;
4301 break;
4303 case VECTOR_TYPE:
4304 /* Use V2SImode and V4SImode as representatives of all 64-bit
4305 and 128-bit vector types, whether or not those modes are
4306 supported with the present options. */
4307 size = int_size_in_bytes (type);
4308 switch (size)
4310 case 8:
4311 mode = V2SImode;
4312 break;
4313 case 16:
4314 mode = V4SImode;
4315 break;
4316 default:
4317 return -1;
4320 if (*modep == VOIDmode)
4321 *modep = mode;
4323 /* Vector modes are considered to be opaque: two vectors are
4324 equivalent for the purposes of being homogeneous aggregates
4325 if they are the same size. */
4326 if (*modep == mode)
4327 return 1;
4329 break;
4331 case ARRAY_TYPE:
4333 int count;
4334 tree index = TYPE_DOMAIN (type);
4336 /* Can't handle incomplete types. */
4337 if (!COMPLETE_TYPE_P (type))
4338 return -1;
4340 count = aapcs_vfp_sub_candidate (TREE_TYPE (type), modep);
4341 if (count == -1
4342 || !index
4343 || !TYPE_MAX_VALUE (index)
4344 || !host_integerp (TYPE_MAX_VALUE (index), 1)
4345 || !TYPE_MIN_VALUE (index)
4346 || !host_integerp (TYPE_MIN_VALUE (index), 1)
4347 || count < 0)
4348 return -1;
4350 count *= (1 + tree_low_cst (TYPE_MAX_VALUE (index), 1)
4351 - tree_low_cst (TYPE_MIN_VALUE (index), 1));
4353 /* There must be no padding. */
4354 if (!host_integerp (TYPE_SIZE (type), 1)
4355 || (tree_low_cst (TYPE_SIZE (type), 1)
4356 != count * GET_MODE_BITSIZE (*modep)))
4357 return -1;
4359 return count;
4362 case RECORD_TYPE:
4364 int count = 0;
4365 int sub_count;
4366 tree field;
4368 /* Can't handle incomplete types. */
4369 if (!COMPLETE_TYPE_P (type))
4370 return -1;
4372 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
4374 if (TREE_CODE (field) != FIELD_DECL)
4375 continue;
4377 sub_count = aapcs_vfp_sub_candidate (TREE_TYPE (field), modep);
4378 if (sub_count < 0)
4379 return -1;
4380 count += sub_count;
4383 /* There must be no padding. */
4384 if (!host_integerp (TYPE_SIZE (type), 1)
4385 || (tree_low_cst (TYPE_SIZE (type), 1)
4386 != count * GET_MODE_BITSIZE (*modep)))
4387 return -1;
4389 return count;
4392 case UNION_TYPE:
4393 case QUAL_UNION_TYPE:
4395 /* These aren't very interesting except in a degenerate case. */
4396 int count = 0;
4397 int sub_count;
4398 tree field;
4400 /* Can't handle incomplete types. */
4401 if (!COMPLETE_TYPE_P (type))
4402 return -1;
4404 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
4406 if (TREE_CODE (field) != FIELD_DECL)
4407 continue;
4409 sub_count = aapcs_vfp_sub_candidate (TREE_TYPE (field), modep);
4410 if (sub_count < 0)
4411 return -1;
4412 count = count > sub_count ? count : sub_count;
4415 /* There must be no padding. */
4416 if (!host_integerp (TYPE_SIZE (type), 1)
4417 || (tree_low_cst (TYPE_SIZE (type), 1)
4418 != count * GET_MODE_BITSIZE (*modep)))
4419 return -1;
4421 return count;
4424 default:
4425 break;
4428 return -1;
4431 /* Return true if PCS_VARIANT should use VFP registers. */
4432 static bool
4433 use_vfp_abi (enum arm_pcs pcs_variant, bool is_double)
4435 if (pcs_variant == ARM_PCS_AAPCS_VFP)
4437 static bool seen_thumb1_vfp = false;
4439 if (TARGET_THUMB1 && !seen_thumb1_vfp)
4441 sorry ("Thumb-1 hard-float VFP ABI");
4442 /* sorry() is not immediately fatal, so only display this once. */
4443 seen_thumb1_vfp = true;
4446 return true;
4449 if (pcs_variant != ARM_PCS_AAPCS_LOCAL)
4450 return false;
4452 return (TARGET_32BIT && TARGET_VFP && TARGET_HARD_FLOAT &&
4453 (TARGET_VFP_DOUBLE || !is_double));
4456 /* Return true if an argument whose type is TYPE, or mode is MODE, is
4457 suitable for passing or returning in VFP registers for the PCS
4458 variant selected. If it is, then *BASE_MODE is updated to contain
4459 a machine mode describing each element of the argument's type and
4460 *COUNT to hold the number of such elements. */
4461 static bool
4462 aapcs_vfp_is_call_or_return_candidate (enum arm_pcs pcs_variant,
4463 enum machine_mode mode, const_tree type,
4464 enum machine_mode *base_mode, int *count)
4466 enum machine_mode new_mode = VOIDmode;
4468 /* If we have the type information, prefer that to working things
4469 out from the mode. */
4470 if (type)
4472 int ag_count = aapcs_vfp_sub_candidate (type, &new_mode);
4474 if (ag_count > 0 && ag_count <= 4)
4475 *count = ag_count;
4476 else
4477 return false;
4479 else if (GET_MODE_CLASS (mode) == MODE_FLOAT
4480 || GET_MODE_CLASS (mode) == MODE_VECTOR_INT
4481 || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
4483 *count = 1;
4484 new_mode = mode;
4486 else if (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT)
4488 *count = 2;
4489 new_mode = (mode == DCmode ? DFmode : SFmode);
4491 else
4492 return false;
4495 if (!use_vfp_abi (pcs_variant, ARM_NUM_REGS (new_mode) > 1))
4496 return false;
4498 *base_mode = new_mode;
4499 return true;
4502 static bool
4503 aapcs_vfp_is_return_candidate (enum arm_pcs pcs_variant,
4504 enum machine_mode mode, const_tree type)
4506 int count ATTRIBUTE_UNUSED;
4507 enum machine_mode ag_mode ATTRIBUTE_UNUSED;
4509 if (!use_vfp_abi (pcs_variant, false))
4510 return false;
4511 return aapcs_vfp_is_call_or_return_candidate (pcs_variant, mode, type,
4512 &ag_mode, &count);
4515 static bool
4516 aapcs_vfp_is_call_candidate (CUMULATIVE_ARGS *pcum, enum machine_mode mode,
4517 const_tree type)
4519 if (!use_vfp_abi (pcum->pcs_variant, false))
4520 return false;
4522 return aapcs_vfp_is_call_or_return_candidate (pcum->pcs_variant, mode, type,
4523 &pcum->aapcs_vfp_rmode,
4524 &pcum->aapcs_vfp_rcount);
4527 static bool
4528 aapcs_vfp_allocate (CUMULATIVE_ARGS *pcum, enum machine_mode mode,
4529 const_tree type ATTRIBUTE_UNUSED)
4531 int shift = GET_MODE_SIZE (pcum->aapcs_vfp_rmode) / GET_MODE_SIZE (SFmode);
4532 unsigned mask = (1 << (shift * pcum->aapcs_vfp_rcount)) - 1;
4533 int regno;
4535 for (regno = 0; regno < NUM_VFP_ARG_REGS; regno += shift)
4536 if (((pcum->aapcs_vfp_regs_free >> regno) & mask) == mask)
4538 pcum->aapcs_vfp_reg_alloc = mask << regno;
4539 if (mode == BLKmode
4540 || (mode == TImode && ! TARGET_NEON)
4541 || ! arm_hard_regno_mode_ok (FIRST_VFP_REGNUM + regno, mode))
4543 int i;
4544 int rcount = pcum->aapcs_vfp_rcount;
4545 int rshift = shift;
4546 enum machine_mode rmode = pcum->aapcs_vfp_rmode;
4547 rtx par;
4548 if (!TARGET_NEON)
4550 /* Avoid using unsupported vector modes. */
4551 if (rmode == V2SImode)
4552 rmode = DImode;
4553 else if (rmode == V4SImode)
4555 rmode = DImode;
4556 rcount *= 2;
4557 rshift /= 2;
4560 par = gen_rtx_PARALLEL (mode, rtvec_alloc (rcount));
4561 for (i = 0; i < rcount; i++)
4563 rtx tmp = gen_rtx_REG (rmode,
4564 FIRST_VFP_REGNUM + regno + i * rshift);
4565 tmp = gen_rtx_EXPR_LIST
4566 (VOIDmode, tmp,
4567 GEN_INT (i * GET_MODE_SIZE (rmode)));
4568 XVECEXP (par, 0, i) = tmp;
4571 pcum->aapcs_reg = par;
4573 else
4574 pcum->aapcs_reg = gen_rtx_REG (mode, FIRST_VFP_REGNUM + regno);
4575 return true;
4577 return false;
4580 static rtx
4581 aapcs_vfp_allocate_return_reg (enum arm_pcs pcs_variant ATTRIBUTE_UNUSED,
4582 enum machine_mode mode,
4583 const_tree type ATTRIBUTE_UNUSED)
4585 if (!use_vfp_abi (pcs_variant, false))
4586 return NULL;
4588 if (mode == BLKmode || (mode == TImode && !TARGET_NEON))
4590 int count;
4591 enum machine_mode ag_mode;
4592 int i;
4593 rtx par;
4594 int shift;
4596 aapcs_vfp_is_call_or_return_candidate (pcs_variant, mode, type,
4597 &ag_mode, &count);
4599 if (!TARGET_NEON)
4601 if (ag_mode == V2SImode)
4602 ag_mode = DImode;
4603 else if (ag_mode == V4SImode)
4605 ag_mode = DImode;
4606 count *= 2;
4609 shift = GET_MODE_SIZE(ag_mode) / GET_MODE_SIZE(SFmode);
4610 par = gen_rtx_PARALLEL (mode, rtvec_alloc (count));
4611 for (i = 0; i < count; i++)
4613 rtx tmp = gen_rtx_REG (ag_mode, FIRST_VFP_REGNUM + i * shift);
4614 tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp,
4615 GEN_INT (i * GET_MODE_SIZE (ag_mode)));
4616 XVECEXP (par, 0, i) = tmp;
4619 return par;
4622 return gen_rtx_REG (mode, FIRST_VFP_REGNUM);
4625 static void
4626 aapcs_vfp_advance (CUMULATIVE_ARGS *pcum ATTRIBUTE_UNUSED,
4627 enum machine_mode mode ATTRIBUTE_UNUSED,
4628 const_tree type ATTRIBUTE_UNUSED)
4630 pcum->aapcs_vfp_regs_free &= ~pcum->aapcs_vfp_reg_alloc;
4631 pcum->aapcs_vfp_reg_alloc = 0;
4632 return;
4635 #define AAPCS_CP(X) \
4637 aapcs_ ## X ## _cum_init, \
4638 aapcs_ ## X ## _is_call_candidate, \
4639 aapcs_ ## X ## _allocate, \
4640 aapcs_ ## X ## _is_return_candidate, \
4641 aapcs_ ## X ## _allocate_return_reg, \
4642 aapcs_ ## X ## _advance \
4645 /* Table of co-processors that can be used to pass arguments in
4646 registers. Idealy no arugment should be a candidate for more than
4647 one co-processor table entry, but the table is processed in order
4648 and stops after the first match. If that entry then fails to put
4649 the argument into a co-processor register, the argument will go on
4650 the stack. */
4651 static struct
4653 /* Initialize co-processor related state in CUMULATIVE_ARGS structure. */
4654 void (*cum_init) (CUMULATIVE_ARGS *, const_tree, rtx, const_tree);
4656 /* Return true if an argument of mode MODE (or type TYPE if MODE is
4657 BLKmode) is a candidate for this co-processor's registers; this
4658 function should ignore any position-dependent state in
4659 CUMULATIVE_ARGS and only use call-type dependent information. */
4660 bool (*is_call_candidate) (CUMULATIVE_ARGS *, enum machine_mode, const_tree);
4662 /* Return true if the argument does get a co-processor register; it
4663 should set aapcs_reg to an RTX of the register allocated as is
4664 required for a return from FUNCTION_ARG. */
4665 bool (*allocate) (CUMULATIVE_ARGS *, enum machine_mode, const_tree);
4667 /* Return true if a result of mode MODE (or type TYPE if MODE is
4668 BLKmode) is can be returned in this co-processor's registers. */
4669 bool (*is_return_candidate) (enum arm_pcs, enum machine_mode, const_tree);
4671 /* Allocate and return an RTX element to hold the return type of a
4672 call, this routine must not fail and will only be called if
4673 is_return_candidate returned true with the same parameters. */
4674 rtx (*allocate_return_reg) (enum arm_pcs, enum machine_mode, const_tree);
4676 /* Finish processing this argument and prepare to start processing
4677 the next one. */
4678 void (*advance) (CUMULATIVE_ARGS *, enum machine_mode, const_tree);
4679 } aapcs_cp_arg_layout[ARM_NUM_COPROC_SLOTS] =
4681 AAPCS_CP(vfp)
4684 #undef AAPCS_CP
4686 static int
4687 aapcs_select_call_coproc (CUMULATIVE_ARGS *pcum, enum machine_mode mode,
4688 const_tree type)
4690 int i;
4692 for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
4693 if (aapcs_cp_arg_layout[i].is_call_candidate (pcum, mode, type))
4694 return i;
4696 return -1;
4699 static int
4700 aapcs_select_return_coproc (const_tree type, const_tree fntype)
4702 /* We aren't passed a decl, so we can't check that a call is local.
4703 However, it isn't clear that that would be a win anyway, since it
4704 might limit some tail-calling opportunities. */
4705 enum arm_pcs pcs_variant;
4707 if (fntype)
4709 const_tree fndecl = NULL_TREE;
4711 if (TREE_CODE (fntype) == FUNCTION_DECL)
4713 fndecl = fntype;
4714 fntype = TREE_TYPE (fntype);
4717 pcs_variant = arm_get_pcs_model (fntype, fndecl);
4719 else
4720 pcs_variant = arm_pcs_default;
4722 if (pcs_variant != ARM_PCS_AAPCS)
4724 int i;
4726 for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
4727 if (aapcs_cp_arg_layout[i].is_return_candidate (pcs_variant,
4728 TYPE_MODE (type),
4729 type))
4730 return i;
4732 return -1;
4735 static rtx
4736 aapcs_allocate_return_reg (enum machine_mode mode, const_tree type,
4737 const_tree fntype)
4739 /* We aren't passed a decl, so we can't check that a call is local.
4740 However, it isn't clear that that would be a win anyway, since it
4741 might limit some tail-calling opportunities. */
4742 enum arm_pcs pcs_variant;
4743 int unsignedp ATTRIBUTE_UNUSED;
4745 if (fntype)
4747 const_tree fndecl = NULL_TREE;
4749 if (TREE_CODE (fntype) == FUNCTION_DECL)
4751 fndecl = fntype;
4752 fntype = TREE_TYPE (fntype);
4755 pcs_variant = arm_get_pcs_model (fntype, fndecl);
4757 else
4758 pcs_variant = arm_pcs_default;
4760 /* Promote integer types. */
4761 if (type && INTEGRAL_TYPE_P (type))
4762 mode = arm_promote_function_mode (type, mode, &unsignedp, fntype, 1);
4764 if (pcs_variant != ARM_PCS_AAPCS)
4766 int i;
4768 for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
4769 if (aapcs_cp_arg_layout[i].is_return_candidate (pcs_variant, mode,
4770 type))
4771 return aapcs_cp_arg_layout[i].allocate_return_reg (pcs_variant,
4772 mode, type);
4775 /* Promotes small structs returned in a register to full-word size
4776 for big-endian AAPCS. */
4777 if (type && arm_return_in_msb (type))
4779 HOST_WIDE_INT size = int_size_in_bytes (type);
4780 if (size % UNITS_PER_WORD != 0)
4782 size += UNITS_PER_WORD - size % UNITS_PER_WORD;
4783 mode = mode_for_size (size * BITS_PER_UNIT, MODE_INT, 0);
4787 return gen_rtx_REG (mode, R0_REGNUM);
4790 static rtx
4791 aapcs_libcall_value (enum machine_mode mode)
4793 if (BYTES_BIG_ENDIAN && ALL_FIXED_POINT_MODE_P (mode)
4794 && GET_MODE_SIZE (mode) <= 4)
4795 mode = SImode;
4797 return aapcs_allocate_return_reg (mode, NULL_TREE, NULL_TREE);
4800 /* Lay out a function argument using the AAPCS rules. The rule
4801 numbers referred to here are those in the AAPCS. */
4802 static void
4803 aapcs_layout_arg (CUMULATIVE_ARGS *pcum, enum machine_mode mode,
4804 const_tree type, bool named)
4806 int nregs, nregs2;
4807 int ncrn;
4809 /* We only need to do this once per argument. */
4810 if (pcum->aapcs_arg_processed)
4811 return;
4813 pcum->aapcs_arg_processed = true;
4815 /* Special case: if named is false then we are handling an incoming
4816 anonymous argument which is on the stack. */
4817 if (!named)
4818 return;
4820 /* Is this a potential co-processor register candidate? */
4821 if (pcum->pcs_variant != ARM_PCS_AAPCS)
4823 int slot = aapcs_select_call_coproc (pcum, mode, type);
4824 pcum->aapcs_cprc_slot = slot;
4826 /* We don't have to apply any of the rules from part B of the
4827 preparation phase, these are handled elsewhere in the
4828 compiler. */
4830 if (slot >= 0)
4832 /* A Co-processor register candidate goes either in its own
4833 class of registers or on the stack. */
4834 if (!pcum->aapcs_cprc_failed[slot])
4836 /* C1.cp - Try to allocate the argument to co-processor
4837 registers. */
4838 if (aapcs_cp_arg_layout[slot].allocate (pcum, mode, type))
4839 return;
4841 /* C2.cp - Put the argument on the stack and note that we
4842 can't assign any more candidates in this slot. We also
4843 need to note that we have allocated stack space, so that
4844 we won't later try to split a non-cprc candidate between
4845 core registers and the stack. */
4846 pcum->aapcs_cprc_failed[slot] = true;
4847 pcum->can_split = false;
4850 /* We didn't get a register, so this argument goes on the
4851 stack. */
4852 gcc_assert (pcum->can_split == false);
4853 return;
4857 /* C3 - For double-word aligned arguments, round the NCRN up to the
4858 next even number. */
4859 ncrn = pcum->aapcs_ncrn;
4860 if ((ncrn & 1) && arm_needs_doubleword_align (mode, type))
4861 ncrn++;
4863 nregs = ARM_NUM_REGS2(mode, type);
4865 /* Sigh, this test should really assert that nregs > 0, but a GCC
4866 extension allows empty structs and then gives them empty size; it
4867 then allows such a structure to be passed by value. For some of
4868 the code below we have to pretend that such an argument has
4869 non-zero size so that we 'locate' it correctly either in
4870 registers or on the stack. */
4871 gcc_assert (nregs >= 0);
4873 nregs2 = nregs ? nregs : 1;
4875 /* C4 - Argument fits entirely in core registers. */
4876 if (ncrn + nregs2 <= NUM_ARG_REGS)
4878 pcum->aapcs_reg = gen_rtx_REG (mode, ncrn);
4879 pcum->aapcs_next_ncrn = ncrn + nregs;
4880 return;
4883 /* C5 - Some core registers left and there are no arguments already
4884 on the stack: split this argument between the remaining core
4885 registers and the stack. */
4886 if (ncrn < NUM_ARG_REGS && pcum->can_split)
4888 pcum->aapcs_reg = gen_rtx_REG (mode, ncrn);
4889 pcum->aapcs_next_ncrn = NUM_ARG_REGS;
4890 pcum->aapcs_partial = (NUM_ARG_REGS - ncrn) * UNITS_PER_WORD;
4891 return;
4894 /* C6 - NCRN is set to 4. */
4895 pcum->aapcs_next_ncrn = NUM_ARG_REGS;
4897 /* C7,C8 - arugment goes on the stack. We have nothing to do here. */
4898 return;
4901 /* Initialize a variable CUM of type CUMULATIVE_ARGS
4902 for a call to a function whose data type is FNTYPE.
4903 For a library call, FNTYPE is NULL. */
4904 void
4905 arm_init_cumulative_args (CUMULATIVE_ARGS *pcum, tree fntype,
4906 rtx libname,
4907 tree fndecl ATTRIBUTE_UNUSED)
4909 /* Long call handling. */
4910 if (fntype)
4911 pcum->pcs_variant = arm_get_pcs_model (fntype, fndecl);
4912 else
4913 pcum->pcs_variant = arm_pcs_default;
4915 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
4917 if (arm_libcall_uses_aapcs_base (libname))
4918 pcum->pcs_variant = ARM_PCS_AAPCS;
4920 pcum->aapcs_ncrn = pcum->aapcs_next_ncrn = 0;
4921 pcum->aapcs_reg = NULL_RTX;
4922 pcum->aapcs_partial = 0;
4923 pcum->aapcs_arg_processed = false;
4924 pcum->aapcs_cprc_slot = -1;
4925 pcum->can_split = true;
4927 if (pcum->pcs_variant != ARM_PCS_AAPCS)
4929 int i;
4931 for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
4933 pcum->aapcs_cprc_failed[i] = false;
4934 aapcs_cp_arg_layout[i].cum_init (pcum, fntype, libname, fndecl);
4937 return;
4940 /* Legacy ABIs */
4942 /* On the ARM, the offset starts at 0. */
4943 pcum->nregs = 0;
4944 pcum->iwmmxt_nregs = 0;
4945 pcum->can_split = true;
4947 /* Varargs vectors are treated the same as long long.
4948 named_count avoids having to change the way arm handles 'named' */
4949 pcum->named_count = 0;
4950 pcum->nargs = 0;
4952 if (TARGET_REALLY_IWMMXT && fntype)
4954 tree fn_arg;
4956 for (fn_arg = TYPE_ARG_TYPES (fntype);
4957 fn_arg;
4958 fn_arg = TREE_CHAIN (fn_arg))
4959 pcum->named_count += 1;
4961 if (! pcum->named_count)
4962 pcum->named_count = INT_MAX;
4967 /* Return true if mode/type need doubleword alignment. */
4968 static bool
4969 arm_needs_doubleword_align (enum machine_mode mode, const_tree type)
4971 return (GET_MODE_ALIGNMENT (mode) > PARM_BOUNDARY
4972 || (type && TYPE_ALIGN (type) > PARM_BOUNDARY));
4976 /* Determine where to put an argument to a function.
4977 Value is zero to push the argument on the stack,
4978 or a hard register in which to store the argument.
4980 MODE is the argument's machine mode.
4981 TYPE is the data type of the argument (as a tree).
4982 This is null for libcalls where that information may
4983 not be available.
4984 CUM is a variable of type CUMULATIVE_ARGS which gives info about
4985 the preceding args and about the function being called.
4986 NAMED is nonzero if this argument is a named parameter
4987 (otherwise it is an extra parameter matching an ellipsis).
4989 On the ARM, normally the first 16 bytes are passed in registers r0-r3; all
4990 other arguments are passed on the stack. If (NAMED == 0) (which happens
4991 only in assign_parms, since TARGET_SETUP_INCOMING_VARARGS is
4992 defined), say it is passed in the stack (function_prologue will
4993 indeed make it pass in the stack if necessary). */
4995 static rtx
4996 arm_function_arg (cumulative_args_t pcum_v, enum machine_mode mode,
4997 const_tree type, bool named)
4999 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
5000 int nregs;
5002 /* Handle the special case quickly. Pick an arbitrary value for op2 of
5003 a call insn (op3 of a call_value insn). */
5004 if (mode == VOIDmode)
5005 return const0_rtx;
5007 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
5009 aapcs_layout_arg (pcum, mode, type, named);
5010 return pcum->aapcs_reg;
5013 /* Varargs vectors are treated the same as long long.
5014 named_count avoids having to change the way arm handles 'named' */
5015 if (TARGET_IWMMXT_ABI
5016 && arm_vector_mode_supported_p (mode)
5017 && pcum->named_count > pcum->nargs + 1)
5019 if (pcum->iwmmxt_nregs <= 9)
5020 return gen_rtx_REG (mode, pcum->iwmmxt_nregs + FIRST_IWMMXT_REGNUM);
5021 else
5023 pcum->can_split = false;
5024 return NULL_RTX;
5028 /* Put doubleword aligned quantities in even register pairs. */
5029 if (pcum->nregs & 1
5030 && ARM_DOUBLEWORD_ALIGN
5031 && arm_needs_doubleword_align (mode, type))
5032 pcum->nregs++;
5034 /* Only allow splitting an arg between regs and memory if all preceding
5035 args were allocated to regs. For args passed by reference we only count
5036 the reference pointer. */
5037 if (pcum->can_split)
5038 nregs = 1;
5039 else
5040 nregs = ARM_NUM_REGS2 (mode, type);
5042 if (!named || pcum->nregs + nregs > NUM_ARG_REGS)
5043 return NULL_RTX;
5045 return gen_rtx_REG (mode, pcum->nregs);
5048 static unsigned int
5049 arm_function_arg_boundary (enum machine_mode mode, const_tree type)
5051 return (ARM_DOUBLEWORD_ALIGN && arm_needs_doubleword_align (mode, type)
5052 ? DOUBLEWORD_ALIGNMENT
5053 : PARM_BOUNDARY);
5056 static int
5057 arm_arg_partial_bytes (cumulative_args_t pcum_v, enum machine_mode mode,
5058 tree type, bool named)
5060 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
5061 int nregs = pcum->nregs;
5063 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
5065 aapcs_layout_arg (pcum, mode, type, named);
5066 return pcum->aapcs_partial;
5069 if (TARGET_IWMMXT_ABI && arm_vector_mode_supported_p (mode))
5070 return 0;
5072 if (NUM_ARG_REGS > nregs
5073 && (NUM_ARG_REGS < nregs + ARM_NUM_REGS2 (mode, type))
5074 && pcum->can_split)
5075 return (NUM_ARG_REGS - nregs) * UNITS_PER_WORD;
5077 return 0;
5080 /* Update the data in PCUM to advance over an argument
5081 of mode MODE and data type TYPE.
5082 (TYPE is null for libcalls where that information may not be available.) */
5084 static void
5085 arm_function_arg_advance (cumulative_args_t pcum_v, enum machine_mode mode,
5086 const_tree type, bool named)
5088 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
5090 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
5092 aapcs_layout_arg (pcum, mode, type, named);
5094 if (pcum->aapcs_cprc_slot >= 0)
5096 aapcs_cp_arg_layout[pcum->aapcs_cprc_slot].advance (pcum, mode,
5097 type);
5098 pcum->aapcs_cprc_slot = -1;
5101 /* Generic stuff. */
5102 pcum->aapcs_arg_processed = false;
5103 pcum->aapcs_ncrn = pcum->aapcs_next_ncrn;
5104 pcum->aapcs_reg = NULL_RTX;
5105 pcum->aapcs_partial = 0;
5107 else
5109 pcum->nargs += 1;
5110 if (arm_vector_mode_supported_p (mode)
5111 && pcum->named_count > pcum->nargs
5112 && TARGET_IWMMXT_ABI)
5113 pcum->iwmmxt_nregs += 1;
5114 else
5115 pcum->nregs += ARM_NUM_REGS2 (mode, type);
5119 /* Variable sized types are passed by reference. This is a GCC
5120 extension to the ARM ABI. */
5122 static bool
5123 arm_pass_by_reference (cumulative_args_t cum ATTRIBUTE_UNUSED,
5124 enum machine_mode mode ATTRIBUTE_UNUSED,
5125 const_tree type, bool named ATTRIBUTE_UNUSED)
5127 return type && TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST;
5130 /* Encode the current state of the #pragma [no_]long_calls. */
5131 typedef enum
5133 OFF, /* No #pragma [no_]long_calls is in effect. */
5134 LONG, /* #pragma long_calls is in effect. */
5135 SHORT /* #pragma no_long_calls is in effect. */
5136 } arm_pragma_enum;
5138 static arm_pragma_enum arm_pragma_long_calls = OFF;
5140 void
5141 arm_pr_long_calls (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
5143 arm_pragma_long_calls = LONG;
5146 void
5147 arm_pr_no_long_calls (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
5149 arm_pragma_long_calls = SHORT;
5152 void
5153 arm_pr_long_calls_off (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
5155 arm_pragma_long_calls = OFF;
5158 /* Handle an attribute requiring a FUNCTION_DECL;
5159 arguments as in struct attribute_spec.handler. */
5160 static tree
5161 arm_handle_fndecl_attribute (tree *node, tree name, tree args ATTRIBUTE_UNUSED,
5162 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
5164 if (TREE_CODE (*node) != FUNCTION_DECL)
5166 warning (OPT_Wattributes, "%qE attribute only applies to functions",
5167 name);
5168 *no_add_attrs = true;
5171 return NULL_TREE;
5174 /* Handle an "interrupt" or "isr" attribute;
5175 arguments as in struct attribute_spec.handler. */
5176 static tree
5177 arm_handle_isr_attribute (tree *node, tree name, tree args, int flags,
5178 bool *no_add_attrs)
5180 if (DECL_P (*node))
5182 if (TREE_CODE (*node) != FUNCTION_DECL)
5184 warning (OPT_Wattributes, "%qE attribute only applies to functions",
5185 name);
5186 *no_add_attrs = true;
5188 /* FIXME: the argument if any is checked for type attributes;
5189 should it be checked for decl ones? */
5191 else
5193 if (TREE_CODE (*node) == FUNCTION_TYPE
5194 || TREE_CODE (*node) == METHOD_TYPE)
5196 if (arm_isr_value (args) == ARM_FT_UNKNOWN)
5198 warning (OPT_Wattributes, "%qE attribute ignored",
5199 name);
5200 *no_add_attrs = true;
5203 else if (TREE_CODE (*node) == POINTER_TYPE
5204 && (TREE_CODE (TREE_TYPE (*node)) == FUNCTION_TYPE
5205 || TREE_CODE (TREE_TYPE (*node)) == METHOD_TYPE)
5206 && arm_isr_value (args) != ARM_FT_UNKNOWN)
5208 *node = build_variant_type_copy (*node);
5209 TREE_TYPE (*node) = build_type_attribute_variant
5210 (TREE_TYPE (*node),
5211 tree_cons (name, args, TYPE_ATTRIBUTES (TREE_TYPE (*node))));
5212 *no_add_attrs = true;
5214 else
5216 /* Possibly pass this attribute on from the type to a decl. */
5217 if (flags & ((int) ATTR_FLAG_DECL_NEXT
5218 | (int) ATTR_FLAG_FUNCTION_NEXT
5219 | (int) ATTR_FLAG_ARRAY_NEXT))
5221 *no_add_attrs = true;
5222 return tree_cons (name, args, NULL_TREE);
5224 else
5226 warning (OPT_Wattributes, "%qE attribute ignored",
5227 name);
5232 return NULL_TREE;
5235 /* Handle a "pcs" attribute; arguments as in struct
5236 attribute_spec.handler. */
5237 static tree
5238 arm_handle_pcs_attribute (tree *node ATTRIBUTE_UNUSED, tree name, tree args,
5239 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
5241 if (arm_pcs_from_attribute (args) == ARM_PCS_UNKNOWN)
5243 warning (OPT_Wattributes, "%qE attribute ignored", name);
5244 *no_add_attrs = true;
5246 return NULL_TREE;
5249 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
5250 /* Handle the "notshared" attribute. This attribute is another way of
5251 requesting hidden visibility. ARM's compiler supports
5252 "__declspec(notshared)"; we support the same thing via an
5253 attribute. */
5255 static tree
5256 arm_handle_notshared_attribute (tree *node,
5257 tree name ATTRIBUTE_UNUSED,
5258 tree args ATTRIBUTE_UNUSED,
5259 int flags ATTRIBUTE_UNUSED,
5260 bool *no_add_attrs)
5262 tree decl = TYPE_NAME (*node);
5264 if (decl)
5266 DECL_VISIBILITY (decl) = VISIBILITY_HIDDEN;
5267 DECL_VISIBILITY_SPECIFIED (decl) = 1;
5268 *no_add_attrs = false;
5270 return NULL_TREE;
5272 #endif
5274 /* Return 0 if the attributes for two types are incompatible, 1 if they
5275 are compatible, and 2 if they are nearly compatible (which causes a
5276 warning to be generated). */
5277 static int
5278 arm_comp_type_attributes (const_tree type1, const_tree type2)
5280 int l1, l2, s1, s2;
5282 /* Check for mismatch of non-default calling convention. */
5283 if (TREE_CODE (type1) != FUNCTION_TYPE)
5284 return 1;
5286 /* Check for mismatched call attributes. */
5287 l1 = lookup_attribute ("long_call", TYPE_ATTRIBUTES (type1)) != NULL;
5288 l2 = lookup_attribute ("long_call", TYPE_ATTRIBUTES (type2)) != NULL;
5289 s1 = lookup_attribute ("short_call", TYPE_ATTRIBUTES (type1)) != NULL;
5290 s2 = lookup_attribute ("short_call", TYPE_ATTRIBUTES (type2)) != NULL;
5292 /* Only bother to check if an attribute is defined. */
5293 if (l1 | l2 | s1 | s2)
5295 /* If one type has an attribute, the other must have the same attribute. */
5296 if ((l1 != l2) || (s1 != s2))
5297 return 0;
5299 /* Disallow mixed attributes. */
5300 if ((l1 & s2) || (l2 & s1))
5301 return 0;
5304 /* Check for mismatched ISR attribute. */
5305 l1 = lookup_attribute ("isr", TYPE_ATTRIBUTES (type1)) != NULL;
5306 if (! l1)
5307 l1 = lookup_attribute ("interrupt", TYPE_ATTRIBUTES (type1)) != NULL;
5308 l2 = lookup_attribute ("isr", TYPE_ATTRIBUTES (type2)) != NULL;
5309 if (! l2)
5310 l1 = lookup_attribute ("interrupt", TYPE_ATTRIBUTES (type2)) != NULL;
5311 if (l1 != l2)
5312 return 0;
5314 return 1;
5317 /* Assigns default attributes to newly defined type. This is used to
5318 set short_call/long_call attributes for function types of
5319 functions defined inside corresponding #pragma scopes. */
5320 static void
5321 arm_set_default_type_attributes (tree type)
5323 /* Add __attribute__ ((long_call)) to all functions, when
5324 inside #pragma long_calls or __attribute__ ((short_call)),
5325 when inside #pragma no_long_calls. */
5326 if (TREE_CODE (type) == FUNCTION_TYPE || TREE_CODE (type) == METHOD_TYPE)
5328 tree type_attr_list, attr_name;
5329 type_attr_list = TYPE_ATTRIBUTES (type);
5331 if (arm_pragma_long_calls == LONG)
5332 attr_name = get_identifier ("long_call");
5333 else if (arm_pragma_long_calls == SHORT)
5334 attr_name = get_identifier ("short_call");
5335 else
5336 return;
5338 type_attr_list = tree_cons (attr_name, NULL_TREE, type_attr_list);
5339 TYPE_ATTRIBUTES (type) = type_attr_list;
5343 /* Return true if DECL is known to be linked into section SECTION. */
5345 static bool
5346 arm_function_in_section_p (tree decl, section *section)
5348 /* We can only be certain about functions defined in the same
5349 compilation unit. */
5350 if (!TREE_STATIC (decl))
5351 return false;
5353 /* Make sure that SYMBOL always binds to the definition in this
5354 compilation unit. */
5355 if (!targetm.binds_local_p (decl))
5356 return false;
5358 /* If DECL_SECTION_NAME is set, assume it is trustworthy. */
5359 if (!DECL_SECTION_NAME (decl))
5361 /* Make sure that we will not create a unique section for DECL. */
5362 if (flag_function_sections || DECL_ONE_ONLY (decl))
5363 return false;
5366 return function_section (decl) == section;
5369 /* Return nonzero if a 32-bit "long_call" should be generated for
5370 a call from the current function to DECL. We generate a long_call
5371 if the function:
5373 a. has an __attribute__((long call))
5374 or b. is within the scope of a #pragma long_calls
5375 or c. the -mlong-calls command line switch has been specified
5377 However we do not generate a long call if the function:
5379 d. has an __attribute__ ((short_call))
5380 or e. is inside the scope of a #pragma no_long_calls
5381 or f. is defined in the same section as the current function. */
5383 bool
5384 arm_is_long_call_p (tree decl)
5386 tree attrs;
5388 if (!decl)
5389 return TARGET_LONG_CALLS;
5391 attrs = TYPE_ATTRIBUTES (TREE_TYPE (decl));
5392 if (lookup_attribute ("short_call", attrs))
5393 return false;
5395 /* For "f", be conservative, and only cater for cases in which the
5396 whole of the current function is placed in the same section. */
5397 if (!flag_reorder_blocks_and_partition
5398 && TREE_CODE (decl) == FUNCTION_DECL
5399 && arm_function_in_section_p (decl, current_function_section ()))
5400 return false;
5402 if (lookup_attribute ("long_call", attrs))
5403 return true;
5405 return TARGET_LONG_CALLS;
5408 /* Return nonzero if it is ok to make a tail-call to DECL. */
5409 static bool
5410 arm_function_ok_for_sibcall (tree decl, tree exp)
5412 unsigned long func_type;
5414 if (cfun->machine->sibcall_blocked)
5415 return false;
5417 /* Never tailcall something if we are generating code for Thumb-1. */
5418 if (TARGET_THUMB1)
5419 return false;
5421 /* The PIC register is live on entry to VxWorks PLT entries, so we
5422 must make the call before restoring the PIC register. */
5423 if (TARGET_VXWORKS_RTP && flag_pic && !targetm.binds_local_p (decl))
5424 return false;
5426 /* Cannot tail-call to long calls, since these are out of range of
5427 a branch instruction. */
5428 if (decl && arm_is_long_call_p (decl))
5429 return false;
5431 /* If we are interworking and the function is not declared static
5432 then we can't tail-call it unless we know that it exists in this
5433 compilation unit (since it might be a Thumb routine). */
5434 if (TARGET_INTERWORK && decl && TREE_PUBLIC (decl)
5435 && !TREE_ASM_WRITTEN (decl))
5436 return false;
5438 func_type = arm_current_func_type ();
5439 /* Never tailcall from an ISR routine - it needs a special exit sequence. */
5440 if (IS_INTERRUPT (func_type))
5441 return false;
5443 if (!VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun->decl))))
5445 /* Check that the return value locations are the same. For
5446 example that we aren't returning a value from the sibling in
5447 a VFP register but then need to transfer it to a core
5448 register. */
5449 rtx a, b;
5451 a = arm_function_value (TREE_TYPE (exp), decl, false);
5452 b = arm_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)),
5453 cfun->decl, false);
5454 if (!rtx_equal_p (a, b))
5455 return false;
5458 /* Never tailcall if function may be called with a misaligned SP. */
5459 if (IS_STACKALIGN (func_type))
5460 return false;
5462 /* The AAPCS says that, on bare-metal, calls to unresolved weak
5463 references should become a NOP. Don't convert such calls into
5464 sibling calls. */
5465 if (TARGET_AAPCS_BASED
5466 && arm_abi == ARM_ABI_AAPCS
5467 && decl
5468 && DECL_WEAK (decl))
5469 return false;
5471 /* Everything else is ok. */
5472 return true;
5476 /* Addressing mode support functions. */
5478 /* Return nonzero if X is a legitimate immediate operand when compiling
5479 for PIC. We know that X satisfies CONSTANT_P and flag_pic is true. */
5481 legitimate_pic_operand_p (rtx x)
5483 if (GET_CODE (x) == SYMBOL_REF
5484 || (GET_CODE (x) == CONST
5485 && GET_CODE (XEXP (x, 0)) == PLUS
5486 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF))
5487 return 0;
5489 return 1;
5492 /* Record that the current function needs a PIC register. Initialize
5493 cfun->machine->pic_reg if we have not already done so. */
5495 static void
5496 require_pic_register (void)
5498 /* A lot of the logic here is made obscure by the fact that this
5499 routine gets called as part of the rtx cost estimation process.
5500 We don't want those calls to affect any assumptions about the real
5501 function; and further, we can't call entry_of_function() until we
5502 start the real expansion process. */
5503 if (!crtl->uses_pic_offset_table)
5505 gcc_assert (can_create_pseudo_p ());
5506 if (arm_pic_register != INVALID_REGNUM
5507 && !(TARGET_THUMB1 && arm_pic_register > LAST_LO_REGNUM))
5509 if (!cfun->machine->pic_reg)
5510 cfun->machine->pic_reg = gen_rtx_REG (Pmode, arm_pic_register);
5512 /* Play games to avoid marking the function as needing pic
5513 if we are being called as part of the cost-estimation
5514 process. */
5515 if (current_ir_type () != IR_GIMPLE || currently_expanding_to_rtl)
5516 crtl->uses_pic_offset_table = 1;
5518 else
5520 rtx seq, insn;
5522 if (!cfun->machine->pic_reg)
5523 cfun->machine->pic_reg = gen_reg_rtx (Pmode);
5525 /* Play games to avoid marking the function as needing pic
5526 if we are being called as part of the cost-estimation
5527 process. */
5528 if (current_ir_type () != IR_GIMPLE || currently_expanding_to_rtl)
5530 crtl->uses_pic_offset_table = 1;
5531 start_sequence ();
5533 if (TARGET_THUMB1 && arm_pic_register != INVALID_REGNUM
5534 && arm_pic_register > LAST_LO_REGNUM)
5535 emit_move_insn (cfun->machine->pic_reg,
5536 gen_rtx_REG (Pmode, arm_pic_register));
5537 else
5538 arm_load_pic_register (0UL);
5540 seq = get_insns ();
5541 end_sequence ();
5543 for (insn = seq; insn; insn = NEXT_INSN (insn))
5544 if (INSN_P (insn))
5545 INSN_LOCATION (insn) = prologue_location;
5547 /* We can be called during expansion of PHI nodes, where
5548 we can't yet emit instructions directly in the final
5549 insn stream. Queue the insns on the entry edge, they will
5550 be committed after everything else is expanded. */
5551 insert_insn_on_edge (seq, single_succ_edge (ENTRY_BLOCK_PTR));
5558 legitimize_pic_address (rtx orig, enum machine_mode mode, rtx reg)
5560 if (GET_CODE (orig) == SYMBOL_REF
5561 || GET_CODE (orig) == LABEL_REF)
5563 rtx insn;
5565 if (reg == 0)
5567 gcc_assert (can_create_pseudo_p ());
5568 reg = gen_reg_rtx (Pmode);
5571 /* VxWorks does not impose a fixed gap between segments; the run-time
5572 gap can be different from the object-file gap. We therefore can't
5573 use GOTOFF unless we are absolutely sure that the symbol is in the
5574 same segment as the GOT. Unfortunately, the flexibility of linker
5575 scripts means that we can't be sure of that in general, so assume
5576 that GOTOFF is never valid on VxWorks. */
5577 if ((GET_CODE (orig) == LABEL_REF
5578 || (GET_CODE (orig) == SYMBOL_REF &&
5579 SYMBOL_REF_LOCAL_P (orig)))
5580 && NEED_GOT_RELOC
5581 && !TARGET_VXWORKS_RTP)
5582 insn = arm_pic_static_addr (orig, reg);
5583 else
5585 rtx pat;
5586 rtx mem;
5588 /* If this function doesn't have a pic register, create one now. */
5589 require_pic_register ();
5591 pat = gen_calculate_pic_address (reg, cfun->machine->pic_reg, orig);
5593 /* Make the MEM as close to a constant as possible. */
5594 mem = SET_SRC (pat);
5595 gcc_assert (MEM_P (mem) && !MEM_VOLATILE_P (mem));
5596 MEM_READONLY_P (mem) = 1;
5597 MEM_NOTRAP_P (mem) = 1;
5599 insn = emit_insn (pat);
5602 /* Put a REG_EQUAL note on this insn, so that it can be optimized
5603 by loop. */
5604 set_unique_reg_note (insn, REG_EQUAL, orig);
5606 return reg;
5608 else if (GET_CODE (orig) == CONST)
5610 rtx base, offset;
5612 if (GET_CODE (XEXP (orig, 0)) == PLUS
5613 && XEXP (XEXP (orig, 0), 0) == cfun->machine->pic_reg)
5614 return orig;
5616 /* Handle the case where we have: const (UNSPEC_TLS). */
5617 if (GET_CODE (XEXP (orig, 0)) == UNSPEC
5618 && XINT (XEXP (orig, 0), 1) == UNSPEC_TLS)
5619 return orig;
5621 /* Handle the case where we have:
5622 const (plus (UNSPEC_TLS) (ADDEND)). The ADDEND must be a
5623 CONST_INT. */
5624 if (GET_CODE (XEXP (orig, 0)) == PLUS
5625 && GET_CODE (XEXP (XEXP (orig, 0), 0)) == UNSPEC
5626 && XINT (XEXP (XEXP (orig, 0), 0), 1) == UNSPEC_TLS)
5628 gcc_assert (CONST_INT_P (XEXP (XEXP (orig, 0), 1)));
5629 return orig;
5632 if (reg == 0)
5634 gcc_assert (can_create_pseudo_p ());
5635 reg = gen_reg_rtx (Pmode);
5638 gcc_assert (GET_CODE (XEXP (orig, 0)) == PLUS);
5640 base = legitimize_pic_address (XEXP (XEXP (orig, 0), 0), Pmode, reg);
5641 offset = legitimize_pic_address (XEXP (XEXP (orig, 0), 1), Pmode,
5642 base == reg ? 0 : reg);
5644 if (CONST_INT_P (offset))
5646 /* The base register doesn't really matter, we only want to
5647 test the index for the appropriate mode. */
5648 if (!arm_legitimate_index_p (mode, offset, SET, 0))
5650 gcc_assert (can_create_pseudo_p ());
5651 offset = force_reg (Pmode, offset);
5654 if (CONST_INT_P (offset))
5655 return plus_constant (Pmode, base, INTVAL (offset));
5658 if (GET_MODE_SIZE (mode) > 4
5659 && (GET_MODE_CLASS (mode) == MODE_INT
5660 || TARGET_SOFT_FLOAT))
5662 emit_insn (gen_addsi3 (reg, base, offset));
5663 return reg;
5666 return gen_rtx_PLUS (Pmode, base, offset);
5669 return orig;
5673 /* Find a spare register to use during the prolog of a function. */
5675 static int
5676 thumb_find_work_register (unsigned long pushed_regs_mask)
5678 int reg;
5680 /* Check the argument registers first as these are call-used. The
5681 register allocation order means that sometimes r3 might be used
5682 but earlier argument registers might not, so check them all. */
5683 for (reg = LAST_ARG_REGNUM; reg >= 0; reg --)
5684 if (!df_regs_ever_live_p (reg))
5685 return reg;
5687 /* Before going on to check the call-saved registers we can try a couple
5688 more ways of deducing that r3 is available. The first is when we are
5689 pushing anonymous arguments onto the stack and we have less than 4
5690 registers worth of fixed arguments(*). In this case r3 will be part of
5691 the variable argument list and so we can be sure that it will be
5692 pushed right at the start of the function. Hence it will be available
5693 for the rest of the prologue.
5694 (*): ie crtl->args.pretend_args_size is greater than 0. */
5695 if (cfun->machine->uses_anonymous_args
5696 && crtl->args.pretend_args_size > 0)
5697 return LAST_ARG_REGNUM;
5699 /* The other case is when we have fixed arguments but less than 4 registers
5700 worth. In this case r3 might be used in the body of the function, but
5701 it is not being used to convey an argument into the function. In theory
5702 we could just check crtl->args.size to see how many bytes are
5703 being passed in argument registers, but it seems that it is unreliable.
5704 Sometimes it will have the value 0 when in fact arguments are being
5705 passed. (See testcase execute/20021111-1.c for an example). So we also
5706 check the args_info.nregs field as well. The problem with this field is
5707 that it makes no allowances for arguments that are passed to the
5708 function but which are not used. Hence we could miss an opportunity
5709 when a function has an unused argument in r3. But it is better to be
5710 safe than to be sorry. */
5711 if (! cfun->machine->uses_anonymous_args
5712 && crtl->args.size >= 0
5713 && crtl->args.size <= (LAST_ARG_REGNUM * UNITS_PER_WORD)
5714 && (TARGET_AAPCS_BASED
5715 ? crtl->args.info.aapcs_ncrn < 4
5716 : crtl->args.info.nregs < 4))
5717 return LAST_ARG_REGNUM;
5719 /* Otherwise look for a call-saved register that is going to be pushed. */
5720 for (reg = LAST_LO_REGNUM; reg > LAST_ARG_REGNUM; reg --)
5721 if (pushed_regs_mask & (1 << reg))
5722 return reg;
5724 if (TARGET_THUMB2)
5726 /* Thumb-2 can use high regs. */
5727 for (reg = FIRST_HI_REGNUM; reg < 15; reg ++)
5728 if (pushed_regs_mask & (1 << reg))
5729 return reg;
5731 /* Something went wrong - thumb_compute_save_reg_mask()
5732 should have arranged for a suitable register to be pushed. */
5733 gcc_unreachable ();
5736 static GTY(()) int pic_labelno;
5738 /* Generate code to load the PIC register. In thumb mode SCRATCH is a
5739 low register. */
5741 void
5742 arm_load_pic_register (unsigned long saved_regs ATTRIBUTE_UNUSED)
5744 rtx l1, labelno, pic_tmp, pic_rtx, pic_reg;
5746 if (crtl->uses_pic_offset_table == 0 || TARGET_SINGLE_PIC_BASE)
5747 return;
5749 gcc_assert (flag_pic);
5751 pic_reg = cfun->machine->pic_reg;
5752 if (TARGET_VXWORKS_RTP)
5754 pic_rtx = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_BASE);
5755 pic_rtx = gen_rtx_CONST (Pmode, pic_rtx);
5756 emit_insn (gen_pic_load_addr_32bit (pic_reg, pic_rtx));
5758 emit_insn (gen_rtx_SET (Pmode, pic_reg, gen_rtx_MEM (Pmode, pic_reg)));
5760 pic_tmp = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_INDEX);
5761 emit_insn (gen_pic_offset_arm (pic_reg, pic_reg, pic_tmp));
5763 else
5765 /* We use an UNSPEC rather than a LABEL_REF because this label
5766 never appears in the code stream. */
5768 labelno = GEN_INT (pic_labelno++);
5769 l1 = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
5770 l1 = gen_rtx_CONST (VOIDmode, l1);
5772 /* On the ARM the PC register contains 'dot + 8' at the time of the
5773 addition, on the Thumb it is 'dot + 4'. */
5774 pic_rtx = plus_constant (Pmode, l1, TARGET_ARM ? 8 : 4);
5775 pic_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, pic_rtx),
5776 UNSPEC_GOTSYM_OFF);
5777 pic_rtx = gen_rtx_CONST (Pmode, pic_rtx);
5779 if (TARGET_32BIT)
5781 emit_insn (gen_pic_load_addr_unified (pic_reg, pic_rtx, labelno));
5783 else /* TARGET_THUMB1 */
5785 if (arm_pic_register != INVALID_REGNUM
5786 && REGNO (pic_reg) > LAST_LO_REGNUM)
5788 /* We will have pushed the pic register, so we should always be
5789 able to find a work register. */
5790 pic_tmp = gen_rtx_REG (SImode,
5791 thumb_find_work_register (saved_regs));
5792 emit_insn (gen_pic_load_addr_thumb1 (pic_tmp, pic_rtx));
5793 emit_insn (gen_movsi (pic_offset_table_rtx, pic_tmp));
5794 emit_insn (gen_pic_add_dot_plus_four (pic_reg, pic_reg, labelno));
5796 else if (arm_pic_register != INVALID_REGNUM
5797 && arm_pic_register > LAST_LO_REGNUM
5798 && REGNO (pic_reg) <= LAST_LO_REGNUM)
5800 emit_insn (gen_pic_load_addr_unified (pic_reg, pic_rtx, labelno));
5801 emit_move_insn (gen_rtx_REG (Pmode, arm_pic_register), pic_reg);
5802 emit_use (gen_rtx_REG (Pmode, arm_pic_register));
5804 else
5805 emit_insn (gen_pic_load_addr_unified (pic_reg, pic_rtx, labelno));
5809 /* Need to emit this whether or not we obey regdecls,
5810 since setjmp/longjmp can cause life info to screw up. */
5811 emit_use (pic_reg);
5814 /* Generate code to load the address of a static var when flag_pic is set. */
5815 static rtx
5816 arm_pic_static_addr (rtx orig, rtx reg)
5818 rtx l1, labelno, offset_rtx, insn;
5820 gcc_assert (flag_pic);
5822 /* We use an UNSPEC rather than a LABEL_REF because this label
5823 never appears in the code stream. */
5824 labelno = GEN_INT (pic_labelno++);
5825 l1 = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
5826 l1 = gen_rtx_CONST (VOIDmode, l1);
5828 /* On the ARM the PC register contains 'dot + 8' at the time of the
5829 addition, on the Thumb it is 'dot + 4'. */
5830 offset_rtx = plus_constant (Pmode, l1, TARGET_ARM ? 8 : 4);
5831 offset_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, orig, offset_rtx),
5832 UNSPEC_SYMBOL_OFFSET);
5833 offset_rtx = gen_rtx_CONST (Pmode, offset_rtx);
5835 insn = emit_insn (gen_pic_load_addr_unified (reg, offset_rtx, labelno));
5836 return insn;
5839 /* Return nonzero if X is valid as an ARM state addressing register. */
5840 static int
5841 arm_address_register_rtx_p (rtx x, int strict_p)
5843 int regno;
5845 if (!REG_P (x))
5846 return 0;
5848 regno = REGNO (x);
5850 if (strict_p)
5851 return ARM_REGNO_OK_FOR_BASE_P (regno);
5853 return (regno <= LAST_ARM_REGNUM
5854 || regno >= FIRST_PSEUDO_REGISTER
5855 || regno == FRAME_POINTER_REGNUM
5856 || regno == ARG_POINTER_REGNUM);
5859 /* Return TRUE if this rtx is the difference of a symbol and a label,
5860 and will reduce to a PC-relative relocation in the object file.
5861 Expressions like this can be left alone when generating PIC, rather
5862 than forced through the GOT. */
5863 static int
5864 pcrel_constant_p (rtx x)
5866 if (GET_CODE (x) == MINUS)
5867 return symbol_mentioned_p (XEXP (x, 0)) && label_mentioned_p (XEXP (x, 1));
5869 return FALSE;
5872 /* Return true if X will surely end up in an index register after next
5873 splitting pass. */
5874 static bool
5875 will_be_in_index_register (const_rtx x)
5877 /* arm.md: calculate_pic_address will split this into a register. */
5878 return GET_CODE (x) == UNSPEC && (XINT (x, 1) == UNSPEC_PIC_SYM);
5881 /* Return nonzero if X is a valid ARM state address operand. */
5883 arm_legitimate_address_outer_p (enum machine_mode mode, rtx x, RTX_CODE outer,
5884 int strict_p)
5886 bool use_ldrd;
5887 enum rtx_code code = GET_CODE (x);
5889 if (arm_address_register_rtx_p (x, strict_p))
5890 return 1;
5892 use_ldrd = (TARGET_LDRD
5893 && (mode == DImode
5894 || (mode == DFmode && (TARGET_SOFT_FLOAT || TARGET_VFP))));
5896 if (code == POST_INC || code == PRE_DEC
5897 || ((code == PRE_INC || code == POST_DEC)
5898 && (use_ldrd || GET_MODE_SIZE (mode) <= 4)))
5899 return arm_address_register_rtx_p (XEXP (x, 0), strict_p);
5901 else if ((code == POST_MODIFY || code == PRE_MODIFY)
5902 && arm_address_register_rtx_p (XEXP (x, 0), strict_p)
5903 && GET_CODE (XEXP (x, 1)) == PLUS
5904 && rtx_equal_p (XEXP (XEXP (x, 1), 0), XEXP (x, 0)))
5906 rtx addend = XEXP (XEXP (x, 1), 1);
5908 /* Don't allow ldrd post increment by register because it's hard
5909 to fixup invalid register choices. */
5910 if (use_ldrd
5911 && GET_CODE (x) == POST_MODIFY
5912 && REG_P (addend))
5913 return 0;
5915 return ((use_ldrd || GET_MODE_SIZE (mode) <= 4)
5916 && arm_legitimate_index_p (mode, addend, outer, strict_p));
5919 /* After reload constants split into minipools will have addresses
5920 from a LABEL_REF. */
5921 else if (reload_completed
5922 && (code == LABEL_REF
5923 || (code == CONST
5924 && GET_CODE (XEXP (x, 0)) == PLUS
5925 && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
5926 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))))
5927 return 1;
5929 else if (mode == TImode || (TARGET_NEON && VALID_NEON_STRUCT_MODE (mode)))
5930 return 0;
5932 else if (code == PLUS)
5934 rtx xop0 = XEXP (x, 0);
5935 rtx xop1 = XEXP (x, 1);
5937 return ((arm_address_register_rtx_p (xop0, strict_p)
5938 && ((CONST_INT_P (xop1)
5939 && arm_legitimate_index_p (mode, xop1, outer, strict_p))
5940 || (!strict_p && will_be_in_index_register (xop1))))
5941 || (arm_address_register_rtx_p (xop1, strict_p)
5942 && arm_legitimate_index_p (mode, xop0, outer, strict_p)));
5945 #if 0
5946 /* Reload currently can't handle MINUS, so disable this for now */
5947 else if (GET_CODE (x) == MINUS)
5949 rtx xop0 = XEXP (x, 0);
5950 rtx xop1 = XEXP (x, 1);
5952 return (arm_address_register_rtx_p (xop0, strict_p)
5953 && arm_legitimate_index_p (mode, xop1, outer, strict_p));
5955 #endif
5957 else if (GET_MODE_CLASS (mode) != MODE_FLOAT
5958 && code == SYMBOL_REF
5959 && CONSTANT_POOL_ADDRESS_P (x)
5960 && ! (flag_pic
5961 && symbol_mentioned_p (get_pool_constant (x))
5962 && ! pcrel_constant_p (get_pool_constant (x))))
5963 return 1;
5965 return 0;
5968 /* Return nonzero if X is a valid Thumb-2 address operand. */
5969 static int
5970 thumb2_legitimate_address_p (enum machine_mode mode, rtx x, int strict_p)
5972 bool use_ldrd;
5973 enum rtx_code code = GET_CODE (x);
5975 if (arm_address_register_rtx_p (x, strict_p))
5976 return 1;
5978 use_ldrd = (TARGET_LDRD
5979 && (mode == DImode
5980 || (mode == DFmode && (TARGET_SOFT_FLOAT || TARGET_VFP))));
5982 if (code == POST_INC || code == PRE_DEC
5983 || ((code == PRE_INC || code == POST_DEC)
5984 && (use_ldrd || GET_MODE_SIZE (mode) <= 4)))
5985 return arm_address_register_rtx_p (XEXP (x, 0), strict_p);
5987 else if ((code == POST_MODIFY || code == PRE_MODIFY)
5988 && arm_address_register_rtx_p (XEXP (x, 0), strict_p)
5989 && GET_CODE (XEXP (x, 1)) == PLUS
5990 && rtx_equal_p (XEXP (XEXP (x, 1), 0), XEXP (x, 0)))
5992 /* Thumb-2 only has autoincrement by constant. */
5993 rtx addend = XEXP (XEXP (x, 1), 1);
5994 HOST_WIDE_INT offset;
5996 if (!CONST_INT_P (addend))
5997 return 0;
5999 offset = INTVAL(addend);
6000 if (GET_MODE_SIZE (mode) <= 4)
6001 return (offset > -256 && offset < 256);
6003 return (use_ldrd && offset > -1024 && offset < 1024
6004 && (offset & 3) == 0);
6007 /* After reload constants split into minipools will have addresses
6008 from a LABEL_REF. */
6009 else if (reload_completed
6010 && (code == LABEL_REF
6011 || (code == CONST
6012 && GET_CODE (XEXP (x, 0)) == PLUS
6013 && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
6014 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))))
6015 return 1;
6017 else if (mode == TImode || (TARGET_NEON && VALID_NEON_STRUCT_MODE (mode)))
6018 return 0;
6020 else if (code == PLUS)
6022 rtx xop0 = XEXP (x, 0);
6023 rtx xop1 = XEXP (x, 1);
6025 return ((arm_address_register_rtx_p (xop0, strict_p)
6026 && (thumb2_legitimate_index_p (mode, xop1, strict_p)
6027 || (!strict_p && will_be_in_index_register (xop1))))
6028 || (arm_address_register_rtx_p (xop1, strict_p)
6029 && thumb2_legitimate_index_p (mode, xop0, strict_p)));
6032 else if (GET_MODE_CLASS (mode) != MODE_FLOAT
6033 && code == SYMBOL_REF
6034 && CONSTANT_POOL_ADDRESS_P (x)
6035 && ! (flag_pic
6036 && symbol_mentioned_p (get_pool_constant (x))
6037 && ! pcrel_constant_p (get_pool_constant (x))))
6038 return 1;
6040 return 0;
6043 /* Return nonzero if INDEX is valid for an address index operand in
6044 ARM state. */
6045 static int
6046 arm_legitimate_index_p (enum machine_mode mode, rtx index, RTX_CODE outer,
6047 int strict_p)
6049 HOST_WIDE_INT range;
6050 enum rtx_code code = GET_CODE (index);
6052 /* Standard coprocessor addressing modes. */
6053 if (TARGET_HARD_FLOAT
6054 && TARGET_VFP
6055 && (mode == SFmode || mode == DFmode))
6056 return (code == CONST_INT && INTVAL (index) < 1024
6057 && INTVAL (index) > -1024
6058 && (INTVAL (index) & 3) == 0);
6060 /* For quad modes, we restrict the constant offset to be slightly less
6061 than what the instruction format permits. We do this because for
6062 quad mode moves, we will actually decompose them into two separate
6063 double-mode reads or writes. INDEX must therefore be a valid
6064 (double-mode) offset and so should INDEX+8. */
6065 if (TARGET_NEON && VALID_NEON_QREG_MODE (mode))
6066 return (code == CONST_INT
6067 && INTVAL (index) < 1016
6068 && INTVAL (index) > -1024
6069 && (INTVAL (index) & 3) == 0);
6071 /* We have no such constraint on double mode offsets, so we permit the
6072 full range of the instruction format. */
6073 if (TARGET_NEON && VALID_NEON_DREG_MODE (mode))
6074 return (code == CONST_INT
6075 && INTVAL (index) < 1024
6076 && INTVAL (index) > -1024
6077 && (INTVAL (index) & 3) == 0);
6079 if (TARGET_REALLY_IWMMXT && VALID_IWMMXT_REG_MODE (mode))
6080 return (code == CONST_INT
6081 && INTVAL (index) < 1024
6082 && INTVAL (index) > -1024
6083 && (INTVAL (index) & 3) == 0);
6085 if (arm_address_register_rtx_p (index, strict_p)
6086 && (GET_MODE_SIZE (mode) <= 4))
6087 return 1;
6089 if (mode == DImode || mode == DFmode)
6091 if (code == CONST_INT)
6093 HOST_WIDE_INT val = INTVAL (index);
6095 if (TARGET_LDRD)
6096 return val > -256 && val < 256;
6097 else
6098 return val > -4096 && val < 4092;
6101 return TARGET_LDRD && arm_address_register_rtx_p (index, strict_p);
6104 if (GET_MODE_SIZE (mode) <= 4
6105 && ! (arm_arch4
6106 && (mode == HImode
6107 || mode == HFmode
6108 || (mode == QImode && outer == SIGN_EXTEND))))
6110 if (code == MULT)
6112 rtx xiop0 = XEXP (index, 0);
6113 rtx xiop1 = XEXP (index, 1);
6115 return ((arm_address_register_rtx_p (xiop0, strict_p)
6116 && power_of_two_operand (xiop1, SImode))
6117 || (arm_address_register_rtx_p (xiop1, strict_p)
6118 && power_of_two_operand (xiop0, SImode)));
6120 else if (code == LSHIFTRT || code == ASHIFTRT
6121 || code == ASHIFT || code == ROTATERT)
6123 rtx op = XEXP (index, 1);
6125 return (arm_address_register_rtx_p (XEXP (index, 0), strict_p)
6126 && CONST_INT_P (op)
6127 && INTVAL (op) > 0
6128 && INTVAL (op) <= 31);
6132 /* For ARM v4 we may be doing a sign-extend operation during the
6133 load. */
6134 if (arm_arch4)
6136 if (mode == HImode
6137 || mode == HFmode
6138 || (outer == SIGN_EXTEND && mode == QImode))
6139 range = 256;
6140 else
6141 range = 4096;
6143 else
6144 range = (mode == HImode || mode == HFmode) ? 4095 : 4096;
6146 return (code == CONST_INT
6147 && INTVAL (index) < range
6148 && INTVAL (index) > -range);
6151 /* Return true if OP is a valid index scaling factor for Thumb-2 address
6152 index operand. i.e. 1, 2, 4 or 8. */
6153 static bool
6154 thumb2_index_mul_operand (rtx op)
6156 HOST_WIDE_INT val;
6158 if (!CONST_INT_P (op))
6159 return false;
6161 val = INTVAL(op);
6162 return (val == 1 || val == 2 || val == 4 || val == 8);
6165 /* Return nonzero if INDEX is a valid Thumb-2 address index operand. */
6166 static int
6167 thumb2_legitimate_index_p (enum machine_mode mode, rtx index, int strict_p)
6169 enum rtx_code code = GET_CODE (index);
6171 /* ??? Combine arm and thumb2 coprocessor addressing modes. */
6172 /* Standard coprocessor addressing modes. */
6173 if (TARGET_HARD_FLOAT
6174 && TARGET_VFP
6175 && (mode == SFmode || mode == DFmode))
6176 return (code == CONST_INT && INTVAL (index) < 1024
6177 /* Thumb-2 allows only > -256 index range for it's core register
6178 load/stores. Since we allow SF/DF in core registers, we have
6179 to use the intersection between -256~4096 (core) and -1024~1024
6180 (coprocessor). */
6181 && INTVAL (index) > -256
6182 && (INTVAL (index) & 3) == 0);
6184 if (TARGET_REALLY_IWMMXT && VALID_IWMMXT_REG_MODE (mode))
6186 /* For DImode assume values will usually live in core regs
6187 and only allow LDRD addressing modes. */
6188 if (!TARGET_LDRD || mode != DImode)
6189 return (code == CONST_INT
6190 && INTVAL (index) < 1024
6191 && INTVAL (index) > -1024
6192 && (INTVAL (index) & 3) == 0);
6195 /* For quad modes, we restrict the constant offset to be slightly less
6196 than what the instruction format permits. We do this because for
6197 quad mode moves, we will actually decompose them into two separate
6198 double-mode reads or writes. INDEX must therefore be a valid
6199 (double-mode) offset and so should INDEX+8. */
6200 if (TARGET_NEON && VALID_NEON_QREG_MODE (mode))
6201 return (code == CONST_INT
6202 && INTVAL (index) < 1016
6203 && INTVAL (index) > -1024
6204 && (INTVAL (index) & 3) == 0);
6206 /* We have no such constraint on double mode offsets, so we permit the
6207 full range of the instruction format. */
6208 if (TARGET_NEON && VALID_NEON_DREG_MODE (mode))
6209 return (code == CONST_INT
6210 && INTVAL (index) < 1024
6211 && INTVAL (index) > -1024
6212 && (INTVAL (index) & 3) == 0);
6214 if (arm_address_register_rtx_p (index, strict_p)
6215 && (GET_MODE_SIZE (mode) <= 4))
6216 return 1;
6218 if (mode == DImode || mode == DFmode)
6220 if (code == CONST_INT)
6222 HOST_WIDE_INT val = INTVAL (index);
6223 /* ??? Can we assume ldrd for thumb2? */
6224 /* Thumb-2 ldrd only has reg+const addressing modes. */
6225 /* ldrd supports offsets of +-1020.
6226 However the ldr fallback does not. */
6227 return val > -256 && val < 256 && (val & 3) == 0;
6229 else
6230 return 0;
6233 if (code == MULT)
6235 rtx xiop0 = XEXP (index, 0);
6236 rtx xiop1 = XEXP (index, 1);
6238 return ((arm_address_register_rtx_p (xiop0, strict_p)
6239 && thumb2_index_mul_operand (xiop1))
6240 || (arm_address_register_rtx_p (xiop1, strict_p)
6241 && thumb2_index_mul_operand (xiop0)));
6243 else if (code == ASHIFT)
6245 rtx op = XEXP (index, 1);
6247 return (arm_address_register_rtx_p (XEXP (index, 0), strict_p)
6248 && CONST_INT_P (op)
6249 && INTVAL (op) > 0
6250 && INTVAL (op) <= 3);
6253 return (code == CONST_INT
6254 && INTVAL (index) < 4096
6255 && INTVAL (index) > -256);
6258 /* Return nonzero if X is valid as a 16-bit Thumb state base register. */
6259 static int
6260 thumb1_base_register_rtx_p (rtx x, enum machine_mode mode, int strict_p)
6262 int regno;
6264 if (!REG_P (x))
6265 return 0;
6267 regno = REGNO (x);
6269 if (strict_p)
6270 return THUMB1_REGNO_MODE_OK_FOR_BASE_P (regno, mode);
6272 return (regno <= LAST_LO_REGNUM
6273 || regno > LAST_VIRTUAL_REGISTER
6274 || regno == FRAME_POINTER_REGNUM
6275 || (GET_MODE_SIZE (mode) >= 4
6276 && (regno == STACK_POINTER_REGNUM
6277 || regno >= FIRST_PSEUDO_REGISTER
6278 || x == hard_frame_pointer_rtx
6279 || x == arg_pointer_rtx)));
6282 /* Return nonzero if x is a legitimate index register. This is the case
6283 for any base register that can access a QImode object. */
6284 inline static int
6285 thumb1_index_register_rtx_p (rtx x, int strict_p)
6287 return thumb1_base_register_rtx_p (x, QImode, strict_p);
6290 /* Return nonzero if x is a legitimate 16-bit Thumb-state address.
6292 The AP may be eliminated to either the SP or the FP, so we use the
6293 least common denominator, e.g. SImode, and offsets from 0 to 64.
6295 ??? Verify whether the above is the right approach.
6297 ??? Also, the FP may be eliminated to the SP, so perhaps that
6298 needs special handling also.
6300 ??? Look at how the mips16 port solves this problem. It probably uses
6301 better ways to solve some of these problems.
6303 Although it is not incorrect, we don't accept QImode and HImode
6304 addresses based on the frame pointer or arg pointer until the
6305 reload pass starts. This is so that eliminating such addresses
6306 into stack based ones won't produce impossible code. */
6308 thumb1_legitimate_address_p (enum machine_mode mode, rtx x, int strict_p)
6310 /* ??? Not clear if this is right. Experiment. */
6311 if (GET_MODE_SIZE (mode) < 4
6312 && !(reload_in_progress || reload_completed)
6313 && (reg_mentioned_p (frame_pointer_rtx, x)
6314 || reg_mentioned_p (arg_pointer_rtx, x)
6315 || reg_mentioned_p (virtual_incoming_args_rtx, x)
6316 || reg_mentioned_p (virtual_outgoing_args_rtx, x)
6317 || reg_mentioned_p (virtual_stack_dynamic_rtx, x)
6318 || reg_mentioned_p (virtual_stack_vars_rtx, x)))
6319 return 0;
6321 /* Accept any base register. SP only in SImode or larger. */
6322 else if (thumb1_base_register_rtx_p (x, mode, strict_p))
6323 return 1;
6325 /* This is PC relative data before arm_reorg runs. */
6326 else if (GET_MODE_SIZE (mode) >= 4 && CONSTANT_P (x)
6327 && GET_CODE (x) == SYMBOL_REF
6328 && CONSTANT_POOL_ADDRESS_P (x) && !flag_pic)
6329 return 1;
6331 /* This is PC relative data after arm_reorg runs. */
6332 else if ((GET_MODE_SIZE (mode) >= 4 || mode == HFmode)
6333 && reload_completed
6334 && (GET_CODE (x) == LABEL_REF
6335 || (GET_CODE (x) == CONST
6336 && GET_CODE (XEXP (x, 0)) == PLUS
6337 && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
6338 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))))
6339 return 1;
6341 /* Post-inc indexing only supported for SImode and larger. */
6342 else if (GET_CODE (x) == POST_INC && GET_MODE_SIZE (mode) >= 4
6343 && thumb1_index_register_rtx_p (XEXP (x, 0), strict_p))
6344 return 1;
6346 else if (GET_CODE (x) == PLUS)
6348 /* REG+REG address can be any two index registers. */
6349 /* We disallow FRAME+REG addressing since we know that FRAME
6350 will be replaced with STACK, and SP relative addressing only
6351 permits SP+OFFSET. */
6352 if (GET_MODE_SIZE (mode) <= 4
6353 && XEXP (x, 0) != frame_pointer_rtx
6354 && XEXP (x, 1) != frame_pointer_rtx
6355 && thumb1_index_register_rtx_p (XEXP (x, 0), strict_p)
6356 && (thumb1_index_register_rtx_p (XEXP (x, 1), strict_p)
6357 || (!strict_p && will_be_in_index_register (XEXP (x, 1)))))
6358 return 1;
6360 /* REG+const has 5-7 bit offset for non-SP registers. */
6361 else if ((thumb1_index_register_rtx_p (XEXP (x, 0), strict_p)
6362 || XEXP (x, 0) == arg_pointer_rtx)
6363 && CONST_INT_P (XEXP (x, 1))
6364 && thumb_legitimate_offset_p (mode, INTVAL (XEXP (x, 1))))
6365 return 1;
6367 /* REG+const has 10-bit offset for SP, but only SImode and
6368 larger is supported. */
6369 /* ??? Should probably check for DI/DFmode overflow here
6370 just like GO_IF_LEGITIMATE_OFFSET does. */
6371 else if (REG_P (XEXP (x, 0))
6372 && REGNO (XEXP (x, 0)) == STACK_POINTER_REGNUM
6373 && GET_MODE_SIZE (mode) >= 4
6374 && CONST_INT_P (XEXP (x, 1))
6375 && INTVAL (XEXP (x, 1)) >= 0
6376 && INTVAL (XEXP (x, 1)) + GET_MODE_SIZE (mode) <= 1024
6377 && (INTVAL (XEXP (x, 1)) & 3) == 0)
6378 return 1;
6380 else if (REG_P (XEXP (x, 0))
6381 && (REGNO (XEXP (x, 0)) == FRAME_POINTER_REGNUM
6382 || REGNO (XEXP (x, 0)) == ARG_POINTER_REGNUM
6383 || (REGNO (XEXP (x, 0)) >= FIRST_VIRTUAL_REGISTER
6384 && REGNO (XEXP (x, 0))
6385 <= LAST_VIRTUAL_POINTER_REGISTER))
6386 && GET_MODE_SIZE (mode) >= 4
6387 && CONST_INT_P (XEXP (x, 1))
6388 && (INTVAL (XEXP (x, 1)) & 3) == 0)
6389 return 1;
6392 else if (GET_MODE_CLASS (mode) != MODE_FLOAT
6393 && GET_MODE_SIZE (mode) == 4
6394 && GET_CODE (x) == SYMBOL_REF
6395 && CONSTANT_POOL_ADDRESS_P (x)
6396 && ! (flag_pic
6397 && symbol_mentioned_p (get_pool_constant (x))
6398 && ! pcrel_constant_p (get_pool_constant (x))))
6399 return 1;
6401 return 0;
6404 /* Return nonzero if VAL can be used as an offset in a Thumb-state address
6405 instruction of mode MODE. */
6407 thumb_legitimate_offset_p (enum machine_mode mode, HOST_WIDE_INT val)
6409 switch (GET_MODE_SIZE (mode))
6411 case 1:
6412 return val >= 0 && val < 32;
6414 case 2:
6415 return val >= 0 && val < 64 && (val & 1) == 0;
6417 default:
6418 return (val >= 0
6419 && (val + GET_MODE_SIZE (mode)) <= 128
6420 && (val & 3) == 0);
6424 bool
6425 arm_legitimate_address_p (enum machine_mode mode, rtx x, bool strict_p)
6427 if (TARGET_ARM)
6428 return arm_legitimate_address_outer_p (mode, x, SET, strict_p);
6429 else if (TARGET_THUMB2)
6430 return thumb2_legitimate_address_p (mode, x, strict_p);
6431 else /* if (TARGET_THUMB1) */
6432 return thumb1_legitimate_address_p (mode, x, strict_p);
6435 /* Worker function for TARGET_PREFERRED_RELOAD_CLASS.
6437 Given an rtx X being reloaded into a reg required to be
6438 in class CLASS, return the class of reg to actually use.
6439 In general this is just CLASS, but for the Thumb core registers and
6440 immediate constants we prefer a LO_REGS class or a subset. */
6442 static reg_class_t
6443 arm_preferred_reload_class (rtx x ATTRIBUTE_UNUSED, reg_class_t rclass)
6445 if (TARGET_32BIT)
6446 return rclass;
6447 else
6449 if (rclass == GENERAL_REGS
6450 || rclass == HI_REGS
6451 || rclass == NO_REGS
6452 || rclass == STACK_REG)
6453 return LO_REGS;
6454 else
6455 return rclass;
6459 /* Build the SYMBOL_REF for __tls_get_addr. */
6461 static GTY(()) rtx tls_get_addr_libfunc;
6463 static rtx
6464 get_tls_get_addr (void)
6466 if (!tls_get_addr_libfunc)
6467 tls_get_addr_libfunc = init_one_libfunc ("__tls_get_addr");
6468 return tls_get_addr_libfunc;
6472 arm_load_tp (rtx target)
6474 if (!target)
6475 target = gen_reg_rtx (SImode);
6477 if (TARGET_HARD_TP)
6479 /* Can return in any reg. */
6480 emit_insn (gen_load_tp_hard (target));
6482 else
6484 /* Always returned in r0. Immediately copy the result into a pseudo,
6485 otherwise other uses of r0 (e.g. setting up function arguments) may
6486 clobber the value. */
6488 rtx tmp;
6490 emit_insn (gen_load_tp_soft ());
6492 tmp = gen_rtx_REG (SImode, 0);
6493 emit_move_insn (target, tmp);
6495 return target;
6498 static rtx
6499 load_tls_operand (rtx x, rtx reg)
6501 rtx tmp;
6503 if (reg == NULL_RTX)
6504 reg = gen_reg_rtx (SImode);
6506 tmp = gen_rtx_CONST (SImode, x);
6508 emit_move_insn (reg, tmp);
6510 return reg;
6513 static rtx
6514 arm_call_tls_get_addr (rtx x, rtx reg, rtx *valuep, int reloc)
6516 rtx insns, label, labelno, sum;
6518 gcc_assert (reloc != TLS_DESCSEQ);
6519 start_sequence ();
6521 labelno = GEN_INT (pic_labelno++);
6522 label = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
6523 label = gen_rtx_CONST (VOIDmode, label);
6525 sum = gen_rtx_UNSPEC (Pmode,
6526 gen_rtvec (4, x, GEN_INT (reloc), label,
6527 GEN_INT (TARGET_ARM ? 8 : 4)),
6528 UNSPEC_TLS);
6529 reg = load_tls_operand (sum, reg);
6531 if (TARGET_ARM)
6532 emit_insn (gen_pic_add_dot_plus_eight (reg, reg, labelno));
6533 else
6534 emit_insn (gen_pic_add_dot_plus_four (reg, reg, labelno));
6536 *valuep = emit_library_call_value (get_tls_get_addr (), NULL_RTX,
6537 LCT_PURE, /* LCT_CONST? */
6538 Pmode, 1, reg, Pmode);
6540 insns = get_insns ();
6541 end_sequence ();
6543 return insns;
6546 static rtx
6547 arm_tls_descseq_addr (rtx x, rtx reg)
6549 rtx labelno = GEN_INT (pic_labelno++);
6550 rtx label = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
6551 rtx sum = gen_rtx_UNSPEC (Pmode,
6552 gen_rtvec (4, x, GEN_INT (TLS_DESCSEQ),
6553 gen_rtx_CONST (VOIDmode, label),
6554 GEN_INT (!TARGET_ARM)),
6555 UNSPEC_TLS);
6556 rtx reg0 = load_tls_operand (sum, gen_rtx_REG (SImode, 0));
6558 emit_insn (gen_tlscall (x, labelno));
6559 if (!reg)
6560 reg = gen_reg_rtx (SImode);
6561 else
6562 gcc_assert (REGNO (reg) != 0);
6564 emit_move_insn (reg, reg0);
6566 return reg;
6570 legitimize_tls_address (rtx x, rtx reg)
6572 rtx dest, tp, label, labelno, sum, insns, ret, eqv, addend;
6573 unsigned int model = SYMBOL_REF_TLS_MODEL (x);
6575 switch (model)
6577 case TLS_MODEL_GLOBAL_DYNAMIC:
6578 if (TARGET_GNU2_TLS)
6580 reg = arm_tls_descseq_addr (x, reg);
6582 tp = arm_load_tp (NULL_RTX);
6584 dest = gen_rtx_PLUS (Pmode, tp, reg);
6586 else
6588 /* Original scheme */
6589 insns = arm_call_tls_get_addr (x, reg, &ret, TLS_GD32);
6590 dest = gen_reg_rtx (Pmode);
6591 emit_libcall_block (insns, dest, ret, x);
6593 return dest;
6595 case TLS_MODEL_LOCAL_DYNAMIC:
6596 if (TARGET_GNU2_TLS)
6598 reg = arm_tls_descseq_addr (x, reg);
6600 tp = arm_load_tp (NULL_RTX);
6602 dest = gen_rtx_PLUS (Pmode, tp, reg);
6604 else
6606 insns = arm_call_tls_get_addr (x, reg, &ret, TLS_LDM32);
6608 /* Attach a unique REG_EQUIV, to allow the RTL optimizers to
6609 share the LDM result with other LD model accesses. */
6610 eqv = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const1_rtx),
6611 UNSPEC_TLS);
6612 dest = gen_reg_rtx (Pmode);
6613 emit_libcall_block (insns, dest, ret, eqv);
6615 /* Load the addend. */
6616 addend = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, x,
6617 GEN_INT (TLS_LDO32)),
6618 UNSPEC_TLS);
6619 addend = force_reg (SImode, gen_rtx_CONST (SImode, addend));
6620 dest = gen_rtx_PLUS (Pmode, dest, addend);
6622 return dest;
6624 case TLS_MODEL_INITIAL_EXEC:
6625 labelno = GEN_INT (pic_labelno++);
6626 label = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
6627 label = gen_rtx_CONST (VOIDmode, label);
6628 sum = gen_rtx_UNSPEC (Pmode,
6629 gen_rtvec (4, x, GEN_INT (TLS_IE32), label,
6630 GEN_INT (TARGET_ARM ? 8 : 4)),
6631 UNSPEC_TLS);
6632 reg = load_tls_operand (sum, reg);
6634 if (TARGET_ARM)
6635 emit_insn (gen_tls_load_dot_plus_eight (reg, reg, labelno));
6636 else if (TARGET_THUMB2)
6637 emit_insn (gen_tls_load_dot_plus_four (reg, NULL, reg, labelno));
6638 else
6640 emit_insn (gen_pic_add_dot_plus_four (reg, reg, labelno));
6641 emit_move_insn (reg, gen_const_mem (SImode, reg));
6644 tp = arm_load_tp (NULL_RTX);
6646 return gen_rtx_PLUS (Pmode, tp, reg);
6648 case TLS_MODEL_LOCAL_EXEC:
6649 tp = arm_load_tp (NULL_RTX);
6651 reg = gen_rtx_UNSPEC (Pmode,
6652 gen_rtvec (2, x, GEN_INT (TLS_LE32)),
6653 UNSPEC_TLS);
6654 reg = force_reg (SImode, gen_rtx_CONST (SImode, reg));
6656 return gen_rtx_PLUS (Pmode, tp, reg);
6658 default:
6659 abort ();
6663 /* Try machine-dependent ways of modifying an illegitimate address
6664 to be legitimate. If we find one, return the new, valid address. */
6666 arm_legitimize_address (rtx x, rtx orig_x, enum machine_mode mode)
6668 if (!TARGET_ARM)
6670 /* TODO: legitimize_address for Thumb2. */
6671 if (TARGET_THUMB2)
6672 return x;
6673 return thumb_legitimize_address (x, orig_x, mode);
6676 if (arm_tls_symbol_p (x))
6677 return legitimize_tls_address (x, NULL_RTX);
6679 if (GET_CODE (x) == PLUS)
6681 rtx xop0 = XEXP (x, 0);
6682 rtx xop1 = XEXP (x, 1);
6684 if (CONSTANT_P (xop0) && !symbol_mentioned_p (xop0))
6685 xop0 = force_reg (SImode, xop0);
6687 if (CONSTANT_P (xop1) && !symbol_mentioned_p (xop1))
6688 xop1 = force_reg (SImode, xop1);
6690 if (ARM_BASE_REGISTER_RTX_P (xop0)
6691 && CONST_INT_P (xop1))
6693 HOST_WIDE_INT n, low_n;
6694 rtx base_reg, val;
6695 n = INTVAL (xop1);
6697 /* VFP addressing modes actually allow greater offsets, but for
6698 now we just stick with the lowest common denominator. */
6699 if (mode == DImode
6700 || ((TARGET_SOFT_FLOAT || TARGET_VFP) && mode == DFmode))
6702 low_n = n & 0x0f;
6703 n &= ~0x0f;
6704 if (low_n > 4)
6706 n += 16;
6707 low_n -= 16;
6710 else
6712 low_n = ((mode) == TImode ? 0
6713 : n >= 0 ? (n & 0xfff) : -((-n) & 0xfff));
6714 n -= low_n;
6717 base_reg = gen_reg_rtx (SImode);
6718 val = force_operand (plus_constant (Pmode, xop0, n), NULL_RTX);
6719 emit_move_insn (base_reg, val);
6720 x = plus_constant (Pmode, base_reg, low_n);
6722 else if (xop0 != XEXP (x, 0) || xop1 != XEXP (x, 1))
6723 x = gen_rtx_PLUS (SImode, xop0, xop1);
6726 /* XXX We don't allow MINUS any more -- see comment in
6727 arm_legitimate_address_outer_p (). */
6728 else if (GET_CODE (x) == MINUS)
6730 rtx xop0 = XEXP (x, 0);
6731 rtx xop1 = XEXP (x, 1);
6733 if (CONSTANT_P (xop0))
6734 xop0 = force_reg (SImode, xop0);
6736 if (CONSTANT_P (xop1) && ! symbol_mentioned_p (xop1))
6737 xop1 = force_reg (SImode, xop1);
6739 if (xop0 != XEXP (x, 0) || xop1 != XEXP (x, 1))
6740 x = gen_rtx_MINUS (SImode, xop0, xop1);
6743 /* Make sure to take full advantage of the pre-indexed addressing mode
6744 with absolute addresses which often allows for the base register to
6745 be factorized for multiple adjacent memory references, and it might
6746 even allows for the mini pool to be avoided entirely. */
6747 else if (CONST_INT_P (x) && optimize > 0)
6749 unsigned int bits;
6750 HOST_WIDE_INT mask, base, index;
6751 rtx base_reg;
6753 /* ldr and ldrb can use a 12-bit index, ldrsb and the rest can only
6754 use a 8-bit index. So let's use a 12-bit index for SImode only and
6755 hope that arm_gen_constant will enable ldrb to use more bits. */
6756 bits = (mode == SImode) ? 12 : 8;
6757 mask = (1 << bits) - 1;
6758 base = INTVAL (x) & ~mask;
6759 index = INTVAL (x) & mask;
6760 if (bit_count (base & 0xffffffff) > (32 - bits)/2)
6762 /* It'll most probably be more efficient to generate the base
6763 with more bits set and use a negative index instead. */
6764 base |= mask;
6765 index -= mask;
6767 base_reg = force_reg (SImode, GEN_INT (base));
6768 x = plus_constant (Pmode, base_reg, index);
6771 if (flag_pic)
6773 /* We need to find and carefully transform any SYMBOL and LABEL
6774 references; so go back to the original address expression. */
6775 rtx new_x = legitimize_pic_address (orig_x, mode, NULL_RTX);
6777 if (new_x != orig_x)
6778 x = new_x;
6781 return x;
6785 /* Try machine-dependent ways of modifying an illegitimate Thumb address
6786 to be legitimate. If we find one, return the new, valid address. */
6788 thumb_legitimize_address (rtx x, rtx orig_x, enum machine_mode mode)
6790 if (arm_tls_symbol_p (x))
6791 return legitimize_tls_address (x, NULL_RTX);
6793 if (GET_CODE (x) == PLUS
6794 && CONST_INT_P (XEXP (x, 1))
6795 && (INTVAL (XEXP (x, 1)) >= 32 * GET_MODE_SIZE (mode)
6796 || INTVAL (XEXP (x, 1)) < 0))
6798 rtx xop0 = XEXP (x, 0);
6799 rtx xop1 = XEXP (x, 1);
6800 HOST_WIDE_INT offset = INTVAL (xop1);
6802 /* Try and fold the offset into a biasing of the base register and
6803 then offsetting that. Don't do this when optimizing for space
6804 since it can cause too many CSEs. */
6805 if (optimize_size && offset >= 0
6806 && offset < 256 + 31 * GET_MODE_SIZE (mode))
6808 HOST_WIDE_INT delta;
6810 if (offset >= 256)
6811 delta = offset - (256 - GET_MODE_SIZE (mode));
6812 else if (offset < 32 * GET_MODE_SIZE (mode) + 8)
6813 delta = 31 * GET_MODE_SIZE (mode);
6814 else
6815 delta = offset & (~31 * GET_MODE_SIZE (mode));
6817 xop0 = force_operand (plus_constant (Pmode, xop0, offset - delta),
6818 NULL_RTX);
6819 x = plus_constant (Pmode, xop0, delta);
6821 else if (offset < 0 && offset > -256)
6822 /* Small negative offsets are best done with a subtract before the
6823 dereference, forcing these into a register normally takes two
6824 instructions. */
6825 x = force_operand (x, NULL_RTX);
6826 else
6828 /* For the remaining cases, force the constant into a register. */
6829 xop1 = force_reg (SImode, xop1);
6830 x = gen_rtx_PLUS (SImode, xop0, xop1);
6833 else if (GET_CODE (x) == PLUS
6834 && s_register_operand (XEXP (x, 1), SImode)
6835 && !s_register_operand (XEXP (x, 0), SImode))
6837 rtx xop0 = force_operand (XEXP (x, 0), NULL_RTX);
6839 x = gen_rtx_PLUS (SImode, xop0, XEXP (x, 1));
6842 if (flag_pic)
6844 /* We need to find and carefully transform any SYMBOL and LABEL
6845 references; so go back to the original address expression. */
6846 rtx new_x = legitimize_pic_address (orig_x, mode, NULL_RTX);
6848 if (new_x != orig_x)
6849 x = new_x;
6852 return x;
6855 bool
6856 arm_legitimize_reload_address (rtx *p,
6857 enum machine_mode mode,
6858 int opnum, int type,
6859 int ind_levels ATTRIBUTE_UNUSED)
6861 /* We must recognize output that we have already generated ourselves. */
6862 if (GET_CODE (*p) == PLUS
6863 && GET_CODE (XEXP (*p, 0)) == PLUS
6864 && REG_P (XEXP (XEXP (*p, 0), 0))
6865 && CONST_INT_P (XEXP (XEXP (*p, 0), 1))
6866 && CONST_INT_P (XEXP (*p, 1)))
6868 push_reload (XEXP (*p, 0), NULL_RTX, &XEXP (*p, 0), NULL,
6869 MODE_BASE_REG_CLASS (mode), GET_MODE (*p),
6870 VOIDmode, 0, 0, opnum, (enum reload_type) type);
6871 return true;
6874 if (GET_CODE (*p) == PLUS
6875 && REG_P (XEXP (*p, 0))
6876 && ARM_REGNO_OK_FOR_BASE_P (REGNO (XEXP (*p, 0)))
6877 /* If the base register is equivalent to a constant, let the generic
6878 code handle it. Otherwise we will run into problems if a future
6879 reload pass decides to rematerialize the constant. */
6880 && !reg_equiv_constant (ORIGINAL_REGNO (XEXP (*p, 0)))
6881 && CONST_INT_P (XEXP (*p, 1)))
6883 HOST_WIDE_INT val = INTVAL (XEXP (*p, 1));
6884 HOST_WIDE_INT low, high;
6886 /* Detect coprocessor load/stores. */
6887 bool coproc_p = ((TARGET_HARD_FLOAT
6888 && TARGET_VFP
6889 && (mode == SFmode || mode == DFmode))
6890 || (TARGET_REALLY_IWMMXT
6891 && VALID_IWMMXT_REG_MODE (mode))
6892 || (TARGET_NEON
6893 && (VALID_NEON_DREG_MODE (mode)
6894 || VALID_NEON_QREG_MODE (mode))));
6896 /* For some conditions, bail out when lower two bits are unaligned. */
6897 if ((val & 0x3) != 0
6898 /* Coprocessor load/store indexes are 8-bits + '00' appended. */
6899 && (coproc_p
6900 /* For DI, and DF under soft-float: */
6901 || ((mode == DImode || mode == DFmode)
6902 /* Without ldrd, we use stm/ldm, which does not
6903 fair well with unaligned bits. */
6904 && (! TARGET_LDRD
6905 /* Thumb-2 ldrd/strd is [-1020,+1020] in steps of 4. */
6906 || TARGET_THUMB2))))
6907 return false;
6909 /* When breaking down a [reg+index] reload address into [(reg+high)+low],
6910 of which the (reg+high) gets turned into a reload add insn,
6911 we try to decompose the index into high/low values that can often
6912 also lead to better reload CSE.
6913 For example:
6914 ldr r0, [r2, #4100] // Offset too large
6915 ldr r1, [r2, #4104] // Offset too large
6917 is best reloaded as:
6918 add t1, r2, #4096
6919 ldr r0, [t1, #4]
6920 add t2, r2, #4096
6921 ldr r1, [t2, #8]
6923 which post-reload CSE can simplify in most cases to eliminate the
6924 second add instruction:
6925 add t1, r2, #4096
6926 ldr r0, [t1, #4]
6927 ldr r1, [t1, #8]
6929 The idea here is that we want to split out the bits of the constant
6930 as a mask, rather than as subtracting the maximum offset that the
6931 respective type of load/store used can handle.
6933 When encountering negative offsets, we can still utilize it even if
6934 the overall offset is positive; sometimes this may lead to an immediate
6935 that can be constructed with fewer instructions.
6936 For example:
6937 ldr r0, [r2, #0x3FFFFC]
6939 This is best reloaded as:
6940 add t1, r2, #0x400000
6941 ldr r0, [t1, #-4]
6943 The trick for spotting this for a load insn with N bits of offset
6944 (i.e. bits N-1:0) is to look at bit N; if it is set, then chose a
6945 negative offset that is going to make bit N and all the bits below
6946 it become zero in the remainder part.
6948 The SIGN_MAG_LOW_ADDR_BITS macro below implements this, with respect
6949 to sign-magnitude addressing (i.e. separate +- bit, or 1's complement),
6950 used in most cases of ARM load/store instructions. */
6952 #define SIGN_MAG_LOW_ADDR_BITS(VAL, N) \
6953 (((VAL) & ((1 << (N)) - 1)) \
6954 ? (((VAL) & ((1 << ((N) + 1)) - 1)) ^ (1 << (N))) - (1 << (N)) \
6955 : 0)
6957 if (coproc_p)
6959 low = SIGN_MAG_LOW_ADDR_BITS (val, 10);
6961 /* NEON quad-word load/stores are made of two double-word accesses,
6962 so the valid index range is reduced by 8. Treat as 9-bit range if
6963 we go over it. */
6964 if (TARGET_NEON && VALID_NEON_QREG_MODE (mode) && low >= 1016)
6965 low = SIGN_MAG_LOW_ADDR_BITS (val, 9);
6967 else if (GET_MODE_SIZE (mode) == 8)
6969 if (TARGET_LDRD)
6970 low = (TARGET_THUMB2
6971 ? SIGN_MAG_LOW_ADDR_BITS (val, 10)
6972 : SIGN_MAG_LOW_ADDR_BITS (val, 8));
6973 else
6974 /* For pre-ARMv5TE (without ldrd), we use ldm/stm(db/da/ib)
6975 to access doublewords. The supported load/store offsets are
6976 -8, -4, and 4, which we try to produce here. */
6977 low = ((val & 0xf) ^ 0x8) - 0x8;
6979 else if (GET_MODE_SIZE (mode) < 8)
6981 /* NEON element load/stores do not have an offset. */
6982 if (TARGET_NEON_FP16 && mode == HFmode)
6983 return false;
6985 if (TARGET_THUMB2)
6987 /* Thumb-2 has an asymmetrical index range of (-256,4096).
6988 Try the wider 12-bit range first, and re-try if the result
6989 is out of range. */
6990 low = SIGN_MAG_LOW_ADDR_BITS (val, 12);
6991 if (low < -255)
6992 low = SIGN_MAG_LOW_ADDR_BITS (val, 8);
6994 else
6996 if (mode == HImode || mode == HFmode)
6998 if (arm_arch4)
6999 low = SIGN_MAG_LOW_ADDR_BITS (val, 8);
7000 else
7002 /* The storehi/movhi_bytes fallbacks can use only
7003 [-4094,+4094] of the full ldrb/strb index range. */
7004 low = SIGN_MAG_LOW_ADDR_BITS (val, 12);
7005 if (low == 4095 || low == -4095)
7006 return false;
7009 else
7010 low = SIGN_MAG_LOW_ADDR_BITS (val, 12);
7013 else
7014 return false;
7016 high = ((((val - low) & (unsigned HOST_WIDE_INT) 0xffffffff)
7017 ^ (unsigned HOST_WIDE_INT) 0x80000000)
7018 - (unsigned HOST_WIDE_INT) 0x80000000);
7019 /* Check for overflow or zero */
7020 if (low == 0 || high == 0 || (high + low != val))
7021 return false;
7023 /* Reload the high part into a base reg; leave the low part
7024 in the mem. */
7025 *p = gen_rtx_PLUS (GET_MODE (*p),
7026 gen_rtx_PLUS (GET_MODE (*p), XEXP (*p, 0),
7027 GEN_INT (high)),
7028 GEN_INT (low));
7029 push_reload (XEXP (*p, 0), NULL_RTX, &XEXP (*p, 0), NULL,
7030 MODE_BASE_REG_CLASS (mode), GET_MODE (*p),
7031 VOIDmode, 0, 0, opnum, (enum reload_type) type);
7032 return true;
7035 return false;
7039 thumb_legitimize_reload_address (rtx *x_p,
7040 enum machine_mode mode,
7041 int opnum, int type,
7042 int ind_levels ATTRIBUTE_UNUSED)
7044 rtx x = *x_p;
7046 if (GET_CODE (x) == PLUS
7047 && GET_MODE_SIZE (mode) < 4
7048 && REG_P (XEXP (x, 0))
7049 && XEXP (x, 0) == stack_pointer_rtx
7050 && CONST_INT_P (XEXP (x, 1))
7051 && !thumb_legitimate_offset_p (mode, INTVAL (XEXP (x, 1))))
7053 rtx orig_x = x;
7055 x = copy_rtx (x);
7056 push_reload (orig_x, NULL_RTX, x_p, NULL, MODE_BASE_REG_CLASS (mode),
7057 Pmode, VOIDmode, 0, 0, opnum, (enum reload_type) type);
7058 return x;
7061 /* If both registers are hi-regs, then it's better to reload the
7062 entire expression rather than each register individually. That
7063 only requires one reload register rather than two. */
7064 if (GET_CODE (x) == PLUS
7065 && REG_P (XEXP (x, 0))
7066 && REG_P (XEXP (x, 1))
7067 && !REG_MODE_OK_FOR_REG_BASE_P (XEXP (x, 0), mode)
7068 && !REG_MODE_OK_FOR_REG_BASE_P (XEXP (x, 1), mode))
7070 rtx orig_x = x;
7072 x = copy_rtx (x);
7073 push_reload (orig_x, NULL_RTX, x_p, NULL, MODE_BASE_REG_CLASS (mode),
7074 Pmode, VOIDmode, 0, 0, opnum, (enum reload_type) type);
7075 return x;
7078 return NULL;
7081 /* Test for various thread-local symbols. */
7083 /* Return TRUE if X is a thread-local symbol. */
7085 static bool
7086 arm_tls_symbol_p (rtx x)
7088 if (! TARGET_HAVE_TLS)
7089 return false;
7091 if (GET_CODE (x) != SYMBOL_REF)
7092 return false;
7094 return SYMBOL_REF_TLS_MODEL (x) != 0;
7097 /* Helper for arm_tls_referenced_p. */
7099 static int
7100 arm_tls_operand_p_1 (rtx *x, void *data ATTRIBUTE_UNUSED)
7102 if (GET_CODE (*x) == SYMBOL_REF)
7103 return SYMBOL_REF_TLS_MODEL (*x) != 0;
7105 /* Don't recurse into UNSPEC_TLS looking for TLS symbols; these are
7106 TLS offsets, not real symbol references. */
7107 if (GET_CODE (*x) == UNSPEC
7108 && XINT (*x, 1) == UNSPEC_TLS)
7109 return -1;
7111 return 0;
7114 /* Return TRUE if X contains any TLS symbol references. */
7116 bool
7117 arm_tls_referenced_p (rtx x)
7119 if (! TARGET_HAVE_TLS)
7120 return false;
7122 return for_each_rtx (&x, arm_tls_operand_p_1, NULL);
7125 /* Implement TARGET_LEGITIMATE_CONSTANT_P.
7127 On the ARM, allow any integer (invalid ones are removed later by insn
7128 patterns), nice doubles and symbol_refs which refer to the function's
7129 constant pool XXX.
7131 When generating pic allow anything. */
7133 static bool
7134 arm_legitimate_constant_p_1 (enum machine_mode mode, rtx x)
7136 /* At present, we have no support for Neon structure constants, so forbid
7137 them here. It might be possible to handle simple cases like 0 and -1
7138 in future. */
7139 if (TARGET_NEON && VALID_NEON_STRUCT_MODE (mode))
7140 return false;
7142 return flag_pic || !label_mentioned_p (x);
7145 static bool
7146 thumb_legitimate_constant_p (enum machine_mode mode ATTRIBUTE_UNUSED, rtx x)
7148 return (CONST_INT_P (x)
7149 || CONST_DOUBLE_P (x)
7150 || CONSTANT_ADDRESS_P (x)
7151 || flag_pic);
7154 static bool
7155 arm_legitimate_constant_p (enum machine_mode mode, rtx x)
7157 return (!arm_cannot_force_const_mem (mode, x)
7158 && (TARGET_32BIT
7159 ? arm_legitimate_constant_p_1 (mode, x)
7160 : thumb_legitimate_constant_p (mode, x)));
7163 /* Implement TARGET_CANNOT_FORCE_CONST_MEM. */
7165 static bool
7166 arm_cannot_force_const_mem (enum machine_mode mode ATTRIBUTE_UNUSED, rtx x)
7168 rtx base, offset;
7170 if (ARM_OFFSETS_MUST_BE_WITHIN_SECTIONS_P)
7172 split_const (x, &base, &offset);
7173 if (GET_CODE (base) == SYMBOL_REF
7174 && !offset_within_block_p (base, INTVAL (offset)))
7175 return true;
7177 return arm_tls_referenced_p (x);
7180 #define REG_OR_SUBREG_REG(X) \
7181 (REG_P (X) \
7182 || (GET_CODE (X) == SUBREG && REG_P (SUBREG_REG (X))))
7184 #define REG_OR_SUBREG_RTX(X) \
7185 (REG_P (X) ? (X) : SUBREG_REG (X))
7187 static inline int
7188 thumb1_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer)
7190 enum machine_mode mode = GET_MODE (x);
7191 int total, words;
7193 switch (code)
7195 case ASHIFT:
7196 case ASHIFTRT:
7197 case LSHIFTRT:
7198 case ROTATERT:
7199 return (mode == SImode) ? COSTS_N_INSNS (1) : COSTS_N_INSNS (2);
7201 case PLUS:
7202 case MINUS:
7203 case COMPARE:
7204 case NEG:
7205 case NOT:
7206 return COSTS_N_INSNS (1);
7208 case MULT:
7209 if (CONST_INT_P (XEXP (x, 1)))
7211 int cycles = 0;
7212 unsigned HOST_WIDE_INT i = INTVAL (XEXP (x, 1));
7214 while (i)
7216 i >>= 2;
7217 cycles++;
7219 return COSTS_N_INSNS (2) + cycles;
7221 return COSTS_N_INSNS (1) + 16;
7223 case SET:
7224 /* A SET doesn't have a mode, so let's look at the SET_DEST to get
7225 the mode. */
7226 words = ARM_NUM_INTS (GET_MODE_SIZE (GET_MODE (SET_DEST (x))));
7227 return (COSTS_N_INSNS (words)
7228 + 4 * ((MEM_P (SET_SRC (x)))
7229 + MEM_P (SET_DEST (x))));
7231 case CONST_INT:
7232 if (outer == SET)
7234 if ((unsigned HOST_WIDE_INT) INTVAL (x) < 256)
7235 return 0;
7236 if (thumb_shiftable_const (INTVAL (x)))
7237 return COSTS_N_INSNS (2);
7238 return COSTS_N_INSNS (3);
7240 else if ((outer == PLUS || outer == COMPARE)
7241 && INTVAL (x) < 256 && INTVAL (x) > -256)
7242 return 0;
7243 else if ((outer == IOR || outer == XOR || outer == AND)
7244 && INTVAL (x) < 256 && INTVAL (x) >= -256)
7245 return COSTS_N_INSNS (1);
7246 else if (outer == AND)
7248 int i;
7249 /* This duplicates the tests in the andsi3 expander. */
7250 for (i = 9; i <= 31; i++)
7251 if ((((HOST_WIDE_INT) 1) << i) - 1 == INTVAL (x)
7252 || (((HOST_WIDE_INT) 1) << i) - 1 == ~INTVAL (x))
7253 return COSTS_N_INSNS (2);
7255 else if (outer == ASHIFT || outer == ASHIFTRT
7256 || outer == LSHIFTRT)
7257 return 0;
7258 return COSTS_N_INSNS (2);
7260 case CONST:
7261 case CONST_DOUBLE:
7262 case LABEL_REF:
7263 case SYMBOL_REF:
7264 return COSTS_N_INSNS (3);
7266 case UDIV:
7267 case UMOD:
7268 case DIV:
7269 case MOD:
7270 return 100;
7272 case TRUNCATE:
7273 return 99;
7275 case AND:
7276 case XOR:
7277 case IOR:
7278 /* XXX guess. */
7279 return 8;
7281 case MEM:
7282 /* XXX another guess. */
7283 /* Memory costs quite a lot for the first word, but subsequent words
7284 load at the equivalent of a single insn each. */
7285 return (10 + 4 * ((GET_MODE_SIZE (mode) - 1) / UNITS_PER_WORD)
7286 + ((GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
7287 ? 4 : 0));
7289 case IF_THEN_ELSE:
7290 /* XXX a guess. */
7291 if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
7292 return 14;
7293 return 2;
7295 case SIGN_EXTEND:
7296 case ZERO_EXTEND:
7297 total = mode == DImode ? COSTS_N_INSNS (1) : 0;
7298 total += thumb1_rtx_costs (XEXP (x, 0), GET_CODE (XEXP (x, 0)), code);
7300 if (mode == SImode)
7301 return total;
7303 if (arm_arch6)
7304 return total + COSTS_N_INSNS (1);
7306 /* Assume a two-shift sequence. Increase the cost slightly so
7307 we prefer actual shifts over an extend operation. */
7308 return total + 1 + COSTS_N_INSNS (2);
7310 default:
7311 return 99;
7315 static inline bool
7316 arm_rtx_costs_1 (rtx x, enum rtx_code outer, int* total, bool speed)
7318 enum machine_mode mode = GET_MODE (x);
7319 enum rtx_code subcode;
7320 rtx operand;
7321 enum rtx_code code = GET_CODE (x);
7322 *total = 0;
7324 switch (code)
7326 case MEM:
7327 /* Memory costs quite a lot for the first word, but subsequent words
7328 load at the equivalent of a single insn each. */
7329 *total = COSTS_N_INSNS (2 + ARM_NUM_REGS (mode));
7330 return true;
7332 case DIV:
7333 case MOD:
7334 case UDIV:
7335 case UMOD:
7336 if (TARGET_HARD_FLOAT && mode == SFmode)
7337 *total = COSTS_N_INSNS (2);
7338 else if (TARGET_HARD_FLOAT && mode == DFmode && !TARGET_VFP_SINGLE)
7339 *total = COSTS_N_INSNS (4);
7340 else
7341 *total = COSTS_N_INSNS (20);
7342 return false;
7344 case ROTATE:
7345 if (REG_P (XEXP (x, 1)))
7346 *total = COSTS_N_INSNS (1); /* Need to subtract from 32 */
7347 else if (!CONST_INT_P (XEXP (x, 1)))
7348 *total = rtx_cost (XEXP (x, 1), code, 1, speed);
7350 /* Fall through */
7351 case ROTATERT:
7352 if (mode != SImode)
7354 *total += COSTS_N_INSNS (4);
7355 return true;
7358 /* Fall through */
7359 case ASHIFT: case LSHIFTRT: case ASHIFTRT:
7360 *total += rtx_cost (XEXP (x, 0), code, 0, speed);
7361 if (mode == DImode)
7363 *total += COSTS_N_INSNS (3);
7364 return true;
7367 *total += COSTS_N_INSNS (1);
7368 /* Increase the cost of complex shifts because they aren't any faster,
7369 and reduce dual issue opportunities. */
7370 if (arm_tune_cortex_a9
7371 && outer != SET && !CONST_INT_P (XEXP (x, 1)))
7372 ++*total;
7374 return true;
7376 case MINUS:
7377 if (mode == DImode)
7379 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
7380 if (CONST_INT_P (XEXP (x, 0))
7381 && const_ok_for_arm (INTVAL (XEXP (x, 0))))
7383 *total += rtx_cost (XEXP (x, 1), code, 1, speed);
7384 return true;
7387 if (CONST_INT_P (XEXP (x, 1))
7388 && const_ok_for_arm (INTVAL (XEXP (x, 1))))
7390 *total += rtx_cost (XEXP (x, 0), code, 0, speed);
7391 return true;
7394 return false;
7397 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
7399 if (TARGET_HARD_FLOAT
7400 && (mode == SFmode
7401 || (mode == DFmode && !TARGET_VFP_SINGLE)))
7403 *total = COSTS_N_INSNS (1);
7404 if (CONST_DOUBLE_P (XEXP (x, 0))
7405 && arm_const_double_rtx (XEXP (x, 0)))
7407 *total += rtx_cost (XEXP (x, 1), code, 1, speed);
7408 return true;
7411 if (CONST_DOUBLE_P (XEXP (x, 1))
7412 && arm_const_double_rtx (XEXP (x, 1)))
7414 *total += rtx_cost (XEXP (x, 0), code, 0, speed);
7415 return true;
7418 return false;
7420 *total = COSTS_N_INSNS (20);
7421 return false;
7424 *total = COSTS_N_INSNS (1);
7425 if (CONST_INT_P (XEXP (x, 0))
7426 && const_ok_for_arm (INTVAL (XEXP (x, 0))))
7428 *total += rtx_cost (XEXP (x, 1), code, 1, speed);
7429 return true;
7432 subcode = GET_CODE (XEXP (x, 1));
7433 if (subcode == ASHIFT || subcode == ASHIFTRT
7434 || subcode == LSHIFTRT
7435 || subcode == ROTATE || subcode == ROTATERT)
7437 *total += rtx_cost (XEXP (x, 0), code, 0, speed);
7438 *total += rtx_cost (XEXP (XEXP (x, 1), 0), subcode, 0, speed);
7439 return true;
7442 /* A shift as a part of RSB costs no more than RSB itself. */
7443 if (GET_CODE (XEXP (x, 0)) == MULT
7444 && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode))
7446 *total += rtx_cost (XEXP (XEXP (x, 0), 0), code, 0, speed);
7447 *total += rtx_cost (XEXP (x, 1), code, 1, speed);
7448 return true;
7451 if (subcode == MULT
7452 && power_of_two_operand (XEXP (XEXP (x, 1), 1), SImode))
7454 *total += rtx_cost (XEXP (x, 0), code, 0, speed);
7455 *total += rtx_cost (XEXP (XEXP (x, 1), 0), subcode, 0, speed);
7456 return true;
7459 if (GET_RTX_CLASS (GET_CODE (XEXP (x, 1))) == RTX_COMPARE
7460 || GET_RTX_CLASS (GET_CODE (XEXP (x, 1))) == RTX_COMM_COMPARE)
7462 *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 0), code, 0, speed);
7463 if (REG_P (XEXP (XEXP (x, 1), 0))
7464 && REGNO (XEXP (XEXP (x, 1), 0)) != CC_REGNUM)
7465 *total += COSTS_N_INSNS (1);
7467 return true;
7470 /* Fall through */
7472 case PLUS:
7473 if (code == PLUS && arm_arch6 && mode == SImode
7474 && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
7475 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
7477 *total = COSTS_N_INSNS (1);
7478 *total += rtx_cost (XEXP (XEXP (x, 0), 0), GET_CODE (XEXP (x, 0)),
7479 0, speed);
7480 *total += rtx_cost (XEXP (x, 1), code, 1, speed);
7481 return true;
7484 /* MLA: All arguments must be registers. We filter out
7485 multiplication by a power of two, so that we fall down into
7486 the code below. */
7487 if (GET_CODE (XEXP (x, 0)) == MULT
7488 && !power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode))
7490 /* The cost comes from the cost of the multiply. */
7491 return false;
7494 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
7496 if (TARGET_HARD_FLOAT
7497 && (mode == SFmode
7498 || (mode == DFmode && !TARGET_VFP_SINGLE)))
7500 *total = COSTS_N_INSNS (1);
7501 if (CONST_DOUBLE_P (XEXP (x, 1))
7502 && arm_const_double_rtx (XEXP (x, 1)))
7504 *total += rtx_cost (XEXP (x, 0), code, 0, speed);
7505 return true;
7508 return false;
7511 *total = COSTS_N_INSNS (20);
7512 return false;
7515 if (GET_RTX_CLASS (GET_CODE (XEXP (x, 0))) == RTX_COMPARE
7516 || GET_RTX_CLASS (GET_CODE (XEXP (x, 0))) == RTX_COMM_COMPARE)
7518 *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 1), code, 1, speed);
7519 if (REG_P (XEXP (XEXP (x, 0), 0))
7520 && REGNO (XEXP (XEXP (x, 0), 0)) != CC_REGNUM)
7521 *total += COSTS_N_INSNS (1);
7522 return true;
7525 /* Fall through */
7527 case AND: case XOR: case IOR:
7529 /* Normally the frame registers will be spilt into reg+const during
7530 reload, so it is a bad idea to combine them with other instructions,
7531 since then they might not be moved outside of loops. As a compromise
7532 we allow integration with ops that have a constant as their second
7533 operand. */
7534 if (REG_OR_SUBREG_REG (XEXP (x, 0))
7535 && ARM_FRAME_RTX (REG_OR_SUBREG_RTX (XEXP (x, 0)))
7536 && !CONST_INT_P (XEXP (x, 1)))
7537 *total = COSTS_N_INSNS (1);
7539 if (mode == DImode)
7541 *total += COSTS_N_INSNS (2);
7542 if (CONST_INT_P (XEXP (x, 1))
7543 && const_ok_for_op (INTVAL (XEXP (x, 1)), code))
7545 *total += rtx_cost (XEXP (x, 0), code, 0, speed);
7546 return true;
7549 return false;
7552 *total += COSTS_N_INSNS (1);
7553 if (CONST_INT_P (XEXP (x, 1))
7554 && const_ok_for_op (INTVAL (XEXP (x, 1)), code))
7556 *total += rtx_cost (XEXP (x, 0), code, 0, speed);
7557 return true;
7559 subcode = GET_CODE (XEXP (x, 0));
7560 if (subcode == ASHIFT || subcode == ASHIFTRT
7561 || subcode == LSHIFTRT
7562 || subcode == ROTATE || subcode == ROTATERT)
7564 *total += rtx_cost (XEXP (x, 1), code, 1, speed);
7565 *total += rtx_cost (XEXP (XEXP (x, 0), 0), subcode, 0, speed);
7566 return true;
7569 if (subcode == MULT
7570 && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode))
7572 *total += rtx_cost (XEXP (x, 1), code, 1, speed);
7573 *total += rtx_cost (XEXP (XEXP (x, 0), 0), subcode, 0, speed);
7574 return true;
7577 if (subcode == UMIN || subcode == UMAX
7578 || subcode == SMIN || subcode == SMAX)
7580 *total = COSTS_N_INSNS (3);
7581 return true;
7584 return false;
7586 case MULT:
7587 /* This should have been handled by the CPU specific routines. */
7588 gcc_unreachable ();
7590 case TRUNCATE:
7591 if (arm_arch3m && mode == SImode
7592 && GET_CODE (XEXP (x, 0)) == LSHIFTRT
7593 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
7594 && (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0))
7595 == GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1)))
7596 && (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == ZERO_EXTEND
7597 || GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == SIGN_EXTEND))
7599 *total = rtx_cost (XEXP (XEXP (x, 0), 0), LSHIFTRT, 0, speed);
7600 return true;
7602 *total = COSTS_N_INSNS (2); /* Plus the cost of the MULT */
7603 return false;
7605 case NEG:
7606 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
7608 if (TARGET_HARD_FLOAT
7609 && (mode == SFmode
7610 || (mode == DFmode && !TARGET_VFP_SINGLE)))
7612 *total = COSTS_N_INSNS (1);
7613 return false;
7615 *total = COSTS_N_INSNS (2);
7616 return false;
7619 /* Fall through */
7620 case NOT:
7621 *total = COSTS_N_INSNS (ARM_NUM_REGS(mode));
7622 if (mode == SImode && code == NOT)
7624 subcode = GET_CODE (XEXP (x, 0));
7625 if (subcode == ASHIFT || subcode == ASHIFTRT
7626 || subcode == LSHIFTRT
7627 || subcode == ROTATE || subcode == ROTATERT
7628 || (subcode == MULT
7629 && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode)))
7631 *total += rtx_cost (XEXP (XEXP (x, 0), 0), subcode, 0, speed);
7632 /* Register shifts cost an extra cycle. */
7633 if (!CONST_INT_P (XEXP (XEXP (x, 0), 1)))
7634 *total += COSTS_N_INSNS (1) + rtx_cost (XEXP (XEXP (x, 0), 1),
7635 subcode, 1, speed);
7636 return true;
7640 return false;
7642 case IF_THEN_ELSE:
7643 if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
7645 *total = COSTS_N_INSNS (4);
7646 return true;
7649 operand = XEXP (x, 0);
7651 if (!((GET_RTX_CLASS (GET_CODE (operand)) == RTX_COMPARE
7652 || GET_RTX_CLASS (GET_CODE (operand)) == RTX_COMM_COMPARE)
7653 && REG_P (XEXP (operand, 0))
7654 && REGNO (XEXP (operand, 0)) == CC_REGNUM))
7655 *total += COSTS_N_INSNS (1);
7656 *total += (rtx_cost (XEXP (x, 1), code, 1, speed)
7657 + rtx_cost (XEXP (x, 2), code, 2, speed));
7658 return true;
7660 case NE:
7661 if (mode == SImode && XEXP (x, 1) == const0_rtx)
7663 *total = COSTS_N_INSNS (2) + rtx_cost (XEXP (x, 0), code, 0, speed);
7664 return true;
7666 goto scc_insn;
7668 case GE:
7669 if ((!REG_P (XEXP (x, 0)) || REGNO (XEXP (x, 0)) != CC_REGNUM)
7670 && mode == SImode && XEXP (x, 1) == const0_rtx)
7672 *total = COSTS_N_INSNS (2) + rtx_cost (XEXP (x, 0), code, 0, speed);
7673 return true;
7675 goto scc_insn;
7677 case LT:
7678 if ((!REG_P (XEXP (x, 0)) || REGNO (XEXP (x, 0)) != CC_REGNUM)
7679 && mode == SImode && XEXP (x, 1) == const0_rtx)
7681 *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 0), code, 0, speed);
7682 return true;
7684 goto scc_insn;
7686 case EQ:
7687 case GT:
7688 case LE:
7689 case GEU:
7690 case LTU:
7691 case GTU:
7692 case LEU:
7693 case UNORDERED:
7694 case ORDERED:
7695 case UNEQ:
7696 case UNGE:
7697 case UNLT:
7698 case UNGT:
7699 case UNLE:
7700 scc_insn:
7701 /* SCC insns. In the case where the comparison has already been
7702 performed, then they cost 2 instructions. Otherwise they need
7703 an additional comparison before them. */
7704 *total = COSTS_N_INSNS (2);
7705 if (REG_P (XEXP (x, 0)) && REGNO (XEXP (x, 0)) == CC_REGNUM)
7707 return true;
7710 /* Fall through */
7711 case COMPARE:
7712 if (REG_P (XEXP (x, 0)) && REGNO (XEXP (x, 0)) == CC_REGNUM)
7714 *total = 0;
7715 return true;
7718 *total += COSTS_N_INSNS (1);
7719 if (CONST_INT_P (XEXP (x, 1))
7720 && const_ok_for_op (INTVAL (XEXP (x, 1)), code))
7722 *total += rtx_cost (XEXP (x, 0), code, 0, speed);
7723 return true;
7726 subcode = GET_CODE (XEXP (x, 0));
7727 if (subcode == ASHIFT || subcode == ASHIFTRT
7728 || subcode == LSHIFTRT
7729 || subcode == ROTATE || subcode == ROTATERT)
7731 *total += rtx_cost (XEXP (x, 1), code, 1, speed);
7732 *total += rtx_cost (XEXP (XEXP (x, 0), 0), subcode, 0, speed);
7733 return true;
7736 if (subcode == MULT
7737 && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode))
7739 *total += rtx_cost (XEXP (x, 1), code, 1, speed);
7740 *total += rtx_cost (XEXP (XEXP (x, 0), 0), subcode, 0, speed);
7741 return true;
7744 return false;
7746 case UMIN:
7747 case UMAX:
7748 case SMIN:
7749 case SMAX:
7750 *total = COSTS_N_INSNS (2) + rtx_cost (XEXP (x, 0), code, 0, speed);
7751 if (!CONST_INT_P (XEXP (x, 1))
7752 || !const_ok_for_arm (INTVAL (XEXP (x, 1))))
7753 *total += rtx_cost (XEXP (x, 1), code, 1, speed);
7754 return true;
7756 case ABS:
7757 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
7759 if (TARGET_HARD_FLOAT
7760 && (mode == SFmode
7761 || (mode == DFmode && !TARGET_VFP_SINGLE)))
7763 *total = COSTS_N_INSNS (1);
7764 return false;
7766 *total = COSTS_N_INSNS (20);
7767 return false;
7769 *total = COSTS_N_INSNS (1);
7770 if (mode == DImode)
7771 *total += COSTS_N_INSNS (3);
7772 return false;
7774 case SIGN_EXTEND:
7775 case ZERO_EXTEND:
7776 *total = 0;
7777 if (GET_MODE_CLASS (mode) == MODE_INT)
7779 rtx op = XEXP (x, 0);
7780 enum machine_mode opmode = GET_MODE (op);
7782 if (mode == DImode)
7783 *total += COSTS_N_INSNS (1);
7785 if (opmode != SImode)
7787 if (MEM_P (op))
7789 /* If !arm_arch4, we use one of the extendhisi2_mem
7790 or movhi_bytes patterns for HImode. For a QImode
7791 sign extension, we first zero-extend from memory
7792 and then perform a shift sequence. */
7793 if (!arm_arch4 && (opmode != QImode || code == SIGN_EXTEND))
7794 *total += COSTS_N_INSNS (2);
7796 else if (arm_arch6)
7797 *total += COSTS_N_INSNS (1);
7799 /* We don't have the necessary insn, so we need to perform some
7800 other operation. */
7801 else if (TARGET_ARM && code == ZERO_EXTEND && mode == QImode)
7802 /* An and with constant 255. */
7803 *total += COSTS_N_INSNS (1);
7804 else
7805 /* A shift sequence. Increase costs slightly to avoid
7806 combining two shifts into an extend operation. */
7807 *total += COSTS_N_INSNS (2) + 1;
7810 return false;
7813 switch (GET_MODE (XEXP (x, 0)))
7815 case V8QImode:
7816 case V4HImode:
7817 case V2SImode:
7818 case V4QImode:
7819 case V2HImode:
7820 *total = COSTS_N_INSNS (1);
7821 return false;
7823 default:
7824 gcc_unreachable ();
7826 gcc_unreachable ();
7828 case ZERO_EXTRACT:
7829 case SIGN_EXTRACT:
7830 *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 0), code, 0, speed);
7831 return true;
7833 case CONST_INT:
7834 if (const_ok_for_arm (INTVAL (x))
7835 || const_ok_for_arm (~INTVAL (x)))
7836 *total = COSTS_N_INSNS (1);
7837 else
7838 *total = COSTS_N_INSNS (arm_gen_constant (SET, mode, NULL_RTX,
7839 INTVAL (x), NULL_RTX,
7840 NULL_RTX, 0, 0));
7841 return true;
7843 case CONST:
7844 case LABEL_REF:
7845 case SYMBOL_REF:
7846 *total = COSTS_N_INSNS (3);
7847 return true;
7849 case HIGH:
7850 *total = COSTS_N_INSNS (1);
7851 return true;
7853 case LO_SUM:
7854 *total = COSTS_N_INSNS (1);
7855 *total += rtx_cost (XEXP (x, 0), code, 0, speed);
7856 return true;
7858 case CONST_DOUBLE:
7859 if (TARGET_HARD_FLOAT && vfp3_const_double_rtx (x)
7860 && (mode == SFmode || !TARGET_VFP_SINGLE))
7861 *total = COSTS_N_INSNS (1);
7862 else
7863 *total = COSTS_N_INSNS (4);
7864 return true;
7866 case SET:
7867 /* The vec_extract patterns accept memory operands that require an
7868 address reload. Account for the cost of that reload to give the
7869 auto-inc-dec pass an incentive to try to replace them. */
7870 if (TARGET_NEON && MEM_P (SET_DEST (x))
7871 && GET_CODE (SET_SRC (x)) == VEC_SELECT)
7873 *total = rtx_cost (SET_DEST (x), code, 0, speed);
7874 if (!neon_vector_mem_operand (SET_DEST (x), 2))
7875 *total += COSTS_N_INSNS (1);
7876 return true;
7878 /* Likewise for the vec_set patterns. */
7879 if (TARGET_NEON && GET_CODE (SET_SRC (x)) == VEC_MERGE
7880 && GET_CODE (XEXP (SET_SRC (x), 0)) == VEC_DUPLICATE
7881 && MEM_P (XEXP (XEXP (SET_SRC (x), 0), 0)))
7883 rtx mem = XEXP (XEXP (SET_SRC (x), 0), 0);
7884 *total = rtx_cost (mem, code, 0, speed);
7885 if (!neon_vector_mem_operand (mem, 2))
7886 *total += COSTS_N_INSNS (1);
7887 return true;
7889 return false;
7891 case UNSPEC:
7892 /* We cost this as high as our memory costs to allow this to
7893 be hoisted from loops. */
7894 if (XINT (x, 1) == UNSPEC_PIC_UNIFIED)
7896 *total = COSTS_N_INSNS (2 + ARM_NUM_REGS (mode));
7898 return true;
7900 case CONST_VECTOR:
7901 if (TARGET_NEON
7902 && TARGET_HARD_FLOAT
7903 && outer == SET
7904 && (VALID_NEON_DREG_MODE (mode) || VALID_NEON_QREG_MODE (mode))
7905 && neon_immediate_valid_for_move (x, mode, NULL, NULL))
7906 *total = COSTS_N_INSNS (1);
7907 else
7908 *total = COSTS_N_INSNS (4);
7909 return true;
7911 default:
7912 *total = COSTS_N_INSNS (4);
7913 return false;
7917 /* Estimates the size cost of thumb1 instructions.
7918 For now most of the code is copied from thumb1_rtx_costs. We need more
7919 fine grain tuning when we have more related test cases. */
7920 static inline int
7921 thumb1_size_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer)
7923 enum machine_mode mode = GET_MODE (x);
7924 int words;
7926 switch (code)
7928 case ASHIFT:
7929 case ASHIFTRT:
7930 case LSHIFTRT:
7931 case ROTATERT:
7932 return (mode == SImode) ? COSTS_N_INSNS (1) : COSTS_N_INSNS (2);
7934 case PLUS:
7935 case MINUS:
7936 /* Thumb-1 needs two instructions to fulfill shiftadd/shiftsub0/shiftsub1
7937 defined by RTL expansion, especially for the expansion of
7938 multiplication. */
7939 if ((GET_CODE (XEXP (x, 0)) == MULT
7940 && power_of_two_operand (XEXP (XEXP (x,0),1), SImode))
7941 || (GET_CODE (XEXP (x, 1)) == MULT
7942 && power_of_two_operand (XEXP (XEXP (x, 1), 1), SImode)))
7943 return COSTS_N_INSNS (2);
7944 /* On purpose fall through for normal RTX. */
7945 case COMPARE:
7946 case NEG:
7947 case NOT:
7948 return COSTS_N_INSNS (1);
7950 case MULT:
7951 if (CONST_INT_P (XEXP (x, 1)))
7953 /* Thumb1 mul instruction can't operate on const. We must Load it
7954 into a register first. */
7955 int const_size = thumb1_size_rtx_costs (XEXP (x, 1), CONST_INT, SET);
7956 return COSTS_N_INSNS (1) + const_size;
7958 return COSTS_N_INSNS (1);
7960 case SET:
7961 /* A SET doesn't have a mode, so let's look at the SET_DEST to get
7962 the mode. */
7963 words = ARM_NUM_INTS (GET_MODE_SIZE (GET_MODE (SET_DEST (x))));
7964 return (COSTS_N_INSNS (words)
7965 + 4 * ((MEM_P (SET_SRC (x)))
7966 + MEM_P (SET_DEST (x))));
7968 case CONST_INT:
7969 if (outer == SET)
7971 if ((unsigned HOST_WIDE_INT) INTVAL (x) < 256)
7972 return COSTS_N_INSNS (1);
7973 /* See split "TARGET_THUMB1 && satisfies_constraint_J". */
7974 if (INTVAL (x) >= -255 && INTVAL (x) <= -1)
7975 return COSTS_N_INSNS (2);
7976 /* See split "TARGET_THUMB1 && satisfies_constraint_K". */
7977 if (thumb_shiftable_const (INTVAL (x)))
7978 return COSTS_N_INSNS (2);
7979 return COSTS_N_INSNS (3);
7981 else if ((outer == PLUS || outer == COMPARE)
7982 && INTVAL (x) < 256 && INTVAL (x) > -256)
7983 return 0;
7984 else if ((outer == IOR || outer == XOR || outer == AND)
7985 && INTVAL (x) < 256 && INTVAL (x) >= -256)
7986 return COSTS_N_INSNS (1);
7987 else if (outer == AND)
7989 int i;
7990 /* This duplicates the tests in the andsi3 expander. */
7991 for (i = 9; i <= 31; i++)
7992 if ((((HOST_WIDE_INT) 1) << i) - 1 == INTVAL (x)
7993 || (((HOST_WIDE_INT) 1) << i) - 1 == ~INTVAL (x))
7994 return COSTS_N_INSNS (2);
7996 else if (outer == ASHIFT || outer == ASHIFTRT
7997 || outer == LSHIFTRT)
7998 return 0;
7999 return COSTS_N_INSNS (2);
8001 case CONST:
8002 case CONST_DOUBLE:
8003 case LABEL_REF:
8004 case SYMBOL_REF:
8005 return COSTS_N_INSNS (3);
8007 case UDIV:
8008 case UMOD:
8009 case DIV:
8010 case MOD:
8011 return 100;
8013 case TRUNCATE:
8014 return 99;
8016 case AND:
8017 case XOR:
8018 case IOR:
8019 /* XXX guess. */
8020 return 8;
8022 case MEM:
8023 /* XXX another guess. */
8024 /* Memory costs quite a lot for the first word, but subsequent words
8025 load at the equivalent of a single insn each. */
8026 return (10 + 4 * ((GET_MODE_SIZE (mode) - 1) / UNITS_PER_WORD)
8027 + ((GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
8028 ? 4 : 0));
8030 case IF_THEN_ELSE:
8031 /* XXX a guess. */
8032 if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
8033 return 14;
8034 return 2;
8036 case ZERO_EXTEND:
8037 /* XXX still guessing. */
8038 switch (GET_MODE (XEXP (x, 0)))
8040 case QImode:
8041 return (1 + (mode == DImode ? 4 : 0)
8042 + (MEM_P (XEXP (x, 0)) ? 10 : 0));
8044 case HImode:
8045 return (4 + (mode == DImode ? 4 : 0)
8046 + (MEM_P (XEXP (x, 0)) ? 10 : 0));
8048 case SImode:
8049 return (1 + (MEM_P (XEXP (x, 0)) ? 10 : 0));
8051 default:
8052 return 99;
8055 default:
8056 return 99;
8060 /* RTX costs when optimizing for size. */
8061 static bool
8062 arm_size_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
8063 int *total)
8065 enum machine_mode mode = GET_MODE (x);
8066 if (TARGET_THUMB1)
8068 *total = thumb1_size_rtx_costs (x, code, outer_code);
8069 return true;
8072 /* FIXME: This makes no attempt to prefer narrow Thumb-2 instructions. */
8073 switch (code)
8075 case MEM:
8076 /* A memory access costs 1 insn if the mode is small, or the address is
8077 a single register, otherwise it costs one insn per word. */
8078 if (REG_P (XEXP (x, 0)))
8079 *total = COSTS_N_INSNS (1);
8080 else if (flag_pic
8081 && GET_CODE (XEXP (x, 0)) == PLUS
8082 && will_be_in_index_register (XEXP (XEXP (x, 0), 1)))
8083 /* This will be split into two instructions.
8084 See arm.md:calculate_pic_address. */
8085 *total = COSTS_N_INSNS (2);
8086 else
8087 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
8088 return true;
8090 case DIV:
8091 case MOD:
8092 case UDIV:
8093 case UMOD:
8094 /* Needs a libcall, so it costs about this. */
8095 *total = COSTS_N_INSNS (2);
8096 return false;
8098 case ROTATE:
8099 if (mode == SImode && REG_P (XEXP (x, 1)))
8101 *total = COSTS_N_INSNS (2) + rtx_cost (XEXP (x, 0), code, 0, false);
8102 return true;
8104 /* Fall through */
8105 case ROTATERT:
8106 case ASHIFT:
8107 case LSHIFTRT:
8108 case ASHIFTRT:
8109 if (mode == DImode && CONST_INT_P (XEXP (x, 1)))
8111 *total = COSTS_N_INSNS (3) + rtx_cost (XEXP (x, 0), code, 0, false);
8112 return true;
8114 else if (mode == SImode)
8116 *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 0), code, 0, false);
8117 /* Slightly disparage register shifts, but not by much. */
8118 if (!CONST_INT_P (XEXP (x, 1)))
8119 *total += 1 + rtx_cost (XEXP (x, 1), code, 1, false);
8120 return true;
8123 /* Needs a libcall. */
8124 *total = COSTS_N_INSNS (2);
8125 return false;
8127 case MINUS:
8128 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
8129 && (mode == SFmode || !TARGET_VFP_SINGLE))
8131 *total = COSTS_N_INSNS (1);
8132 return false;
8135 if (mode == SImode)
8137 enum rtx_code subcode0 = GET_CODE (XEXP (x, 0));
8138 enum rtx_code subcode1 = GET_CODE (XEXP (x, 1));
8140 if (subcode0 == ROTATE || subcode0 == ROTATERT || subcode0 == ASHIFT
8141 || subcode0 == LSHIFTRT || subcode0 == ASHIFTRT
8142 || subcode1 == ROTATE || subcode1 == ROTATERT
8143 || subcode1 == ASHIFT || subcode1 == LSHIFTRT
8144 || subcode1 == ASHIFTRT)
8146 /* It's just the cost of the two operands. */
8147 *total = 0;
8148 return false;
8151 *total = COSTS_N_INSNS (1);
8152 return false;
8155 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
8156 return false;
8158 case PLUS:
8159 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
8160 && (mode == SFmode || !TARGET_VFP_SINGLE))
8162 *total = COSTS_N_INSNS (1);
8163 return false;
8166 /* A shift as a part of ADD costs nothing. */
8167 if (GET_CODE (XEXP (x, 0)) == MULT
8168 && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode))
8170 *total = COSTS_N_INSNS (TARGET_THUMB2 ? 2 : 1);
8171 *total += rtx_cost (XEXP (XEXP (x, 0), 0), code, 0, false);
8172 *total += rtx_cost (XEXP (x, 1), code, 1, false);
8173 return true;
8176 /* Fall through */
8177 case AND: case XOR: case IOR:
8178 if (mode == SImode)
8180 enum rtx_code subcode = GET_CODE (XEXP (x, 0));
8182 if (subcode == ROTATE || subcode == ROTATERT || subcode == ASHIFT
8183 || subcode == LSHIFTRT || subcode == ASHIFTRT
8184 || (code == AND && subcode == NOT))
8186 /* It's just the cost of the two operands. */
8187 *total = 0;
8188 return false;
8192 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
8193 return false;
8195 case MULT:
8196 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
8197 return false;
8199 case NEG:
8200 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
8201 && (mode == SFmode || !TARGET_VFP_SINGLE))
8203 *total = COSTS_N_INSNS (1);
8204 return false;
8207 /* Fall through */
8208 case NOT:
8209 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
8211 return false;
8213 case IF_THEN_ELSE:
8214 *total = 0;
8215 return false;
8217 case COMPARE:
8218 if (cc_register (XEXP (x, 0), VOIDmode))
8219 * total = 0;
8220 else
8221 *total = COSTS_N_INSNS (1);
8222 return false;
8224 case ABS:
8225 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
8226 && (mode == SFmode || !TARGET_VFP_SINGLE))
8227 *total = COSTS_N_INSNS (1);
8228 else
8229 *total = COSTS_N_INSNS (1 + ARM_NUM_REGS (mode));
8230 return false;
8232 case SIGN_EXTEND:
8233 case ZERO_EXTEND:
8234 return arm_rtx_costs_1 (x, outer_code, total, 0);
8236 case CONST_INT:
8237 if (const_ok_for_arm (INTVAL (x)))
8238 /* A multiplication by a constant requires another instruction
8239 to load the constant to a register. */
8240 *total = COSTS_N_INSNS ((outer_code == SET || outer_code == MULT)
8241 ? 1 : 0);
8242 else if (const_ok_for_arm (~INTVAL (x)))
8243 *total = COSTS_N_INSNS (outer_code == AND ? 0 : 1);
8244 else if (const_ok_for_arm (-INTVAL (x)))
8246 if (outer_code == COMPARE || outer_code == PLUS
8247 || outer_code == MINUS)
8248 *total = 0;
8249 else
8250 *total = COSTS_N_INSNS (1);
8252 else
8253 *total = COSTS_N_INSNS (2);
8254 return true;
8256 case CONST:
8257 case LABEL_REF:
8258 case SYMBOL_REF:
8259 *total = COSTS_N_INSNS (2);
8260 return true;
8262 case CONST_DOUBLE:
8263 *total = COSTS_N_INSNS (4);
8264 return true;
8266 case CONST_VECTOR:
8267 if (TARGET_NEON
8268 && TARGET_HARD_FLOAT
8269 && outer_code == SET
8270 && (VALID_NEON_DREG_MODE (mode) || VALID_NEON_QREG_MODE (mode))
8271 && neon_immediate_valid_for_move (x, mode, NULL, NULL))
8272 *total = COSTS_N_INSNS (1);
8273 else
8274 *total = COSTS_N_INSNS (4);
8275 return true;
8277 case HIGH:
8278 case LO_SUM:
8279 /* We prefer constant pool entries to MOVW/MOVT pairs, so bump the
8280 cost of these slightly. */
8281 *total = COSTS_N_INSNS (1) + 1;
8282 return true;
8284 case SET:
8285 return false;
8287 default:
8288 if (mode != VOIDmode)
8289 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
8290 else
8291 *total = COSTS_N_INSNS (4); /* How knows? */
8292 return false;
8296 /* RTX costs when optimizing for size. */
8297 static bool
8298 arm_rtx_costs (rtx x, int code, int outer_code, int opno ATTRIBUTE_UNUSED,
8299 int *total, bool speed)
8301 if (!speed)
8302 return arm_size_rtx_costs (x, (enum rtx_code) code,
8303 (enum rtx_code) outer_code, total);
8304 else
8305 return current_tune->rtx_costs (x, (enum rtx_code) code,
8306 (enum rtx_code) outer_code,
8307 total, speed);
8310 /* RTX costs for cores with a slow MUL implementation. Thumb-2 is not
8311 supported on any "slowmul" cores, so it can be ignored. */
8313 static bool
8314 arm_slowmul_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
8315 int *total, bool speed)
8317 enum machine_mode mode = GET_MODE (x);
8319 if (TARGET_THUMB)
8321 *total = thumb1_rtx_costs (x, code, outer_code);
8322 return true;
8325 switch (code)
8327 case MULT:
8328 if (GET_MODE_CLASS (mode) == MODE_FLOAT
8329 || mode == DImode)
8331 *total = COSTS_N_INSNS (20);
8332 return false;
8335 if (CONST_INT_P (XEXP (x, 1)))
8337 unsigned HOST_WIDE_INT i = (INTVAL (XEXP (x, 1))
8338 & (unsigned HOST_WIDE_INT) 0xffffffff);
8339 int cost, const_ok = const_ok_for_arm (i);
8340 int j, booth_unit_size;
8342 /* Tune as appropriate. */
8343 cost = const_ok ? 4 : 8;
8344 booth_unit_size = 2;
8345 for (j = 0; i && j < 32; j += booth_unit_size)
8347 i >>= booth_unit_size;
8348 cost++;
8351 *total = COSTS_N_INSNS (cost);
8352 *total += rtx_cost (XEXP (x, 0), code, 0, speed);
8353 return true;
8356 *total = COSTS_N_INSNS (20);
8357 return false;
8359 default:
8360 return arm_rtx_costs_1 (x, outer_code, total, speed);;
8365 /* RTX cost for cores with a fast multiply unit (M variants). */
8367 static bool
8368 arm_fastmul_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
8369 int *total, bool speed)
8371 enum machine_mode mode = GET_MODE (x);
8373 if (TARGET_THUMB1)
8375 *total = thumb1_rtx_costs (x, code, outer_code);
8376 return true;
8379 /* ??? should thumb2 use different costs? */
8380 switch (code)
8382 case MULT:
8383 /* There is no point basing this on the tuning, since it is always the
8384 fast variant if it exists at all. */
8385 if (mode == DImode
8386 && (GET_CODE (XEXP (x, 0)) == GET_CODE (XEXP (x, 1)))
8387 && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
8388 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
8390 *total = COSTS_N_INSNS(2);
8391 return false;
8395 if (mode == DImode)
8397 *total = COSTS_N_INSNS (5);
8398 return false;
8401 if (CONST_INT_P (XEXP (x, 1)))
8403 unsigned HOST_WIDE_INT i = (INTVAL (XEXP (x, 1))
8404 & (unsigned HOST_WIDE_INT) 0xffffffff);
8405 int cost, const_ok = const_ok_for_arm (i);
8406 int j, booth_unit_size;
8408 /* Tune as appropriate. */
8409 cost = const_ok ? 4 : 8;
8410 booth_unit_size = 8;
8411 for (j = 0; i && j < 32; j += booth_unit_size)
8413 i >>= booth_unit_size;
8414 cost++;
8417 *total = COSTS_N_INSNS(cost);
8418 return false;
8421 if (mode == SImode)
8423 *total = COSTS_N_INSNS (4);
8424 return false;
8427 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
8429 if (TARGET_HARD_FLOAT
8430 && (mode == SFmode
8431 || (mode == DFmode && !TARGET_VFP_SINGLE)))
8433 *total = COSTS_N_INSNS (1);
8434 return false;
8438 /* Requires a lib call */
8439 *total = COSTS_N_INSNS (20);
8440 return false;
8442 default:
8443 return arm_rtx_costs_1 (x, outer_code, total, speed);
8448 /* RTX cost for XScale CPUs. Thumb-2 is not supported on any xscale cores,
8449 so it can be ignored. */
8451 static bool
8452 arm_xscale_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
8453 int *total, bool speed)
8455 enum machine_mode mode = GET_MODE (x);
8457 if (TARGET_THUMB)
8459 *total = thumb1_rtx_costs (x, code, outer_code);
8460 return true;
8463 switch (code)
8465 case COMPARE:
8466 if (GET_CODE (XEXP (x, 0)) != MULT)
8467 return arm_rtx_costs_1 (x, outer_code, total, speed);
8469 /* A COMPARE of a MULT is slow on XScale; the muls instruction
8470 will stall until the multiplication is complete. */
8471 *total = COSTS_N_INSNS (3);
8472 return false;
8474 case MULT:
8475 /* There is no point basing this on the tuning, since it is always the
8476 fast variant if it exists at all. */
8477 if (mode == DImode
8478 && (GET_CODE (XEXP (x, 0)) == GET_CODE (XEXP (x, 1)))
8479 && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
8480 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
8482 *total = COSTS_N_INSNS (2);
8483 return false;
8487 if (mode == DImode)
8489 *total = COSTS_N_INSNS (5);
8490 return false;
8493 if (CONST_INT_P (XEXP (x, 1)))
8495 /* If operand 1 is a constant we can more accurately
8496 calculate the cost of the multiply. The multiplier can
8497 retire 15 bits on the first cycle and a further 12 on the
8498 second. We do, of course, have to load the constant into
8499 a register first. */
8500 unsigned HOST_WIDE_INT i = INTVAL (XEXP (x, 1));
8501 /* There's a general overhead of one cycle. */
8502 int cost = 1;
8503 unsigned HOST_WIDE_INT masked_const;
8505 if (i & 0x80000000)
8506 i = ~i;
8508 i &= (unsigned HOST_WIDE_INT) 0xffffffff;
8510 masked_const = i & 0xffff8000;
8511 if (masked_const != 0)
8513 cost++;
8514 masked_const = i & 0xf8000000;
8515 if (masked_const != 0)
8516 cost++;
8518 *total = COSTS_N_INSNS (cost);
8519 return false;
8522 if (mode == SImode)
8524 *total = COSTS_N_INSNS (3);
8525 return false;
8528 /* Requires a lib call */
8529 *total = COSTS_N_INSNS (20);
8530 return false;
8532 default:
8533 return arm_rtx_costs_1 (x, outer_code, total, speed);
8538 /* RTX costs for 9e (and later) cores. */
8540 static bool
8541 arm_9e_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
8542 int *total, bool speed)
8544 enum machine_mode mode = GET_MODE (x);
8546 if (TARGET_THUMB1)
8548 switch (code)
8550 case MULT:
8551 *total = COSTS_N_INSNS (3);
8552 return true;
8554 default:
8555 *total = thumb1_rtx_costs (x, code, outer_code);
8556 return true;
8560 switch (code)
8562 case MULT:
8563 /* There is no point basing this on the tuning, since it is always the
8564 fast variant if it exists at all. */
8565 if (mode == DImode
8566 && (GET_CODE (XEXP (x, 0)) == GET_CODE (XEXP (x, 1)))
8567 && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
8568 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
8570 *total = COSTS_N_INSNS (2);
8571 return false;
8575 if (mode == DImode)
8577 *total = COSTS_N_INSNS (5);
8578 return false;
8581 if (mode == SImode)
8583 *total = COSTS_N_INSNS (2);
8584 return false;
8587 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
8589 if (TARGET_HARD_FLOAT
8590 && (mode == SFmode
8591 || (mode == DFmode && !TARGET_VFP_SINGLE)))
8593 *total = COSTS_N_INSNS (1);
8594 return false;
8598 *total = COSTS_N_INSNS (20);
8599 return false;
8601 default:
8602 return arm_rtx_costs_1 (x, outer_code, total, speed);
8605 /* All address computations that can be done are free, but rtx cost returns
8606 the same for practically all of them. So we weight the different types
8607 of address here in the order (most pref first):
8608 PRE/POST_INC/DEC, SHIFT or NON-INT sum, INT sum, REG, MEM or LABEL. */
8609 static inline int
8610 arm_arm_address_cost (rtx x)
8612 enum rtx_code c = GET_CODE (x);
8614 if (c == PRE_INC || c == PRE_DEC || c == POST_INC || c == POST_DEC)
8615 return 0;
8616 if (c == MEM || c == LABEL_REF || c == SYMBOL_REF)
8617 return 10;
8619 if (c == PLUS)
8621 if (CONST_INT_P (XEXP (x, 1)))
8622 return 2;
8624 if (ARITHMETIC_P (XEXP (x, 0)) || ARITHMETIC_P (XEXP (x, 1)))
8625 return 3;
8627 return 4;
8630 return 6;
8633 static inline int
8634 arm_thumb_address_cost (rtx x)
8636 enum rtx_code c = GET_CODE (x);
8638 if (c == REG)
8639 return 1;
8640 if (c == PLUS
8641 && REG_P (XEXP (x, 0))
8642 && CONST_INT_P (XEXP (x, 1)))
8643 return 1;
8645 return 2;
8648 static int
8649 arm_address_cost (rtx x, enum machine_mode mode ATTRIBUTE_UNUSED,
8650 addr_space_t as ATTRIBUTE_UNUSED, bool speed ATTRIBUTE_UNUSED)
8652 return TARGET_32BIT ? arm_arm_address_cost (x) : arm_thumb_address_cost (x);
8655 /* Adjust cost hook for XScale. */
8656 static bool
8657 xscale_sched_adjust_cost (rtx insn, rtx link, rtx dep, int * cost)
8659 /* Some true dependencies can have a higher cost depending
8660 on precisely how certain input operands are used. */
8661 if (REG_NOTE_KIND(link) == 0
8662 && recog_memoized (insn) >= 0
8663 && recog_memoized (dep) >= 0)
8665 int shift_opnum = get_attr_shift (insn);
8666 enum attr_type attr_type = get_attr_type (dep);
8668 /* If nonzero, SHIFT_OPNUM contains the operand number of a shifted
8669 operand for INSN. If we have a shifted input operand and the
8670 instruction we depend on is another ALU instruction, then we may
8671 have to account for an additional stall. */
8672 if (shift_opnum != 0
8673 && (attr_type == TYPE_ARLO_SHIFT
8674 || attr_type == TYPE_ARLO_SHIFT_REG
8675 || attr_type == TYPE_MOV_SHIFT
8676 || attr_type == TYPE_MVN_SHIFT
8677 || attr_type == TYPE_MOV_SHIFT_REG
8678 || attr_type == TYPE_MVN_SHIFT_REG))
8680 rtx shifted_operand;
8681 int opno;
8683 /* Get the shifted operand. */
8684 extract_insn (insn);
8685 shifted_operand = recog_data.operand[shift_opnum];
8687 /* Iterate over all the operands in DEP. If we write an operand
8688 that overlaps with SHIFTED_OPERAND, then we have increase the
8689 cost of this dependency. */
8690 extract_insn (dep);
8691 preprocess_constraints ();
8692 for (opno = 0; opno < recog_data.n_operands; opno++)
8694 /* We can ignore strict inputs. */
8695 if (recog_data.operand_type[opno] == OP_IN)
8696 continue;
8698 if (reg_overlap_mentioned_p (recog_data.operand[opno],
8699 shifted_operand))
8701 *cost = 2;
8702 return false;
8707 return true;
8710 /* Adjust cost hook for Cortex A9. */
8711 static bool
8712 cortex_a9_sched_adjust_cost (rtx insn, rtx link, rtx dep, int * cost)
8714 switch (REG_NOTE_KIND (link))
8716 case REG_DEP_ANTI:
8717 *cost = 0;
8718 return false;
8720 case REG_DEP_TRUE:
8721 case REG_DEP_OUTPUT:
8722 if (recog_memoized (insn) >= 0
8723 && recog_memoized (dep) >= 0)
8725 if (GET_CODE (PATTERN (insn)) == SET)
8727 if (GET_MODE_CLASS
8728 (GET_MODE (SET_DEST (PATTERN (insn)))) == MODE_FLOAT
8729 || GET_MODE_CLASS
8730 (GET_MODE (SET_SRC (PATTERN (insn)))) == MODE_FLOAT)
8732 enum attr_type attr_type_insn = get_attr_type (insn);
8733 enum attr_type attr_type_dep = get_attr_type (dep);
8735 /* By default all dependencies of the form
8736 s0 = s0 <op> s1
8737 s0 = s0 <op> s2
8738 have an extra latency of 1 cycle because
8739 of the input and output dependency in this
8740 case. However this gets modeled as an true
8741 dependency and hence all these checks. */
8742 if (REG_P (SET_DEST (PATTERN (insn)))
8743 && REG_P (SET_DEST (PATTERN (dep)))
8744 && reg_overlap_mentioned_p (SET_DEST (PATTERN (insn)),
8745 SET_DEST (PATTERN (dep))))
8747 /* FMACS is a special case where the dependent
8748 instruction can be issued 3 cycles before
8749 the normal latency in case of an output
8750 dependency. */
8751 if ((attr_type_insn == TYPE_FMACS
8752 || attr_type_insn == TYPE_FMACD)
8753 && (attr_type_dep == TYPE_FMACS
8754 || attr_type_dep == TYPE_FMACD))
8756 if (REG_NOTE_KIND (link) == REG_DEP_OUTPUT)
8757 *cost = insn_default_latency (dep) - 3;
8758 else
8759 *cost = insn_default_latency (dep);
8760 return false;
8762 else
8764 if (REG_NOTE_KIND (link) == REG_DEP_OUTPUT)
8765 *cost = insn_default_latency (dep) + 1;
8766 else
8767 *cost = insn_default_latency (dep);
8769 return false;
8774 break;
8776 default:
8777 gcc_unreachable ();
8780 return true;
8783 /* Adjust cost hook for FA726TE. */
8784 static bool
8785 fa726te_sched_adjust_cost (rtx insn, rtx link, rtx dep, int * cost)
8787 /* For FA726TE, true dependency on CPSR (i.e. set cond followed by predicated)
8788 have penalty of 3. */
8789 if (REG_NOTE_KIND (link) == REG_DEP_TRUE
8790 && recog_memoized (insn) >= 0
8791 && recog_memoized (dep) >= 0
8792 && get_attr_conds (dep) == CONDS_SET)
8794 /* Use of carry (e.g. 64-bit arithmetic) in ALU: 3-cycle latency. */
8795 if (get_attr_conds (insn) == CONDS_USE
8796 && get_attr_type (insn) != TYPE_BRANCH)
8798 *cost = 3;
8799 return false;
8802 if (GET_CODE (PATTERN (insn)) == COND_EXEC
8803 || get_attr_conds (insn) == CONDS_USE)
8805 *cost = 0;
8806 return false;
8810 return true;
8813 /* Implement TARGET_REGISTER_MOVE_COST.
8815 Moves between VFP_REGS and GENERAL_REGS are a single insn, but
8816 it is typically more expensive than a single memory access. We set
8817 the cost to less than two memory accesses so that floating
8818 point to integer conversion does not go through memory. */
8821 arm_register_move_cost (enum machine_mode mode ATTRIBUTE_UNUSED,
8822 reg_class_t from, reg_class_t to)
8824 if (TARGET_32BIT)
8826 if ((IS_VFP_CLASS (from) && !IS_VFP_CLASS (to))
8827 || (!IS_VFP_CLASS (from) && IS_VFP_CLASS (to)))
8828 return 15;
8829 else if ((from == IWMMXT_REGS && to != IWMMXT_REGS)
8830 || (from != IWMMXT_REGS && to == IWMMXT_REGS))
8831 return 4;
8832 else if (from == IWMMXT_GR_REGS || to == IWMMXT_GR_REGS)
8833 return 20;
8834 else
8835 return 2;
8837 else
8839 if (from == HI_REGS || to == HI_REGS)
8840 return 4;
8841 else
8842 return 2;
8846 /* Implement TARGET_MEMORY_MOVE_COST. */
8849 arm_memory_move_cost (enum machine_mode mode, reg_class_t rclass,
8850 bool in ATTRIBUTE_UNUSED)
8852 if (TARGET_32BIT)
8853 return 10;
8854 else
8856 if (GET_MODE_SIZE (mode) < 4)
8857 return 8;
8858 else
8859 return ((2 * GET_MODE_SIZE (mode)) * (rclass == LO_REGS ? 1 : 2));
8863 /* Vectorizer cost model implementation. */
8865 /* Implement targetm.vectorize.builtin_vectorization_cost. */
8866 static int
8867 arm_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
8868 tree vectype,
8869 int misalign ATTRIBUTE_UNUSED)
8871 unsigned elements;
8873 switch (type_of_cost)
8875 case scalar_stmt:
8876 return current_tune->vec_costs->scalar_stmt_cost;
8878 case scalar_load:
8879 return current_tune->vec_costs->scalar_load_cost;
8881 case scalar_store:
8882 return current_tune->vec_costs->scalar_store_cost;
8884 case vector_stmt:
8885 return current_tune->vec_costs->vec_stmt_cost;
8887 case vector_load:
8888 return current_tune->vec_costs->vec_align_load_cost;
8890 case vector_store:
8891 return current_tune->vec_costs->vec_store_cost;
8893 case vec_to_scalar:
8894 return current_tune->vec_costs->vec_to_scalar_cost;
8896 case scalar_to_vec:
8897 return current_tune->vec_costs->scalar_to_vec_cost;
8899 case unaligned_load:
8900 return current_tune->vec_costs->vec_unalign_load_cost;
8902 case unaligned_store:
8903 return current_tune->vec_costs->vec_unalign_store_cost;
8905 case cond_branch_taken:
8906 return current_tune->vec_costs->cond_taken_branch_cost;
8908 case cond_branch_not_taken:
8909 return current_tune->vec_costs->cond_not_taken_branch_cost;
8911 case vec_perm:
8912 case vec_promote_demote:
8913 return current_tune->vec_costs->vec_stmt_cost;
8915 case vec_construct:
8916 elements = TYPE_VECTOR_SUBPARTS (vectype);
8917 return elements / 2 + 1;
8919 default:
8920 gcc_unreachable ();
8924 /* Implement targetm.vectorize.add_stmt_cost. */
8926 static unsigned
8927 arm_add_stmt_cost (void *data, int count, enum vect_cost_for_stmt kind,
8928 struct _stmt_vec_info *stmt_info, int misalign,
8929 enum vect_cost_model_location where)
8931 unsigned *cost = (unsigned *) data;
8932 unsigned retval = 0;
8934 if (flag_vect_cost_model)
8936 tree vectype = stmt_info ? stmt_vectype (stmt_info) : NULL_TREE;
8937 int stmt_cost = arm_builtin_vectorization_cost (kind, vectype, misalign);
8939 /* Statements in an inner loop relative to the loop being
8940 vectorized are weighted more heavily. The value here is
8941 arbitrary and could potentially be improved with analysis. */
8942 if (where == vect_body && stmt_info && stmt_in_inner_loop_p (stmt_info))
8943 count *= 50; /* FIXME. */
8945 retval = (unsigned) (count * stmt_cost);
8946 cost[where] += retval;
8949 return retval;
8952 /* Return true if and only if this insn can dual-issue only as older. */
8953 static bool
8954 cortexa7_older_only (rtx insn)
8956 if (recog_memoized (insn) < 0)
8957 return false;
8959 switch (get_attr_type (insn))
8961 case TYPE_ARLO_REG:
8962 case TYPE_MVN_REG:
8963 case TYPE_SHIFT:
8964 case TYPE_SHIFT_REG:
8965 case TYPE_LOAD_BYTE:
8966 case TYPE_LOAD1:
8967 case TYPE_STORE1:
8968 case TYPE_FFARITHS:
8969 case TYPE_FADDS:
8970 case TYPE_FFARITHD:
8971 case TYPE_FADDD:
8972 case TYPE_FCPYS:
8973 case TYPE_F_CVT:
8974 case TYPE_FCMPS:
8975 case TYPE_FCMPD:
8976 case TYPE_FCONSTS:
8977 case TYPE_FCONSTD:
8978 case TYPE_FMULS:
8979 case TYPE_FMACS:
8980 case TYPE_FMULD:
8981 case TYPE_FMACD:
8982 case TYPE_FDIVS:
8983 case TYPE_FDIVD:
8984 case TYPE_F_2_R:
8985 case TYPE_F_FLAG:
8986 case TYPE_F_LOADS:
8987 case TYPE_F_STORES:
8988 return true;
8989 default:
8990 return false;
8994 /* Return true if and only if this insn can dual-issue as younger. */
8995 static bool
8996 cortexa7_younger (FILE *file, int verbose, rtx insn)
8998 if (recog_memoized (insn) < 0)
9000 if (verbose > 5)
9001 fprintf (file, ";; not cortexa7_younger %d\n", INSN_UID (insn));
9002 return false;
9005 switch (get_attr_type (insn))
9007 case TYPE_ARLO_IMM:
9008 case TYPE_EXTEND:
9009 case TYPE_MVN_IMM:
9010 case TYPE_MOV_IMM:
9011 case TYPE_MOV_REG:
9012 case TYPE_MOV_SHIFT:
9013 case TYPE_MOV_SHIFT_REG:
9014 case TYPE_BRANCH:
9015 case TYPE_CALL:
9016 return true;
9017 default:
9018 return false;
9023 /* Look for an instruction that can dual issue only as an older
9024 instruction, and move it in front of any instructions that can
9025 dual-issue as younger, while preserving the relative order of all
9026 other instructions in the ready list. This is a hueuristic to help
9027 dual-issue in later cycles, by postponing issue of more flexible
9028 instructions. This heuristic may affect dual issue opportunities
9029 in the current cycle. */
9030 static void
9031 cortexa7_sched_reorder (FILE *file, int verbose, rtx *ready, int *n_readyp,
9032 int clock)
9034 int i;
9035 int first_older_only = -1, first_younger = -1;
9037 if (verbose > 5)
9038 fprintf (file,
9039 ";; sched_reorder for cycle %d with %d insns in ready list\n",
9040 clock,
9041 *n_readyp);
9043 /* Traverse the ready list from the head (the instruction to issue
9044 first), and looking for the first instruction that can issue as
9045 younger and the first instruction that can dual-issue only as
9046 older. */
9047 for (i = *n_readyp - 1; i >= 0; i--)
9049 rtx insn = ready[i];
9050 if (cortexa7_older_only (insn))
9052 first_older_only = i;
9053 if (verbose > 5)
9054 fprintf (file, ";; reorder older found %d\n", INSN_UID (insn));
9055 break;
9057 else if (cortexa7_younger (file, verbose, insn) && first_younger == -1)
9058 first_younger = i;
9061 /* Nothing to reorder because either no younger insn found or insn
9062 that can dual-issue only as older appears before any insn that
9063 can dual-issue as younger. */
9064 if (first_younger == -1)
9066 if (verbose > 5)
9067 fprintf (file, ";; sched_reorder nothing to reorder as no younger\n");
9068 return;
9071 /* Nothing to reorder because no older-only insn in the ready list. */
9072 if (first_older_only == -1)
9074 if (verbose > 5)
9075 fprintf (file, ";; sched_reorder nothing to reorder as no older_only\n");
9076 return;
9079 /* Move first_older_only insn before first_younger. */
9080 if (verbose > 5)
9081 fprintf (file, ";; cortexa7_sched_reorder insn %d before %d\n",
9082 INSN_UID(ready [first_older_only]),
9083 INSN_UID(ready [first_younger]));
9084 rtx first_older_only_insn = ready [first_older_only];
9085 for (i = first_older_only; i < first_younger; i++)
9087 ready[i] = ready[i+1];
9090 ready[i] = first_older_only_insn;
9091 return;
9094 /* Implement TARGET_SCHED_REORDER. */
9095 static int
9096 arm_sched_reorder (FILE *file, int verbose, rtx *ready, int *n_readyp,
9097 int clock)
9099 switch (arm_tune)
9101 case cortexa7:
9102 cortexa7_sched_reorder (file, verbose, ready, n_readyp, clock);
9103 break;
9104 default:
9105 /* Do nothing for other cores. */
9106 break;
9109 return arm_issue_rate ();
9112 /* This function implements the target macro TARGET_SCHED_ADJUST_COST.
9113 It corrects the value of COST based on the relationship between
9114 INSN and DEP through the dependence LINK. It returns the new
9115 value. There is a per-core adjust_cost hook to adjust scheduler costs
9116 and the per-core hook can choose to completely override the generic
9117 adjust_cost function. Only put bits of code into arm_adjust_cost that
9118 are common across all cores. */
9119 static int
9120 arm_adjust_cost (rtx insn, rtx link, rtx dep, int cost)
9122 rtx i_pat, d_pat;
9124 /* When generating Thumb-1 code, we want to place flag-setting operations
9125 close to a conditional branch which depends on them, so that we can
9126 omit the comparison. */
9127 if (TARGET_THUMB1
9128 && REG_NOTE_KIND (link) == 0
9129 && recog_memoized (insn) == CODE_FOR_cbranchsi4_insn
9130 && recog_memoized (dep) >= 0
9131 && get_attr_conds (dep) == CONDS_SET)
9132 return 0;
9134 if (current_tune->sched_adjust_cost != NULL)
9136 if (!current_tune->sched_adjust_cost (insn, link, dep, &cost))
9137 return cost;
9140 /* XXX Is this strictly true? */
9141 if (REG_NOTE_KIND (link) == REG_DEP_ANTI
9142 || REG_NOTE_KIND (link) == REG_DEP_OUTPUT)
9143 return 0;
9145 /* Call insns don't incur a stall, even if they follow a load. */
9146 if (REG_NOTE_KIND (link) == 0
9147 && CALL_P (insn))
9148 return 1;
9150 if ((i_pat = single_set (insn)) != NULL
9151 && MEM_P (SET_SRC (i_pat))
9152 && (d_pat = single_set (dep)) != NULL
9153 && MEM_P (SET_DEST (d_pat)))
9155 rtx src_mem = XEXP (SET_SRC (i_pat), 0);
9156 /* This is a load after a store, there is no conflict if the load reads
9157 from a cached area. Assume that loads from the stack, and from the
9158 constant pool are cached, and that others will miss. This is a
9159 hack. */
9161 if ((GET_CODE (src_mem) == SYMBOL_REF
9162 && CONSTANT_POOL_ADDRESS_P (src_mem))
9163 || reg_mentioned_p (stack_pointer_rtx, src_mem)
9164 || reg_mentioned_p (frame_pointer_rtx, src_mem)
9165 || reg_mentioned_p (hard_frame_pointer_rtx, src_mem))
9166 return 1;
9169 return cost;
9173 arm_max_conditional_execute (void)
9175 return max_insns_skipped;
9178 static int
9179 arm_default_branch_cost (bool speed_p, bool predictable_p ATTRIBUTE_UNUSED)
9181 if (TARGET_32BIT)
9182 return (TARGET_THUMB2 && !speed_p) ? 1 : 4;
9183 else
9184 return (optimize > 0) ? 2 : 0;
9187 static int
9188 arm_cortex_a5_branch_cost (bool speed_p, bool predictable_p)
9190 return speed_p ? 0 : arm_default_branch_cost (speed_p, predictable_p);
9193 static bool fp_consts_inited = false;
9195 static REAL_VALUE_TYPE value_fp0;
9197 static void
9198 init_fp_table (void)
9200 REAL_VALUE_TYPE r;
9202 r = REAL_VALUE_ATOF ("0", DFmode);
9203 value_fp0 = r;
9204 fp_consts_inited = true;
9207 /* Return TRUE if rtx X is a valid immediate FP constant. */
9209 arm_const_double_rtx (rtx x)
9211 REAL_VALUE_TYPE r;
9213 if (!fp_consts_inited)
9214 init_fp_table ();
9216 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
9217 if (REAL_VALUE_MINUS_ZERO (r))
9218 return 0;
9220 if (REAL_VALUES_EQUAL (r, value_fp0))
9221 return 1;
9223 return 0;
9226 /* VFPv3 has a fairly wide range of representable immediates, formed from
9227 "quarter-precision" floating-point values. These can be evaluated using this
9228 formula (with ^ for exponentiation):
9230 -1^s * n * 2^-r
9232 Where 's' is a sign bit (0/1), 'n' and 'r' are integers such that
9233 16 <= n <= 31 and 0 <= r <= 7.
9235 These values are mapped onto an 8-bit integer ABCDEFGH s.t.
9237 - A (most-significant) is the sign bit.
9238 - BCD are the exponent (encoded as r XOR 3).
9239 - EFGH are the mantissa (encoded as n - 16).
9242 /* Return an integer index for a VFPv3 immediate operand X suitable for the
9243 fconst[sd] instruction, or -1 if X isn't suitable. */
9244 static int
9245 vfp3_const_double_index (rtx x)
9247 REAL_VALUE_TYPE r, m;
9248 int sign, exponent;
9249 unsigned HOST_WIDE_INT mantissa, mant_hi;
9250 unsigned HOST_WIDE_INT mask;
9251 HOST_WIDE_INT m1, m2;
9252 int point_pos = 2 * HOST_BITS_PER_WIDE_INT - 1;
9254 if (!TARGET_VFP3 || !CONST_DOUBLE_P (x))
9255 return -1;
9257 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
9259 /* We can't represent these things, so detect them first. */
9260 if (REAL_VALUE_ISINF (r) || REAL_VALUE_ISNAN (r) || REAL_VALUE_MINUS_ZERO (r))
9261 return -1;
9263 /* Extract sign, exponent and mantissa. */
9264 sign = REAL_VALUE_NEGATIVE (r) ? 1 : 0;
9265 r = real_value_abs (&r);
9266 exponent = REAL_EXP (&r);
9267 /* For the mantissa, we expand into two HOST_WIDE_INTS, apart from the
9268 highest (sign) bit, with a fixed binary point at bit point_pos.
9269 WARNING: If there's ever a VFP version which uses more than 2 * H_W_I - 1
9270 bits for the mantissa, this may fail (low bits would be lost). */
9271 real_ldexp (&m, &r, point_pos - exponent);
9272 REAL_VALUE_TO_INT (&m1, &m2, m);
9273 mantissa = m1;
9274 mant_hi = m2;
9276 /* If there are bits set in the low part of the mantissa, we can't
9277 represent this value. */
9278 if (mantissa != 0)
9279 return -1;
9281 /* Now make it so that mantissa contains the most-significant bits, and move
9282 the point_pos to indicate that the least-significant bits have been
9283 discarded. */
9284 point_pos -= HOST_BITS_PER_WIDE_INT;
9285 mantissa = mant_hi;
9287 /* We can permit four significant bits of mantissa only, plus a high bit
9288 which is always 1. */
9289 mask = ((unsigned HOST_WIDE_INT)1 << (point_pos - 5)) - 1;
9290 if ((mantissa & mask) != 0)
9291 return -1;
9293 /* Now we know the mantissa is in range, chop off the unneeded bits. */
9294 mantissa >>= point_pos - 5;
9296 /* The mantissa may be zero. Disallow that case. (It's possible to load the
9297 floating-point immediate zero with Neon using an integer-zero load, but
9298 that case is handled elsewhere.) */
9299 if (mantissa == 0)
9300 return -1;
9302 gcc_assert (mantissa >= 16 && mantissa <= 31);
9304 /* The value of 5 here would be 4 if GCC used IEEE754-like encoding (where
9305 normalized significands are in the range [1, 2). (Our mantissa is shifted
9306 left 4 places at this point relative to normalized IEEE754 values). GCC
9307 internally uses [0.5, 1) (see real.c), so the exponent returned from
9308 REAL_EXP must be altered. */
9309 exponent = 5 - exponent;
9311 if (exponent < 0 || exponent > 7)
9312 return -1;
9314 /* Sign, mantissa and exponent are now in the correct form to plug into the
9315 formula described in the comment above. */
9316 return (sign << 7) | ((exponent ^ 3) << 4) | (mantissa - 16);
9319 /* Return TRUE if rtx X is a valid immediate VFPv3 constant. */
9321 vfp3_const_double_rtx (rtx x)
9323 if (!TARGET_VFP3)
9324 return 0;
9326 return vfp3_const_double_index (x) != -1;
9329 /* Recognize immediates which can be used in various Neon instructions. Legal
9330 immediates are described by the following table (for VMVN variants, the
9331 bitwise inverse of the constant shown is recognized. In either case, VMOV
9332 is output and the correct instruction to use for a given constant is chosen
9333 by the assembler). The constant shown is replicated across all elements of
9334 the destination vector.
9336 insn elems variant constant (binary)
9337 ---- ----- ------- -----------------
9338 vmov i32 0 00000000 00000000 00000000 abcdefgh
9339 vmov i32 1 00000000 00000000 abcdefgh 00000000
9340 vmov i32 2 00000000 abcdefgh 00000000 00000000
9341 vmov i32 3 abcdefgh 00000000 00000000 00000000
9342 vmov i16 4 00000000 abcdefgh
9343 vmov i16 5 abcdefgh 00000000
9344 vmvn i32 6 00000000 00000000 00000000 abcdefgh
9345 vmvn i32 7 00000000 00000000 abcdefgh 00000000
9346 vmvn i32 8 00000000 abcdefgh 00000000 00000000
9347 vmvn i32 9 abcdefgh 00000000 00000000 00000000
9348 vmvn i16 10 00000000 abcdefgh
9349 vmvn i16 11 abcdefgh 00000000
9350 vmov i32 12 00000000 00000000 abcdefgh 11111111
9351 vmvn i32 13 00000000 00000000 abcdefgh 11111111
9352 vmov i32 14 00000000 abcdefgh 11111111 11111111
9353 vmvn i32 15 00000000 abcdefgh 11111111 11111111
9354 vmov i8 16 abcdefgh
9355 vmov i64 17 aaaaaaaa bbbbbbbb cccccccc dddddddd
9356 eeeeeeee ffffffff gggggggg hhhhhhhh
9357 vmov f32 18 aBbbbbbc defgh000 00000000 00000000
9358 vmov f32 19 00000000 00000000 00000000 00000000
9360 For case 18, B = !b. Representable values are exactly those accepted by
9361 vfp3_const_double_index, but are output as floating-point numbers rather
9362 than indices.
9364 For case 19, we will change it to vmov.i32 when assembling.
9366 Variants 0-5 (inclusive) may also be used as immediates for the second
9367 operand of VORR/VBIC instructions.
9369 The INVERSE argument causes the bitwise inverse of the given operand to be
9370 recognized instead (used for recognizing legal immediates for the VAND/VORN
9371 pseudo-instructions). If INVERSE is true, the value placed in *MODCONST is
9372 *not* inverted (i.e. the pseudo-instruction forms vand/vorn should still be
9373 output, rather than the real insns vbic/vorr).
9375 INVERSE makes no difference to the recognition of float vectors.
9377 The return value is the variant of immediate as shown in the above table, or
9378 -1 if the given value doesn't match any of the listed patterns.
9380 static int
9381 neon_valid_immediate (rtx op, enum machine_mode mode, int inverse,
9382 rtx *modconst, int *elementwidth)
9384 #define CHECK(STRIDE, ELSIZE, CLASS, TEST) \
9385 matches = 1; \
9386 for (i = 0; i < idx; i += (STRIDE)) \
9387 if (!(TEST)) \
9388 matches = 0; \
9389 if (matches) \
9391 immtype = (CLASS); \
9392 elsize = (ELSIZE); \
9393 break; \
9396 unsigned int i, elsize = 0, idx = 0, n_elts;
9397 unsigned int innersize;
9398 unsigned char bytes[16];
9399 int immtype = -1, matches;
9400 unsigned int invmask = inverse ? 0xff : 0;
9401 bool vector = GET_CODE (op) == CONST_VECTOR;
9403 if (vector)
9405 n_elts = CONST_VECTOR_NUNITS (op);
9406 innersize = GET_MODE_SIZE (GET_MODE_INNER (mode));
9408 else
9410 n_elts = 1;
9411 if (mode == VOIDmode)
9412 mode = DImode;
9413 innersize = GET_MODE_SIZE (mode);
9416 /* Vectors of float constants. */
9417 if (GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
9419 rtx el0 = CONST_VECTOR_ELT (op, 0);
9420 REAL_VALUE_TYPE r0;
9422 if (!vfp3_const_double_rtx (el0) && el0 != CONST0_RTX (GET_MODE (el0)))
9423 return -1;
9425 REAL_VALUE_FROM_CONST_DOUBLE (r0, el0);
9427 for (i = 1; i < n_elts; i++)
9429 rtx elt = CONST_VECTOR_ELT (op, i);
9430 REAL_VALUE_TYPE re;
9432 REAL_VALUE_FROM_CONST_DOUBLE (re, elt);
9434 if (!REAL_VALUES_EQUAL (r0, re))
9435 return -1;
9438 if (modconst)
9439 *modconst = CONST_VECTOR_ELT (op, 0);
9441 if (elementwidth)
9442 *elementwidth = 0;
9444 if (el0 == CONST0_RTX (GET_MODE (el0)))
9445 return 19;
9446 else
9447 return 18;
9450 /* Splat vector constant out into a byte vector. */
9451 for (i = 0; i < n_elts; i++)
9453 rtx el = vector ? CONST_VECTOR_ELT (op, i) : op;
9454 unsigned HOST_WIDE_INT elpart;
9455 unsigned int part, parts;
9457 if (CONST_INT_P (el))
9459 elpart = INTVAL (el);
9460 parts = 1;
9462 else if (CONST_DOUBLE_P (el))
9464 elpart = CONST_DOUBLE_LOW (el);
9465 parts = 2;
9467 else
9468 gcc_unreachable ();
9470 for (part = 0; part < parts; part++)
9472 unsigned int byte;
9473 for (byte = 0; byte < innersize; byte++)
9475 bytes[idx++] = (elpart & 0xff) ^ invmask;
9476 elpart >>= BITS_PER_UNIT;
9478 if (CONST_DOUBLE_P (el))
9479 elpart = CONST_DOUBLE_HIGH (el);
9483 /* Sanity check. */
9484 gcc_assert (idx == GET_MODE_SIZE (mode));
9488 CHECK (4, 32, 0, bytes[i] == bytes[0] && bytes[i + 1] == 0
9489 && bytes[i + 2] == 0 && bytes[i + 3] == 0);
9491 CHECK (4, 32, 1, bytes[i] == 0 && bytes[i + 1] == bytes[1]
9492 && bytes[i + 2] == 0 && bytes[i + 3] == 0);
9494 CHECK (4, 32, 2, bytes[i] == 0 && bytes[i + 1] == 0
9495 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0);
9497 CHECK (4, 32, 3, bytes[i] == 0 && bytes[i + 1] == 0
9498 && bytes[i + 2] == 0 && bytes[i + 3] == bytes[3]);
9500 CHECK (2, 16, 4, bytes[i] == bytes[0] && bytes[i + 1] == 0);
9502 CHECK (2, 16, 5, bytes[i] == 0 && bytes[i + 1] == bytes[1]);
9504 CHECK (4, 32, 6, bytes[i] == bytes[0] && bytes[i + 1] == 0xff
9505 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff);
9507 CHECK (4, 32, 7, bytes[i] == 0xff && bytes[i + 1] == bytes[1]
9508 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff);
9510 CHECK (4, 32, 8, bytes[i] == 0xff && bytes[i + 1] == 0xff
9511 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff);
9513 CHECK (4, 32, 9, bytes[i] == 0xff && bytes[i + 1] == 0xff
9514 && bytes[i + 2] == 0xff && bytes[i + 3] == bytes[3]);
9516 CHECK (2, 16, 10, bytes[i] == bytes[0] && bytes[i + 1] == 0xff);
9518 CHECK (2, 16, 11, bytes[i] == 0xff && bytes[i + 1] == bytes[1]);
9520 CHECK (4, 32, 12, bytes[i] == 0xff && bytes[i + 1] == bytes[1]
9521 && bytes[i + 2] == 0 && bytes[i + 3] == 0);
9523 CHECK (4, 32, 13, bytes[i] == 0 && bytes[i + 1] == bytes[1]
9524 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff);
9526 CHECK (4, 32, 14, bytes[i] == 0xff && bytes[i + 1] == 0xff
9527 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0);
9529 CHECK (4, 32, 15, bytes[i] == 0 && bytes[i + 1] == 0
9530 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff);
9532 CHECK (1, 8, 16, bytes[i] == bytes[0]);
9534 CHECK (1, 64, 17, (bytes[i] == 0 || bytes[i] == 0xff)
9535 && bytes[i] == bytes[(i + 8) % idx]);
9537 while (0);
9539 if (immtype == -1)
9540 return -1;
9542 if (elementwidth)
9543 *elementwidth = elsize;
9545 if (modconst)
9547 unsigned HOST_WIDE_INT imm = 0;
9549 /* Un-invert bytes of recognized vector, if necessary. */
9550 if (invmask != 0)
9551 for (i = 0; i < idx; i++)
9552 bytes[i] ^= invmask;
9554 if (immtype == 17)
9556 /* FIXME: Broken on 32-bit H_W_I hosts. */
9557 gcc_assert (sizeof (HOST_WIDE_INT) == 8);
9559 for (i = 0; i < 8; i++)
9560 imm |= (unsigned HOST_WIDE_INT) (bytes[i] ? 0xff : 0)
9561 << (i * BITS_PER_UNIT);
9563 *modconst = GEN_INT (imm);
9565 else
9567 unsigned HOST_WIDE_INT imm = 0;
9569 for (i = 0; i < elsize / BITS_PER_UNIT; i++)
9570 imm |= (unsigned HOST_WIDE_INT) bytes[i] << (i * BITS_PER_UNIT);
9572 *modconst = GEN_INT (imm);
9576 return immtype;
9577 #undef CHECK
9580 /* Return TRUE if rtx X is legal for use as either a Neon VMOV (or, implicitly,
9581 VMVN) immediate. Write back width per element to *ELEMENTWIDTH (or zero for
9582 float elements), and a modified constant (whatever should be output for a
9583 VMOV) in *MODCONST. */
9586 neon_immediate_valid_for_move (rtx op, enum machine_mode mode,
9587 rtx *modconst, int *elementwidth)
9589 rtx tmpconst;
9590 int tmpwidth;
9591 int retval = neon_valid_immediate (op, mode, 0, &tmpconst, &tmpwidth);
9593 if (retval == -1)
9594 return 0;
9596 if (modconst)
9597 *modconst = tmpconst;
9599 if (elementwidth)
9600 *elementwidth = tmpwidth;
9602 return 1;
9605 /* Return TRUE if rtx X is legal for use in a VORR or VBIC instruction. If
9606 the immediate is valid, write a constant suitable for using as an operand
9607 to VORR/VBIC/VAND/VORN to *MODCONST and the corresponding element width to
9608 *ELEMENTWIDTH. See neon_valid_immediate for description of INVERSE. */
9611 neon_immediate_valid_for_logic (rtx op, enum machine_mode mode, int inverse,
9612 rtx *modconst, int *elementwidth)
9614 rtx tmpconst;
9615 int tmpwidth;
9616 int retval = neon_valid_immediate (op, mode, inverse, &tmpconst, &tmpwidth);
9618 if (retval < 0 || retval > 5)
9619 return 0;
9621 if (modconst)
9622 *modconst = tmpconst;
9624 if (elementwidth)
9625 *elementwidth = tmpwidth;
9627 return 1;
9630 /* Return TRUE if rtx OP is legal for use in a VSHR or VSHL instruction. If
9631 the immediate is valid, write a constant suitable for using as an operand
9632 to VSHR/VSHL to *MODCONST and the corresponding element width to
9633 *ELEMENTWIDTH. ISLEFTSHIFT is for determine left or right shift,
9634 because they have different limitations. */
9637 neon_immediate_valid_for_shift (rtx op, enum machine_mode mode,
9638 rtx *modconst, int *elementwidth,
9639 bool isleftshift)
9641 unsigned int innersize = GET_MODE_SIZE (GET_MODE_INNER (mode));
9642 unsigned int n_elts = CONST_VECTOR_NUNITS (op), i;
9643 unsigned HOST_WIDE_INT last_elt = 0;
9644 unsigned HOST_WIDE_INT maxshift;
9646 /* Split vector constant out into a byte vector. */
9647 for (i = 0; i < n_elts; i++)
9649 rtx el = CONST_VECTOR_ELT (op, i);
9650 unsigned HOST_WIDE_INT elpart;
9652 if (CONST_INT_P (el))
9653 elpart = INTVAL (el);
9654 else if (CONST_DOUBLE_P (el))
9655 return 0;
9656 else
9657 gcc_unreachable ();
9659 if (i != 0 && elpart != last_elt)
9660 return 0;
9662 last_elt = elpart;
9665 /* Shift less than element size. */
9666 maxshift = innersize * 8;
9668 if (isleftshift)
9670 /* Left shift immediate value can be from 0 to <size>-1. */
9671 if (last_elt >= maxshift)
9672 return 0;
9674 else
9676 /* Right shift immediate value can be from 1 to <size>. */
9677 if (last_elt == 0 || last_elt > maxshift)
9678 return 0;
9681 if (elementwidth)
9682 *elementwidth = innersize * 8;
9684 if (modconst)
9685 *modconst = CONST_VECTOR_ELT (op, 0);
9687 return 1;
9690 /* Return a string suitable for output of Neon immediate logic operation
9691 MNEM. */
9693 char *
9694 neon_output_logic_immediate (const char *mnem, rtx *op2, enum machine_mode mode,
9695 int inverse, int quad)
9697 int width, is_valid;
9698 static char templ[40];
9700 is_valid = neon_immediate_valid_for_logic (*op2, mode, inverse, op2, &width);
9702 gcc_assert (is_valid != 0);
9704 if (quad)
9705 sprintf (templ, "%s.i%d\t%%q0, %%2", mnem, width);
9706 else
9707 sprintf (templ, "%s.i%d\t%%P0, %%2", mnem, width);
9709 return templ;
9712 /* Return a string suitable for output of Neon immediate shift operation
9713 (VSHR or VSHL) MNEM. */
9715 char *
9716 neon_output_shift_immediate (const char *mnem, char sign, rtx *op2,
9717 enum machine_mode mode, int quad,
9718 bool isleftshift)
9720 int width, is_valid;
9721 static char templ[40];
9723 is_valid = neon_immediate_valid_for_shift (*op2, mode, op2, &width, isleftshift);
9724 gcc_assert (is_valid != 0);
9726 if (quad)
9727 sprintf (templ, "%s.%c%d\t%%q0, %%q1, %%2", mnem, sign, width);
9728 else
9729 sprintf (templ, "%s.%c%d\t%%P0, %%P1, %%2", mnem, sign, width);
9731 return templ;
9734 /* Output a sequence of pairwise operations to implement a reduction.
9735 NOTE: We do "too much work" here, because pairwise operations work on two
9736 registers-worth of operands in one go. Unfortunately we can't exploit those
9737 extra calculations to do the full operation in fewer steps, I don't think.
9738 Although all vector elements of the result but the first are ignored, we
9739 actually calculate the same result in each of the elements. An alternative
9740 such as initially loading a vector with zero to use as each of the second
9741 operands would use up an additional register and take an extra instruction,
9742 for no particular gain. */
9744 void
9745 neon_pairwise_reduce (rtx op0, rtx op1, enum machine_mode mode,
9746 rtx (*reduc) (rtx, rtx, rtx))
9748 enum machine_mode inner = GET_MODE_INNER (mode);
9749 unsigned int i, parts = GET_MODE_SIZE (mode) / GET_MODE_SIZE (inner);
9750 rtx tmpsum = op1;
9752 for (i = parts / 2; i >= 1; i /= 2)
9754 rtx dest = (i == 1) ? op0 : gen_reg_rtx (mode);
9755 emit_insn (reduc (dest, tmpsum, tmpsum));
9756 tmpsum = dest;
9760 /* If VALS is a vector constant that can be loaded into a register
9761 using VDUP, generate instructions to do so and return an RTX to
9762 assign to the register. Otherwise return NULL_RTX. */
9764 static rtx
9765 neon_vdup_constant (rtx vals)
9767 enum machine_mode mode = GET_MODE (vals);
9768 enum machine_mode inner_mode = GET_MODE_INNER (mode);
9769 int n_elts = GET_MODE_NUNITS (mode);
9770 bool all_same = true;
9771 rtx x;
9772 int i;
9774 if (GET_CODE (vals) != CONST_VECTOR || GET_MODE_SIZE (inner_mode) > 4)
9775 return NULL_RTX;
9777 for (i = 0; i < n_elts; ++i)
9779 x = XVECEXP (vals, 0, i);
9780 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
9781 all_same = false;
9784 if (!all_same)
9785 /* The elements are not all the same. We could handle repeating
9786 patterns of a mode larger than INNER_MODE here (e.g. int8x8_t
9787 {0, C, 0, C, 0, C, 0, C} which can be loaded using
9788 vdup.i16). */
9789 return NULL_RTX;
9791 /* We can load this constant by using VDUP and a constant in a
9792 single ARM register. This will be cheaper than a vector
9793 load. */
9795 x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, 0));
9796 return gen_rtx_VEC_DUPLICATE (mode, x);
9799 /* Generate code to load VALS, which is a PARALLEL containing only
9800 constants (for vec_init) or CONST_VECTOR, efficiently into a
9801 register. Returns an RTX to copy into the register, or NULL_RTX
9802 for a PARALLEL that can not be converted into a CONST_VECTOR. */
9805 neon_make_constant (rtx vals)
9807 enum machine_mode mode = GET_MODE (vals);
9808 rtx target;
9809 rtx const_vec = NULL_RTX;
9810 int n_elts = GET_MODE_NUNITS (mode);
9811 int n_const = 0;
9812 int i;
9814 if (GET_CODE (vals) == CONST_VECTOR)
9815 const_vec = vals;
9816 else if (GET_CODE (vals) == PARALLEL)
9818 /* A CONST_VECTOR must contain only CONST_INTs and
9819 CONST_DOUBLEs, but CONSTANT_P allows more (e.g. SYMBOL_REF).
9820 Only store valid constants in a CONST_VECTOR. */
9821 for (i = 0; i < n_elts; ++i)
9823 rtx x = XVECEXP (vals, 0, i);
9824 if (CONST_INT_P (x) || CONST_DOUBLE_P (x))
9825 n_const++;
9827 if (n_const == n_elts)
9828 const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0));
9830 else
9831 gcc_unreachable ();
9833 if (const_vec != NULL
9834 && neon_immediate_valid_for_move (const_vec, mode, NULL, NULL))
9835 /* Load using VMOV. On Cortex-A8 this takes one cycle. */
9836 return const_vec;
9837 else if ((target = neon_vdup_constant (vals)) != NULL_RTX)
9838 /* Loaded using VDUP. On Cortex-A8 the VDUP takes one NEON
9839 pipeline cycle; creating the constant takes one or two ARM
9840 pipeline cycles. */
9841 return target;
9842 else if (const_vec != NULL_RTX)
9843 /* Load from constant pool. On Cortex-A8 this takes two cycles
9844 (for either double or quad vectors). We can not take advantage
9845 of single-cycle VLD1 because we need a PC-relative addressing
9846 mode. */
9847 return const_vec;
9848 else
9849 /* A PARALLEL containing something not valid inside CONST_VECTOR.
9850 We can not construct an initializer. */
9851 return NULL_RTX;
9854 /* Initialize vector TARGET to VALS. */
9856 void
9857 neon_expand_vector_init (rtx target, rtx vals)
9859 enum machine_mode mode = GET_MODE (target);
9860 enum machine_mode inner_mode = GET_MODE_INNER (mode);
9861 int n_elts = GET_MODE_NUNITS (mode);
9862 int n_var = 0, one_var = -1;
9863 bool all_same = true;
9864 rtx x, mem;
9865 int i;
9867 for (i = 0; i < n_elts; ++i)
9869 x = XVECEXP (vals, 0, i);
9870 if (!CONSTANT_P (x))
9871 ++n_var, one_var = i;
9873 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
9874 all_same = false;
9877 if (n_var == 0)
9879 rtx constant = neon_make_constant (vals);
9880 if (constant != NULL_RTX)
9882 emit_move_insn (target, constant);
9883 return;
9887 /* Splat a single non-constant element if we can. */
9888 if (all_same && GET_MODE_SIZE (inner_mode) <= 4)
9890 x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, 0));
9891 emit_insn (gen_rtx_SET (VOIDmode, target,
9892 gen_rtx_VEC_DUPLICATE (mode, x)));
9893 return;
9896 /* One field is non-constant. Load constant then overwrite varying
9897 field. This is more efficient than using the stack. */
9898 if (n_var == 1)
9900 rtx copy = copy_rtx (vals);
9901 rtx index = GEN_INT (one_var);
9903 /* Load constant part of vector, substitute neighboring value for
9904 varying element. */
9905 XVECEXP (copy, 0, one_var) = XVECEXP (vals, 0, (one_var + 1) % n_elts);
9906 neon_expand_vector_init (target, copy);
9908 /* Insert variable. */
9909 x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, one_var));
9910 switch (mode)
9912 case V8QImode:
9913 emit_insn (gen_neon_vset_lanev8qi (target, x, target, index));
9914 break;
9915 case V16QImode:
9916 emit_insn (gen_neon_vset_lanev16qi (target, x, target, index));
9917 break;
9918 case V4HImode:
9919 emit_insn (gen_neon_vset_lanev4hi (target, x, target, index));
9920 break;
9921 case V8HImode:
9922 emit_insn (gen_neon_vset_lanev8hi (target, x, target, index));
9923 break;
9924 case V2SImode:
9925 emit_insn (gen_neon_vset_lanev2si (target, x, target, index));
9926 break;
9927 case V4SImode:
9928 emit_insn (gen_neon_vset_lanev4si (target, x, target, index));
9929 break;
9930 case V2SFmode:
9931 emit_insn (gen_neon_vset_lanev2sf (target, x, target, index));
9932 break;
9933 case V4SFmode:
9934 emit_insn (gen_neon_vset_lanev4sf (target, x, target, index));
9935 break;
9936 case V2DImode:
9937 emit_insn (gen_neon_vset_lanev2di (target, x, target, index));
9938 break;
9939 default:
9940 gcc_unreachable ();
9942 return;
9945 /* Construct the vector in memory one field at a time
9946 and load the whole vector. */
9947 mem = assign_stack_temp (mode, GET_MODE_SIZE (mode));
9948 for (i = 0; i < n_elts; i++)
9949 emit_move_insn (adjust_address_nv (mem, inner_mode,
9950 i * GET_MODE_SIZE (inner_mode)),
9951 XVECEXP (vals, 0, i));
9952 emit_move_insn (target, mem);
9955 /* Ensure OPERAND lies between LOW (inclusive) and HIGH (exclusive). Raise
9956 ERR if it doesn't. FIXME: NEON bounds checks occur late in compilation, so
9957 reported source locations are bogus. */
9959 static void
9960 bounds_check (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high,
9961 const char *err)
9963 HOST_WIDE_INT lane;
9965 gcc_assert (CONST_INT_P (operand));
9967 lane = INTVAL (operand);
9969 if (lane < low || lane >= high)
9970 error (err);
9973 /* Bounds-check lanes. */
9975 void
9976 neon_lane_bounds (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high)
9978 bounds_check (operand, low, high, "lane out of range");
9981 /* Bounds-check constants. */
9983 void
9984 neon_const_bounds (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high)
9986 bounds_check (operand, low, high, "constant out of range");
9989 HOST_WIDE_INT
9990 neon_element_bits (enum machine_mode mode)
9992 if (mode == DImode)
9993 return GET_MODE_BITSIZE (mode);
9994 else
9995 return GET_MODE_BITSIZE (GET_MODE_INNER (mode));
9999 /* Predicates for `match_operand' and `match_operator'. */
10001 /* Return TRUE if OP is a valid coprocessor memory address pattern.
10002 WB is true if full writeback address modes are allowed and is false
10003 if limited writeback address modes (POST_INC and PRE_DEC) are
10004 allowed. */
10007 arm_coproc_mem_operand (rtx op, bool wb)
10009 rtx ind;
10011 /* Reject eliminable registers. */
10012 if (! (reload_in_progress || reload_completed)
10013 && ( reg_mentioned_p (frame_pointer_rtx, op)
10014 || reg_mentioned_p (arg_pointer_rtx, op)
10015 || reg_mentioned_p (virtual_incoming_args_rtx, op)
10016 || reg_mentioned_p (virtual_outgoing_args_rtx, op)
10017 || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
10018 || reg_mentioned_p (virtual_stack_vars_rtx, op)))
10019 return FALSE;
10021 /* Constants are converted into offsets from labels. */
10022 if (!MEM_P (op))
10023 return FALSE;
10025 ind = XEXP (op, 0);
10027 if (reload_completed
10028 && (GET_CODE (ind) == LABEL_REF
10029 || (GET_CODE (ind) == CONST
10030 && GET_CODE (XEXP (ind, 0)) == PLUS
10031 && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF
10032 && CONST_INT_P (XEXP (XEXP (ind, 0), 1)))))
10033 return TRUE;
10035 /* Match: (mem (reg)). */
10036 if (REG_P (ind))
10037 return arm_address_register_rtx_p (ind, 0);
10039 /* Autoincremment addressing modes. POST_INC and PRE_DEC are
10040 acceptable in any case (subject to verification by
10041 arm_address_register_rtx_p). We need WB to be true to accept
10042 PRE_INC and POST_DEC. */
10043 if (GET_CODE (ind) == POST_INC
10044 || GET_CODE (ind) == PRE_DEC
10045 || (wb
10046 && (GET_CODE (ind) == PRE_INC
10047 || GET_CODE (ind) == POST_DEC)))
10048 return arm_address_register_rtx_p (XEXP (ind, 0), 0);
10050 if (wb
10051 && (GET_CODE (ind) == POST_MODIFY || GET_CODE (ind) == PRE_MODIFY)
10052 && arm_address_register_rtx_p (XEXP (ind, 0), 0)
10053 && GET_CODE (XEXP (ind, 1)) == PLUS
10054 && rtx_equal_p (XEXP (XEXP (ind, 1), 0), XEXP (ind, 0)))
10055 ind = XEXP (ind, 1);
10057 /* Match:
10058 (plus (reg)
10059 (const)). */
10060 if (GET_CODE (ind) == PLUS
10061 && REG_P (XEXP (ind, 0))
10062 && REG_MODE_OK_FOR_BASE_P (XEXP (ind, 0), VOIDmode)
10063 && CONST_INT_P (XEXP (ind, 1))
10064 && INTVAL (XEXP (ind, 1)) > -1024
10065 && INTVAL (XEXP (ind, 1)) < 1024
10066 && (INTVAL (XEXP (ind, 1)) & 3) == 0)
10067 return TRUE;
10069 return FALSE;
10072 /* Return TRUE if OP is a memory operand which we can load or store a vector
10073 to/from. TYPE is one of the following values:
10074 0 - Vector load/stor (vldr)
10075 1 - Core registers (ldm)
10076 2 - Element/structure loads (vld1)
10079 neon_vector_mem_operand (rtx op, int type)
10081 rtx ind;
10083 /* Reject eliminable registers. */
10084 if (! (reload_in_progress || reload_completed)
10085 && ( reg_mentioned_p (frame_pointer_rtx, op)
10086 || reg_mentioned_p (arg_pointer_rtx, op)
10087 || reg_mentioned_p (virtual_incoming_args_rtx, op)
10088 || reg_mentioned_p (virtual_outgoing_args_rtx, op)
10089 || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
10090 || reg_mentioned_p (virtual_stack_vars_rtx, op)))
10091 return FALSE;
10093 /* Constants are converted into offsets from labels. */
10094 if (!MEM_P (op))
10095 return FALSE;
10097 ind = XEXP (op, 0);
10099 if (reload_completed
10100 && (GET_CODE (ind) == LABEL_REF
10101 || (GET_CODE (ind) == CONST
10102 && GET_CODE (XEXP (ind, 0)) == PLUS
10103 && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF
10104 && CONST_INT_P (XEXP (XEXP (ind, 0), 1)))))
10105 return TRUE;
10107 /* Match: (mem (reg)). */
10108 if (REG_P (ind))
10109 return arm_address_register_rtx_p (ind, 0);
10111 /* Allow post-increment with Neon registers. */
10112 if ((type != 1 && GET_CODE (ind) == POST_INC)
10113 || (type == 0 && GET_CODE (ind) == PRE_DEC))
10114 return arm_address_register_rtx_p (XEXP (ind, 0), 0);
10116 /* FIXME: vld1 allows register post-modify. */
10118 /* Match:
10119 (plus (reg)
10120 (const)). */
10121 if (type == 0
10122 && GET_CODE (ind) == PLUS
10123 && REG_P (XEXP (ind, 0))
10124 && REG_MODE_OK_FOR_BASE_P (XEXP (ind, 0), VOIDmode)
10125 && CONST_INT_P (XEXP (ind, 1))
10126 && INTVAL (XEXP (ind, 1)) > -1024
10127 /* For quad modes, we restrict the constant offset to be slightly less
10128 than what the instruction format permits. We have no such constraint
10129 on double mode offsets. (This must match arm_legitimate_index_p.) */
10130 && (INTVAL (XEXP (ind, 1))
10131 < (VALID_NEON_QREG_MODE (GET_MODE (op))? 1016 : 1024))
10132 && (INTVAL (XEXP (ind, 1)) & 3) == 0)
10133 return TRUE;
10135 return FALSE;
10138 /* Return TRUE if OP is a mem suitable for loading/storing a Neon struct
10139 type. */
10141 neon_struct_mem_operand (rtx op)
10143 rtx ind;
10145 /* Reject eliminable registers. */
10146 if (! (reload_in_progress || reload_completed)
10147 && ( reg_mentioned_p (frame_pointer_rtx, op)
10148 || reg_mentioned_p (arg_pointer_rtx, op)
10149 || reg_mentioned_p (virtual_incoming_args_rtx, op)
10150 || reg_mentioned_p (virtual_outgoing_args_rtx, op)
10151 || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
10152 || reg_mentioned_p (virtual_stack_vars_rtx, op)))
10153 return FALSE;
10155 /* Constants are converted into offsets from labels. */
10156 if (!MEM_P (op))
10157 return FALSE;
10159 ind = XEXP (op, 0);
10161 if (reload_completed
10162 && (GET_CODE (ind) == LABEL_REF
10163 || (GET_CODE (ind) == CONST
10164 && GET_CODE (XEXP (ind, 0)) == PLUS
10165 && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF
10166 && CONST_INT_P (XEXP (XEXP (ind, 0), 1)))))
10167 return TRUE;
10169 /* Match: (mem (reg)). */
10170 if (REG_P (ind))
10171 return arm_address_register_rtx_p (ind, 0);
10173 /* vldm/vstm allows POST_INC (ia) and PRE_DEC (db). */
10174 if (GET_CODE (ind) == POST_INC
10175 || GET_CODE (ind) == PRE_DEC)
10176 return arm_address_register_rtx_p (XEXP (ind, 0), 0);
10178 return FALSE;
10181 /* Return true if X is a register that will be eliminated later on. */
10183 arm_eliminable_register (rtx x)
10185 return REG_P (x) && (REGNO (x) == FRAME_POINTER_REGNUM
10186 || REGNO (x) == ARG_POINTER_REGNUM
10187 || (REGNO (x) >= FIRST_VIRTUAL_REGISTER
10188 && REGNO (x) <= LAST_VIRTUAL_REGISTER));
10191 /* Return GENERAL_REGS if a scratch register required to reload x to/from
10192 coprocessor registers. Otherwise return NO_REGS. */
10194 enum reg_class
10195 coproc_secondary_reload_class (enum machine_mode mode, rtx x, bool wb)
10197 if (mode == HFmode)
10199 if (!TARGET_NEON_FP16)
10200 return GENERAL_REGS;
10201 if (s_register_operand (x, mode) || neon_vector_mem_operand (x, 2))
10202 return NO_REGS;
10203 return GENERAL_REGS;
10206 /* The neon move patterns handle all legitimate vector and struct
10207 addresses. */
10208 if (TARGET_NEON
10209 && (MEM_P (x) || GET_CODE (x) == CONST_VECTOR)
10210 && (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
10211 || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT
10212 || VALID_NEON_STRUCT_MODE (mode)))
10213 return NO_REGS;
10215 if (arm_coproc_mem_operand (x, wb) || s_register_operand (x, mode))
10216 return NO_REGS;
10218 return GENERAL_REGS;
10221 /* Values which must be returned in the most-significant end of the return
10222 register. */
10224 static bool
10225 arm_return_in_msb (const_tree valtype)
10227 return (TARGET_AAPCS_BASED
10228 && BYTES_BIG_ENDIAN
10229 && (AGGREGATE_TYPE_P (valtype)
10230 || TREE_CODE (valtype) == COMPLEX_TYPE
10231 || FIXED_POINT_TYPE_P (valtype)));
10234 /* Return TRUE if X references a SYMBOL_REF. */
10236 symbol_mentioned_p (rtx x)
10238 const char * fmt;
10239 int i;
10241 if (GET_CODE (x) == SYMBOL_REF)
10242 return 1;
10244 /* UNSPEC_TLS entries for a symbol include the SYMBOL_REF, but they
10245 are constant offsets, not symbols. */
10246 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
10247 return 0;
10249 fmt = GET_RTX_FORMAT (GET_CODE (x));
10251 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
10253 if (fmt[i] == 'E')
10255 int j;
10257 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
10258 if (symbol_mentioned_p (XVECEXP (x, i, j)))
10259 return 1;
10261 else if (fmt[i] == 'e' && symbol_mentioned_p (XEXP (x, i)))
10262 return 1;
10265 return 0;
10268 /* Return TRUE if X references a LABEL_REF. */
10270 label_mentioned_p (rtx x)
10272 const char * fmt;
10273 int i;
10275 if (GET_CODE (x) == LABEL_REF)
10276 return 1;
10278 /* UNSPEC_TLS entries for a symbol include a LABEL_REF for the referencing
10279 instruction, but they are constant offsets, not symbols. */
10280 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
10281 return 0;
10283 fmt = GET_RTX_FORMAT (GET_CODE (x));
10284 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
10286 if (fmt[i] == 'E')
10288 int j;
10290 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
10291 if (label_mentioned_p (XVECEXP (x, i, j)))
10292 return 1;
10294 else if (fmt[i] == 'e' && label_mentioned_p (XEXP (x, i)))
10295 return 1;
10298 return 0;
10302 tls_mentioned_p (rtx x)
10304 switch (GET_CODE (x))
10306 case CONST:
10307 return tls_mentioned_p (XEXP (x, 0));
10309 case UNSPEC:
10310 if (XINT (x, 1) == UNSPEC_TLS)
10311 return 1;
10313 default:
10314 return 0;
10318 /* Must not copy any rtx that uses a pc-relative address. */
10320 static int
10321 arm_note_pic_base (rtx *x, void *date ATTRIBUTE_UNUSED)
10323 if (GET_CODE (*x) == UNSPEC
10324 && (XINT (*x, 1) == UNSPEC_PIC_BASE
10325 || XINT (*x, 1) == UNSPEC_PIC_UNIFIED))
10326 return 1;
10327 return 0;
10330 static bool
10331 arm_cannot_copy_insn_p (rtx insn)
10333 /* The tls call insn cannot be copied, as it is paired with a data
10334 word. */
10335 if (recog_memoized (insn) == CODE_FOR_tlscall)
10336 return true;
10338 return for_each_rtx (&PATTERN (insn), arm_note_pic_base, NULL);
10341 enum rtx_code
10342 minmax_code (rtx x)
10344 enum rtx_code code = GET_CODE (x);
10346 switch (code)
10348 case SMAX:
10349 return GE;
10350 case SMIN:
10351 return LE;
10352 case UMIN:
10353 return LEU;
10354 case UMAX:
10355 return GEU;
10356 default:
10357 gcc_unreachable ();
10361 /* Match pair of min/max operators that can be implemented via usat/ssat. */
10363 bool
10364 arm_sat_operator_match (rtx lo_bound, rtx hi_bound,
10365 int *mask, bool *signed_sat)
10367 /* The high bound must be a power of two minus one. */
10368 int log = exact_log2 (INTVAL (hi_bound) + 1);
10369 if (log == -1)
10370 return false;
10372 /* The low bound is either zero (for usat) or one less than the
10373 negation of the high bound (for ssat). */
10374 if (INTVAL (lo_bound) == 0)
10376 if (mask)
10377 *mask = log;
10378 if (signed_sat)
10379 *signed_sat = false;
10381 return true;
10384 if (INTVAL (lo_bound) == -INTVAL (hi_bound) - 1)
10386 if (mask)
10387 *mask = log + 1;
10388 if (signed_sat)
10389 *signed_sat = true;
10391 return true;
10394 return false;
10397 /* Return 1 if memory locations are adjacent. */
10399 adjacent_mem_locations (rtx a, rtx b)
10401 /* We don't guarantee to preserve the order of these memory refs. */
10402 if (volatile_refs_p (a) || volatile_refs_p (b))
10403 return 0;
10405 if ((REG_P (XEXP (a, 0))
10406 || (GET_CODE (XEXP (a, 0)) == PLUS
10407 && CONST_INT_P (XEXP (XEXP (a, 0), 1))))
10408 && (REG_P (XEXP (b, 0))
10409 || (GET_CODE (XEXP (b, 0)) == PLUS
10410 && CONST_INT_P (XEXP (XEXP (b, 0), 1)))))
10412 HOST_WIDE_INT val0 = 0, val1 = 0;
10413 rtx reg0, reg1;
10414 int val_diff;
10416 if (GET_CODE (XEXP (a, 0)) == PLUS)
10418 reg0 = XEXP (XEXP (a, 0), 0);
10419 val0 = INTVAL (XEXP (XEXP (a, 0), 1));
10421 else
10422 reg0 = XEXP (a, 0);
10424 if (GET_CODE (XEXP (b, 0)) == PLUS)
10426 reg1 = XEXP (XEXP (b, 0), 0);
10427 val1 = INTVAL (XEXP (XEXP (b, 0), 1));
10429 else
10430 reg1 = XEXP (b, 0);
10432 /* Don't accept any offset that will require multiple
10433 instructions to handle, since this would cause the
10434 arith_adjacentmem pattern to output an overlong sequence. */
10435 if (!const_ok_for_op (val0, PLUS) || !const_ok_for_op (val1, PLUS))
10436 return 0;
10438 /* Don't allow an eliminable register: register elimination can make
10439 the offset too large. */
10440 if (arm_eliminable_register (reg0))
10441 return 0;
10443 val_diff = val1 - val0;
10445 if (arm_ld_sched)
10447 /* If the target has load delay slots, then there's no benefit
10448 to using an ldm instruction unless the offset is zero and
10449 we are optimizing for size. */
10450 return (optimize_size && (REGNO (reg0) == REGNO (reg1))
10451 && (val0 == 0 || val1 == 0 || val0 == 4 || val1 == 4)
10452 && (val_diff == 4 || val_diff == -4));
10455 return ((REGNO (reg0) == REGNO (reg1))
10456 && (val_diff == 4 || val_diff == -4));
10459 return 0;
10462 /* Return true if OP is a valid load or store multiple operation. LOAD is true
10463 for load operations, false for store operations. CONSECUTIVE is true
10464 if the register numbers in the operation must be consecutive in the register
10465 bank. RETURN_PC is true if value is to be loaded in PC.
10466 The pattern we are trying to match for load is:
10467 [(SET (R_d0) (MEM (PLUS (addr) (offset))))
10468 (SET (R_d1) (MEM (PLUS (addr) (offset + <reg_increment>))))
10471 (SET (R_dn) (MEM (PLUS (addr) (offset + n * <reg_increment>))))
10473 where
10474 1. If offset is 0, first insn should be (SET (R_d0) (MEM (src_addr))).
10475 2. REGNO (R_d0) < REGNO (R_d1) < ... < REGNO (R_dn).
10476 3. If consecutive is TRUE, then for kth register being loaded,
10477 REGNO (R_dk) = REGNO (R_d0) + k.
10478 The pattern for store is similar. */
10479 bool
10480 ldm_stm_operation_p (rtx op, bool load, enum machine_mode mode,
10481 bool consecutive, bool return_pc)
10483 HOST_WIDE_INT count = XVECLEN (op, 0);
10484 rtx reg, mem, addr;
10485 unsigned regno;
10486 unsigned first_regno;
10487 HOST_WIDE_INT i = 1, base = 0, offset = 0;
10488 rtx elt;
10489 bool addr_reg_in_reglist = false;
10490 bool update = false;
10491 int reg_increment;
10492 int offset_adj;
10493 int regs_per_val;
10495 /* If not in SImode, then registers must be consecutive
10496 (e.g., VLDM instructions for DFmode). */
10497 gcc_assert ((mode == SImode) || consecutive);
10498 /* Setting return_pc for stores is illegal. */
10499 gcc_assert (!return_pc || load);
10501 /* Set up the increments and the regs per val based on the mode. */
10502 reg_increment = GET_MODE_SIZE (mode);
10503 regs_per_val = reg_increment / 4;
10504 offset_adj = return_pc ? 1 : 0;
10506 if (count <= 1
10507 || GET_CODE (XVECEXP (op, 0, offset_adj)) != SET
10508 || (load && !REG_P (SET_DEST (XVECEXP (op, 0, offset_adj)))))
10509 return false;
10511 /* Check if this is a write-back. */
10512 elt = XVECEXP (op, 0, offset_adj);
10513 if (GET_CODE (SET_SRC (elt)) == PLUS)
10515 i++;
10516 base = 1;
10517 update = true;
10519 /* The offset adjustment must be the number of registers being
10520 popped times the size of a single register. */
10521 if (!REG_P (SET_DEST (elt))
10522 || !REG_P (XEXP (SET_SRC (elt), 0))
10523 || (REGNO (SET_DEST (elt)) != REGNO (XEXP (SET_SRC (elt), 0)))
10524 || !CONST_INT_P (XEXP (SET_SRC (elt), 1))
10525 || INTVAL (XEXP (SET_SRC (elt), 1)) !=
10526 ((count - 1 - offset_adj) * reg_increment))
10527 return false;
10530 i = i + offset_adj;
10531 base = base + offset_adj;
10532 /* Perform a quick check so we don't blow up below. If only one reg is loaded,
10533 success depends on the type: VLDM can do just one reg,
10534 LDM must do at least two. */
10535 if ((count <= i) && (mode == SImode))
10536 return false;
10538 elt = XVECEXP (op, 0, i - 1);
10539 if (GET_CODE (elt) != SET)
10540 return false;
10542 if (load)
10544 reg = SET_DEST (elt);
10545 mem = SET_SRC (elt);
10547 else
10549 reg = SET_SRC (elt);
10550 mem = SET_DEST (elt);
10553 if (!REG_P (reg) || !MEM_P (mem))
10554 return false;
10556 regno = REGNO (reg);
10557 first_regno = regno;
10558 addr = XEXP (mem, 0);
10559 if (GET_CODE (addr) == PLUS)
10561 if (!CONST_INT_P (XEXP (addr, 1)))
10562 return false;
10564 offset = INTVAL (XEXP (addr, 1));
10565 addr = XEXP (addr, 0);
10568 if (!REG_P (addr))
10569 return false;
10571 /* Don't allow SP to be loaded unless it is also the base register. It
10572 guarantees that SP is reset correctly when an LDM instruction
10573 is interruptted. Otherwise, we might end up with a corrupt stack. */
10574 if (load && (REGNO (reg) == SP_REGNUM) && (REGNO (addr) != SP_REGNUM))
10575 return false;
10577 for (; i < count; i++)
10579 elt = XVECEXP (op, 0, i);
10580 if (GET_CODE (elt) != SET)
10581 return false;
10583 if (load)
10585 reg = SET_DEST (elt);
10586 mem = SET_SRC (elt);
10588 else
10590 reg = SET_SRC (elt);
10591 mem = SET_DEST (elt);
10594 if (!REG_P (reg)
10595 || GET_MODE (reg) != mode
10596 || REGNO (reg) <= regno
10597 || (consecutive
10598 && (REGNO (reg) !=
10599 (unsigned int) (first_regno + regs_per_val * (i - base))))
10600 /* Don't allow SP to be loaded unless it is also the base register. It
10601 guarantees that SP is reset correctly when an LDM instruction
10602 is interrupted. Otherwise, we might end up with a corrupt stack. */
10603 || (load && (REGNO (reg) == SP_REGNUM) && (REGNO (addr) != SP_REGNUM))
10604 || !MEM_P (mem)
10605 || GET_MODE (mem) != mode
10606 || ((GET_CODE (XEXP (mem, 0)) != PLUS
10607 || !rtx_equal_p (XEXP (XEXP (mem, 0), 0), addr)
10608 || !CONST_INT_P (XEXP (XEXP (mem, 0), 1))
10609 || (INTVAL (XEXP (XEXP (mem, 0), 1)) !=
10610 offset + (i - base) * reg_increment))
10611 && (!REG_P (XEXP (mem, 0))
10612 || offset + (i - base) * reg_increment != 0)))
10613 return false;
10615 regno = REGNO (reg);
10616 if (regno == REGNO (addr))
10617 addr_reg_in_reglist = true;
10620 if (load)
10622 if (update && addr_reg_in_reglist)
10623 return false;
10625 /* For Thumb-1, address register is always modified - either by write-back
10626 or by explicit load. If the pattern does not describe an update,
10627 then the address register must be in the list of loaded registers. */
10628 if (TARGET_THUMB1)
10629 return update || addr_reg_in_reglist;
10632 return true;
10635 /* Return true iff it would be profitable to turn a sequence of NOPS loads
10636 or stores (depending on IS_STORE) into a load-multiple or store-multiple
10637 instruction. ADD_OFFSET is nonzero if the base address register needs
10638 to be modified with an add instruction before we can use it. */
10640 static bool
10641 multiple_operation_profitable_p (bool is_store ATTRIBUTE_UNUSED,
10642 int nops, HOST_WIDE_INT add_offset)
10644 /* For ARM8,9 & StrongARM, 2 ldr instructions are faster than an ldm
10645 if the offset isn't small enough. The reason 2 ldrs are faster
10646 is because these ARMs are able to do more than one cache access
10647 in a single cycle. The ARM9 and StrongARM have Harvard caches,
10648 whilst the ARM8 has a double bandwidth cache. This means that
10649 these cores can do both an instruction fetch and a data fetch in
10650 a single cycle, so the trick of calculating the address into a
10651 scratch register (one of the result regs) and then doing a load
10652 multiple actually becomes slower (and no smaller in code size).
10653 That is the transformation
10655 ldr rd1, [rbase + offset]
10656 ldr rd2, [rbase + offset + 4]
10660 add rd1, rbase, offset
10661 ldmia rd1, {rd1, rd2}
10663 produces worse code -- '3 cycles + any stalls on rd2' instead of
10664 '2 cycles + any stalls on rd2'. On ARMs with only one cache
10665 access per cycle, the first sequence could never complete in less
10666 than 6 cycles, whereas the ldm sequence would only take 5 and
10667 would make better use of sequential accesses if not hitting the
10668 cache.
10670 We cheat here and test 'arm_ld_sched' which we currently know to
10671 only be true for the ARM8, ARM9 and StrongARM. If this ever
10672 changes, then the test below needs to be reworked. */
10673 if (nops == 2 && arm_ld_sched && add_offset != 0)
10674 return false;
10676 /* XScale has load-store double instructions, but they have stricter
10677 alignment requirements than load-store multiple, so we cannot
10678 use them.
10680 For XScale ldm requires 2 + NREGS cycles to complete and blocks
10681 the pipeline until completion.
10683 NREGS CYCLES
10689 An ldr instruction takes 1-3 cycles, but does not block the
10690 pipeline.
10692 NREGS CYCLES
10693 1 1-3
10694 2 2-6
10695 3 3-9
10696 4 4-12
10698 Best case ldr will always win. However, the more ldr instructions
10699 we issue, the less likely we are to be able to schedule them well.
10700 Using ldr instructions also increases code size.
10702 As a compromise, we use ldr for counts of 1 or 2 regs, and ldm
10703 for counts of 3 or 4 regs. */
10704 if (nops <= 2 && arm_tune_xscale && !optimize_size)
10705 return false;
10706 return true;
10709 /* Subroutine of load_multiple_sequence and store_multiple_sequence.
10710 Given an array of UNSORTED_OFFSETS, of which there are NOPS, compute
10711 an array ORDER which describes the sequence to use when accessing the
10712 offsets that produces an ascending order. In this sequence, each
10713 offset must be larger by exactly 4 than the previous one. ORDER[0]
10714 must have been filled in with the lowest offset by the caller.
10715 If UNSORTED_REGS is nonnull, it is an array of register numbers that
10716 we use to verify that ORDER produces an ascending order of registers.
10717 Return true if it was possible to construct such an order, false if
10718 not. */
10720 static bool
10721 compute_offset_order (int nops, HOST_WIDE_INT *unsorted_offsets, int *order,
10722 int *unsorted_regs)
10724 int i;
10725 for (i = 1; i < nops; i++)
10727 int j;
10729 order[i] = order[i - 1];
10730 for (j = 0; j < nops; j++)
10731 if (unsorted_offsets[j] == unsorted_offsets[order[i - 1]] + 4)
10733 /* We must find exactly one offset that is higher than the
10734 previous one by 4. */
10735 if (order[i] != order[i - 1])
10736 return false;
10737 order[i] = j;
10739 if (order[i] == order[i - 1])
10740 return false;
10741 /* The register numbers must be ascending. */
10742 if (unsorted_regs != NULL
10743 && unsorted_regs[order[i]] <= unsorted_regs[order[i - 1]])
10744 return false;
10746 return true;
10749 /* Used to determine in a peephole whether a sequence of load
10750 instructions can be changed into a load-multiple instruction.
10751 NOPS is the number of separate load instructions we are examining. The
10752 first NOPS entries in OPERANDS are the destination registers, the
10753 next NOPS entries are memory operands. If this function is
10754 successful, *BASE is set to the common base register of the memory
10755 accesses; *LOAD_OFFSET is set to the first memory location's offset
10756 from that base register.
10757 REGS is an array filled in with the destination register numbers.
10758 SAVED_ORDER (if nonnull), is an array filled in with an order that maps
10759 insn numbers to an ascending order of stores. If CHECK_REGS is true,
10760 the sequence of registers in REGS matches the loads from ascending memory
10761 locations, and the function verifies that the register numbers are
10762 themselves ascending. If CHECK_REGS is false, the register numbers
10763 are stored in the order they are found in the operands. */
10764 static int
10765 load_multiple_sequence (rtx *operands, int nops, int *regs, int *saved_order,
10766 int *base, HOST_WIDE_INT *load_offset, bool check_regs)
10768 int unsorted_regs[MAX_LDM_STM_OPS];
10769 HOST_WIDE_INT unsorted_offsets[MAX_LDM_STM_OPS];
10770 int order[MAX_LDM_STM_OPS];
10771 rtx base_reg_rtx = NULL;
10772 int base_reg = -1;
10773 int i, ldm_case;
10775 /* Can only handle up to MAX_LDM_STM_OPS insns at present, though could be
10776 easily extended if required. */
10777 gcc_assert (nops >= 2 && nops <= MAX_LDM_STM_OPS);
10779 memset (order, 0, MAX_LDM_STM_OPS * sizeof (int));
10781 /* Loop over the operands and check that the memory references are
10782 suitable (i.e. immediate offsets from the same base register). At
10783 the same time, extract the target register, and the memory
10784 offsets. */
10785 for (i = 0; i < nops; i++)
10787 rtx reg;
10788 rtx offset;
10790 /* Convert a subreg of a mem into the mem itself. */
10791 if (GET_CODE (operands[nops + i]) == SUBREG)
10792 operands[nops + i] = alter_subreg (operands + (nops + i), true);
10794 gcc_assert (MEM_P (operands[nops + i]));
10796 /* Don't reorder volatile memory references; it doesn't seem worth
10797 looking for the case where the order is ok anyway. */
10798 if (MEM_VOLATILE_P (operands[nops + i]))
10799 return 0;
10801 offset = const0_rtx;
10803 if ((REG_P (reg = XEXP (operands[nops + i], 0))
10804 || (GET_CODE (reg) == SUBREG
10805 && REG_P (reg = SUBREG_REG (reg))))
10806 || (GET_CODE (XEXP (operands[nops + i], 0)) == PLUS
10807 && ((REG_P (reg = XEXP (XEXP (operands[nops + i], 0), 0)))
10808 || (GET_CODE (reg) == SUBREG
10809 && REG_P (reg = SUBREG_REG (reg))))
10810 && (CONST_INT_P (offset
10811 = XEXP (XEXP (operands[nops + i], 0), 1)))))
10813 if (i == 0)
10815 base_reg = REGNO (reg);
10816 base_reg_rtx = reg;
10817 if (TARGET_THUMB1 && base_reg > LAST_LO_REGNUM)
10818 return 0;
10820 else if (base_reg != (int) REGNO (reg))
10821 /* Not addressed from the same base register. */
10822 return 0;
10824 unsorted_regs[i] = (REG_P (operands[i])
10825 ? REGNO (operands[i])
10826 : REGNO (SUBREG_REG (operands[i])));
10828 /* If it isn't an integer register, or if it overwrites the
10829 base register but isn't the last insn in the list, then
10830 we can't do this. */
10831 if (unsorted_regs[i] < 0
10832 || (TARGET_THUMB1 && unsorted_regs[i] > LAST_LO_REGNUM)
10833 || unsorted_regs[i] > 14
10834 || (i != nops - 1 && unsorted_regs[i] == base_reg))
10835 return 0;
10837 /* Don't allow SP to be loaded unless it is also the base
10838 register. It guarantees that SP is reset correctly when
10839 an LDM instruction is interrupted. Otherwise, we might
10840 end up with a corrupt stack. */
10841 if (unsorted_regs[i] == SP_REGNUM && base_reg != SP_REGNUM)
10842 return 0;
10844 unsorted_offsets[i] = INTVAL (offset);
10845 if (i == 0 || unsorted_offsets[i] < unsorted_offsets[order[0]])
10846 order[0] = i;
10848 else
10849 /* Not a suitable memory address. */
10850 return 0;
10853 /* All the useful information has now been extracted from the
10854 operands into unsorted_regs and unsorted_offsets; additionally,
10855 order[0] has been set to the lowest offset in the list. Sort
10856 the offsets into order, verifying that they are adjacent, and
10857 check that the register numbers are ascending. */
10858 if (!compute_offset_order (nops, unsorted_offsets, order,
10859 check_regs ? unsorted_regs : NULL))
10860 return 0;
10862 if (saved_order)
10863 memcpy (saved_order, order, sizeof order);
10865 if (base)
10867 *base = base_reg;
10869 for (i = 0; i < nops; i++)
10870 regs[i] = unsorted_regs[check_regs ? order[i] : i];
10872 *load_offset = unsorted_offsets[order[0]];
10875 if (TARGET_THUMB1
10876 && !peep2_reg_dead_p (nops, base_reg_rtx))
10877 return 0;
10879 if (unsorted_offsets[order[0]] == 0)
10880 ldm_case = 1; /* ldmia */
10881 else if (TARGET_ARM && unsorted_offsets[order[0]] == 4)
10882 ldm_case = 2; /* ldmib */
10883 else if (TARGET_ARM && unsorted_offsets[order[nops - 1]] == 0)
10884 ldm_case = 3; /* ldmda */
10885 else if (TARGET_32BIT && unsorted_offsets[order[nops - 1]] == -4)
10886 ldm_case = 4; /* ldmdb */
10887 else if (const_ok_for_arm (unsorted_offsets[order[0]])
10888 || const_ok_for_arm (-unsorted_offsets[order[0]]))
10889 ldm_case = 5;
10890 else
10891 return 0;
10893 if (!multiple_operation_profitable_p (false, nops,
10894 ldm_case == 5
10895 ? unsorted_offsets[order[0]] : 0))
10896 return 0;
10898 return ldm_case;
10901 /* Used to determine in a peephole whether a sequence of store instructions can
10902 be changed into a store-multiple instruction.
10903 NOPS is the number of separate store instructions we are examining.
10904 NOPS_TOTAL is the total number of instructions recognized by the peephole
10905 pattern.
10906 The first NOPS entries in OPERANDS are the source registers, the next
10907 NOPS entries are memory operands. If this function is successful, *BASE is
10908 set to the common base register of the memory accesses; *LOAD_OFFSET is set
10909 to the first memory location's offset from that base register. REGS is an
10910 array filled in with the source register numbers, REG_RTXS (if nonnull) is
10911 likewise filled with the corresponding rtx's.
10912 SAVED_ORDER (if nonnull), is an array filled in with an order that maps insn
10913 numbers to an ascending order of stores.
10914 If CHECK_REGS is true, the sequence of registers in *REGS matches the stores
10915 from ascending memory locations, and the function verifies that the register
10916 numbers are themselves ascending. If CHECK_REGS is false, the register
10917 numbers are stored in the order they are found in the operands. */
10918 static int
10919 store_multiple_sequence (rtx *operands, int nops, int nops_total,
10920 int *regs, rtx *reg_rtxs, int *saved_order, int *base,
10921 HOST_WIDE_INT *load_offset, bool check_regs)
10923 int unsorted_regs[MAX_LDM_STM_OPS];
10924 rtx unsorted_reg_rtxs[MAX_LDM_STM_OPS];
10925 HOST_WIDE_INT unsorted_offsets[MAX_LDM_STM_OPS];
10926 int order[MAX_LDM_STM_OPS];
10927 int base_reg = -1;
10928 rtx base_reg_rtx = NULL;
10929 int i, stm_case;
10931 /* Write back of base register is currently only supported for Thumb 1. */
10932 int base_writeback = TARGET_THUMB1;
10934 /* Can only handle up to MAX_LDM_STM_OPS insns at present, though could be
10935 easily extended if required. */
10936 gcc_assert (nops >= 2 && nops <= MAX_LDM_STM_OPS);
10938 memset (order, 0, MAX_LDM_STM_OPS * sizeof (int));
10940 /* Loop over the operands and check that the memory references are
10941 suitable (i.e. immediate offsets from the same base register). At
10942 the same time, extract the target register, and the memory
10943 offsets. */
10944 for (i = 0; i < nops; i++)
10946 rtx reg;
10947 rtx offset;
10949 /* Convert a subreg of a mem into the mem itself. */
10950 if (GET_CODE (operands[nops + i]) == SUBREG)
10951 operands[nops + i] = alter_subreg (operands + (nops + i), true);
10953 gcc_assert (MEM_P (operands[nops + i]));
10955 /* Don't reorder volatile memory references; it doesn't seem worth
10956 looking for the case where the order is ok anyway. */
10957 if (MEM_VOLATILE_P (operands[nops + i]))
10958 return 0;
10960 offset = const0_rtx;
10962 if ((REG_P (reg = XEXP (operands[nops + i], 0))
10963 || (GET_CODE (reg) == SUBREG
10964 && REG_P (reg = SUBREG_REG (reg))))
10965 || (GET_CODE (XEXP (operands[nops + i], 0)) == PLUS
10966 && ((REG_P (reg = XEXP (XEXP (operands[nops + i], 0), 0)))
10967 || (GET_CODE (reg) == SUBREG
10968 && REG_P (reg = SUBREG_REG (reg))))
10969 && (CONST_INT_P (offset
10970 = XEXP (XEXP (operands[nops + i], 0), 1)))))
10972 unsorted_reg_rtxs[i] = (REG_P (operands[i])
10973 ? operands[i] : SUBREG_REG (operands[i]));
10974 unsorted_regs[i] = REGNO (unsorted_reg_rtxs[i]);
10976 if (i == 0)
10978 base_reg = REGNO (reg);
10979 base_reg_rtx = reg;
10980 if (TARGET_THUMB1 && base_reg > LAST_LO_REGNUM)
10981 return 0;
10983 else if (base_reg != (int) REGNO (reg))
10984 /* Not addressed from the same base register. */
10985 return 0;
10987 /* If it isn't an integer register, then we can't do this. */
10988 if (unsorted_regs[i] < 0
10989 || (TARGET_THUMB1 && unsorted_regs[i] > LAST_LO_REGNUM)
10990 /* The effects are unpredictable if the base register is
10991 both updated and stored. */
10992 || (base_writeback && unsorted_regs[i] == base_reg)
10993 || (TARGET_THUMB2 && unsorted_regs[i] == SP_REGNUM)
10994 || unsorted_regs[i] > 14)
10995 return 0;
10997 unsorted_offsets[i] = INTVAL (offset);
10998 if (i == 0 || unsorted_offsets[i] < unsorted_offsets[order[0]])
10999 order[0] = i;
11001 else
11002 /* Not a suitable memory address. */
11003 return 0;
11006 /* All the useful information has now been extracted from the
11007 operands into unsorted_regs and unsorted_offsets; additionally,
11008 order[0] has been set to the lowest offset in the list. Sort
11009 the offsets into order, verifying that they are adjacent, and
11010 check that the register numbers are ascending. */
11011 if (!compute_offset_order (nops, unsorted_offsets, order,
11012 check_regs ? unsorted_regs : NULL))
11013 return 0;
11015 if (saved_order)
11016 memcpy (saved_order, order, sizeof order);
11018 if (base)
11020 *base = base_reg;
11022 for (i = 0; i < nops; i++)
11024 regs[i] = unsorted_regs[check_regs ? order[i] : i];
11025 if (reg_rtxs)
11026 reg_rtxs[i] = unsorted_reg_rtxs[check_regs ? order[i] : i];
11029 *load_offset = unsorted_offsets[order[0]];
11032 if (TARGET_THUMB1
11033 && !peep2_reg_dead_p (nops_total, base_reg_rtx))
11034 return 0;
11036 if (unsorted_offsets[order[0]] == 0)
11037 stm_case = 1; /* stmia */
11038 else if (TARGET_ARM && unsorted_offsets[order[0]] == 4)
11039 stm_case = 2; /* stmib */
11040 else if (TARGET_ARM && unsorted_offsets[order[nops - 1]] == 0)
11041 stm_case = 3; /* stmda */
11042 else if (TARGET_32BIT && unsorted_offsets[order[nops - 1]] == -4)
11043 stm_case = 4; /* stmdb */
11044 else
11045 return 0;
11047 if (!multiple_operation_profitable_p (false, nops, 0))
11048 return 0;
11050 return stm_case;
11053 /* Routines for use in generating RTL. */
11055 /* Generate a load-multiple instruction. COUNT is the number of loads in
11056 the instruction; REGS and MEMS are arrays containing the operands.
11057 BASEREG is the base register to be used in addressing the memory operands.
11058 WBACK_OFFSET is nonzero if the instruction should update the base
11059 register. */
11061 static rtx
11062 arm_gen_load_multiple_1 (int count, int *regs, rtx *mems, rtx basereg,
11063 HOST_WIDE_INT wback_offset)
11065 int i = 0, j;
11066 rtx result;
11068 if (!multiple_operation_profitable_p (false, count, 0))
11070 rtx seq;
11072 start_sequence ();
11074 for (i = 0; i < count; i++)
11075 emit_move_insn (gen_rtx_REG (SImode, regs[i]), mems[i]);
11077 if (wback_offset != 0)
11078 emit_move_insn (basereg, plus_constant (Pmode, basereg, wback_offset));
11080 seq = get_insns ();
11081 end_sequence ();
11083 return seq;
11086 result = gen_rtx_PARALLEL (VOIDmode,
11087 rtvec_alloc (count + (wback_offset != 0 ? 1 : 0)));
11088 if (wback_offset != 0)
11090 XVECEXP (result, 0, 0)
11091 = gen_rtx_SET (VOIDmode, basereg,
11092 plus_constant (Pmode, basereg, wback_offset));
11093 i = 1;
11094 count++;
11097 for (j = 0; i < count; i++, j++)
11098 XVECEXP (result, 0, i)
11099 = gen_rtx_SET (VOIDmode, gen_rtx_REG (SImode, regs[j]), mems[j]);
11101 return result;
11104 /* Generate a store-multiple instruction. COUNT is the number of stores in
11105 the instruction; REGS and MEMS are arrays containing the operands.
11106 BASEREG is the base register to be used in addressing the memory operands.
11107 WBACK_OFFSET is nonzero if the instruction should update the base
11108 register. */
11110 static rtx
11111 arm_gen_store_multiple_1 (int count, int *regs, rtx *mems, rtx basereg,
11112 HOST_WIDE_INT wback_offset)
11114 int i = 0, j;
11115 rtx result;
11117 if (GET_CODE (basereg) == PLUS)
11118 basereg = XEXP (basereg, 0);
11120 if (!multiple_operation_profitable_p (false, count, 0))
11122 rtx seq;
11124 start_sequence ();
11126 for (i = 0; i < count; i++)
11127 emit_move_insn (mems[i], gen_rtx_REG (SImode, regs[i]));
11129 if (wback_offset != 0)
11130 emit_move_insn (basereg, plus_constant (Pmode, basereg, wback_offset));
11132 seq = get_insns ();
11133 end_sequence ();
11135 return seq;
11138 result = gen_rtx_PARALLEL (VOIDmode,
11139 rtvec_alloc (count + (wback_offset != 0 ? 1 : 0)));
11140 if (wback_offset != 0)
11142 XVECEXP (result, 0, 0)
11143 = gen_rtx_SET (VOIDmode, basereg,
11144 plus_constant (Pmode, basereg, wback_offset));
11145 i = 1;
11146 count++;
11149 for (j = 0; i < count; i++, j++)
11150 XVECEXP (result, 0, i)
11151 = gen_rtx_SET (VOIDmode, mems[j], gen_rtx_REG (SImode, regs[j]));
11153 return result;
11156 /* Generate either a load-multiple or a store-multiple instruction. This
11157 function can be used in situations where we can start with a single MEM
11158 rtx and adjust its address upwards.
11159 COUNT is the number of operations in the instruction, not counting a
11160 possible update of the base register. REGS is an array containing the
11161 register operands.
11162 BASEREG is the base register to be used in addressing the memory operands,
11163 which are constructed from BASEMEM.
11164 WRITE_BACK specifies whether the generated instruction should include an
11165 update of the base register.
11166 OFFSETP is used to pass an offset to and from this function; this offset
11167 is not used when constructing the address (instead BASEMEM should have an
11168 appropriate offset in its address), it is used only for setting
11169 MEM_OFFSET. It is updated only if WRITE_BACK is true.*/
11171 static rtx
11172 arm_gen_multiple_op (bool is_load, int *regs, int count, rtx basereg,
11173 bool write_back, rtx basemem, HOST_WIDE_INT *offsetp)
11175 rtx mems[MAX_LDM_STM_OPS];
11176 HOST_WIDE_INT offset = *offsetp;
11177 int i;
11179 gcc_assert (count <= MAX_LDM_STM_OPS);
11181 if (GET_CODE (basereg) == PLUS)
11182 basereg = XEXP (basereg, 0);
11184 for (i = 0; i < count; i++)
11186 rtx addr = plus_constant (Pmode, basereg, i * 4);
11187 mems[i] = adjust_automodify_address_nv (basemem, SImode, addr, offset);
11188 offset += 4;
11191 if (write_back)
11192 *offsetp = offset;
11194 if (is_load)
11195 return arm_gen_load_multiple_1 (count, regs, mems, basereg,
11196 write_back ? 4 * count : 0);
11197 else
11198 return arm_gen_store_multiple_1 (count, regs, mems, basereg,
11199 write_back ? 4 * count : 0);
11203 arm_gen_load_multiple (int *regs, int count, rtx basereg, int write_back,
11204 rtx basemem, HOST_WIDE_INT *offsetp)
11206 return arm_gen_multiple_op (TRUE, regs, count, basereg, write_back, basemem,
11207 offsetp);
11211 arm_gen_store_multiple (int *regs, int count, rtx basereg, int write_back,
11212 rtx basemem, HOST_WIDE_INT *offsetp)
11214 return arm_gen_multiple_op (FALSE, regs, count, basereg, write_back, basemem,
11215 offsetp);
11218 /* Called from a peephole2 expander to turn a sequence of loads into an
11219 LDM instruction. OPERANDS are the operands found by the peephole matcher;
11220 NOPS indicates how many separate loads we are trying to combine. SORT_REGS
11221 is true if we can reorder the registers because they are used commutatively
11222 subsequently.
11223 Returns true iff we could generate a new instruction. */
11225 bool
11226 gen_ldm_seq (rtx *operands, int nops, bool sort_regs)
11228 int regs[MAX_LDM_STM_OPS], mem_order[MAX_LDM_STM_OPS];
11229 rtx mems[MAX_LDM_STM_OPS];
11230 int i, j, base_reg;
11231 rtx base_reg_rtx;
11232 HOST_WIDE_INT offset;
11233 int write_back = FALSE;
11234 int ldm_case;
11235 rtx addr;
11237 ldm_case = load_multiple_sequence (operands, nops, regs, mem_order,
11238 &base_reg, &offset, !sort_regs);
11240 if (ldm_case == 0)
11241 return false;
11243 if (sort_regs)
11244 for (i = 0; i < nops - 1; i++)
11245 for (j = i + 1; j < nops; j++)
11246 if (regs[i] > regs[j])
11248 int t = regs[i];
11249 regs[i] = regs[j];
11250 regs[j] = t;
11252 base_reg_rtx = gen_rtx_REG (Pmode, base_reg);
11254 if (TARGET_THUMB1)
11256 gcc_assert (peep2_reg_dead_p (nops, base_reg_rtx));
11257 gcc_assert (ldm_case == 1 || ldm_case == 5);
11258 write_back = TRUE;
11261 if (ldm_case == 5)
11263 rtx newbase = TARGET_THUMB1 ? base_reg_rtx : gen_rtx_REG (SImode, regs[0]);
11264 emit_insn (gen_addsi3 (newbase, base_reg_rtx, GEN_INT (offset)));
11265 offset = 0;
11266 if (!TARGET_THUMB1)
11268 base_reg = regs[0];
11269 base_reg_rtx = newbase;
11273 for (i = 0; i < nops; i++)
11275 addr = plus_constant (Pmode, base_reg_rtx, offset + i * 4);
11276 mems[i] = adjust_automodify_address_nv (operands[nops + mem_order[i]],
11277 SImode, addr, 0);
11279 emit_insn (arm_gen_load_multiple_1 (nops, regs, mems, base_reg_rtx,
11280 write_back ? offset + i * 4 : 0));
11281 return true;
11284 /* Called from a peephole2 expander to turn a sequence of stores into an
11285 STM instruction. OPERANDS are the operands found by the peephole matcher;
11286 NOPS indicates how many separate stores we are trying to combine.
11287 Returns true iff we could generate a new instruction. */
11289 bool
11290 gen_stm_seq (rtx *operands, int nops)
11292 int i;
11293 int regs[MAX_LDM_STM_OPS], mem_order[MAX_LDM_STM_OPS];
11294 rtx mems[MAX_LDM_STM_OPS];
11295 int base_reg;
11296 rtx base_reg_rtx;
11297 HOST_WIDE_INT offset;
11298 int write_back = FALSE;
11299 int stm_case;
11300 rtx addr;
11301 bool base_reg_dies;
11303 stm_case = store_multiple_sequence (operands, nops, nops, regs, NULL,
11304 mem_order, &base_reg, &offset, true);
11306 if (stm_case == 0)
11307 return false;
11309 base_reg_rtx = gen_rtx_REG (Pmode, base_reg);
11311 base_reg_dies = peep2_reg_dead_p (nops, base_reg_rtx);
11312 if (TARGET_THUMB1)
11314 gcc_assert (base_reg_dies);
11315 write_back = TRUE;
11318 if (stm_case == 5)
11320 gcc_assert (base_reg_dies);
11321 emit_insn (gen_addsi3 (base_reg_rtx, base_reg_rtx, GEN_INT (offset)));
11322 offset = 0;
11325 addr = plus_constant (Pmode, base_reg_rtx, offset);
11327 for (i = 0; i < nops; i++)
11329 addr = plus_constant (Pmode, base_reg_rtx, offset + i * 4);
11330 mems[i] = adjust_automodify_address_nv (operands[nops + mem_order[i]],
11331 SImode, addr, 0);
11333 emit_insn (arm_gen_store_multiple_1 (nops, regs, mems, base_reg_rtx,
11334 write_back ? offset + i * 4 : 0));
11335 return true;
11338 /* Called from a peephole2 expander to turn a sequence of stores that are
11339 preceded by constant loads into an STM instruction. OPERANDS are the
11340 operands found by the peephole matcher; NOPS indicates how many
11341 separate stores we are trying to combine; there are 2 * NOPS
11342 instructions in the peephole.
11343 Returns true iff we could generate a new instruction. */
11345 bool
11346 gen_const_stm_seq (rtx *operands, int nops)
11348 int regs[MAX_LDM_STM_OPS], sorted_regs[MAX_LDM_STM_OPS];
11349 int reg_order[MAX_LDM_STM_OPS], mem_order[MAX_LDM_STM_OPS];
11350 rtx reg_rtxs[MAX_LDM_STM_OPS], orig_reg_rtxs[MAX_LDM_STM_OPS];
11351 rtx mems[MAX_LDM_STM_OPS];
11352 int base_reg;
11353 rtx base_reg_rtx;
11354 HOST_WIDE_INT offset;
11355 int write_back = FALSE;
11356 int stm_case;
11357 rtx addr;
11358 bool base_reg_dies;
11359 int i, j;
11360 HARD_REG_SET allocated;
11362 stm_case = store_multiple_sequence (operands, nops, 2 * nops, regs, reg_rtxs,
11363 mem_order, &base_reg, &offset, false);
11365 if (stm_case == 0)
11366 return false;
11368 memcpy (orig_reg_rtxs, reg_rtxs, sizeof orig_reg_rtxs);
11370 /* If the same register is used more than once, try to find a free
11371 register. */
11372 CLEAR_HARD_REG_SET (allocated);
11373 for (i = 0; i < nops; i++)
11375 for (j = i + 1; j < nops; j++)
11376 if (regs[i] == regs[j])
11378 rtx t = peep2_find_free_register (0, nops * 2,
11379 TARGET_THUMB1 ? "l" : "r",
11380 SImode, &allocated);
11381 if (t == NULL_RTX)
11382 return false;
11383 reg_rtxs[i] = t;
11384 regs[i] = REGNO (t);
11388 /* Compute an ordering that maps the register numbers to an ascending
11389 sequence. */
11390 reg_order[0] = 0;
11391 for (i = 0; i < nops; i++)
11392 if (regs[i] < regs[reg_order[0]])
11393 reg_order[0] = i;
11395 for (i = 1; i < nops; i++)
11397 int this_order = reg_order[i - 1];
11398 for (j = 0; j < nops; j++)
11399 if (regs[j] > regs[reg_order[i - 1]]
11400 && (this_order == reg_order[i - 1]
11401 || regs[j] < regs[this_order]))
11402 this_order = j;
11403 reg_order[i] = this_order;
11406 /* Ensure that registers that must be live after the instruction end
11407 up with the correct value. */
11408 for (i = 0; i < nops; i++)
11410 int this_order = reg_order[i];
11411 if ((this_order != mem_order[i]
11412 || orig_reg_rtxs[this_order] != reg_rtxs[this_order])
11413 && !peep2_reg_dead_p (nops * 2, orig_reg_rtxs[this_order]))
11414 return false;
11417 /* Load the constants. */
11418 for (i = 0; i < nops; i++)
11420 rtx op = operands[2 * nops + mem_order[i]];
11421 sorted_regs[i] = regs[reg_order[i]];
11422 emit_move_insn (reg_rtxs[reg_order[i]], op);
11425 base_reg_rtx = gen_rtx_REG (Pmode, base_reg);
11427 base_reg_dies = peep2_reg_dead_p (nops * 2, base_reg_rtx);
11428 if (TARGET_THUMB1)
11430 gcc_assert (base_reg_dies);
11431 write_back = TRUE;
11434 if (stm_case == 5)
11436 gcc_assert (base_reg_dies);
11437 emit_insn (gen_addsi3 (base_reg_rtx, base_reg_rtx, GEN_INT (offset)));
11438 offset = 0;
11441 addr = plus_constant (Pmode, base_reg_rtx, offset);
11443 for (i = 0; i < nops; i++)
11445 addr = plus_constant (Pmode, base_reg_rtx, offset + i * 4);
11446 mems[i] = adjust_automodify_address_nv (operands[nops + mem_order[i]],
11447 SImode, addr, 0);
11449 emit_insn (arm_gen_store_multiple_1 (nops, sorted_regs, mems, base_reg_rtx,
11450 write_back ? offset + i * 4 : 0));
11451 return true;
11454 /* Copy a block of memory using plain ldr/str/ldrh/strh instructions, to permit
11455 unaligned copies on processors which support unaligned semantics for those
11456 instructions. INTERLEAVE_FACTOR can be used to attempt to hide load latency
11457 (using more registers) by doing e.g. load/load/store/store for a factor of 2.
11458 An interleave factor of 1 (the minimum) will perform no interleaving.
11459 Load/store multiple are used for aligned addresses where possible. */
11461 static void
11462 arm_block_move_unaligned_straight (rtx dstbase, rtx srcbase,
11463 HOST_WIDE_INT length,
11464 unsigned int interleave_factor)
11466 rtx *regs = XALLOCAVEC (rtx, interleave_factor);
11467 int *regnos = XALLOCAVEC (int, interleave_factor);
11468 HOST_WIDE_INT block_size_bytes = interleave_factor * UNITS_PER_WORD;
11469 HOST_WIDE_INT i, j;
11470 HOST_WIDE_INT remaining = length, words;
11471 rtx halfword_tmp = NULL, byte_tmp = NULL;
11472 rtx dst, src;
11473 bool src_aligned = MEM_ALIGN (srcbase) >= BITS_PER_WORD;
11474 bool dst_aligned = MEM_ALIGN (dstbase) >= BITS_PER_WORD;
11475 HOST_WIDE_INT srcoffset, dstoffset;
11476 HOST_WIDE_INT src_autoinc, dst_autoinc;
11477 rtx mem, addr;
11479 gcc_assert (1 <= interleave_factor && interleave_factor <= 4);
11481 /* Use hard registers if we have aligned source or destination so we can use
11482 load/store multiple with contiguous registers. */
11483 if (dst_aligned || src_aligned)
11484 for (i = 0; i < interleave_factor; i++)
11485 regs[i] = gen_rtx_REG (SImode, i);
11486 else
11487 for (i = 0; i < interleave_factor; i++)
11488 regs[i] = gen_reg_rtx (SImode);
11490 dst = copy_addr_to_reg (XEXP (dstbase, 0));
11491 src = copy_addr_to_reg (XEXP (srcbase, 0));
11493 srcoffset = dstoffset = 0;
11495 /* Calls to arm_gen_load_multiple and arm_gen_store_multiple update SRC/DST.
11496 For copying the last bytes we want to subtract this offset again. */
11497 src_autoinc = dst_autoinc = 0;
11499 for (i = 0; i < interleave_factor; i++)
11500 regnos[i] = i;
11502 /* Copy BLOCK_SIZE_BYTES chunks. */
11504 for (i = 0; i + block_size_bytes <= length; i += block_size_bytes)
11506 /* Load words. */
11507 if (src_aligned && interleave_factor > 1)
11509 emit_insn (arm_gen_load_multiple (regnos, interleave_factor, src,
11510 TRUE, srcbase, &srcoffset));
11511 src_autoinc += UNITS_PER_WORD * interleave_factor;
11513 else
11515 for (j = 0; j < interleave_factor; j++)
11517 addr = plus_constant (Pmode, src, (srcoffset + j * UNITS_PER_WORD
11518 - src_autoinc));
11519 mem = adjust_automodify_address (srcbase, SImode, addr,
11520 srcoffset + j * UNITS_PER_WORD);
11521 emit_insn (gen_unaligned_loadsi (regs[j], mem));
11523 srcoffset += block_size_bytes;
11526 /* Store words. */
11527 if (dst_aligned && interleave_factor > 1)
11529 emit_insn (arm_gen_store_multiple (regnos, interleave_factor, dst,
11530 TRUE, dstbase, &dstoffset));
11531 dst_autoinc += UNITS_PER_WORD * interleave_factor;
11533 else
11535 for (j = 0; j < interleave_factor; j++)
11537 addr = plus_constant (Pmode, dst, (dstoffset + j * UNITS_PER_WORD
11538 - dst_autoinc));
11539 mem = adjust_automodify_address (dstbase, SImode, addr,
11540 dstoffset + j * UNITS_PER_WORD);
11541 emit_insn (gen_unaligned_storesi (mem, regs[j]));
11543 dstoffset += block_size_bytes;
11546 remaining -= block_size_bytes;
11549 /* Copy any whole words left (note these aren't interleaved with any
11550 subsequent halfword/byte load/stores in the interests of simplicity). */
11552 words = remaining / UNITS_PER_WORD;
11554 gcc_assert (words < interleave_factor);
11556 if (src_aligned && words > 1)
11558 emit_insn (arm_gen_load_multiple (regnos, words, src, TRUE, srcbase,
11559 &srcoffset));
11560 src_autoinc += UNITS_PER_WORD * words;
11562 else
11564 for (j = 0; j < words; j++)
11566 addr = plus_constant (Pmode, src,
11567 srcoffset + j * UNITS_PER_WORD - src_autoinc);
11568 mem = adjust_automodify_address (srcbase, SImode, addr,
11569 srcoffset + j * UNITS_PER_WORD);
11570 emit_insn (gen_unaligned_loadsi (regs[j], mem));
11572 srcoffset += words * UNITS_PER_WORD;
11575 if (dst_aligned && words > 1)
11577 emit_insn (arm_gen_store_multiple (regnos, words, dst, TRUE, dstbase,
11578 &dstoffset));
11579 dst_autoinc += words * UNITS_PER_WORD;
11581 else
11583 for (j = 0; j < words; j++)
11585 addr = plus_constant (Pmode, dst,
11586 dstoffset + j * UNITS_PER_WORD - dst_autoinc);
11587 mem = adjust_automodify_address (dstbase, SImode, addr,
11588 dstoffset + j * UNITS_PER_WORD);
11589 emit_insn (gen_unaligned_storesi (mem, regs[j]));
11591 dstoffset += words * UNITS_PER_WORD;
11594 remaining -= words * UNITS_PER_WORD;
11596 gcc_assert (remaining < 4);
11598 /* Copy a halfword if necessary. */
11600 if (remaining >= 2)
11602 halfword_tmp = gen_reg_rtx (SImode);
11604 addr = plus_constant (Pmode, src, srcoffset - src_autoinc);
11605 mem = adjust_automodify_address (srcbase, HImode, addr, srcoffset);
11606 emit_insn (gen_unaligned_loadhiu (halfword_tmp, mem));
11608 /* Either write out immediately, or delay until we've loaded the last
11609 byte, depending on interleave factor. */
11610 if (interleave_factor == 1)
11612 addr = plus_constant (Pmode, dst, dstoffset - dst_autoinc);
11613 mem = adjust_automodify_address (dstbase, HImode, addr, dstoffset);
11614 emit_insn (gen_unaligned_storehi (mem,
11615 gen_lowpart (HImode, halfword_tmp)));
11616 halfword_tmp = NULL;
11617 dstoffset += 2;
11620 remaining -= 2;
11621 srcoffset += 2;
11624 gcc_assert (remaining < 2);
11626 /* Copy last byte. */
11628 if ((remaining & 1) != 0)
11630 byte_tmp = gen_reg_rtx (SImode);
11632 addr = plus_constant (Pmode, src, srcoffset - src_autoinc);
11633 mem = adjust_automodify_address (srcbase, QImode, addr, srcoffset);
11634 emit_move_insn (gen_lowpart (QImode, byte_tmp), mem);
11636 if (interleave_factor == 1)
11638 addr = plus_constant (Pmode, dst, dstoffset - dst_autoinc);
11639 mem = adjust_automodify_address (dstbase, QImode, addr, dstoffset);
11640 emit_move_insn (mem, gen_lowpart (QImode, byte_tmp));
11641 byte_tmp = NULL;
11642 dstoffset++;
11645 remaining--;
11646 srcoffset++;
11649 /* Store last halfword if we haven't done so already. */
11651 if (halfword_tmp)
11653 addr = plus_constant (Pmode, dst, dstoffset - dst_autoinc);
11654 mem = adjust_automodify_address (dstbase, HImode, addr, dstoffset);
11655 emit_insn (gen_unaligned_storehi (mem,
11656 gen_lowpart (HImode, halfword_tmp)));
11657 dstoffset += 2;
11660 /* Likewise for last byte. */
11662 if (byte_tmp)
11664 addr = plus_constant (Pmode, dst, dstoffset - dst_autoinc);
11665 mem = adjust_automodify_address (dstbase, QImode, addr, dstoffset);
11666 emit_move_insn (mem, gen_lowpart (QImode, byte_tmp));
11667 dstoffset++;
11670 gcc_assert (remaining == 0 && srcoffset == dstoffset);
11673 /* From mips_adjust_block_mem:
11675 Helper function for doing a loop-based block operation on memory
11676 reference MEM. Each iteration of the loop will operate on LENGTH
11677 bytes of MEM.
11679 Create a new base register for use within the loop and point it to
11680 the start of MEM. Create a new memory reference that uses this
11681 register. Store them in *LOOP_REG and *LOOP_MEM respectively. */
11683 static void
11684 arm_adjust_block_mem (rtx mem, HOST_WIDE_INT length, rtx *loop_reg,
11685 rtx *loop_mem)
11687 *loop_reg = copy_addr_to_reg (XEXP (mem, 0));
11689 /* Although the new mem does not refer to a known location,
11690 it does keep up to LENGTH bytes of alignment. */
11691 *loop_mem = change_address (mem, BLKmode, *loop_reg);
11692 set_mem_align (*loop_mem, MIN (MEM_ALIGN (mem), length * BITS_PER_UNIT));
11695 /* From mips_block_move_loop:
11697 Move LENGTH bytes from SRC to DEST using a loop that moves BYTES_PER_ITER
11698 bytes at a time. LENGTH must be at least BYTES_PER_ITER. Assume that
11699 the memory regions do not overlap. */
11701 static void
11702 arm_block_move_unaligned_loop (rtx dest, rtx src, HOST_WIDE_INT length,
11703 unsigned int interleave_factor,
11704 HOST_WIDE_INT bytes_per_iter)
11706 rtx label, src_reg, dest_reg, final_src, test;
11707 HOST_WIDE_INT leftover;
11709 leftover = length % bytes_per_iter;
11710 length -= leftover;
11712 /* Create registers and memory references for use within the loop. */
11713 arm_adjust_block_mem (src, bytes_per_iter, &src_reg, &src);
11714 arm_adjust_block_mem (dest, bytes_per_iter, &dest_reg, &dest);
11716 /* Calculate the value that SRC_REG should have after the last iteration of
11717 the loop. */
11718 final_src = expand_simple_binop (Pmode, PLUS, src_reg, GEN_INT (length),
11719 0, 0, OPTAB_WIDEN);
11721 /* Emit the start of the loop. */
11722 label = gen_label_rtx ();
11723 emit_label (label);
11725 /* Emit the loop body. */
11726 arm_block_move_unaligned_straight (dest, src, bytes_per_iter,
11727 interleave_factor);
11729 /* Move on to the next block. */
11730 emit_move_insn (src_reg, plus_constant (Pmode, src_reg, bytes_per_iter));
11731 emit_move_insn (dest_reg, plus_constant (Pmode, dest_reg, bytes_per_iter));
11733 /* Emit the loop condition. */
11734 test = gen_rtx_NE (VOIDmode, src_reg, final_src);
11735 emit_jump_insn (gen_cbranchsi4 (test, src_reg, final_src, label));
11737 /* Mop up any left-over bytes. */
11738 if (leftover)
11739 arm_block_move_unaligned_straight (dest, src, leftover, interleave_factor);
11742 /* Emit a block move when either the source or destination is unaligned (not
11743 aligned to a four-byte boundary). This may need further tuning depending on
11744 core type, optimize_size setting, etc. */
11746 static int
11747 arm_movmemqi_unaligned (rtx *operands)
11749 HOST_WIDE_INT length = INTVAL (operands[2]);
11751 if (optimize_size)
11753 bool src_aligned = MEM_ALIGN (operands[1]) >= BITS_PER_WORD;
11754 bool dst_aligned = MEM_ALIGN (operands[0]) >= BITS_PER_WORD;
11755 /* Inlined memcpy using ldr/str/ldrh/strh can be quite big: try to limit
11756 size of code if optimizing for size. We'll use ldm/stm if src_aligned
11757 or dst_aligned though: allow more interleaving in those cases since the
11758 resulting code can be smaller. */
11759 unsigned int interleave_factor = (src_aligned || dst_aligned) ? 2 : 1;
11760 HOST_WIDE_INT bytes_per_iter = (src_aligned || dst_aligned) ? 8 : 4;
11762 if (length > 12)
11763 arm_block_move_unaligned_loop (operands[0], operands[1], length,
11764 interleave_factor, bytes_per_iter);
11765 else
11766 arm_block_move_unaligned_straight (operands[0], operands[1], length,
11767 interleave_factor);
11769 else
11771 /* Note that the loop created by arm_block_move_unaligned_loop may be
11772 subject to loop unrolling, which makes tuning this condition a little
11773 redundant. */
11774 if (length > 32)
11775 arm_block_move_unaligned_loop (operands[0], operands[1], length, 4, 16);
11776 else
11777 arm_block_move_unaligned_straight (operands[0], operands[1], length, 4);
11780 return 1;
11784 arm_gen_movmemqi (rtx *operands)
11786 HOST_WIDE_INT in_words_to_go, out_words_to_go, last_bytes;
11787 HOST_WIDE_INT srcoffset, dstoffset;
11788 int i;
11789 rtx src, dst, srcbase, dstbase;
11790 rtx part_bytes_reg = NULL;
11791 rtx mem;
11793 if (!CONST_INT_P (operands[2])
11794 || !CONST_INT_P (operands[3])
11795 || INTVAL (operands[2]) > 64)
11796 return 0;
11798 if (unaligned_access && (INTVAL (operands[3]) & 3) != 0)
11799 return arm_movmemqi_unaligned (operands);
11801 if (INTVAL (operands[3]) & 3)
11802 return 0;
11804 dstbase = operands[0];
11805 srcbase = operands[1];
11807 dst = copy_to_mode_reg (SImode, XEXP (dstbase, 0));
11808 src = copy_to_mode_reg (SImode, XEXP (srcbase, 0));
11810 in_words_to_go = ARM_NUM_INTS (INTVAL (operands[2]));
11811 out_words_to_go = INTVAL (operands[2]) / 4;
11812 last_bytes = INTVAL (operands[2]) & 3;
11813 dstoffset = srcoffset = 0;
11815 if (out_words_to_go != in_words_to_go && ((in_words_to_go - 1) & 3) != 0)
11816 part_bytes_reg = gen_rtx_REG (SImode, (in_words_to_go - 1) & 3);
11818 for (i = 0; in_words_to_go >= 2; i+=4)
11820 if (in_words_to_go > 4)
11821 emit_insn (arm_gen_load_multiple (arm_regs_in_sequence, 4, src,
11822 TRUE, srcbase, &srcoffset));
11823 else
11824 emit_insn (arm_gen_load_multiple (arm_regs_in_sequence, in_words_to_go,
11825 src, FALSE, srcbase,
11826 &srcoffset));
11828 if (out_words_to_go)
11830 if (out_words_to_go > 4)
11831 emit_insn (arm_gen_store_multiple (arm_regs_in_sequence, 4, dst,
11832 TRUE, dstbase, &dstoffset));
11833 else if (out_words_to_go != 1)
11834 emit_insn (arm_gen_store_multiple (arm_regs_in_sequence,
11835 out_words_to_go, dst,
11836 (last_bytes == 0
11837 ? FALSE : TRUE),
11838 dstbase, &dstoffset));
11839 else
11841 mem = adjust_automodify_address (dstbase, SImode, dst, dstoffset);
11842 emit_move_insn (mem, gen_rtx_REG (SImode, 0));
11843 if (last_bytes != 0)
11845 emit_insn (gen_addsi3 (dst, dst, GEN_INT (4)));
11846 dstoffset += 4;
11851 in_words_to_go -= in_words_to_go < 4 ? in_words_to_go : 4;
11852 out_words_to_go -= out_words_to_go < 4 ? out_words_to_go : 4;
11855 /* OUT_WORDS_TO_GO will be zero here if there are byte stores to do. */
11856 if (out_words_to_go)
11858 rtx sreg;
11860 mem = adjust_automodify_address (srcbase, SImode, src, srcoffset);
11861 sreg = copy_to_reg (mem);
11863 mem = adjust_automodify_address (dstbase, SImode, dst, dstoffset);
11864 emit_move_insn (mem, sreg);
11865 in_words_to_go--;
11867 gcc_assert (!in_words_to_go); /* Sanity check */
11870 if (in_words_to_go)
11872 gcc_assert (in_words_to_go > 0);
11874 mem = adjust_automodify_address (srcbase, SImode, src, srcoffset);
11875 part_bytes_reg = copy_to_mode_reg (SImode, mem);
11878 gcc_assert (!last_bytes || part_bytes_reg);
11880 if (BYTES_BIG_ENDIAN && last_bytes)
11882 rtx tmp = gen_reg_rtx (SImode);
11884 /* The bytes we want are in the top end of the word. */
11885 emit_insn (gen_lshrsi3 (tmp, part_bytes_reg,
11886 GEN_INT (8 * (4 - last_bytes))));
11887 part_bytes_reg = tmp;
11889 while (last_bytes)
11891 mem = adjust_automodify_address (dstbase, QImode,
11892 plus_constant (Pmode, dst,
11893 last_bytes - 1),
11894 dstoffset + last_bytes - 1);
11895 emit_move_insn (mem, gen_lowpart (QImode, part_bytes_reg));
11897 if (--last_bytes)
11899 tmp = gen_reg_rtx (SImode);
11900 emit_insn (gen_lshrsi3 (tmp, part_bytes_reg, GEN_INT (8)));
11901 part_bytes_reg = tmp;
11906 else
11908 if (last_bytes > 1)
11910 mem = adjust_automodify_address (dstbase, HImode, dst, dstoffset);
11911 emit_move_insn (mem, gen_lowpart (HImode, part_bytes_reg));
11912 last_bytes -= 2;
11913 if (last_bytes)
11915 rtx tmp = gen_reg_rtx (SImode);
11916 emit_insn (gen_addsi3 (dst, dst, const2_rtx));
11917 emit_insn (gen_lshrsi3 (tmp, part_bytes_reg, GEN_INT (16)));
11918 part_bytes_reg = tmp;
11919 dstoffset += 2;
11923 if (last_bytes)
11925 mem = adjust_automodify_address (dstbase, QImode, dst, dstoffset);
11926 emit_move_insn (mem, gen_lowpart (QImode, part_bytes_reg));
11930 return 1;
11933 /* Helper for gen_movmem_ldrd_strd. Increase the address of memory rtx
11934 by mode size. */
11935 inline static rtx
11936 next_consecutive_mem (rtx mem)
11938 enum machine_mode mode = GET_MODE (mem);
11939 HOST_WIDE_INT offset = GET_MODE_SIZE (mode);
11940 rtx addr = plus_constant (Pmode, XEXP (mem, 0), offset);
11942 return adjust_automodify_address (mem, mode, addr, offset);
11945 /* Copy using LDRD/STRD instructions whenever possible.
11946 Returns true upon success. */
11947 bool
11948 gen_movmem_ldrd_strd (rtx *operands)
11950 unsigned HOST_WIDE_INT len;
11951 HOST_WIDE_INT align;
11952 rtx src, dst, base;
11953 rtx reg0;
11954 bool src_aligned, dst_aligned;
11955 bool src_volatile, dst_volatile;
11957 gcc_assert (CONST_INT_P (operands[2]));
11958 gcc_assert (CONST_INT_P (operands[3]));
11960 len = UINTVAL (operands[2]);
11961 if (len > 64)
11962 return false;
11964 /* Maximum alignment we can assume for both src and dst buffers. */
11965 align = INTVAL (operands[3]);
11967 if ((!unaligned_access) && (len >= 4) && ((align & 3) != 0))
11968 return false;
11970 /* Place src and dst addresses in registers
11971 and update the corresponding mem rtx. */
11972 dst = operands[0];
11973 dst_volatile = MEM_VOLATILE_P (dst);
11974 dst_aligned = MEM_ALIGN (dst) >= BITS_PER_WORD;
11975 base = copy_to_mode_reg (SImode, XEXP (dst, 0));
11976 dst = adjust_automodify_address (dst, VOIDmode, base, 0);
11978 src = operands[1];
11979 src_volatile = MEM_VOLATILE_P (src);
11980 src_aligned = MEM_ALIGN (src) >= BITS_PER_WORD;
11981 base = copy_to_mode_reg (SImode, XEXP (src, 0));
11982 src = adjust_automodify_address (src, VOIDmode, base, 0);
11984 if (!unaligned_access && !(src_aligned && dst_aligned))
11985 return false;
11987 if (src_volatile || dst_volatile)
11988 return false;
11990 /* If we cannot generate any LDRD/STRD, try to generate LDM/STM. */
11991 if (!(dst_aligned || src_aligned))
11992 return arm_gen_movmemqi (operands);
11994 src = adjust_address (src, DImode, 0);
11995 dst = adjust_address (dst, DImode, 0);
11996 while (len >= 8)
11998 len -= 8;
11999 reg0 = gen_reg_rtx (DImode);
12000 if (src_aligned)
12001 emit_move_insn (reg0, src);
12002 else
12003 emit_insn (gen_unaligned_loaddi (reg0, src));
12005 if (dst_aligned)
12006 emit_move_insn (dst, reg0);
12007 else
12008 emit_insn (gen_unaligned_storedi (dst, reg0));
12010 src = next_consecutive_mem (src);
12011 dst = next_consecutive_mem (dst);
12014 gcc_assert (len < 8);
12015 if (len >= 4)
12017 /* More than a word but less than a double-word to copy. Copy a word. */
12018 reg0 = gen_reg_rtx (SImode);
12019 src = adjust_address (src, SImode, 0);
12020 dst = adjust_address (dst, SImode, 0);
12021 if (src_aligned)
12022 emit_move_insn (reg0, src);
12023 else
12024 emit_insn (gen_unaligned_loadsi (reg0, src));
12026 if (dst_aligned)
12027 emit_move_insn (dst, reg0);
12028 else
12029 emit_insn (gen_unaligned_storesi (dst, reg0));
12031 src = next_consecutive_mem (src);
12032 dst = next_consecutive_mem (dst);
12033 len -= 4;
12036 if (len == 0)
12037 return true;
12039 /* Copy the remaining bytes. */
12040 if (len >= 2)
12042 dst = adjust_address (dst, HImode, 0);
12043 src = adjust_address (src, HImode, 0);
12044 reg0 = gen_reg_rtx (SImode);
12045 if (src_aligned)
12046 emit_insn (gen_zero_extendhisi2 (reg0, src));
12047 else
12048 emit_insn (gen_unaligned_loadhiu (reg0, src));
12050 if (dst_aligned)
12051 emit_insn (gen_movhi (dst, gen_lowpart(HImode, reg0)));
12052 else
12053 emit_insn (gen_unaligned_storehi (dst, gen_lowpart (HImode, reg0)));
12055 src = next_consecutive_mem (src);
12056 dst = next_consecutive_mem (dst);
12057 if (len == 2)
12058 return true;
12061 dst = adjust_address (dst, QImode, 0);
12062 src = adjust_address (src, QImode, 0);
12063 reg0 = gen_reg_rtx (QImode);
12064 emit_move_insn (reg0, src);
12065 emit_move_insn (dst, reg0);
12066 return true;
12069 /* Select a dominance comparison mode if possible for a test of the general
12070 form (OP (COND_OR (X) (Y)) (const_int 0)). We support three forms.
12071 COND_OR == DOM_CC_X_AND_Y => (X && Y)
12072 COND_OR == DOM_CC_NX_OR_Y => ((! X) || Y)
12073 COND_OR == DOM_CC_X_OR_Y => (X || Y)
12074 In all cases OP will be either EQ or NE, but we don't need to know which
12075 here. If we are unable to support a dominance comparison we return
12076 CC mode. This will then fail to match for the RTL expressions that
12077 generate this call. */
12078 enum machine_mode
12079 arm_select_dominance_cc_mode (rtx x, rtx y, HOST_WIDE_INT cond_or)
12081 enum rtx_code cond1, cond2;
12082 int swapped = 0;
12084 /* Currently we will probably get the wrong result if the individual
12085 comparisons are not simple. This also ensures that it is safe to
12086 reverse a comparison if necessary. */
12087 if ((arm_select_cc_mode (cond1 = GET_CODE (x), XEXP (x, 0), XEXP (x, 1))
12088 != CCmode)
12089 || (arm_select_cc_mode (cond2 = GET_CODE (y), XEXP (y, 0), XEXP (y, 1))
12090 != CCmode))
12091 return CCmode;
12093 /* The if_then_else variant of this tests the second condition if the
12094 first passes, but is true if the first fails. Reverse the first
12095 condition to get a true "inclusive-or" expression. */
12096 if (cond_or == DOM_CC_NX_OR_Y)
12097 cond1 = reverse_condition (cond1);
12099 /* If the comparisons are not equal, and one doesn't dominate the other,
12100 then we can't do this. */
12101 if (cond1 != cond2
12102 && !comparison_dominates_p (cond1, cond2)
12103 && (swapped = 1, !comparison_dominates_p (cond2, cond1)))
12104 return CCmode;
12106 if (swapped)
12108 enum rtx_code temp = cond1;
12109 cond1 = cond2;
12110 cond2 = temp;
12113 switch (cond1)
12115 case EQ:
12116 if (cond_or == DOM_CC_X_AND_Y)
12117 return CC_DEQmode;
12119 switch (cond2)
12121 case EQ: return CC_DEQmode;
12122 case LE: return CC_DLEmode;
12123 case LEU: return CC_DLEUmode;
12124 case GE: return CC_DGEmode;
12125 case GEU: return CC_DGEUmode;
12126 default: gcc_unreachable ();
12129 case LT:
12130 if (cond_or == DOM_CC_X_AND_Y)
12131 return CC_DLTmode;
12133 switch (cond2)
12135 case LT:
12136 return CC_DLTmode;
12137 case LE:
12138 return CC_DLEmode;
12139 case NE:
12140 return CC_DNEmode;
12141 default:
12142 gcc_unreachable ();
12145 case GT:
12146 if (cond_or == DOM_CC_X_AND_Y)
12147 return CC_DGTmode;
12149 switch (cond2)
12151 case GT:
12152 return CC_DGTmode;
12153 case GE:
12154 return CC_DGEmode;
12155 case NE:
12156 return CC_DNEmode;
12157 default:
12158 gcc_unreachable ();
12161 case LTU:
12162 if (cond_or == DOM_CC_X_AND_Y)
12163 return CC_DLTUmode;
12165 switch (cond2)
12167 case LTU:
12168 return CC_DLTUmode;
12169 case LEU:
12170 return CC_DLEUmode;
12171 case NE:
12172 return CC_DNEmode;
12173 default:
12174 gcc_unreachable ();
12177 case GTU:
12178 if (cond_or == DOM_CC_X_AND_Y)
12179 return CC_DGTUmode;
12181 switch (cond2)
12183 case GTU:
12184 return CC_DGTUmode;
12185 case GEU:
12186 return CC_DGEUmode;
12187 case NE:
12188 return CC_DNEmode;
12189 default:
12190 gcc_unreachable ();
12193 /* The remaining cases only occur when both comparisons are the
12194 same. */
12195 case NE:
12196 gcc_assert (cond1 == cond2);
12197 return CC_DNEmode;
12199 case LE:
12200 gcc_assert (cond1 == cond2);
12201 return CC_DLEmode;
12203 case GE:
12204 gcc_assert (cond1 == cond2);
12205 return CC_DGEmode;
12207 case LEU:
12208 gcc_assert (cond1 == cond2);
12209 return CC_DLEUmode;
12211 case GEU:
12212 gcc_assert (cond1 == cond2);
12213 return CC_DGEUmode;
12215 default:
12216 gcc_unreachable ();
12220 enum machine_mode
12221 arm_select_cc_mode (enum rtx_code op, rtx x, rtx y)
12223 /* All floating point compares return CCFP if it is an equality
12224 comparison, and CCFPE otherwise. */
12225 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
12227 switch (op)
12229 case EQ:
12230 case NE:
12231 case UNORDERED:
12232 case ORDERED:
12233 case UNLT:
12234 case UNLE:
12235 case UNGT:
12236 case UNGE:
12237 case UNEQ:
12238 case LTGT:
12239 return CCFPmode;
12241 case LT:
12242 case LE:
12243 case GT:
12244 case GE:
12245 return CCFPEmode;
12247 default:
12248 gcc_unreachable ();
12252 /* A compare with a shifted operand. Because of canonicalization, the
12253 comparison will have to be swapped when we emit the assembler. */
12254 if (GET_MODE (y) == SImode
12255 && (REG_P (y) || (GET_CODE (y) == SUBREG))
12256 && (GET_CODE (x) == ASHIFT || GET_CODE (x) == ASHIFTRT
12257 || GET_CODE (x) == LSHIFTRT || GET_CODE (x) == ROTATE
12258 || GET_CODE (x) == ROTATERT))
12259 return CC_SWPmode;
12261 /* This operation is performed swapped, but since we only rely on the Z
12262 flag we don't need an additional mode. */
12263 if (GET_MODE (y) == SImode
12264 && (REG_P (y) || (GET_CODE (y) == SUBREG))
12265 && GET_CODE (x) == NEG
12266 && (op == EQ || op == NE))
12267 return CC_Zmode;
12269 /* This is a special case that is used by combine to allow a
12270 comparison of a shifted byte load to be split into a zero-extend
12271 followed by a comparison of the shifted integer (only valid for
12272 equalities and unsigned inequalities). */
12273 if (GET_MODE (x) == SImode
12274 && GET_CODE (x) == ASHIFT
12275 && CONST_INT_P (XEXP (x, 1)) && INTVAL (XEXP (x, 1)) == 24
12276 && GET_CODE (XEXP (x, 0)) == SUBREG
12277 && MEM_P (SUBREG_REG (XEXP (x, 0)))
12278 && GET_MODE (SUBREG_REG (XEXP (x, 0))) == QImode
12279 && (op == EQ || op == NE
12280 || op == GEU || op == GTU || op == LTU || op == LEU)
12281 && CONST_INT_P (y))
12282 return CC_Zmode;
12284 /* A construct for a conditional compare, if the false arm contains
12285 0, then both conditions must be true, otherwise either condition
12286 must be true. Not all conditions are possible, so CCmode is
12287 returned if it can't be done. */
12288 if (GET_CODE (x) == IF_THEN_ELSE
12289 && (XEXP (x, 2) == const0_rtx
12290 || XEXP (x, 2) == const1_rtx)
12291 && COMPARISON_P (XEXP (x, 0))
12292 && COMPARISON_P (XEXP (x, 1)))
12293 return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
12294 INTVAL (XEXP (x, 2)));
12296 /* Alternate canonicalizations of the above. These are somewhat cleaner. */
12297 if (GET_CODE (x) == AND
12298 && (op == EQ || op == NE)
12299 && COMPARISON_P (XEXP (x, 0))
12300 && COMPARISON_P (XEXP (x, 1)))
12301 return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
12302 DOM_CC_X_AND_Y);
12304 if (GET_CODE (x) == IOR
12305 && (op == EQ || op == NE)
12306 && COMPARISON_P (XEXP (x, 0))
12307 && COMPARISON_P (XEXP (x, 1)))
12308 return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
12309 DOM_CC_X_OR_Y);
12311 /* An operation (on Thumb) where we want to test for a single bit.
12312 This is done by shifting that bit up into the top bit of a
12313 scratch register; we can then branch on the sign bit. */
12314 if (TARGET_THUMB1
12315 && GET_MODE (x) == SImode
12316 && (op == EQ || op == NE)
12317 && GET_CODE (x) == ZERO_EXTRACT
12318 && XEXP (x, 1) == const1_rtx)
12319 return CC_Nmode;
12321 /* An operation that sets the condition codes as a side-effect, the
12322 V flag is not set correctly, so we can only use comparisons where
12323 this doesn't matter. (For LT and GE we can use "mi" and "pl"
12324 instead.) */
12325 /* ??? Does the ZERO_EXTRACT case really apply to thumb2? */
12326 if (GET_MODE (x) == SImode
12327 && y == const0_rtx
12328 && (op == EQ || op == NE || op == LT || op == GE)
12329 && (GET_CODE (x) == PLUS || GET_CODE (x) == MINUS
12330 || GET_CODE (x) == AND || GET_CODE (x) == IOR
12331 || GET_CODE (x) == XOR || GET_CODE (x) == MULT
12332 || GET_CODE (x) == NOT || GET_CODE (x) == NEG
12333 || GET_CODE (x) == LSHIFTRT
12334 || GET_CODE (x) == ASHIFT || GET_CODE (x) == ASHIFTRT
12335 || GET_CODE (x) == ROTATERT
12336 || (TARGET_32BIT && GET_CODE (x) == ZERO_EXTRACT)))
12337 return CC_NOOVmode;
12339 if (GET_MODE (x) == QImode && (op == EQ || op == NE))
12340 return CC_Zmode;
12342 if (GET_MODE (x) == SImode && (op == LTU || op == GEU)
12343 && GET_CODE (x) == PLUS
12344 && (rtx_equal_p (XEXP (x, 0), y) || rtx_equal_p (XEXP (x, 1), y)))
12345 return CC_Cmode;
12347 if (GET_MODE (x) == DImode || GET_MODE (y) == DImode)
12349 switch (op)
12351 case EQ:
12352 case NE:
12353 /* A DImode comparison against zero can be implemented by
12354 or'ing the two halves together. */
12355 if (y == const0_rtx)
12356 return CC_Zmode;
12358 /* We can do an equality test in three Thumb instructions. */
12359 if (!TARGET_32BIT)
12360 return CC_Zmode;
12362 /* FALLTHROUGH */
12364 case LTU:
12365 case LEU:
12366 case GTU:
12367 case GEU:
12368 /* DImode unsigned comparisons can be implemented by cmp +
12369 cmpeq without a scratch register. Not worth doing in
12370 Thumb-2. */
12371 if (TARGET_32BIT)
12372 return CC_CZmode;
12374 /* FALLTHROUGH */
12376 case LT:
12377 case LE:
12378 case GT:
12379 case GE:
12380 /* DImode signed and unsigned comparisons can be implemented
12381 by cmp + sbcs with a scratch register, but that does not
12382 set the Z flag - we must reverse GT/LE/GTU/LEU. */
12383 gcc_assert (op != EQ && op != NE);
12384 return CC_NCVmode;
12386 default:
12387 gcc_unreachable ();
12391 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_CC)
12392 return GET_MODE (x);
12394 return CCmode;
12397 /* X and Y are two things to compare using CODE. Emit the compare insn and
12398 return the rtx for register 0 in the proper mode. FP means this is a
12399 floating point compare: I don't think that it is needed on the arm. */
12401 arm_gen_compare_reg (enum rtx_code code, rtx x, rtx y, rtx scratch)
12403 enum machine_mode mode;
12404 rtx cc_reg;
12405 int dimode_comparison = GET_MODE (x) == DImode || GET_MODE (y) == DImode;
12407 /* We might have X as a constant, Y as a register because of the predicates
12408 used for cmpdi. If so, force X to a register here. */
12409 if (dimode_comparison && !REG_P (x))
12410 x = force_reg (DImode, x);
12412 mode = SELECT_CC_MODE (code, x, y);
12413 cc_reg = gen_rtx_REG (mode, CC_REGNUM);
12415 if (dimode_comparison
12416 && mode != CC_CZmode)
12418 rtx clobber, set;
12420 /* To compare two non-zero values for equality, XOR them and
12421 then compare against zero. Not used for ARM mode; there
12422 CC_CZmode is cheaper. */
12423 if (mode == CC_Zmode && y != const0_rtx)
12425 gcc_assert (!reload_completed);
12426 x = expand_binop (DImode, xor_optab, x, y, NULL_RTX, 0, OPTAB_WIDEN);
12427 y = const0_rtx;
12430 /* A scratch register is required. */
12431 if (reload_completed)
12432 gcc_assert (scratch != NULL && GET_MODE (scratch) == SImode);
12433 else
12434 scratch = gen_rtx_SCRATCH (SImode);
12436 clobber = gen_rtx_CLOBBER (VOIDmode, scratch);
12437 set = gen_rtx_SET (VOIDmode, cc_reg, gen_rtx_COMPARE (mode, x, y));
12438 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, set, clobber)));
12440 else
12441 emit_set_insn (cc_reg, gen_rtx_COMPARE (mode, x, y));
12443 return cc_reg;
12446 /* Generate a sequence of insns that will generate the correct return
12447 address mask depending on the physical architecture that the program
12448 is running on. */
12450 arm_gen_return_addr_mask (void)
12452 rtx reg = gen_reg_rtx (Pmode);
12454 emit_insn (gen_return_addr_mask (reg));
12455 return reg;
12458 void
12459 arm_reload_in_hi (rtx *operands)
12461 rtx ref = operands[1];
12462 rtx base, scratch;
12463 HOST_WIDE_INT offset = 0;
12465 if (GET_CODE (ref) == SUBREG)
12467 offset = SUBREG_BYTE (ref);
12468 ref = SUBREG_REG (ref);
12471 if (REG_P (ref))
12473 /* We have a pseudo which has been spilt onto the stack; there
12474 are two cases here: the first where there is a simple
12475 stack-slot replacement and a second where the stack-slot is
12476 out of range, or is used as a subreg. */
12477 if (reg_equiv_mem (REGNO (ref)))
12479 ref = reg_equiv_mem (REGNO (ref));
12480 base = find_replacement (&XEXP (ref, 0));
12482 else
12483 /* The slot is out of range, or was dressed up in a SUBREG. */
12484 base = reg_equiv_address (REGNO (ref));
12486 else
12487 base = find_replacement (&XEXP (ref, 0));
12489 /* Handle the case where the address is too complex to be offset by 1. */
12490 if (GET_CODE (base) == MINUS
12491 || (GET_CODE (base) == PLUS && !CONST_INT_P (XEXP (base, 1))))
12493 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
12495 emit_set_insn (base_plus, base);
12496 base = base_plus;
12498 else if (GET_CODE (base) == PLUS)
12500 /* The addend must be CONST_INT, or we would have dealt with it above. */
12501 HOST_WIDE_INT hi, lo;
12503 offset += INTVAL (XEXP (base, 1));
12504 base = XEXP (base, 0);
12506 /* Rework the address into a legal sequence of insns. */
12507 /* Valid range for lo is -4095 -> 4095 */
12508 lo = (offset >= 0
12509 ? (offset & 0xfff)
12510 : -((-offset) & 0xfff));
12512 /* Corner case, if lo is the max offset then we would be out of range
12513 once we have added the additional 1 below, so bump the msb into the
12514 pre-loading insn(s). */
12515 if (lo == 4095)
12516 lo &= 0x7ff;
12518 hi = ((((offset - lo) & (HOST_WIDE_INT) 0xffffffff)
12519 ^ (HOST_WIDE_INT) 0x80000000)
12520 - (HOST_WIDE_INT) 0x80000000);
12522 gcc_assert (hi + lo == offset);
12524 if (hi != 0)
12526 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
12528 /* Get the base address; addsi3 knows how to handle constants
12529 that require more than one insn. */
12530 emit_insn (gen_addsi3 (base_plus, base, GEN_INT (hi)));
12531 base = base_plus;
12532 offset = lo;
12536 /* Operands[2] may overlap operands[0] (though it won't overlap
12537 operands[1]), that's why we asked for a DImode reg -- so we can
12538 use the bit that does not overlap. */
12539 if (REGNO (operands[2]) == REGNO (operands[0]))
12540 scratch = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
12541 else
12542 scratch = gen_rtx_REG (SImode, REGNO (operands[2]));
12544 emit_insn (gen_zero_extendqisi2 (scratch,
12545 gen_rtx_MEM (QImode,
12546 plus_constant (Pmode, base,
12547 offset))));
12548 emit_insn (gen_zero_extendqisi2 (gen_rtx_SUBREG (SImode, operands[0], 0),
12549 gen_rtx_MEM (QImode,
12550 plus_constant (Pmode, base,
12551 offset + 1))));
12552 if (!BYTES_BIG_ENDIAN)
12553 emit_set_insn (gen_rtx_SUBREG (SImode, operands[0], 0),
12554 gen_rtx_IOR (SImode,
12555 gen_rtx_ASHIFT
12556 (SImode,
12557 gen_rtx_SUBREG (SImode, operands[0], 0),
12558 GEN_INT (8)),
12559 scratch));
12560 else
12561 emit_set_insn (gen_rtx_SUBREG (SImode, operands[0], 0),
12562 gen_rtx_IOR (SImode,
12563 gen_rtx_ASHIFT (SImode, scratch,
12564 GEN_INT (8)),
12565 gen_rtx_SUBREG (SImode, operands[0], 0)));
12568 /* Handle storing a half-word to memory during reload by synthesizing as two
12569 byte stores. Take care not to clobber the input values until after we
12570 have moved them somewhere safe. This code assumes that if the DImode
12571 scratch in operands[2] overlaps either the input value or output address
12572 in some way, then that value must die in this insn (we absolutely need
12573 two scratch registers for some corner cases). */
12574 void
12575 arm_reload_out_hi (rtx *operands)
12577 rtx ref = operands[0];
12578 rtx outval = operands[1];
12579 rtx base, scratch;
12580 HOST_WIDE_INT offset = 0;
12582 if (GET_CODE (ref) == SUBREG)
12584 offset = SUBREG_BYTE (ref);
12585 ref = SUBREG_REG (ref);
12588 if (REG_P (ref))
12590 /* We have a pseudo which has been spilt onto the stack; there
12591 are two cases here: the first where there is a simple
12592 stack-slot replacement and a second where the stack-slot is
12593 out of range, or is used as a subreg. */
12594 if (reg_equiv_mem (REGNO (ref)))
12596 ref = reg_equiv_mem (REGNO (ref));
12597 base = find_replacement (&XEXP (ref, 0));
12599 else
12600 /* The slot is out of range, or was dressed up in a SUBREG. */
12601 base = reg_equiv_address (REGNO (ref));
12603 else
12604 base = find_replacement (&XEXP (ref, 0));
12606 scratch = gen_rtx_REG (SImode, REGNO (operands[2]));
12608 /* Handle the case where the address is too complex to be offset by 1. */
12609 if (GET_CODE (base) == MINUS
12610 || (GET_CODE (base) == PLUS && !CONST_INT_P (XEXP (base, 1))))
12612 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
12614 /* Be careful not to destroy OUTVAL. */
12615 if (reg_overlap_mentioned_p (base_plus, outval))
12617 /* Updating base_plus might destroy outval, see if we can
12618 swap the scratch and base_plus. */
12619 if (!reg_overlap_mentioned_p (scratch, outval))
12621 rtx tmp = scratch;
12622 scratch = base_plus;
12623 base_plus = tmp;
12625 else
12627 rtx scratch_hi = gen_rtx_REG (HImode, REGNO (operands[2]));
12629 /* Be conservative and copy OUTVAL into the scratch now,
12630 this should only be necessary if outval is a subreg
12631 of something larger than a word. */
12632 /* XXX Might this clobber base? I can't see how it can,
12633 since scratch is known to overlap with OUTVAL, and
12634 must be wider than a word. */
12635 emit_insn (gen_movhi (scratch_hi, outval));
12636 outval = scratch_hi;
12640 emit_set_insn (base_plus, base);
12641 base = base_plus;
12643 else if (GET_CODE (base) == PLUS)
12645 /* The addend must be CONST_INT, or we would have dealt with it above. */
12646 HOST_WIDE_INT hi, lo;
12648 offset += INTVAL (XEXP (base, 1));
12649 base = XEXP (base, 0);
12651 /* Rework the address into a legal sequence of insns. */
12652 /* Valid range for lo is -4095 -> 4095 */
12653 lo = (offset >= 0
12654 ? (offset & 0xfff)
12655 : -((-offset) & 0xfff));
12657 /* Corner case, if lo is the max offset then we would be out of range
12658 once we have added the additional 1 below, so bump the msb into the
12659 pre-loading insn(s). */
12660 if (lo == 4095)
12661 lo &= 0x7ff;
12663 hi = ((((offset - lo) & (HOST_WIDE_INT) 0xffffffff)
12664 ^ (HOST_WIDE_INT) 0x80000000)
12665 - (HOST_WIDE_INT) 0x80000000);
12667 gcc_assert (hi + lo == offset);
12669 if (hi != 0)
12671 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
12673 /* Be careful not to destroy OUTVAL. */
12674 if (reg_overlap_mentioned_p (base_plus, outval))
12676 /* Updating base_plus might destroy outval, see if we
12677 can swap the scratch and base_plus. */
12678 if (!reg_overlap_mentioned_p (scratch, outval))
12680 rtx tmp = scratch;
12681 scratch = base_plus;
12682 base_plus = tmp;
12684 else
12686 rtx scratch_hi = gen_rtx_REG (HImode, REGNO (operands[2]));
12688 /* Be conservative and copy outval into scratch now,
12689 this should only be necessary if outval is a
12690 subreg of something larger than a word. */
12691 /* XXX Might this clobber base? I can't see how it
12692 can, since scratch is known to overlap with
12693 outval. */
12694 emit_insn (gen_movhi (scratch_hi, outval));
12695 outval = scratch_hi;
12699 /* Get the base address; addsi3 knows how to handle constants
12700 that require more than one insn. */
12701 emit_insn (gen_addsi3 (base_plus, base, GEN_INT (hi)));
12702 base = base_plus;
12703 offset = lo;
12707 if (BYTES_BIG_ENDIAN)
12709 emit_insn (gen_movqi (gen_rtx_MEM (QImode,
12710 plus_constant (Pmode, base,
12711 offset + 1)),
12712 gen_lowpart (QImode, outval)));
12713 emit_insn (gen_lshrsi3 (scratch,
12714 gen_rtx_SUBREG (SImode, outval, 0),
12715 GEN_INT (8)));
12716 emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (Pmode, base,
12717 offset)),
12718 gen_lowpart (QImode, scratch)));
12720 else
12722 emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (Pmode, base,
12723 offset)),
12724 gen_lowpart (QImode, outval)));
12725 emit_insn (gen_lshrsi3 (scratch,
12726 gen_rtx_SUBREG (SImode, outval, 0),
12727 GEN_INT (8)));
12728 emit_insn (gen_movqi (gen_rtx_MEM (QImode,
12729 plus_constant (Pmode, base,
12730 offset + 1)),
12731 gen_lowpart (QImode, scratch)));
12735 /* Return true if a type must be passed in memory. For AAPCS, small aggregates
12736 (padded to the size of a word) should be passed in a register. */
12738 static bool
12739 arm_must_pass_in_stack (enum machine_mode mode, const_tree type)
12741 if (TARGET_AAPCS_BASED)
12742 return must_pass_in_stack_var_size (mode, type);
12743 else
12744 return must_pass_in_stack_var_size_or_pad (mode, type);
12748 /* For use by FUNCTION_ARG_PADDING (MODE, TYPE).
12749 Return true if an argument passed on the stack should be padded upwards,
12750 i.e. if the least-significant byte has useful data.
12751 For legacy APCS ABIs we use the default. For AAPCS based ABIs small
12752 aggregate types are placed in the lowest memory address. */
12754 bool
12755 arm_pad_arg_upward (enum machine_mode mode ATTRIBUTE_UNUSED, const_tree type)
12757 if (!TARGET_AAPCS_BASED)
12758 return DEFAULT_FUNCTION_ARG_PADDING(mode, type) == upward;
12760 if (type && BYTES_BIG_ENDIAN && INTEGRAL_TYPE_P (type))
12761 return false;
12763 return true;
12767 /* Similarly, for use by BLOCK_REG_PADDING (MODE, TYPE, FIRST).
12768 Return !BYTES_BIG_ENDIAN if the least significant byte of the
12769 register has useful data, and return the opposite if the most
12770 significant byte does. */
12772 bool
12773 arm_pad_reg_upward (enum machine_mode mode,
12774 tree type, int first ATTRIBUTE_UNUSED)
12776 if (TARGET_AAPCS_BASED && BYTES_BIG_ENDIAN)
12778 /* For AAPCS, small aggregates, small fixed-point types,
12779 and small complex types are always padded upwards. */
12780 if (type)
12782 if ((AGGREGATE_TYPE_P (type)
12783 || TREE_CODE (type) == COMPLEX_TYPE
12784 || FIXED_POINT_TYPE_P (type))
12785 && int_size_in_bytes (type) <= 4)
12786 return true;
12788 else
12790 if ((COMPLEX_MODE_P (mode) || ALL_FIXED_POINT_MODE_P (mode))
12791 && GET_MODE_SIZE (mode) <= 4)
12792 return true;
12796 /* Otherwise, use default padding. */
12797 return !BYTES_BIG_ENDIAN;
12800 /* Returns true iff OFFSET is valid for use in an LDRD/STRD instruction,
12801 assuming that the address in the base register is word aligned. */
12802 bool
12803 offset_ok_for_ldrd_strd (HOST_WIDE_INT offset)
12805 HOST_WIDE_INT max_offset;
12807 /* Offset must be a multiple of 4 in Thumb mode. */
12808 if (TARGET_THUMB2 && ((offset & 3) != 0))
12809 return false;
12811 if (TARGET_THUMB2)
12812 max_offset = 1020;
12813 else if (TARGET_ARM)
12814 max_offset = 255;
12815 else
12816 return false;
12818 return ((offset <= max_offset) && (offset >= -max_offset));
12821 /* Checks whether the operands are valid for use in an LDRD/STRD instruction.
12822 Assumes that RT, RT2, and RN are REG. This is guaranteed by the patterns.
12823 Assumes that the address in the base register RN is word aligned. Pattern
12824 guarantees that both memory accesses use the same base register,
12825 the offsets are constants within the range, and the gap between the offsets is 4.
12826 If preload complete then check that registers are legal. WBACK indicates whether
12827 address is updated. LOAD indicates whether memory access is load or store. */
12828 bool
12829 operands_ok_ldrd_strd (rtx rt, rtx rt2, rtx rn, HOST_WIDE_INT offset,
12830 bool wback, bool load)
12832 unsigned int t, t2, n;
12834 if (!reload_completed)
12835 return true;
12837 if (!offset_ok_for_ldrd_strd (offset))
12838 return false;
12840 t = REGNO (rt);
12841 t2 = REGNO (rt2);
12842 n = REGNO (rn);
12844 if ((TARGET_THUMB2)
12845 && ((wback && (n == t || n == t2))
12846 || (t == SP_REGNUM)
12847 || (t == PC_REGNUM)
12848 || (t2 == SP_REGNUM)
12849 || (t2 == PC_REGNUM)
12850 || (!load && (n == PC_REGNUM))
12851 || (load && (t == t2))
12852 /* Triggers Cortex-M3 LDRD errata. */
12853 || (!wback && load && fix_cm3_ldrd && (n == t))))
12854 return false;
12856 if ((TARGET_ARM)
12857 && ((wback && (n == t || n == t2))
12858 || (t2 == PC_REGNUM)
12859 || (t % 2 != 0) /* First destination register is not even. */
12860 || (t2 != t + 1)
12861 /* PC can be used as base register (for offset addressing only),
12862 but it is depricated. */
12863 || (n == PC_REGNUM)))
12864 return false;
12866 return true;
12869 /* Helper for gen_operands_ldrd_strd. Returns true iff the memory
12870 operand ADDR is an immediate offset from the base register and is
12871 not volatile, in which case it sets BASE and OFFSET
12872 accordingly. */
12873 bool
12874 mem_ok_for_ldrd_strd (rtx addr, rtx *base, rtx *offset)
12876 /* TODO: Handle more general memory operand patterns, such as
12877 PRE_DEC and PRE_INC. */
12879 /* Convert a subreg of mem into mem itself. */
12880 if (GET_CODE (addr) == SUBREG)
12881 addr = alter_subreg (&addr, true);
12883 gcc_assert (MEM_P (addr));
12885 /* Don't modify volatile memory accesses. */
12886 if (MEM_VOLATILE_P (addr))
12887 return false;
12889 *offset = const0_rtx;
12891 addr = XEXP (addr, 0);
12892 if (REG_P (addr))
12894 *base = addr;
12895 return true;
12897 else if (GET_CODE (addr) == PLUS || GET_CODE (addr) == MINUS)
12899 *base = XEXP (addr, 0);
12900 *offset = XEXP (addr, 1);
12901 return (REG_P (*base) && CONST_INT_P (*offset));
12904 return false;
12907 #define SWAP_RTX(x,y) do { rtx tmp = x; x = y; y = tmp; } while (0)
12909 /* Called from a peephole2 to replace two word-size accesses with a
12910 single LDRD/STRD instruction. Returns true iff we can generate a
12911 new instruction sequence. That is, both accesses use the same base
12912 register and the gap between constant offsets is 4. This function
12913 may reorder its operands to match ldrd/strd RTL templates.
12914 OPERANDS are the operands found by the peephole matcher;
12915 OPERANDS[0,1] are register operands, and OPERANDS[2,3] are the
12916 corresponding memory operands. LOAD indicaates whether the access
12917 is load or store. CONST_STORE indicates a store of constant
12918 integer values held in OPERANDS[4,5] and assumes that the pattern
12919 is of length 4 insn, for the purpose of checking dead registers.
12920 COMMUTE indicates that register operands may be reordered. */
12921 bool
12922 gen_operands_ldrd_strd (rtx *operands, bool load,
12923 bool const_store, bool commute)
12925 int nops = 2;
12926 HOST_WIDE_INT offsets[2], offset;
12927 rtx base = NULL_RTX;
12928 rtx cur_base, cur_offset, tmp;
12929 int i, gap;
12930 HARD_REG_SET regset;
12932 gcc_assert (!const_store || !load);
12933 /* Check that the memory references are immediate offsets from the
12934 same base register. Extract the base register, the destination
12935 registers, and the corresponding memory offsets. */
12936 for (i = 0; i < nops; i++)
12938 if (!mem_ok_for_ldrd_strd (operands[nops+i], &cur_base, &cur_offset))
12939 return false;
12941 if (i == 0)
12942 base = cur_base;
12943 else if (REGNO (base) != REGNO (cur_base))
12944 return false;
12946 offsets[i] = INTVAL (cur_offset);
12947 if (GET_CODE (operands[i]) == SUBREG)
12949 tmp = SUBREG_REG (operands[i]);
12950 gcc_assert (GET_MODE (operands[i]) == GET_MODE (tmp));
12951 operands[i] = tmp;
12955 /* Make sure there is no dependency between the individual loads. */
12956 if (load && REGNO (operands[0]) == REGNO (base))
12957 return false; /* RAW */
12959 if (load && REGNO (operands[0]) == REGNO (operands[1]))
12960 return false; /* WAW */
12962 /* If the same input register is used in both stores
12963 when storing different constants, try to find a free register.
12964 For example, the code
12965 mov r0, 0
12966 str r0, [r2]
12967 mov r0, 1
12968 str r0, [r2, #4]
12969 can be transformed into
12970 mov r1, 0
12971 strd r1, r0, [r2]
12972 in Thumb mode assuming that r1 is free. */
12973 if (const_store
12974 && REGNO (operands[0]) == REGNO (operands[1])
12975 && INTVAL (operands[4]) != INTVAL (operands[5]))
12977 if (TARGET_THUMB2)
12979 CLEAR_HARD_REG_SET (regset);
12980 tmp = peep2_find_free_register (0, 4, "r", SImode, &regset);
12981 if (tmp == NULL_RTX)
12982 return false;
12984 /* Use the new register in the first load to ensure that
12985 if the original input register is not dead after peephole,
12986 then it will have the correct constant value. */
12987 operands[0] = tmp;
12989 else if (TARGET_ARM)
12991 return false;
12992 int regno = REGNO (operands[0]);
12993 if (!peep2_reg_dead_p (4, operands[0]))
12995 /* When the input register is even and is not dead after the
12996 pattern, it has to hold the second constant but we cannot
12997 form a legal STRD in ARM mode with this register as the second
12998 register. */
12999 if (regno % 2 == 0)
13000 return false;
13002 /* Is regno-1 free? */
13003 SET_HARD_REG_SET (regset);
13004 CLEAR_HARD_REG_BIT(regset, regno - 1);
13005 tmp = peep2_find_free_register (0, 4, "r", SImode, &regset);
13006 if (tmp == NULL_RTX)
13007 return false;
13009 operands[0] = tmp;
13011 else
13013 /* Find a DImode register. */
13014 CLEAR_HARD_REG_SET (regset);
13015 tmp = peep2_find_free_register (0, 4, "r", DImode, &regset);
13016 if (tmp != NULL_RTX)
13018 operands[0] = simplify_gen_subreg (SImode, tmp, DImode, 0);
13019 operands[1] = simplify_gen_subreg (SImode, tmp, DImode, 4);
13021 else
13023 /* Can we use the input register to form a DI register? */
13024 SET_HARD_REG_SET (regset);
13025 CLEAR_HARD_REG_BIT(regset,
13026 regno % 2 == 0 ? regno + 1 : regno - 1);
13027 tmp = peep2_find_free_register (0, 4, "r", SImode, &regset);
13028 if (tmp == NULL_RTX)
13029 return false;
13030 operands[regno % 2 == 1 ? 0 : 1] = tmp;
13034 gcc_assert (operands[0] != NULL_RTX);
13035 gcc_assert (operands[1] != NULL_RTX);
13036 gcc_assert (REGNO (operands[0]) % 2 == 0);
13037 gcc_assert (REGNO (operands[1]) == REGNO (operands[0]) + 1);
13041 /* Make sure the instructions are ordered with lower memory access first. */
13042 if (offsets[0] > offsets[1])
13044 gap = offsets[0] - offsets[1];
13045 offset = offsets[1];
13047 /* Swap the instructions such that lower memory is accessed first. */
13048 SWAP_RTX (operands[0], operands[1]);
13049 SWAP_RTX (operands[2], operands[3]);
13050 if (const_store)
13051 SWAP_RTX (operands[4], operands[5]);
13053 else
13055 gap = offsets[1] - offsets[0];
13056 offset = offsets[0];
13059 /* Make sure accesses are to consecutive memory locations. */
13060 if (gap != 4)
13061 return false;
13063 /* Make sure we generate legal instructions. */
13064 if (operands_ok_ldrd_strd (operands[0], operands[1], base, offset,
13065 false, load))
13066 return true;
13068 /* In Thumb state, where registers are almost unconstrained, there
13069 is little hope to fix it. */
13070 if (TARGET_THUMB2)
13071 return false;
13073 if (load && commute)
13075 /* Try reordering registers. */
13076 SWAP_RTX (operands[0], operands[1]);
13077 if (operands_ok_ldrd_strd (operands[0], operands[1], base, offset,
13078 false, load))
13079 return true;
13082 if (const_store)
13084 /* If input registers are dead after this pattern, they can be
13085 reordered or replaced by other registers that are free in the
13086 current pattern. */
13087 if (!peep2_reg_dead_p (4, operands[0])
13088 || !peep2_reg_dead_p (4, operands[1]))
13089 return false;
13091 /* Try to reorder the input registers. */
13092 /* For example, the code
13093 mov r0, 0
13094 mov r1, 1
13095 str r1, [r2]
13096 str r0, [r2, #4]
13097 can be transformed into
13098 mov r1, 0
13099 mov r0, 1
13100 strd r0, [r2]
13102 if (operands_ok_ldrd_strd (operands[1], operands[0], base, offset,
13103 false, false))
13105 SWAP_RTX (operands[0], operands[1]);
13106 return true;
13109 /* Try to find a free DI register. */
13110 CLEAR_HARD_REG_SET (regset);
13111 add_to_hard_reg_set (&regset, SImode, REGNO (operands[0]));
13112 add_to_hard_reg_set (&regset, SImode, REGNO (operands[1]));
13113 while (true)
13115 tmp = peep2_find_free_register (0, 4, "r", DImode, &regset);
13116 if (tmp == NULL_RTX)
13117 return false;
13119 /* DREG must be an even-numbered register in DImode.
13120 Split it into SI registers. */
13121 operands[0] = simplify_gen_subreg (SImode, tmp, DImode, 0);
13122 operands[1] = simplify_gen_subreg (SImode, tmp, DImode, 4);
13123 gcc_assert (operands[0] != NULL_RTX);
13124 gcc_assert (operands[1] != NULL_RTX);
13125 gcc_assert (REGNO (operands[0]) % 2 == 0);
13126 gcc_assert (REGNO (operands[0]) + 1 == REGNO (operands[1]));
13128 return (operands_ok_ldrd_strd (operands[0], operands[1],
13129 base, offset,
13130 false, load));
13134 return false;
13136 #undef SWAP_RTX
13141 /* Print a symbolic form of X to the debug file, F. */
13142 static void
13143 arm_print_value (FILE *f, rtx x)
13145 switch (GET_CODE (x))
13147 case CONST_INT:
13148 fprintf (f, HOST_WIDE_INT_PRINT_HEX, INTVAL (x));
13149 return;
13151 case CONST_DOUBLE:
13152 fprintf (f, "<0x%lx,0x%lx>", (long)XWINT (x, 2), (long)XWINT (x, 3));
13153 return;
13155 case CONST_VECTOR:
13157 int i;
13159 fprintf (f, "<");
13160 for (i = 0; i < CONST_VECTOR_NUNITS (x); i++)
13162 fprintf (f, HOST_WIDE_INT_PRINT_HEX, INTVAL (CONST_VECTOR_ELT (x, i)));
13163 if (i < (CONST_VECTOR_NUNITS (x) - 1))
13164 fputc (',', f);
13166 fprintf (f, ">");
13168 return;
13170 case CONST_STRING:
13171 fprintf (f, "\"%s\"", XSTR (x, 0));
13172 return;
13174 case SYMBOL_REF:
13175 fprintf (f, "`%s'", XSTR (x, 0));
13176 return;
13178 case LABEL_REF:
13179 fprintf (f, "L%d", INSN_UID (XEXP (x, 0)));
13180 return;
13182 case CONST:
13183 arm_print_value (f, XEXP (x, 0));
13184 return;
13186 case PLUS:
13187 arm_print_value (f, XEXP (x, 0));
13188 fprintf (f, "+");
13189 arm_print_value (f, XEXP (x, 1));
13190 return;
13192 case PC:
13193 fprintf (f, "pc");
13194 return;
13196 default:
13197 fprintf (f, "????");
13198 return;
13202 /* Routines for manipulation of the constant pool. */
13204 /* Arm instructions cannot load a large constant directly into a
13205 register; they have to come from a pc relative load. The constant
13206 must therefore be placed in the addressable range of the pc
13207 relative load. Depending on the precise pc relative load
13208 instruction the range is somewhere between 256 bytes and 4k. This
13209 means that we often have to dump a constant inside a function, and
13210 generate code to branch around it.
13212 It is important to minimize this, since the branches will slow
13213 things down and make the code larger.
13215 Normally we can hide the table after an existing unconditional
13216 branch so that there is no interruption of the flow, but in the
13217 worst case the code looks like this:
13219 ldr rn, L1
13221 b L2
13222 align
13223 L1: .long value
13227 ldr rn, L3
13229 b L4
13230 align
13231 L3: .long value
13235 We fix this by performing a scan after scheduling, which notices
13236 which instructions need to have their operands fetched from the
13237 constant table and builds the table.
13239 The algorithm starts by building a table of all the constants that
13240 need fixing up and all the natural barriers in the function (places
13241 where a constant table can be dropped without breaking the flow).
13242 For each fixup we note how far the pc-relative replacement will be
13243 able to reach and the offset of the instruction into the function.
13245 Having built the table we then group the fixes together to form
13246 tables that are as large as possible (subject to addressing
13247 constraints) and emit each table of constants after the last
13248 barrier that is within range of all the instructions in the group.
13249 If a group does not contain a barrier, then we forcibly create one
13250 by inserting a jump instruction into the flow. Once the table has
13251 been inserted, the insns are then modified to reference the
13252 relevant entry in the pool.
13254 Possible enhancements to the algorithm (not implemented) are:
13256 1) For some processors and object formats, there may be benefit in
13257 aligning the pools to the start of cache lines; this alignment
13258 would need to be taken into account when calculating addressability
13259 of a pool. */
13261 /* These typedefs are located at the start of this file, so that
13262 they can be used in the prototypes there. This comment is to
13263 remind readers of that fact so that the following structures
13264 can be understood more easily.
13266 typedef struct minipool_node Mnode;
13267 typedef struct minipool_fixup Mfix; */
13269 struct minipool_node
13271 /* Doubly linked chain of entries. */
13272 Mnode * next;
13273 Mnode * prev;
13274 /* The maximum offset into the code that this entry can be placed. While
13275 pushing fixes for forward references, all entries are sorted in order
13276 of increasing max_address. */
13277 HOST_WIDE_INT max_address;
13278 /* Similarly for an entry inserted for a backwards ref. */
13279 HOST_WIDE_INT min_address;
13280 /* The number of fixes referencing this entry. This can become zero
13281 if we "unpush" an entry. In this case we ignore the entry when we
13282 come to emit the code. */
13283 int refcount;
13284 /* The offset from the start of the minipool. */
13285 HOST_WIDE_INT offset;
13286 /* The value in table. */
13287 rtx value;
13288 /* The mode of value. */
13289 enum machine_mode mode;
13290 /* The size of the value. With iWMMXt enabled
13291 sizes > 4 also imply an alignment of 8-bytes. */
13292 int fix_size;
13295 struct minipool_fixup
13297 Mfix * next;
13298 rtx insn;
13299 HOST_WIDE_INT address;
13300 rtx * loc;
13301 enum machine_mode mode;
13302 int fix_size;
13303 rtx value;
13304 Mnode * minipool;
13305 HOST_WIDE_INT forwards;
13306 HOST_WIDE_INT backwards;
13309 /* Fixes less than a word need padding out to a word boundary. */
13310 #define MINIPOOL_FIX_SIZE(mode) \
13311 (GET_MODE_SIZE ((mode)) >= 4 ? GET_MODE_SIZE ((mode)) : 4)
13313 static Mnode * minipool_vector_head;
13314 static Mnode * minipool_vector_tail;
13315 static rtx minipool_vector_label;
13316 static int minipool_pad;
13318 /* The linked list of all minipool fixes required for this function. */
13319 Mfix * minipool_fix_head;
13320 Mfix * minipool_fix_tail;
13321 /* The fix entry for the current minipool, once it has been placed. */
13322 Mfix * minipool_barrier;
13324 /* Determines if INSN is the start of a jump table. Returns the end
13325 of the TABLE or NULL_RTX. */
13326 static rtx
13327 is_jump_table (rtx insn)
13329 rtx table;
13331 if (jump_to_label_p (insn)
13332 && ((table = next_real_insn (JUMP_LABEL (insn)))
13333 == next_real_insn (insn))
13334 && table != NULL
13335 && JUMP_P (table)
13336 && (GET_CODE (PATTERN (table)) == ADDR_VEC
13337 || GET_CODE (PATTERN (table)) == ADDR_DIFF_VEC))
13338 return table;
13340 return NULL_RTX;
13343 #ifndef JUMP_TABLES_IN_TEXT_SECTION
13344 #define JUMP_TABLES_IN_TEXT_SECTION 0
13345 #endif
13347 static HOST_WIDE_INT
13348 get_jump_table_size (rtx insn)
13350 /* ADDR_VECs only take room if read-only data does into the text
13351 section. */
13352 if (JUMP_TABLES_IN_TEXT_SECTION || readonly_data_section == text_section)
13354 rtx body = PATTERN (insn);
13355 int elt = GET_CODE (body) == ADDR_DIFF_VEC ? 1 : 0;
13356 HOST_WIDE_INT size;
13357 HOST_WIDE_INT modesize;
13359 modesize = GET_MODE_SIZE (GET_MODE (body));
13360 size = modesize * XVECLEN (body, elt);
13361 switch (modesize)
13363 case 1:
13364 /* Round up size of TBB table to a halfword boundary. */
13365 size = (size + 1) & ~(HOST_WIDE_INT)1;
13366 break;
13367 case 2:
13368 /* No padding necessary for TBH. */
13369 break;
13370 case 4:
13371 /* Add two bytes for alignment on Thumb. */
13372 if (TARGET_THUMB)
13373 size += 2;
13374 break;
13375 default:
13376 gcc_unreachable ();
13378 return size;
13381 return 0;
13384 /* Return the maximum amount of padding that will be inserted before
13385 label LABEL. */
13387 static HOST_WIDE_INT
13388 get_label_padding (rtx label)
13390 HOST_WIDE_INT align, min_insn_size;
13392 align = 1 << label_to_alignment (label);
13393 min_insn_size = TARGET_THUMB ? 2 : 4;
13394 return align > min_insn_size ? align - min_insn_size : 0;
13397 /* Move a minipool fix MP from its current location to before MAX_MP.
13398 If MAX_MP is NULL, then MP doesn't need moving, but the addressing
13399 constraints may need updating. */
13400 static Mnode *
13401 move_minipool_fix_forward_ref (Mnode *mp, Mnode *max_mp,
13402 HOST_WIDE_INT max_address)
13404 /* The code below assumes these are different. */
13405 gcc_assert (mp != max_mp);
13407 if (max_mp == NULL)
13409 if (max_address < mp->max_address)
13410 mp->max_address = max_address;
13412 else
13414 if (max_address > max_mp->max_address - mp->fix_size)
13415 mp->max_address = max_mp->max_address - mp->fix_size;
13416 else
13417 mp->max_address = max_address;
13419 /* Unlink MP from its current position. Since max_mp is non-null,
13420 mp->prev must be non-null. */
13421 mp->prev->next = mp->next;
13422 if (mp->next != NULL)
13423 mp->next->prev = mp->prev;
13424 else
13425 minipool_vector_tail = mp->prev;
13427 /* Re-insert it before MAX_MP. */
13428 mp->next = max_mp;
13429 mp->prev = max_mp->prev;
13430 max_mp->prev = mp;
13432 if (mp->prev != NULL)
13433 mp->prev->next = mp;
13434 else
13435 minipool_vector_head = mp;
13438 /* Save the new entry. */
13439 max_mp = mp;
13441 /* Scan over the preceding entries and adjust their addresses as
13442 required. */
13443 while (mp->prev != NULL
13444 && mp->prev->max_address > mp->max_address - mp->prev->fix_size)
13446 mp->prev->max_address = mp->max_address - mp->prev->fix_size;
13447 mp = mp->prev;
13450 return max_mp;
13453 /* Add a constant to the minipool for a forward reference. Returns the
13454 node added or NULL if the constant will not fit in this pool. */
13455 static Mnode *
13456 add_minipool_forward_ref (Mfix *fix)
13458 /* If set, max_mp is the first pool_entry that has a lower
13459 constraint than the one we are trying to add. */
13460 Mnode * max_mp = NULL;
13461 HOST_WIDE_INT max_address = fix->address + fix->forwards - minipool_pad;
13462 Mnode * mp;
13464 /* If the minipool starts before the end of FIX->INSN then this FIX
13465 can not be placed into the current pool. Furthermore, adding the
13466 new constant pool entry may cause the pool to start FIX_SIZE bytes
13467 earlier. */
13468 if (minipool_vector_head &&
13469 (fix->address + get_attr_length (fix->insn)
13470 >= minipool_vector_head->max_address - fix->fix_size))
13471 return NULL;
13473 /* Scan the pool to see if a constant with the same value has
13474 already been added. While we are doing this, also note the
13475 location where we must insert the constant if it doesn't already
13476 exist. */
13477 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
13479 if (GET_CODE (fix->value) == GET_CODE (mp->value)
13480 && fix->mode == mp->mode
13481 && (!LABEL_P (fix->value)
13482 || (CODE_LABEL_NUMBER (fix->value)
13483 == CODE_LABEL_NUMBER (mp->value)))
13484 && rtx_equal_p (fix->value, mp->value))
13486 /* More than one fix references this entry. */
13487 mp->refcount++;
13488 return move_minipool_fix_forward_ref (mp, max_mp, max_address);
13491 /* Note the insertion point if necessary. */
13492 if (max_mp == NULL
13493 && mp->max_address > max_address)
13494 max_mp = mp;
13496 /* If we are inserting an 8-bytes aligned quantity and
13497 we have not already found an insertion point, then
13498 make sure that all such 8-byte aligned quantities are
13499 placed at the start of the pool. */
13500 if (ARM_DOUBLEWORD_ALIGN
13501 && max_mp == NULL
13502 && fix->fix_size >= 8
13503 && mp->fix_size < 8)
13505 max_mp = mp;
13506 max_address = mp->max_address;
13510 /* The value is not currently in the minipool, so we need to create
13511 a new entry for it. If MAX_MP is NULL, the entry will be put on
13512 the end of the list since the placement is less constrained than
13513 any existing entry. Otherwise, we insert the new fix before
13514 MAX_MP and, if necessary, adjust the constraints on the other
13515 entries. */
13516 mp = XNEW (Mnode);
13517 mp->fix_size = fix->fix_size;
13518 mp->mode = fix->mode;
13519 mp->value = fix->value;
13520 mp->refcount = 1;
13521 /* Not yet required for a backwards ref. */
13522 mp->min_address = -65536;
13524 if (max_mp == NULL)
13526 mp->max_address = max_address;
13527 mp->next = NULL;
13528 mp->prev = minipool_vector_tail;
13530 if (mp->prev == NULL)
13532 minipool_vector_head = mp;
13533 minipool_vector_label = gen_label_rtx ();
13535 else
13536 mp->prev->next = mp;
13538 minipool_vector_tail = mp;
13540 else
13542 if (max_address > max_mp->max_address - mp->fix_size)
13543 mp->max_address = max_mp->max_address - mp->fix_size;
13544 else
13545 mp->max_address = max_address;
13547 mp->next = max_mp;
13548 mp->prev = max_mp->prev;
13549 max_mp->prev = mp;
13550 if (mp->prev != NULL)
13551 mp->prev->next = mp;
13552 else
13553 minipool_vector_head = mp;
13556 /* Save the new entry. */
13557 max_mp = mp;
13559 /* Scan over the preceding entries and adjust their addresses as
13560 required. */
13561 while (mp->prev != NULL
13562 && mp->prev->max_address > mp->max_address - mp->prev->fix_size)
13564 mp->prev->max_address = mp->max_address - mp->prev->fix_size;
13565 mp = mp->prev;
13568 return max_mp;
13571 static Mnode *
13572 move_minipool_fix_backward_ref (Mnode *mp, Mnode *min_mp,
13573 HOST_WIDE_INT min_address)
13575 HOST_WIDE_INT offset;
13577 /* The code below assumes these are different. */
13578 gcc_assert (mp != min_mp);
13580 if (min_mp == NULL)
13582 if (min_address > mp->min_address)
13583 mp->min_address = min_address;
13585 else
13587 /* We will adjust this below if it is too loose. */
13588 mp->min_address = min_address;
13590 /* Unlink MP from its current position. Since min_mp is non-null,
13591 mp->next must be non-null. */
13592 mp->next->prev = mp->prev;
13593 if (mp->prev != NULL)
13594 mp->prev->next = mp->next;
13595 else
13596 minipool_vector_head = mp->next;
13598 /* Reinsert it after MIN_MP. */
13599 mp->prev = min_mp;
13600 mp->next = min_mp->next;
13601 min_mp->next = mp;
13602 if (mp->next != NULL)
13603 mp->next->prev = mp;
13604 else
13605 minipool_vector_tail = mp;
13608 min_mp = mp;
13610 offset = 0;
13611 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
13613 mp->offset = offset;
13614 if (mp->refcount > 0)
13615 offset += mp->fix_size;
13617 if (mp->next && mp->next->min_address < mp->min_address + mp->fix_size)
13618 mp->next->min_address = mp->min_address + mp->fix_size;
13621 return min_mp;
13624 /* Add a constant to the minipool for a backward reference. Returns the
13625 node added or NULL if the constant will not fit in this pool.
13627 Note that the code for insertion for a backwards reference can be
13628 somewhat confusing because the calculated offsets for each fix do
13629 not take into account the size of the pool (which is still under
13630 construction. */
13631 static Mnode *
13632 add_minipool_backward_ref (Mfix *fix)
13634 /* If set, min_mp is the last pool_entry that has a lower constraint
13635 than the one we are trying to add. */
13636 Mnode *min_mp = NULL;
13637 /* This can be negative, since it is only a constraint. */
13638 HOST_WIDE_INT min_address = fix->address - fix->backwards;
13639 Mnode *mp;
13641 /* If we can't reach the current pool from this insn, or if we can't
13642 insert this entry at the end of the pool without pushing other
13643 fixes out of range, then we don't try. This ensures that we
13644 can't fail later on. */
13645 if (min_address >= minipool_barrier->address
13646 || (minipool_vector_tail->min_address + fix->fix_size
13647 >= minipool_barrier->address))
13648 return NULL;
13650 /* Scan the pool to see if a constant with the same value has
13651 already been added. While we are doing this, also note the
13652 location where we must insert the constant if it doesn't already
13653 exist. */
13654 for (mp = minipool_vector_tail; mp != NULL; mp = mp->prev)
13656 if (GET_CODE (fix->value) == GET_CODE (mp->value)
13657 && fix->mode == mp->mode
13658 && (!LABEL_P (fix->value)
13659 || (CODE_LABEL_NUMBER (fix->value)
13660 == CODE_LABEL_NUMBER (mp->value)))
13661 && rtx_equal_p (fix->value, mp->value)
13662 /* Check that there is enough slack to move this entry to the
13663 end of the table (this is conservative). */
13664 && (mp->max_address
13665 > (minipool_barrier->address
13666 + minipool_vector_tail->offset
13667 + minipool_vector_tail->fix_size)))
13669 mp->refcount++;
13670 return move_minipool_fix_backward_ref (mp, min_mp, min_address);
13673 if (min_mp != NULL)
13674 mp->min_address += fix->fix_size;
13675 else
13677 /* Note the insertion point if necessary. */
13678 if (mp->min_address < min_address)
13680 /* For now, we do not allow the insertion of 8-byte alignment
13681 requiring nodes anywhere but at the start of the pool. */
13682 if (ARM_DOUBLEWORD_ALIGN
13683 && fix->fix_size >= 8 && mp->fix_size < 8)
13684 return NULL;
13685 else
13686 min_mp = mp;
13688 else if (mp->max_address
13689 < minipool_barrier->address + mp->offset + fix->fix_size)
13691 /* Inserting before this entry would push the fix beyond
13692 its maximum address (which can happen if we have
13693 re-located a forwards fix); force the new fix to come
13694 after it. */
13695 if (ARM_DOUBLEWORD_ALIGN
13696 && fix->fix_size >= 8 && mp->fix_size < 8)
13697 return NULL;
13698 else
13700 min_mp = mp;
13701 min_address = mp->min_address + fix->fix_size;
13704 /* Do not insert a non-8-byte aligned quantity before 8-byte
13705 aligned quantities. */
13706 else if (ARM_DOUBLEWORD_ALIGN
13707 && fix->fix_size < 8
13708 && mp->fix_size >= 8)
13710 min_mp = mp;
13711 min_address = mp->min_address + fix->fix_size;
13716 /* We need to create a new entry. */
13717 mp = XNEW (Mnode);
13718 mp->fix_size = fix->fix_size;
13719 mp->mode = fix->mode;
13720 mp->value = fix->value;
13721 mp->refcount = 1;
13722 mp->max_address = minipool_barrier->address + 65536;
13724 mp->min_address = min_address;
13726 if (min_mp == NULL)
13728 mp->prev = NULL;
13729 mp->next = minipool_vector_head;
13731 if (mp->next == NULL)
13733 minipool_vector_tail = mp;
13734 minipool_vector_label = gen_label_rtx ();
13736 else
13737 mp->next->prev = mp;
13739 minipool_vector_head = mp;
13741 else
13743 mp->next = min_mp->next;
13744 mp->prev = min_mp;
13745 min_mp->next = mp;
13747 if (mp->next != NULL)
13748 mp->next->prev = mp;
13749 else
13750 minipool_vector_tail = mp;
13753 /* Save the new entry. */
13754 min_mp = mp;
13756 if (mp->prev)
13757 mp = mp->prev;
13758 else
13759 mp->offset = 0;
13761 /* Scan over the following entries and adjust their offsets. */
13762 while (mp->next != NULL)
13764 if (mp->next->min_address < mp->min_address + mp->fix_size)
13765 mp->next->min_address = mp->min_address + mp->fix_size;
13767 if (mp->refcount)
13768 mp->next->offset = mp->offset + mp->fix_size;
13769 else
13770 mp->next->offset = mp->offset;
13772 mp = mp->next;
13775 return min_mp;
13778 static void
13779 assign_minipool_offsets (Mfix *barrier)
13781 HOST_WIDE_INT offset = 0;
13782 Mnode *mp;
13784 minipool_barrier = barrier;
13786 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
13788 mp->offset = offset;
13790 if (mp->refcount > 0)
13791 offset += mp->fix_size;
13795 /* Output the literal table */
13796 static void
13797 dump_minipool (rtx scan)
13799 Mnode * mp;
13800 Mnode * nmp;
13801 int align64 = 0;
13803 if (ARM_DOUBLEWORD_ALIGN)
13804 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
13805 if (mp->refcount > 0 && mp->fix_size >= 8)
13807 align64 = 1;
13808 break;
13811 if (dump_file)
13812 fprintf (dump_file,
13813 ";; Emitting minipool after insn %u; address %ld; align %d (bytes)\n",
13814 INSN_UID (scan), (unsigned long) minipool_barrier->address, align64 ? 8 : 4);
13816 scan = emit_label_after (gen_label_rtx (), scan);
13817 scan = emit_insn_after (align64 ? gen_align_8 () : gen_align_4 (), scan);
13818 scan = emit_label_after (minipool_vector_label, scan);
13820 for (mp = minipool_vector_head; mp != NULL; mp = nmp)
13822 if (mp->refcount > 0)
13824 if (dump_file)
13826 fprintf (dump_file,
13827 ";; Offset %u, min %ld, max %ld ",
13828 (unsigned) mp->offset, (unsigned long) mp->min_address,
13829 (unsigned long) mp->max_address);
13830 arm_print_value (dump_file, mp->value);
13831 fputc ('\n', dump_file);
13834 switch (mp->fix_size)
13836 #ifdef HAVE_consttable_1
13837 case 1:
13838 scan = emit_insn_after (gen_consttable_1 (mp->value), scan);
13839 break;
13841 #endif
13842 #ifdef HAVE_consttable_2
13843 case 2:
13844 scan = emit_insn_after (gen_consttable_2 (mp->value), scan);
13845 break;
13847 #endif
13848 #ifdef HAVE_consttable_4
13849 case 4:
13850 scan = emit_insn_after (gen_consttable_4 (mp->value), scan);
13851 break;
13853 #endif
13854 #ifdef HAVE_consttable_8
13855 case 8:
13856 scan = emit_insn_after (gen_consttable_8 (mp->value), scan);
13857 break;
13859 #endif
13860 #ifdef HAVE_consttable_16
13861 case 16:
13862 scan = emit_insn_after (gen_consttable_16 (mp->value), scan);
13863 break;
13865 #endif
13866 default:
13867 gcc_unreachable ();
13871 nmp = mp->next;
13872 free (mp);
13875 minipool_vector_head = minipool_vector_tail = NULL;
13876 scan = emit_insn_after (gen_consttable_end (), scan);
13877 scan = emit_barrier_after (scan);
13880 /* Return the cost of forcibly inserting a barrier after INSN. */
13881 static int
13882 arm_barrier_cost (rtx insn)
13884 /* Basing the location of the pool on the loop depth is preferable,
13885 but at the moment, the basic block information seems to be
13886 corrupt by this stage of the compilation. */
13887 int base_cost = 50;
13888 rtx next = next_nonnote_insn (insn);
13890 if (next != NULL && LABEL_P (next))
13891 base_cost -= 20;
13893 switch (GET_CODE (insn))
13895 case CODE_LABEL:
13896 /* It will always be better to place the table before the label, rather
13897 than after it. */
13898 return 50;
13900 case INSN:
13901 case CALL_INSN:
13902 return base_cost;
13904 case JUMP_INSN:
13905 return base_cost - 10;
13907 default:
13908 return base_cost + 10;
13912 /* Find the best place in the insn stream in the range
13913 (FIX->address,MAX_ADDRESS) to forcibly insert a minipool barrier.
13914 Create the barrier by inserting a jump and add a new fix entry for
13915 it. */
13916 static Mfix *
13917 create_fix_barrier (Mfix *fix, HOST_WIDE_INT max_address)
13919 HOST_WIDE_INT count = 0;
13920 rtx barrier;
13921 rtx from = fix->insn;
13922 /* The instruction after which we will insert the jump. */
13923 rtx selected = NULL;
13924 int selected_cost;
13925 /* The address at which the jump instruction will be placed. */
13926 HOST_WIDE_INT selected_address;
13927 Mfix * new_fix;
13928 HOST_WIDE_INT max_count = max_address - fix->address;
13929 rtx label = gen_label_rtx ();
13931 selected_cost = arm_barrier_cost (from);
13932 selected_address = fix->address;
13934 while (from && count < max_count)
13936 rtx tmp;
13937 int new_cost;
13939 /* This code shouldn't have been called if there was a natural barrier
13940 within range. */
13941 gcc_assert (!BARRIER_P (from));
13943 /* Count the length of this insn. This must stay in sync with the
13944 code that pushes minipool fixes. */
13945 if (LABEL_P (from))
13946 count += get_label_padding (from);
13947 else
13948 count += get_attr_length (from);
13950 /* If there is a jump table, add its length. */
13951 tmp = is_jump_table (from);
13952 if (tmp != NULL)
13954 count += get_jump_table_size (tmp);
13956 /* Jump tables aren't in a basic block, so base the cost on
13957 the dispatch insn. If we select this location, we will
13958 still put the pool after the table. */
13959 new_cost = arm_barrier_cost (from);
13961 if (count < max_count
13962 && (!selected || new_cost <= selected_cost))
13964 selected = tmp;
13965 selected_cost = new_cost;
13966 selected_address = fix->address + count;
13969 /* Continue after the dispatch table. */
13970 from = NEXT_INSN (tmp);
13971 continue;
13974 new_cost = arm_barrier_cost (from);
13976 if (count < max_count
13977 && (!selected || new_cost <= selected_cost))
13979 selected = from;
13980 selected_cost = new_cost;
13981 selected_address = fix->address + count;
13984 from = NEXT_INSN (from);
13987 /* Make sure that we found a place to insert the jump. */
13988 gcc_assert (selected);
13990 /* Make sure we do not split a call and its corresponding
13991 CALL_ARG_LOCATION note. */
13992 if (CALL_P (selected))
13994 rtx next = NEXT_INSN (selected);
13995 if (next && NOTE_P (next)
13996 && NOTE_KIND (next) == NOTE_INSN_CALL_ARG_LOCATION)
13997 selected = next;
14000 /* Create a new JUMP_INSN that branches around a barrier. */
14001 from = emit_jump_insn_after (gen_jump (label), selected);
14002 JUMP_LABEL (from) = label;
14003 barrier = emit_barrier_after (from);
14004 emit_label_after (label, barrier);
14006 /* Create a minipool barrier entry for the new barrier. */
14007 new_fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* new_fix));
14008 new_fix->insn = barrier;
14009 new_fix->address = selected_address;
14010 new_fix->next = fix->next;
14011 fix->next = new_fix;
14013 return new_fix;
14016 /* Record that there is a natural barrier in the insn stream at
14017 ADDRESS. */
14018 static void
14019 push_minipool_barrier (rtx insn, HOST_WIDE_INT address)
14021 Mfix * fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* fix));
14023 fix->insn = insn;
14024 fix->address = address;
14026 fix->next = NULL;
14027 if (minipool_fix_head != NULL)
14028 minipool_fix_tail->next = fix;
14029 else
14030 minipool_fix_head = fix;
14032 minipool_fix_tail = fix;
14035 /* Record INSN, which will need fixing up to load a value from the
14036 minipool. ADDRESS is the offset of the insn since the start of the
14037 function; LOC is a pointer to the part of the insn which requires
14038 fixing; VALUE is the constant that must be loaded, which is of type
14039 MODE. */
14040 static void
14041 push_minipool_fix (rtx insn, HOST_WIDE_INT address, rtx *loc,
14042 enum machine_mode mode, rtx value)
14044 Mfix * fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* fix));
14046 fix->insn = insn;
14047 fix->address = address;
14048 fix->loc = loc;
14049 fix->mode = mode;
14050 fix->fix_size = MINIPOOL_FIX_SIZE (mode);
14051 fix->value = value;
14052 fix->forwards = get_attr_pool_range (insn);
14053 fix->backwards = get_attr_neg_pool_range (insn);
14054 fix->minipool = NULL;
14056 /* If an insn doesn't have a range defined for it, then it isn't
14057 expecting to be reworked by this code. Better to stop now than
14058 to generate duff assembly code. */
14059 gcc_assert (fix->forwards || fix->backwards);
14061 /* If an entry requires 8-byte alignment then assume all constant pools
14062 require 4 bytes of padding. Trying to do this later on a per-pool
14063 basis is awkward because existing pool entries have to be modified. */
14064 if (ARM_DOUBLEWORD_ALIGN && fix->fix_size >= 8)
14065 minipool_pad = 4;
14067 if (dump_file)
14069 fprintf (dump_file,
14070 ";; %smode fixup for i%d; addr %lu, range (%ld,%ld): ",
14071 GET_MODE_NAME (mode),
14072 INSN_UID (insn), (unsigned long) address,
14073 -1 * (long)fix->backwards, (long)fix->forwards);
14074 arm_print_value (dump_file, fix->value);
14075 fprintf (dump_file, "\n");
14078 /* Add it to the chain of fixes. */
14079 fix->next = NULL;
14081 if (minipool_fix_head != NULL)
14082 minipool_fix_tail->next = fix;
14083 else
14084 minipool_fix_head = fix;
14086 minipool_fix_tail = fix;
14089 /* Return the cost of synthesizing a 64-bit constant VAL inline.
14090 Returns the number of insns needed, or 99 if we don't know how to
14091 do it. */
14093 arm_const_double_inline_cost (rtx val)
14095 rtx lowpart, highpart;
14096 enum machine_mode mode;
14098 mode = GET_MODE (val);
14100 if (mode == VOIDmode)
14101 mode = DImode;
14103 gcc_assert (GET_MODE_SIZE (mode) == 8);
14105 lowpart = gen_lowpart (SImode, val);
14106 highpart = gen_highpart_mode (SImode, mode, val);
14108 gcc_assert (CONST_INT_P (lowpart));
14109 gcc_assert (CONST_INT_P (highpart));
14111 return (arm_gen_constant (SET, SImode, NULL_RTX, INTVAL (lowpart),
14112 NULL_RTX, NULL_RTX, 0, 0)
14113 + arm_gen_constant (SET, SImode, NULL_RTX, INTVAL (highpart),
14114 NULL_RTX, NULL_RTX, 0, 0));
14117 /* Return true if it is worthwhile to split a 64-bit constant into two
14118 32-bit operations. This is the case if optimizing for size, or
14119 if we have load delay slots, or if one 32-bit part can be done with
14120 a single data operation. */
14121 bool
14122 arm_const_double_by_parts (rtx val)
14124 enum machine_mode mode = GET_MODE (val);
14125 rtx part;
14127 if (optimize_size || arm_ld_sched)
14128 return true;
14130 if (mode == VOIDmode)
14131 mode = DImode;
14133 part = gen_highpart_mode (SImode, mode, val);
14135 gcc_assert (CONST_INT_P (part));
14137 if (const_ok_for_arm (INTVAL (part))
14138 || const_ok_for_arm (~INTVAL (part)))
14139 return true;
14141 part = gen_lowpart (SImode, val);
14143 gcc_assert (CONST_INT_P (part));
14145 if (const_ok_for_arm (INTVAL (part))
14146 || const_ok_for_arm (~INTVAL (part)))
14147 return true;
14149 return false;
14152 /* Return true if it is possible to inline both the high and low parts
14153 of a 64-bit constant into 32-bit data processing instructions. */
14154 bool
14155 arm_const_double_by_immediates (rtx val)
14157 enum machine_mode mode = GET_MODE (val);
14158 rtx part;
14160 if (mode == VOIDmode)
14161 mode = DImode;
14163 part = gen_highpart_mode (SImode, mode, val);
14165 gcc_assert (CONST_INT_P (part));
14167 if (!const_ok_for_arm (INTVAL (part)))
14168 return false;
14170 part = gen_lowpart (SImode, val);
14172 gcc_assert (CONST_INT_P (part));
14174 if (!const_ok_for_arm (INTVAL (part)))
14175 return false;
14177 return true;
14180 /* Scan INSN and note any of its operands that need fixing.
14181 If DO_PUSHES is false we do not actually push any of the fixups
14182 needed. */
14183 static void
14184 note_invalid_constants (rtx insn, HOST_WIDE_INT address, int do_pushes)
14186 int opno;
14188 extract_insn (insn);
14190 if (!constrain_operands (1))
14191 fatal_insn_not_found (insn);
14193 if (recog_data.n_alternatives == 0)
14194 return;
14196 /* Fill in recog_op_alt with information about the constraints of
14197 this insn. */
14198 preprocess_constraints ();
14200 for (opno = 0; opno < recog_data.n_operands; opno++)
14202 /* Things we need to fix can only occur in inputs. */
14203 if (recog_data.operand_type[opno] != OP_IN)
14204 continue;
14206 /* If this alternative is a memory reference, then any mention
14207 of constants in this alternative is really to fool reload
14208 into allowing us to accept one there. We need to fix them up
14209 now so that we output the right code. */
14210 if (recog_op_alt[opno][which_alternative].memory_ok)
14212 rtx op = recog_data.operand[opno];
14214 if (CONSTANT_P (op))
14216 if (do_pushes)
14217 push_minipool_fix (insn, address, recog_data.operand_loc[opno],
14218 recog_data.operand_mode[opno], op);
14220 else if (MEM_P (op)
14221 && GET_CODE (XEXP (op, 0)) == SYMBOL_REF
14222 && CONSTANT_POOL_ADDRESS_P (XEXP (op, 0)))
14224 if (do_pushes)
14226 rtx cop = avoid_constant_pool_reference (op);
14228 /* Casting the address of something to a mode narrower
14229 than a word can cause avoid_constant_pool_reference()
14230 to return the pool reference itself. That's no good to
14231 us here. Lets just hope that we can use the
14232 constant pool value directly. */
14233 if (op == cop)
14234 cop = get_pool_constant (XEXP (op, 0));
14236 push_minipool_fix (insn, address,
14237 recog_data.operand_loc[opno],
14238 recog_data.operand_mode[opno], cop);
14245 return;
14248 /* Rewrite move insn into subtract of 0 if the condition codes will
14249 be useful in next conditional jump insn. */
14251 static void
14252 thumb1_reorg (void)
14254 basic_block bb;
14256 FOR_EACH_BB (bb)
14258 rtx set, dest, src;
14259 rtx pat, op0;
14260 rtx prev, insn = BB_END (bb);
14262 while (insn != BB_HEAD (bb) && DEBUG_INSN_P (insn))
14263 insn = PREV_INSN (insn);
14265 /* Find the last cbranchsi4_insn in basic block BB. */
14266 if (INSN_CODE (insn) != CODE_FOR_cbranchsi4_insn)
14267 continue;
14269 /* Find the first non-note insn before INSN in basic block BB. */
14270 gcc_assert (insn != BB_HEAD (bb));
14271 prev = PREV_INSN (insn);
14272 while (prev != BB_HEAD (bb) && (NOTE_P (prev) || DEBUG_INSN_P (prev)))
14273 prev = PREV_INSN (prev);
14275 set = single_set (prev);
14276 if (!set)
14277 continue;
14279 dest = SET_DEST (set);
14280 src = SET_SRC (set);
14281 if (!low_register_operand (dest, SImode)
14282 || !low_register_operand (src, SImode))
14283 continue;
14285 pat = PATTERN (insn);
14286 op0 = XEXP (XEXP (SET_SRC (pat), 0), 0);
14287 /* Rewrite move into subtract of 0 if its operand is compared with ZERO
14288 in INSN. Don't need to check dest since cprop_hardreg pass propagates
14289 src into INSN. */
14290 if (REGNO (op0) == REGNO (src))
14292 dest = copy_rtx (dest);
14293 src = copy_rtx (src);
14294 src = gen_rtx_MINUS (SImode, src, const0_rtx);
14295 PATTERN (prev) = gen_rtx_SET (VOIDmode, dest, src);
14296 INSN_CODE (prev) = -1;
14297 /* Set test register in INSN to dest. */
14298 XEXP (XEXP (SET_SRC (pat), 0), 0) = copy_rtx (dest);
14299 INSN_CODE (insn) = -1;
14304 /* Convert instructions to their cc-clobbering variant if possible, since
14305 that allows us to use smaller encodings. */
14307 static void
14308 thumb2_reorg (void)
14310 basic_block bb;
14311 regset_head live;
14313 INIT_REG_SET (&live);
14315 /* We are freeing block_for_insn in the toplev to keep compatibility
14316 with old MDEP_REORGS that are not CFG based. Recompute it now. */
14317 compute_bb_for_insn ();
14318 df_analyze ();
14320 FOR_EACH_BB (bb)
14322 rtx insn;
14324 COPY_REG_SET (&live, DF_LR_OUT (bb));
14325 df_simulate_initialize_backwards (bb, &live);
14326 FOR_BB_INSNS_REVERSE (bb, insn)
14328 if (NONJUMP_INSN_P (insn)
14329 && !REGNO_REG_SET_P (&live, CC_REGNUM)
14330 && GET_CODE (PATTERN (insn)) == SET)
14332 enum {SKIP, CONV, SWAP_CONV} action = SKIP;
14333 rtx pat = PATTERN (insn);
14334 rtx dst = XEXP (pat, 0);
14335 rtx src = XEXP (pat, 1);
14336 rtx op0 = NULL_RTX, op1 = NULL_RTX;
14338 if (!OBJECT_P (src))
14339 op0 = XEXP (src, 0);
14341 if (BINARY_P (src))
14342 op1 = XEXP (src, 1);
14344 if (low_register_operand (dst, SImode))
14346 switch (GET_CODE (src))
14348 case PLUS:
14349 /* Adding two registers and storing the result
14350 in the first source is already a 16-bit
14351 operation. */
14352 if (rtx_equal_p (dst, op0)
14353 && register_operand (op1, SImode))
14354 break;
14356 if (low_register_operand (op0, SImode))
14358 /* ADDS <Rd>,<Rn>,<Rm> */
14359 if (low_register_operand (op1, SImode))
14360 action = CONV;
14361 /* ADDS <Rdn>,#<imm8> */
14362 /* SUBS <Rdn>,#<imm8> */
14363 else if (rtx_equal_p (dst, op0)
14364 && CONST_INT_P (op1)
14365 && IN_RANGE (INTVAL (op1), -255, 255))
14366 action = CONV;
14367 /* ADDS <Rd>,<Rn>,#<imm3> */
14368 /* SUBS <Rd>,<Rn>,#<imm3> */
14369 else if (CONST_INT_P (op1)
14370 && IN_RANGE (INTVAL (op1), -7, 7))
14371 action = CONV;
14373 /* ADCS <Rd>, <Rn> */
14374 else if (GET_CODE (XEXP (src, 0)) == PLUS
14375 && rtx_equal_p (XEXP (XEXP (src, 0), 0), dst)
14376 && low_register_operand (XEXP (XEXP (src, 0), 1),
14377 SImode)
14378 && COMPARISON_P (op1)
14379 && cc_register (XEXP (op1, 0), VOIDmode)
14380 && maybe_get_arm_condition_code (op1) == ARM_CS
14381 && XEXP (op1, 1) == const0_rtx)
14382 action = CONV;
14383 break;
14385 case MINUS:
14386 /* RSBS <Rd>,<Rn>,#0
14387 Not handled here: see NEG below. */
14388 /* SUBS <Rd>,<Rn>,#<imm3>
14389 SUBS <Rdn>,#<imm8>
14390 Not handled here: see PLUS above. */
14391 /* SUBS <Rd>,<Rn>,<Rm> */
14392 if (low_register_operand (op0, SImode)
14393 && low_register_operand (op1, SImode))
14394 action = CONV;
14395 break;
14397 case MULT:
14398 /* MULS <Rdm>,<Rn>,<Rdm>
14399 As an exception to the rule, this is only used
14400 when optimizing for size since MULS is slow on all
14401 known implementations. We do not even want to use
14402 MULS in cold code, if optimizing for speed, so we
14403 test the global flag here. */
14404 if (!optimize_size)
14405 break;
14406 /* else fall through. */
14407 case AND:
14408 case IOR:
14409 case XOR:
14410 /* ANDS <Rdn>,<Rm> */
14411 if (rtx_equal_p (dst, op0)
14412 && low_register_operand (op1, SImode))
14413 action = CONV;
14414 else if (rtx_equal_p (dst, op1)
14415 && low_register_operand (op0, SImode))
14416 action = SWAP_CONV;
14417 break;
14419 case ASHIFTRT:
14420 case ASHIFT:
14421 case LSHIFTRT:
14422 /* ASRS <Rdn>,<Rm> */
14423 /* LSRS <Rdn>,<Rm> */
14424 /* LSLS <Rdn>,<Rm> */
14425 if (rtx_equal_p (dst, op0)
14426 && low_register_operand (op1, SImode))
14427 action = CONV;
14428 /* ASRS <Rd>,<Rm>,#<imm5> */
14429 /* LSRS <Rd>,<Rm>,#<imm5> */
14430 /* LSLS <Rd>,<Rm>,#<imm5> */
14431 else if (low_register_operand (op0, SImode)
14432 && CONST_INT_P (op1)
14433 && IN_RANGE (INTVAL (op1), 0, 31))
14434 action = CONV;
14435 break;
14437 case ROTATERT:
14438 /* RORS <Rdn>,<Rm> */
14439 if (rtx_equal_p (dst, op0)
14440 && low_register_operand (op1, SImode))
14441 action = CONV;
14442 break;
14444 case NOT:
14445 case NEG:
14446 /* MVNS <Rd>,<Rm> */
14447 /* NEGS <Rd>,<Rm> (a.k.a RSBS) */
14448 if (low_register_operand (op0, SImode))
14449 action = CONV;
14450 break;
14452 case CONST_INT:
14453 /* MOVS <Rd>,#<imm8> */
14454 if (CONST_INT_P (src)
14455 && IN_RANGE (INTVAL (src), 0, 255))
14456 action = CONV;
14457 break;
14459 case REG:
14460 /* MOVS and MOV<c> with registers have different
14461 encodings, so are not relevant here. */
14462 break;
14464 default:
14465 break;
14469 if (action != SKIP)
14471 rtx ccreg = gen_rtx_REG (CCmode, CC_REGNUM);
14472 rtx clobber = gen_rtx_CLOBBER (VOIDmode, ccreg);
14473 rtvec vec;
14475 if (action == SWAP_CONV)
14477 src = copy_rtx (src);
14478 XEXP (src, 0) = op1;
14479 XEXP (src, 1) = op0;
14480 pat = gen_rtx_SET (VOIDmode, dst, src);
14481 vec = gen_rtvec (2, pat, clobber);
14483 else /* action == CONV */
14484 vec = gen_rtvec (2, pat, clobber);
14486 PATTERN (insn) = gen_rtx_PARALLEL (VOIDmode, vec);
14487 INSN_CODE (insn) = -1;
14491 if (NONDEBUG_INSN_P (insn))
14492 df_simulate_one_insn_backwards (bb, insn, &live);
14496 CLEAR_REG_SET (&live);
14499 /* Gcc puts the pool in the wrong place for ARM, since we can only
14500 load addresses a limited distance around the pc. We do some
14501 special munging to move the constant pool values to the correct
14502 point in the code. */
14503 static void
14504 arm_reorg (void)
14506 rtx insn;
14507 HOST_WIDE_INT address = 0;
14508 Mfix * fix;
14510 if (TARGET_THUMB1)
14511 thumb1_reorg ();
14512 else if (TARGET_THUMB2)
14513 thumb2_reorg ();
14515 /* Ensure all insns that must be split have been split at this point.
14516 Otherwise, the pool placement code below may compute incorrect
14517 insn lengths. Note that when optimizing, all insns have already
14518 been split at this point. */
14519 if (!optimize)
14520 split_all_insns_noflow ();
14522 minipool_fix_head = minipool_fix_tail = NULL;
14524 /* The first insn must always be a note, or the code below won't
14525 scan it properly. */
14526 insn = get_insns ();
14527 gcc_assert (NOTE_P (insn));
14528 minipool_pad = 0;
14530 /* Scan all the insns and record the operands that will need fixing. */
14531 for (insn = next_nonnote_insn (insn); insn; insn = next_nonnote_insn (insn))
14533 if (BARRIER_P (insn))
14534 push_minipool_barrier (insn, address);
14535 else if (INSN_P (insn))
14537 rtx table;
14539 note_invalid_constants (insn, address, true);
14540 address += get_attr_length (insn);
14542 /* If the insn is a vector jump, add the size of the table
14543 and skip the table. */
14544 if ((table = is_jump_table (insn)) != NULL)
14546 address += get_jump_table_size (table);
14547 insn = table;
14550 else if (LABEL_P (insn))
14551 /* Add the worst-case padding due to alignment. We don't add
14552 the _current_ padding because the minipool insertions
14553 themselves might change it. */
14554 address += get_label_padding (insn);
14557 fix = minipool_fix_head;
14559 /* Now scan the fixups and perform the required changes. */
14560 while (fix)
14562 Mfix * ftmp;
14563 Mfix * fdel;
14564 Mfix * last_added_fix;
14565 Mfix * last_barrier = NULL;
14566 Mfix * this_fix;
14568 /* Skip any further barriers before the next fix. */
14569 while (fix && BARRIER_P (fix->insn))
14570 fix = fix->next;
14572 /* No more fixes. */
14573 if (fix == NULL)
14574 break;
14576 last_added_fix = NULL;
14578 for (ftmp = fix; ftmp; ftmp = ftmp->next)
14580 if (BARRIER_P (ftmp->insn))
14582 if (ftmp->address >= minipool_vector_head->max_address)
14583 break;
14585 last_barrier = ftmp;
14587 else if ((ftmp->minipool = add_minipool_forward_ref (ftmp)) == NULL)
14588 break;
14590 last_added_fix = ftmp; /* Keep track of the last fix added. */
14593 /* If we found a barrier, drop back to that; any fixes that we
14594 could have reached but come after the barrier will now go in
14595 the next mini-pool. */
14596 if (last_barrier != NULL)
14598 /* Reduce the refcount for those fixes that won't go into this
14599 pool after all. */
14600 for (fdel = last_barrier->next;
14601 fdel && fdel != ftmp;
14602 fdel = fdel->next)
14604 fdel->minipool->refcount--;
14605 fdel->minipool = NULL;
14608 ftmp = last_barrier;
14610 else
14612 /* ftmp is first fix that we can't fit into this pool and
14613 there no natural barriers that we could use. Insert a
14614 new barrier in the code somewhere between the previous
14615 fix and this one, and arrange to jump around it. */
14616 HOST_WIDE_INT max_address;
14618 /* The last item on the list of fixes must be a barrier, so
14619 we can never run off the end of the list of fixes without
14620 last_barrier being set. */
14621 gcc_assert (ftmp);
14623 max_address = minipool_vector_head->max_address;
14624 /* Check that there isn't another fix that is in range that
14625 we couldn't fit into this pool because the pool was
14626 already too large: we need to put the pool before such an
14627 instruction. The pool itself may come just after the
14628 fix because create_fix_barrier also allows space for a
14629 jump instruction. */
14630 if (ftmp->address < max_address)
14631 max_address = ftmp->address + 1;
14633 last_barrier = create_fix_barrier (last_added_fix, max_address);
14636 assign_minipool_offsets (last_barrier);
14638 while (ftmp)
14640 if (!BARRIER_P (ftmp->insn)
14641 && ((ftmp->minipool = add_minipool_backward_ref (ftmp))
14642 == NULL))
14643 break;
14645 ftmp = ftmp->next;
14648 /* Scan over the fixes we have identified for this pool, fixing them
14649 up and adding the constants to the pool itself. */
14650 for (this_fix = fix; this_fix && ftmp != this_fix;
14651 this_fix = this_fix->next)
14652 if (!BARRIER_P (this_fix->insn))
14654 rtx addr
14655 = plus_constant (Pmode,
14656 gen_rtx_LABEL_REF (VOIDmode,
14657 minipool_vector_label),
14658 this_fix->minipool->offset);
14659 *this_fix->loc = gen_rtx_MEM (this_fix->mode, addr);
14662 dump_minipool (last_barrier->insn);
14663 fix = ftmp;
14666 /* From now on we must synthesize any constants that we can't handle
14667 directly. This can happen if the RTL gets split during final
14668 instruction generation. */
14669 after_arm_reorg = 1;
14671 /* Free the minipool memory. */
14672 obstack_free (&minipool_obstack, minipool_startobj);
14675 /* Routines to output assembly language. */
14677 /* If the rtx is the correct value then return the string of the number.
14678 In this way we can ensure that valid double constants are generated even
14679 when cross compiling. */
14680 const char *
14681 fp_immediate_constant (rtx x)
14683 REAL_VALUE_TYPE r;
14685 if (!fp_consts_inited)
14686 init_fp_table ();
14688 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
14690 gcc_assert (REAL_VALUES_EQUAL (r, value_fp0));
14691 return "0";
14694 /* As for fp_immediate_constant, but value is passed directly, not in rtx. */
14695 static const char *
14696 fp_const_from_val (REAL_VALUE_TYPE *r)
14698 if (!fp_consts_inited)
14699 init_fp_table ();
14701 gcc_assert (REAL_VALUES_EQUAL (*r, value_fp0));
14702 return "0";
14705 /* OPERANDS[0] is the entire list of insns that constitute pop,
14706 OPERANDS[1] is the base register, RETURN_PC is true iff return insn
14707 is in the list, UPDATE is true iff the list contains explicit
14708 update of base register. */
14709 void
14710 arm_output_multireg_pop (rtx *operands, bool return_pc, rtx cond, bool reverse,
14711 bool update)
14713 int i;
14714 char pattern[100];
14715 int offset;
14716 const char *conditional;
14717 int num_saves = XVECLEN (operands[0], 0);
14718 unsigned int regno;
14719 unsigned int regno_base = REGNO (operands[1]);
14721 offset = 0;
14722 offset += update ? 1 : 0;
14723 offset += return_pc ? 1 : 0;
14725 /* Is the base register in the list? */
14726 for (i = offset; i < num_saves; i++)
14728 regno = REGNO (XEXP (XVECEXP (operands[0], 0, i), 0));
14729 /* If SP is in the list, then the base register must be SP. */
14730 gcc_assert ((regno != SP_REGNUM) || (regno_base == SP_REGNUM));
14731 /* If base register is in the list, there must be no explicit update. */
14732 if (regno == regno_base)
14733 gcc_assert (!update);
14736 conditional = reverse ? "%?%D0" : "%?%d0";
14737 if ((regno_base == SP_REGNUM) && TARGET_UNIFIED_ASM)
14739 /* Output pop (not stmfd) because it has a shorter encoding. */
14740 gcc_assert (update);
14741 sprintf (pattern, "pop%s\t{", conditional);
14743 else
14745 /* Output ldmfd when the base register is SP, otherwise output ldmia.
14746 It's just a convention, their semantics are identical. */
14747 if (regno_base == SP_REGNUM)
14748 sprintf (pattern, "ldm%sfd\t", conditional);
14749 else if (TARGET_UNIFIED_ASM)
14750 sprintf (pattern, "ldmia%s\t", conditional);
14751 else
14752 sprintf (pattern, "ldm%sia\t", conditional);
14754 strcat (pattern, reg_names[regno_base]);
14755 if (update)
14756 strcat (pattern, "!, {");
14757 else
14758 strcat (pattern, ", {");
14761 /* Output the first destination register. */
14762 strcat (pattern,
14763 reg_names[REGNO (XEXP (XVECEXP (operands[0], 0, offset), 0))]);
14765 /* Output the rest of the destination registers. */
14766 for (i = offset + 1; i < num_saves; i++)
14768 strcat (pattern, ", ");
14769 strcat (pattern,
14770 reg_names[REGNO (XEXP (XVECEXP (operands[0], 0, i), 0))]);
14773 strcat (pattern, "}");
14775 if (IS_INTERRUPT (arm_current_func_type ()) && return_pc)
14776 strcat (pattern, "^");
14778 output_asm_insn (pattern, &cond);
14782 /* Output the assembly for a store multiple. */
14784 const char *
14785 vfp_output_fstmd (rtx * operands)
14787 char pattern[100];
14788 int p;
14789 int base;
14790 int i;
14792 strcpy (pattern, "fstmfdd%?\t%m0!, {%P1");
14793 p = strlen (pattern);
14795 gcc_assert (REG_P (operands[1]));
14797 base = (REGNO (operands[1]) - FIRST_VFP_REGNUM) / 2;
14798 for (i = 1; i < XVECLEN (operands[2], 0); i++)
14800 p += sprintf (&pattern[p], ", d%d", base + i);
14802 strcpy (&pattern[p], "}");
14804 output_asm_insn (pattern, operands);
14805 return "";
14809 /* Emit RTL to save block of VFP register pairs to the stack. Returns the
14810 number of bytes pushed. */
14812 static int
14813 vfp_emit_fstmd (int base_reg, int count)
14815 rtx par;
14816 rtx dwarf;
14817 rtx tmp, reg;
14818 int i;
14820 /* Workaround ARM10 VFPr1 bug. Data corruption can occur when exactly two
14821 register pairs are stored by a store multiple insn. We avoid this
14822 by pushing an extra pair. */
14823 if (count == 2 && !arm_arch6)
14825 if (base_reg == LAST_VFP_REGNUM - 3)
14826 base_reg -= 2;
14827 count++;
14830 /* FSTMD may not store more than 16 doubleword registers at once. Split
14831 larger stores into multiple parts (up to a maximum of two, in
14832 practice). */
14833 if (count > 16)
14835 int saved;
14836 /* NOTE: base_reg is an internal register number, so each D register
14837 counts as 2. */
14838 saved = vfp_emit_fstmd (base_reg + 32, count - 16);
14839 saved += vfp_emit_fstmd (base_reg, 16);
14840 return saved;
14843 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (count));
14844 dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (count + 1));
14846 reg = gen_rtx_REG (DFmode, base_reg);
14847 base_reg += 2;
14849 XVECEXP (par, 0, 0)
14850 = gen_rtx_SET (VOIDmode,
14851 gen_frame_mem
14852 (BLKmode,
14853 gen_rtx_PRE_MODIFY (Pmode,
14854 stack_pointer_rtx,
14855 plus_constant
14856 (Pmode, stack_pointer_rtx,
14857 - (count * 8)))
14859 gen_rtx_UNSPEC (BLKmode,
14860 gen_rtvec (1, reg),
14861 UNSPEC_PUSH_MULT));
14863 tmp = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
14864 plus_constant (Pmode, stack_pointer_rtx, -(count * 8)));
14865 RTX_FRAME_RELATED_P (tmp) = 1;
14866 XVECEXP (dwarf, 0, 0) = tmp;
14868 tmp = gen_rtx_SET (VOIDmode,
14869 gen_frame_mem (DFmode, stack_pointer_rtx),
14870 reg);
14871 RTX_FRAME_RELATED_P (tmp) = 1;
14872 XVECEXP (dwarf, 0, 1) = tmp;
14874 for (i = 1; i < count; i++)
14876 reg = gen_rtx_REG (DFmode, base_reg);
14877 base_reg += 2;
14878 XVECEXP (par, 0, i) = gen_rtx_USE (VOIDmode, reg);
14880 tmp = gen_rtx_SET (VOIDmode,
14881 gen_frame_mem (DFmode,
14882 plus_constant (Pmode,
14883 stack_pointer_rtx,
14884 i * 8)),
14885 reg);
14886 RTX_FRAME_RELATED_P (tmp) = 1;
14887 XVECEXP (dwarf, 0, i + 1) = tmp;
14890 par = emit_insn (par);
14891 add_reg_note (par, REG_FRAME_RELATED_EXPR, dwarf);
14892 RTX_FRAME_RELATED_P (par) = 1;
14894 return count * 8;
14897 /* Emit a call instruction with pattern PAT. ADDR is the address of
14898 the call target. */
14900 void
14901 arm_emit_call_insn (rtx pat, rtx addr)
14903 rtx insn;
14905 insn = emit_call_insn (pat);
14907 /* The PIC register is live on entry to VxWorks PIC PLT entries.
14908 If the call might use such an entry, add a use of the PIC register
14909 to the instruction's CALL_INSN_FUNCTION_USAGE. */
14910 if (TARGET_VXWORKS_RTP
14911 && flag_pic
14912 && GET_CODE (addr) == SYMBOL_REF
14913 && (SYMBOL_REF_DECL (addr)
14914 ? !targetm.binds_local_p (SYMBOL_REF_DECL (addr))
14915 : !SYMBOL_REF_LOCAL_P (addr)))
14917 require_pic_register ();
14918 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), cfun->machine->pic_reg);
14922 /* Output a 'call' insn. */
14923 const char *
14924 output_call (rtx *operands)
14926 gcc_assert (!arm_arch5); /* Patterns should call blx <reg> directly. */
14928 /* Handle calls to lr using ip (which may be clobbered in subr anyway). */
14929 if (REGNO (operands[0]) == LR_REGNUM)
14931 operands[0] = gen_rtx_REG (SImode, IP_REGNUM);
14932 output_asm_insn ("mov%?\t%0, %|lr", operands);
14935 output_asm_insn ("mov%?\t%|lr, %|pc", operands);
14937 if (TARGET_INTERWORK || arm_arch4t)
14938 output_asm_insn ("bx%?\t%0", operands);
14939 else
14940 output_asm_insn ("mov%?\t%|pc, %0", operands);
14942 return "";
14945 /* Output a 'call' insn that is a reference in memory. This is
14946 disabled for ARMv5 and we prefer a blx instead because otherwise
14947 there's a significant performance overhead. */
14948 const char *
14949 output_call_mem (rtx *operands)
14951 gcc_assert (!arm_arch5);
14952 if (TARGET_INTERWORK)
14954 output_asm_insn ("ldr%?\t%|ip, %0", operands);
14955 output_asm_insn ("mov%?\t%|lr, %|pc", operands);
14956 output_asm_insn ("bx%?\t%|ip", operands);
14958 else if (regno_use_in (LR_REGNUM, operands[0]))
14960 /* LR is used in the memory address. We load the address in the
14961 first instruction. It's safe to use IP as the target of the
14962 load since the call will kill it anyway. */
14963 output_asm_insn ("ldr%?\t%|ip, %0", operands);
14964 output_asm_insn ("mov%?\t%|lr, %|pc", operands);
14965 if (arm_arch4t)
14966 output_asm_insn ("bx%?\t%|ip", operands);
14967 else
14968 output_asm_insn ("mov%?\t%|pc, %|ip", operands);
14970 else
14972 output_asm_insn ("mov%?\t%|lr, %|pc", operands);
14973 output_asm_insn ("ldr%?\t%|pc, %0", operands);
14976 return "";
14980 /* Output a move from arm registers to arm registers of a long double
14981 OPERANDS[0] is the destination.
14982 OPERANDS[1] is the source. */
14983 const char *
14984 output_mov_long_double_arm_from_arm (rtx *operands)
14986 /* We have to be careful here because the two might overlap. */
14987 int dest_start = REGNO (operands[0]);
14988 int src_start = REGNO (operands[1]);
14989 rtx ops[2];
14990 int i;
14992 if (dest_start < src_start)
14994 for (i = 0; i < 3; i++)
14996 ops[0] = gen_rtx_REG (SImode, dest_start + i);
14997 ops[1] = gen_rtx_REG (SImode, src_start + i);
14998 output_asm_insn ("mov%?\t%0, %1", ops);
15001 else
15003 for (i = 2; i >= 0; i--)
15005 ops[0] = gen_rtx_REG (SImode, dest_start + i);
15006 ops[1] = gen_rtx_REG (SImode, src_start + i);
15007 output_asm_insn ("mov%?\t%0, %1", ops);
15011 return "";
15014 void
15015 arm_emit_movpair (rtx dest, rtx src)
15017 /* If the src is an immediate, simplify it. */
15018 if (CONST_INT_P (src))
15020 HOST_WIDE_INT val = INTVAL (src);
15021 emit_set_insn (dest, GEN_INT (val & 0x0000ffff));
15022 if ((val >> 16) & 0x0000ffff)
15023 emit_set_insn (gen_rtx_ZERO_EXTRACT (SImode, dest, GEN_INT (16),
15024 GEN_INT (16)),
15025 GEN_INT ((val >> 16) & 0x0000ffff));
15026 return;
15028 emit_set_insn (dest, gen_rtx_HIGH (SImode, src));
15029 emit_set_insn (dest, gen_rtx_LO_SUM (SImode, dest, src));
15032 /* Output a move between double words. It must be REG<-MEM
15033 or MEM<-REG. */
15034 const char *
15035 output_move_double (rtx *operands, bool emit, int *count)
15037 enum rtx_code code0 = GET_CODE (operands[0]);
15038 enum rtx_code code1 = GET_CODE (operands[1]);
15039 rtx otherops[3];
15040 if (count)
15041 *count = 1;
15043 /* The only case when this might happen is when
15044 you are looking at the length of a DImode instruction
15045 that has an invalid constant in it. */
15046 if (code0 == REG && code1 != MEM)
15048 gcc_assert (!emit);
15049 *count = 2;
15050 return "";
15053 if (code0 == REG)
15055 unsigned int reg0 = REGNO (operands[0]);
15057 otherops[0] = gen_rtx_REG (SImode, 1 + reg0);
15059 gcc_assert (code1 == MEM); /* Constraints should ensure this. */
15061 switch (GET_CODE (XEXP (operands[1], 0)))
15063 case REG:
15065 if (emit)
15067 if (TARGET_LDRD
15068 && !(fix_cm3_ldrd && reg0 == REGNO(XEXP (operands[1], 0))))
15069 output_asm_insn ("ldr%(d%)\t%0, [%m1]", operands);
15070 else
15071 output_asm_insn ("ldm%(ia%)\t%m1, %M0", operands);
15073 break;
15075 case PRE_INC:
15076 gcc_assert (TARGET_LDRD);
15077 if (emit)
15078 output_asm_insn ("ldr%(d%)\t%0, [%m1, #8]!", operands);
15079 break;
15081 case PRE_DEC:
15082 if (emit)
15084 if (TARGET_LDRD)
15085 output_asm_insn ("ldr%(d%)\t%0, [%m1, #-8]!", operands);
15086 else
15087 output_asm_insn ("ldm%(db%)\t%m1!, %M0", operands);
15089 break;
15091 case POST_INC:
15092 if (emit)
15094 if (TARGET_LDRD)
15095 output_asm_insn ("ldr%(d%)\t%0, [%m1], #8", operands);
15096 else
15097 output_asm_insn ("ldm%(ia%)\t%m1!, %M0", operands);
15099 break;
15101 case POST_DEC:
15102 gcc_assert (TARGET_LDRD);
15103 if (emit)
15104 output_asm_insn ("ldr%(d%)\t%0, [%m1], #-8", operands);
15105 break;
15107 case PRE_MODIFY:
15108 case POST_MODIFY:
15109 /* Autoicrement addressing modes should never have overlapping
15110 base and destination registers, and overlapping index registers
15111 are already prohibited, so this doesn't need to worry about
15112 fix_cm3_ldrd. */
15113 otherops[0] = operands[0];
15114 otherops[1] = XEXP (XEXP (XEXP (operands[1], 0), 1), 0);
15115 otherops[2] = XEXP (XEXP (XEXP (operands[1], 0), 1), 1);
15117 if (GET_CODE (XEXP (operands[1], 0)) == PRE_MODIFY)
15119 if (reg_overlap_mentioned_p (otherops[0], otherops[2]))
15121 /* Registers overlap so split out the increment. */
15122 if (emit)
15124 output_asm_insn ("add%?\t%1, %1, %2", otherops);
15125 output_asm_insn ("ldr%(d%)\t%0, [%1] @split", otherops);
15127 if (count)
15128 *count = 2;
15130 else
15132 /* Use a single insn if we can.
15133 FIXME: IWMMXT allows offsets larger than ldrd can
15134 handle, fix these up with a pair of ldr. */
15135 if (TARGET_THUMB2
15136 || !CONST_INT_P (otherops[2])
15137 || (INTVAL (otherops[2]) > -256
15138 && INTVAL (otherops[2]) < 256))
15140 if (emit)
15141 output_asm_insn ("ldr%(d%)\t%0, [%1, %2]!", otherops);
15143 else
15145 if (emit)
15147 output_asm_insn ("ldr%?\t%0, [%1, %2]!", otherops);
15148 output_asm_insn ("ldr%?\t%H0, [%1, #4]", otherops);
15150 if (count)
15151 *count = 2;
15156 else
15158 /* Use a single insn if we can.
15159 FIXME: IWMMXT allows offsets larger than ldrd can handle,
15160 fix these up with a pair of ldr. */
15161 if (TARGET_THUMB2
15162 || !CONST_INT_P (otherops[2])
15163 || (INTVAL (otherops[2]) > -256
15164 && INTVAL (otherops[2]) < 256))
15166 if (emit)
15167 output_asm_insn ("ldr%(d%)\t%0, [%1], %2", otherops);
15169 else
15171 if (emit)
15173 output_asm_insn ("ldr%?\t%H0, [%1, #4]", otherops);
15174 output_asm_insn ("ldr%?\t%0, [%1], %2", otherops);
15176 if (count)
15177 *count = 2;
15180 break;
15182 case LABEL_REF:
15183 case CONST:
15184 /* We might be able to use ldrd %0, %1 here. However the range is
15185 different to ldr/adr, and it is broken on some ARMv7-M
15186 implementations. */
15187 /* Use the second register of the pair to avoid problematic
15188 overlap. */
15189 otherops[1] = operands[1];
15190 if (emit)
15191 output_asm_insn ("adr%?\t%0, %1", otherops);
15192 operands[1] = otherops[0];
15193 if (emit)
15195 if (TARGET_LDRD)
15196 output_asm_insn ("ldr%(d%)\t%0, [%1]", operands);
15197 else
15198 output_asm_insn ("ldm%(ia%)\t%1, %M0", operands);
15201 if (count)
15202 *count = 2;
15203 break;
15205 /* ??? This needs checking for thumb2. */
15206 default:
15207 if (arm_add_operand (XEXP (XEXP (operands[1], 0), 1),
15208 GET_MODE (XEXP (XEXP (operands[1], 0), 1))))
15210 otherops[0] = operands[0];
15211 otherops[1] = XEXP (XEXP (operands[1], 0), 0);
15212 otherops[2] = XEXP (XEXP (operands[1], 0), 1);
15214 if (GET_CODE (XEXP (operands[1], 0)) == PLUS)
15216 if (CONST_INT_P (otherops[2]) && !TARGET_LDRD)
15218 switch ((int) INTVAL (otherops[2]))
15220 case -8:
15221 if (emit)
15222 output_asm_insn ("ldm%(db%)\t%1, %M0", otherops);
15223 return "";
15224 case -4:
15225 if (TARGET_THUMB2)
15226 break;
15227 if (emit)
15228 output_asm_insn ("ldm%(da%)\t%1, %M0", otherops);
15229 return "";
15230 case 4:
15231 if (TARGET_THUMB2)
15232 break;
15233 if (emit)
15234 output_asm_insn ("ldm%(ib%)\t%1, %M0", otherops);
15235 return "";
15238 otherops[0] = gen_rtx_REG(SImode, REGNO(operands[0]) + 1);
15239 operands[1] = otherops[0];
15240 if (TARGET_LDRD
15241 && (REG_P (otherops[2])
15242 || TARGET_THUMB2
15243 || (CONST_INT_P (otherops[2])
15244 && INTVAL (otherops[2]) > -256
15245 && INTVAL (otherops[2]) < 256)))
15247 if (reg_overlap_mentioned_p (operands[0],
15248 otherops[2]))
15250 rtx tmp;
15251 /* Swap base and index registers over to
15252 avoid a conflict. */
15253 tmp = otherops[1];
15254 otherops[1] = otherops[2];
15255 otherops[2] = tmp;
15257 /* If both registers conflict, it will usually
15258 have been fixed by a splitter. */
15259 if (reg_overlap_mentioned_p (operands[0], otherops[2])
15260 || (fix_cm3_ldrd && reg0 == REGNO (otherops[1])))
15262 if (emit)
15264 output_asm_insn ("add%?\t%0, %1, %2", otherops);
15265 output_asm_insn ("ldr%(d%)\t%0, [%1]", operands);
15267 if (count)
15268 *count = 2;
15270 else
15272 otherops[0] = operands[0];
15273 if (emit)
15274 output_asm_insn ("ldr%(d%)\t%0, [%1, %2]", otherops);
15276 return "";
15279 if (CONST_INT_P (otherops[2]))
15281 if (emit)
15283 if (!(const_ok_for_arm (INTVAL (otherops[2]))))
15284 output_asm_insn ("sub%?\t%0, %1, #%n2", otherops);
15285 else
15286 output_asm_insn ("add%?\t%0, %1, %2", otherops);
15289 else
15291 if (emit)
15292 output_asm_insn ("add%?\t%0, %1, %2", otherops);
15295 else
15297 if (emit)
15298 output_asm_insn ("sub%?\t%0, %1, %2", otherops);
15301 if (count)
15302 *count = 2;
15304 if (TARGET_LDRD)
15305 return "ldr%(d%)\t%0, [%1]";
15307 return "ldm%(ia%)\t%1, %M0";
15309 else
15311 otherops[1] = adjust_address (operands[1], SImode, 4);
15312 /* Take care of overlapping base/data reg. */
15313 if (reg_mentioned_p (operands[0], operands[1]))
15315 if (emit)
15317 output_asm_insn ("ldr%?\t%0, %1", otherops);
15318 output_asm_insn ("ldr%?\t%0, %1", operands);
15320 if (count)
15321 *count = 2;
15324 else
15326 if (emit)
15328 output_asm_insn ("ldr%?\t%0, %1", operands);
15329 output_asm_insn ("ldr%?\t%0, %1", otherops);
15331 if (count)
15332 *count = 2;
15337 else
15339 /* Constraints should ensure this. */
15340 gcc_assert (code0 == MEM && code1 == REG);
15341 gcc_assert ((REGNO (operands[1]) != IP_REGNUM)
15342 || (TARGET_ARM && TARGET_LDRD));
15344 switch (GET_CODE (XEXP (operands[0], 0)))
15346 case REG:
15347 if (emit)
15349 if (TARGET_LDRD)
15350 output_asm_insn ("str%(d%)\t%1, [%m0]", operands);
15351 else
15352 output_asm_insn ("stm%(ia%)\t%m0, %M1", operands);
15354 break;
15356 case PRE_INC:
15357 gcc_assert (TARGET_LDRD);
15358 if (emit)
15359 output_asm_insn ("str%(d%)\t%1, [%m0, #8]!", operands);
15360 break;
15362 case PRE_DEC:
15363 if (emit)
15365 if (TARGET_LDRD)
15366 output_asm_insn ("str%(d%)\t%1, [%m0, #-8]!", operands);
15367 else
15368 output_asm_insn ("stm%(db%)\t%m0!, %M1", operands);
15370 break;
15372 case POST_INC:
15373 if (emit)
15375 if (TARGET_LDRD)
15376 output_asm_insn ("str%(d%)\t%1, [%m0], #8", operands);
15377 else
15378 output_asm_insn ("stm%(ia%)\t%m0!, %M1", operands);
15380 break;
15382 case POST_DEC:
15383 gcc_assert (TARGET_LDRD);
15384 if (emit)
15385 output_asm_insn ("str%(d%)\t%1, [%m0], #-8", operands);
15386 break;
15388 case PRE_MODIFY:
15389 case POST_MODIFY:
15390 otherops[0] = operands[1];
15391 otherops[1] = XEXP (XEXP (XEXP (operands[0], 0), 1), 0);
15392 otherops[2] = XEXP (XEXP (XEXP (operands[0], 0), 1), 1);
15394 /* IWMMXT allows offsets larger than ldrd can handle,
15395 fix these up with a pair of ldr. */
15396 if (!TARGET_THUMB2
15397 && CONST_INT_P (otherops[2])
15398 && (INTVAL(otherops[2]) <= -256
15399 || INTVAL(otherops[2]) >= 256))
15401 if (GET_CODE (XEXP (operands[0], 0)) == PRE_MODIFY)
15403 if (emit)
15405 output_asm_insn ("str%?\t%0, [%1, %2]!", otherops);
15406 output_asm_insn ("str%?\t%H0, [%1, #4]", otherops);
15408 if (count)
15409 *count = 2;
15411 else
15413 if (emit)
15415 output_asm_insn ("str%?\t%H0, [%1, #4]", otherops);
15416 output_asm_insn ("str%?\t%0, [%1], %2", otherops);
15418 if (count)
15419 *count = 2;
15422 else if (GET_CODE (XEXP (operands[0], 0)) == PRE_MODIFY)
15424 if (emit)
15425 output_asm_insn ("str%(d%)\t%0, [%1, %2]!", otherops);
15427 else
15429 if (emit)
15430 output_asm_insn ("str%(d%)\t%0, [%1], %2", otherops);
15432 break;
15434 case PLUS:
15435 otherops[2] = XEXP (XEXP (operands[0], 0), 1);
15436 if (CONST_INT_P (otherops[2]) && !TARGET_LDRD)
15438 switch ((int) INTVAL (XEXP (XEXP (operands[0], 0), 1)))
15440 case -8:
15441 if (emit)
15442 output_asm_insn ("stm%(db%)\t%m0, %M1", operands);
15443 return "";
15445 case -4:
15446 if (TARGET_THUMB2)
15447 break;
15448 if (emit)
15449 output_asm_insn ("stm%(da%)\t%m0, %M1", operands);
15450 return "";
15452 case 4:
15453 if (TARGET_THUMB2)
15454 break;
15455 if (emit)
15456 output_asm_insn ("stm%(ib%)\t%m0, %M1", operands);
15457 return "";
15460 if (TARGET_LDRD
15461 && (REG_P (otherops[2])
15462 || TARGET_THUMB2
15463 || (CONST_INT_P (otherops[2])
15464 && INTVAL (otherops[2]) > -256
15465 && INTVAL (otherops[2]) < 256)))
15467 otherops[0] = operands[1];
15468 otherops[1] = XEXP (XEXP (operands[0], 0), 0);
15469 if (emit)
15470 output_asm_insn ("str%(d%)\t%0, [%1, %2]", otherops);
15471 return "";
15473 /* Fall through */
15475 default:
15476 otherops[0] = adjust_address (operands[0], SImode, 4);
15477 otherops[1] = operands[1];
15478 if (emit)
15480 output_asm_insn ("str%?\t%1, %0", operands);
15481 output_asm_insn ("str%?\t%H1, %0", otherops);
15483 if (count)
15484 *count = 2;
15488 return "";
15491 /* Output a move, load or store for quad-word vectors in ARM registers. Only
15492 handles MEMs accepted by neon_vector_mem_operand with TYPE=1. */
15494 const char *
15495 output_move_quad (rtx *operands)
15497 if (REG_P (operands[0]))
15499 /* Load, or reg->reg move. */
15501 if (MEM_P (operands[1]))
15503 switch (GET_CODE (XEXP (operands[1], 0)))
15505 case REG:
15506 output_asm_insn ("ldm%(ia%)\t%m1, %M0", operands);
15507 break;
15509 case LABEL_REF:
15510 case CONST:
15511 output_asm_insn ("adr%?\t%0, %1", operands);
15512 output_asm_insn ("ldm%(ia%)\t%0, %M0", operands);
15513 break;
15515 default:
15516 gcc_unreachable ();
15519 else
15521 rtx ops[2];
15522 int dest, src, i;
15524 gcc_assert (REG_P (operands[1]));
15526 dest = REGNO (operands[0]);
15527 src = REGNO (operands[1]);
15529 /* This seems pretty dumb, but hopefully GCC won't try to do it
15530 very often. */
15531 if (dest < src)
15532 for (i = 0; i < 4; i++)
15534 ops[0] = gen_rtx_REG (SImode, dest + i);
15535 ops[1] = gen_rtx_REG (SImode, src + i);
15536 output_asm_insn ("mov%?\t%0, %1", ops);
15538 else
15539 for (i = 3; i >= 0; i--)
15541 ops[0] = gen_rtx_REG (SImode, dest + i);
15542 ops[1] = gen_rtx_REG (SImode, src + i);
15543 output_asm_insn ("mov%?\t%0, %1", ops);
15547 else
15549 gcc_assert (MEM_P (operands[0]));
15550 gcc_assert (REG_P (operands[1]));
15551 gcc_assert (!reg_overlap_mentioned_p (operands[1], operands[0]));
15553 switch (GET_CODE (XEXP (operands[0], 0)))
15555 case REG:
15556 output_asm_insn ("stm%(ia%)\t%m0, %M1", operands);
15557 break;
15559 default:
15560 gcc_unreachable ();
15564 return "";
15567 /* Output a VFP load or store instruction. */
15569 const char *
15570 output_move_vfp (rtx *operands)
15572 rtx reg, mem, addr, ops[2];
15573 int load = REG_P (operands[0]);
15574 int dp = GET_MODE_SIZE (GET_MODE (operands[0])) == 8;
15575 int integer_p = GET_MODE_CLASS (GET_MODE (operands[0])) == MODE_INT;
15576 const char *templ;
15577 char buff[50];
15578 enum machine_mode mode;
15580 reg = operands[!load];
15581 mem = operands[load];
15583 mode = GET_MODE (reg);
15585 gcc_assert (REG_P (reg));
15586 gcc_assert (IS_VFP_REGNUM (REGNO (reg)));
15587 gcc_assert (mode == SFmode
15588 || mode == DFmode
15589 || mode == SImode
15590 || mode == DImode
15591 || (TARGET_NEON && VALID_NEON_DREG_MODE (mode)));
15592 gcc_assert (MEM_P (mem));
15594 addr = XEXP (mem, 0);
15596 switch (GET_CODE (addr))
15598 case PRE_DEC:
15599 templ = "f%smdb%c%%?\t%%0!, {%%%s1}%s";
15600 ops[0] = XEXP (addr, 0);
15601 ops[1] = reg;
15602 break;
15604 case POST_INC:
15605 templ = "f%smia%c%%?\t%%0!, {%%%s1}%s";
15606 ops[0] = XEXP (addr, 0);
15607 ops[1] = reg;
15608 break;
15610 default:
15611 templ = "f%s%c%%?\t%%%s0, %%1%s";
15612 ops[0] = reg;
15613 ops[1] = mem;
15614 break;
15617 sprintf (buff, templ,
15618 load ? "ld" : "st",
15619 dp ? 'd' : 's',
15620 dp ? "P" : "",
15621 integer_p ? "\t%@ int" : "");
15622 output_asm_insn (buff, ops);
15624 return "";
15627 /* Output a Neon double-word or quad-word load or store, or a load
15628 or store for larger structure modes.
15630 WARNING: The ordering of elements is weird in big-endian mode,
15631 because the EABI requires that vectors stored in memory appear
15632 as though they were stored by a VSTM, as required by the EABI.
15633 GCC RTL defines element ordering based on in-memory order.
15634 This can be different from the architectural ordering of elements
15635 within a NEON register. The intrinsics defined in arm_neon.h use the
15636 NEON register element ordering, not the GCC RTL element ordering.
15638 For example, the in-memory ordering of a big-endian a quadword
15639 vector with 16-bit elements when stored from register pair {d0,d1}
15640 will be (lowest address first, d0[N] is NEON register element N):
15642 [d0[3], d0[2], d0[1], d0[0], d1[7], d1[6], d1[5], d1[4]]
15644 When necessary, quadword registers (dN, dN+1) are moved to ARM
15645 registers from rN in the order:
15647 dN -> (rN+1, rN), dN+1 -> (rN+3, rN+2)
15649 So that STM/LDM can be used on vectors in ARM registers, and the
15650 same memory layout will result as if VSTM/VLDM were used.
15652 Instead of VSTM/VLDM we prefer to use VST1.64/VLD1.64 where
15653 possible, which allows use of appropriate alignment tags.
15654 Note that the choice of "64" is independent of the actual vector
15655 element size; this size simply ensures that the behavior is
15656 equivalent to VSTM/VLDM in both little-endian and big-endian mode.
15658 Due to limitations of those instructions, use of VST1.64/VLD1.64
15659 is not possible if:
15660 - the address contains PRE_DEC, or
15661 - the mode refers to more than 4 double-word registers
15663 In those cases, it would be possible to replace VSTM/VLDM by a
15664 sequence of instructions; this is not currently implemented since
15665 this is not certain to actually improve performance. */
15667 const char *
15668 output_move_neon (rtx *operands)
15670 rtx reg, mem, addr, ops[2];
15671 int regno, nregs, load = REG_P (operands[0]);
15672 const char *templ;
15673 char buff[50];
15674 enum machine_mode mode;
15676 reg = operands[!load];
15677 mem = operands[load];
15679 mode = GET_MODE (reg);
15681 gcc_assert (REG_P (reg));
15682 regno = REGNO (reg);
15683 nregs = HARD_REGNO_NREGS (regno, mode) / 2;
15684 gcc_assert (VFP_REGNO_OK_FOR_DOUBLE (regno)
15685 || NEON_REGNO_OK_FOR_QUAD (regno));
15686 gcc_assert (VALID_NEON_DREG_MODE (mode)
15687 || VALID_NEON_QREG_MODE (mode)
15688 || VALID_NEON_STRUCT_MODE (mode));
15689 gcc_assert (MEM_P (mem));
15691 addr = XEXP (mem, 0);
15693 /* Strip off const from addresses like (const (plus (...))). */
15694 if (GET_CODE (addr) == CONST && GET_CODE (XEXP (addr, 0)) == PLUS)
15695 addr = XEXP (addr, 0);
15697 switch (GET_CODE (addr))
15699 case POST_INC:
15700 /* We have to use vldm / vstm for too-large modes. */
15701 if (nregs > 4)
15703 templ = "v%smia%%?\t%%0!, %%h1";
15704 ops[0] = XEXP (addr, 0);
15706 else
15708 templ = "v%s1.64\t%%h1, %%A0";
15709 ops[0] = mem;
15711 ops[1] = reg;
15712 break;
15714 case PRE_DEC:
15715 /* We have to use vldm / vstm in this case, since there is no
15716 pre-decrement form of the vld1 / vst1 instructions. */
15717 templ = "v%smdb%%?\t%%0!, %%h1";
15718 ops[0] = XEXP (addr, 0);
15719 ops[1] = reg;
15720 break;
15722 case POST_MODIFY:
15723 /* FIXME: Not currently enabled in neon_vector_mem_operand. */
15724 gcc_unreachable ();
15726 case LABEL_REF:
15727 case PLUS:
15729 int i;
15730 int overlap = -1;
15731 for (i = 0; i < nregs; i++)
15733 /* We're only using DImode here because it's a convenient size. */
15734 ops[0] = gen_rtx_REG (DImode, REGNO (reg) + 2 * i);
15735 ops[1] = adjust_address (mem, DImode, 8 * i);
15736 if (reg_overlap_mentioned_p (ops[0], mem))
15738 gcc_assert (overlap == -1);
15739 overlap = i;
15741 else
15743 sprintf (buff, "v%sr%%?\t%%P0, %%1", load ? "ld" : "st");
15744 output_asm_insn (buff, ops);
15747 if (overlap != -1)
15749 ops[0] = gen_rtx_REG (DImode, REGNO (reg) + 2 * overlap);
15750 ops[1] = adjust_address (mem, SImode, 8 * overlap);
15751 sprintf (buff, "v%sr%%?\t%%P0, %%1", load ? "ld" : "st");
15752 output_asm_insn (buff, ops);
15755 return "";
15758 default:
15759 /* We have to use vldm / vstm for too-large modes. */
15760 if (nregs > 4)
15761 templ = "v%smia%%?\t%%m0, %%h1";
15762 else
15763 templ = "v%s1.64\t%%h1, %%A0";
15765 ops[0] = mem;
15766 ops[1] = reg;
15769 sprintf (buff, templ, load ? "ld" : "st");
15770 output_asm_insn (buff, ops);
15772 return "";
15775 /* Compute and return the length of neon_mov<mode>, where <mode> is
15776 one of VSTRUCT modes: EI, OI, CI or XI. */
15778 arm_attr_length_move_neon (rtx insn)
15780 rtx reg, mem, addr;
15781 int load;
15782 enum machine_mode mode;
15784 extract_insn_cached (insn);
15786 if (REG_P (recog_data.operand[0]) && REG_P (recog_data.operand[1]))
15788 mode = GET_MODE (recog_data.operand[0]);
15789 switch (mode)
15791 case EImode:
15792 case OImode:
15793 return 8;
15794 case CImode:
15795 return 12;
15796 case XImode:
15797 return 16;
15798 default:
15799 gcc_unreachable ();
15803 load = REG_P (recog_data.operand[0]);
15804 reg = recog_data.operand[!load];
15805 mem = recog_data.operand[load];
15807 gcc_assert (MEM_P (mem));
15809 mode = GET_MODE (reg);
15810 addr = XEXP (mem, 0);
15812 /* Strip off const from addresses like (const (plus (...))). */
15813 if (GET_CODE (addr) == CONST && GET_CODE (XEXP (addr, 0)) == PLUS)
15814 addr = XEXP (addr, 0);
15816 if (GET_CODE (addr) == LABEL_REF || GET_CODE (addr) == PLUS)
15818 int insns = HARD_REGNO_NREGS (REGNO (reg), mode) / 2;
15819 return insns * 4;
15821 else
15822 return 4;
15825 /* Return nonzero if the offset in the address is an immediate. Otherwise,
15826 return zero. */
15829 arm_address_offset_is_imm (rtx insn)
15831 rtx mem, addr;
15833 extract_insn_cached (insn);
15835 if (REG_P (recog_data.operand[0]))
15836 return 0;
15838 mem = recog_data.operand[0];
15840 gcc_assert (MEM_P (mem));
15842 addr = XEXP (mem, 0);
15844 if (REG_P (addr)
15845 || (GET_CODE (addr) == PLUS
15846 && REG_P (XEXP (addr, 0))
15847 && CONST_INT_P (XEXP (addr, 1))))
15848 return 1;
15849 else
15850 return 0;
15853 /* Output an ADD r, s, #n where n may be too big for one instruction.
15854 If adding zero to one register, output nothing. */
15855 const char *
15856 output_add_immediate (rtx *operands)
15858 HOST_WIDE_INT n = INTVAL (operands[2]);
15860 if (n != 0 || REGNO (operands[0]) != REGNO (operands[1]))
15862 if (n < 0)
15863 output_multi_immediate (operands,
15864 "sub%?\t%0, %1, %2", "sub%?\t%0, %0, %2", 2,
15865 -n);
15866 else
15867 output_multi_immediate (operands,
15868 "add%?\t%0, %1, %2", "add%?\t%0, %0, %2", 2,
15872 return "";
15875 /* Output a multiple immediate operation.
15876 OPERANDS is the vector of operands referred to in the output patterns.
15877 INSTR1 is the output pattern to use for the first constant.
15878 INSTR2 is the output pattern to use for subsequent constants.
15879 IMMED_OP is the index of the constant slot in OPERANDS.
15880 N is the constant value. */
15881 static const char *
15882 output_multi_immediate (rtx *operands, const char *instr1, const char *instr2,
15883 int immed_op, HOST_WIDE_INT n)
15885 #if HOST_BITS_PER_WIDE_INT > 32
15886 n &= 0xffffffff;
15887 #endif
15889 if (n == 0)
15891 /* Quick and easy output. */
15892 operands[immed_op] = const0_rtx;
15893 output_asm_insn (instr1, operands);
15895 else
15897 int i;
15898 const char * instr = instr1;
15900 /* Note that n is never zero here (which would give no output). */
15901 for (i = 0; i < 32; i += 2)
15903 if (n & (3 << i))
15905 operands[immed_op] = GEN_INT (n & (255 << i));
15906 output_asm_insn (instr, operands);
15907 instr = instr2;
15908 i += 6;
15913 return "";
15916 /* Return the name of a shifter operation. */
15917 static const char *
15918 arm_shift_nmem(enum rtx_code code)
15920 switch (code)
15922 case ASHIFT:
15923 return ARM_LSL_NAME;
15925 case ASHIFTRT:
15926 return "asr";
15928 case LSHIFTRT:
15929 return "lsr";
15931 case ROTATERT:
15932 return "ror";
15934 default:
15935 abort();
15939 /* Return the appropriate ARM instruction for the operation code.
15940 The returned result should not be overwritten. OP is the rtx of the
15941 operation. SHIFT_FIRST_ARG is TRUE if the first argument of the operator
15942 was shifted. */
15943 const char *
15944 arithmetic_instr (rtx op, int shift_first_arg)
15946 switch (GET_CODE (op))
15948 case PLUS:
15949 return "add";
15951 case MINUS:
15952 return shift_first_arg ? "rsb" : "sub";
15954 case IOR:
15955 return "orr";
15957 case XOR:
15958 return "eor";
15960 case AND:
15961 return "and";
15963 case ASHIFT:
15964 case ASHIFTRT:
15965 case LSHIFTRT:
15966 case ROTATERT:
15967 return arm_shift_nmem(GET_CODE(op));
15969 default:
15970 gcc_unreachable ();
15974 /* Ensure valid constant shifts and return the appropriate shift mnemonic
15975 for the operation code. The returned result should not be overwritten.
15976 OP is the rtx code of the shift.
15977 On exit, *AMOUNTP will be -1 if the shift is by a register, or a constant
15978 shift. */
15979 static const char *
15980 shift_op (rtx op, HOST_WIDE_INT *amountp)
15982 const char * mnem;
15983 enum rtx_code code = GET_CODE (op);
15985 switch (code)
15987 case ROTATE:
15988 if (!CONST_INT_P (XEXP (op, 1)))
15990 output_operand_lossage ("invalid shift operand");
15991 return NULL;
15994 code = ROTATERT;
15995 *amountp = 32 - INTVAL (XEXP (op, 1));
15996 mnem = "ror";
15997 break;
15999 case ASHIFT:
16000 case ASHIFTRT:
16001 case LSHIFTRT:
16002 case ROTATERT:
16003 mnem = arm_shift_nmem(code);
16004 if (CONST_INT_P (XEXP (op, 1)))
16006 *amountp = INTVAL (XEXP (op, 1));
16008 else if (REG_P (XEXP (op, 1)))
16010 *amountp = -1;
16011 return mnem;
16013 else
16015 output_operand_lossage ("invalid shift operand");
16016 return NULL;
16018 break;
16020 case MULT:
16021 /* We never have to worry about the amount being other than a
16022 power of 2, since this case can never be reloaded from a reg. */
16023 if (!CONST_INT_P (XEXP (op, 1)))
16025 output_operand_lossage ("invalid shift operand");
16026 return NULL;
16029 *amountp = INTVAL (XEXP (op, 1)) & 0xFFFFFFFF;
16031 /* Amount must be a power of two. */
16032 if (*amountp & (*amountp - 1))
16034 output_operand_lossage ("invalid shift operand");
16035 return NULL;
16038 *amountp = int_log2 (*amountp);
16039 return ARM_LSL_NAME;
16041 default:
16042 output_operand_lossage ("invalid shift operand");
16043 return NULL;
16046 /* This is not 100% correct, but follows from the desire to merge
16047 multiplication by a power of 2 with the recognizer for a
16048 shift. >=32 is not a valid shift for "lsl", so we must try and
16049 output a shift that produces the correct arithmetical result.
16050 Using lsr #32 is identical except for the fact that the carry bit
16051 is not set correctly if we set the flags; but we never use the
16052 carry bit from such an operation, so we can ignore that. */
16053 if (code == ROTATERT)
16054 /* Rotate is just modulo 32. */
16055 *amountp &= 31;
16056 else if (*amountp != (*amountp & 31))
16058 if (code == ASHIFT)
16059 mnem = "lsr";
16060 *amountp = 32;
16063 /* Shifts of 0 are no-ops. */
16064 if (*amountp == 0)
16065 return NULL;
16067 return mnem;
16070 /* Obtain the shift from the POWER of two. */
16072 static HOST_WIDE_INT
16073 int_log2 (HOST_WIDE_INT power)
16075 HOST_WIDE_INT shift = 0;
16077 while ((((HOST_WIDE_INT) 1 << shift) & power) == 0)
16079 gcc_assert (shift <= 31);
16080 shift++;
16083 return shift;
16086 /* Output a .ascii pseudo-op, keeping track of lengths. This is
16087 because /bin/as is horribly restrictive. The judgement about
16088 whether or not each character is 'printable' (and can be output as
16089 is) or not (and must be printed with an octal escape) must be made
16090 with reference to the *host* character set -- the situation is
16091 similar to that discussed in the comments above pp_c_char in
16092 c-pretty-print.c. */
16094 #define MAX_ASCII_LEN 51
16096 void
16097 output_ascii_pseudo_op (FILE *stream, const unsigned char *p, int len)
16099 int i;
16100 int len_so_far = 0;
16102 fputs ("\t.ascii\t\"", stream);
16104 for (i = 0; i < len; i++)
16106 int c = p[i];
16108 if (len_so_far >= MAX_ASCII_LEN)
16110 fputs ("\"\n\t.ascii\t\"", stream);
16111 len_so_far = 0;
16114 if (ISPRINT (c))
16116 if (c == '\\' || c == '\"')
16118 putc ('\\', stream);
16119 len_so_far++;
16121 putc (c, stream);
16122 len_so_far++;
16124 else
16126 fprintf (stream, "\\%03o", c);
16127 len_so_far += 4;
16131 fputs ("\"\n", stream);
16134 /* Compute the register save mask for registers 0 through 12
16135 inclusive. This code is used by arm_compute_save_reg_mask. */
16137 static unsigned long
16138 arm_compute_save_reg0_reg12_mask (void)
16140 unsigned long func_type = arm_current_func_type ();
16141 unsigned long save_reg_mask = 0;
16142 unsigned int reg;
16144 if (IS_INTERRUPT (func_type))
16146 unsigned int max_reg;
16147 /* Interrupt functions must not corrupt any registers,
16148 even call clobbered ones. If this is a leaf function
16149 we can just examine the registers used by the RTL, but
16150 otherwise we have to assume that whatever function is
16151 called might clobber anything, and so we have to save
16152 all the call-clobbered registers as well. */
16153 if (ARM_FUNC_TYPE (func_type) == ARM_FT_FIQ)
16154 /* FIQ handlers have registers r8 - r12 banked, so
16155 we only need to check r0 - r7, Normal ISRs only
16156 bank r14 and r15, so we must check up to r12.
16157 r13 is the stack pointer which is always preserved,
16158 so we do not need to consider it here. */
16159 max_reg = 7;
16160 else
16161 max_reg = 12;
16163 for (reg = 0; reg <= max_reg; reg++)
16164 if (df_regs_ever_live_p (reg)
16165 || (! crtl->is_leaf && call_used_regs[reg]))
16166 save_reg_mask |= (1 << reg);
16168 /* Also save the pic base register if necessary. */
16169 if (flag_pic
16170 && !TARGET_SINGLE_PIC_BASE
16171 && arm_pic_register != INVALID_REGNUM
16172 && crtl->uses_pic_offset_table)
16173 save_reg_mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
16175 else if (IS_VOLATILE(func_type))
16177 /* For noreturn functions we historically omitted register saves
16178 altogether. However this really messes up debugging. As a
16179 compromise save just the frame pointers. Combined with the link
16180 register saved elsewhere this should be sufficient to get
16181 a backtrace. */
16182 if (frame_pointer_needed)
16183 save_reg_mask |= 1 << HARD_FRAME_POINTER_REGNUM;
16184 if (df_regs_ever_live_p (ARM_HARD_FRAME_POINTER_REGNUM))
16185 save_reg_mask |= 1 << ARM_HARD_FRAME_POINTER_REGNUM;
16186 if (df_regs_ever_live_p (THUMB_HARD_FRAME_POINTER_REGNUM))
16187 save_reg_mask |= 1 << THUMB_HARD_FRAME_POINTER_REGNUM;
16189 else
16191 /* In the normal case we only need to save those registers
16192 which are call saved and which are used by this function. */
16193 for (reg = 0; reg <= 11; reg++)
16194 if (df_regs_ever_live_p (reg) && ! call_used_regs[reg])
16195 save_reg_mask |= (1 << reg);
16197 /* Handle the frame pointer as a special case. */
16198 if (frame_pointer_needed)
16199 save_reg_mask |= 1 << HARD_FRAME_POINTER_REGNUM;
16201 /* If we aren't loading the PIC register,
16202 don't stack it even though it may be live. */
16203 if (flag_pic
16204 && !TARGET_SINGLE_PIC_BASE
16205 && arm_pic_register != INVALID_REGNUM
16206 && (df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM)
16207 || crtl->uses_pic_offset_table))
16208 save_reg_mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
16210 /* The prologue will copy SP into R0, so save it. */
16211 if (IS_STACKALIGN (func_type))
16212 save_reg_mask |= 1;
16215 /* Save registers so the exception handler can modify them. */
16216 if (crtl->calls_eh_return)
16218 unsigned int i;
16220 for (i = 0; ; i++)
16222 reg = EH_RETURN_DATA_REGNO (i);
16223 if (reg == INVALID_REGNUM)
16224 break;
16225 save_reg_mask |= 1 << reg;
16229 return save_reg_mask;
16233 /* Compute the number of bytes used to store the static chain register on the
16234 stack, above the stack frame. We need to know this accurately to get the
16235 alignment of the rest of the stack frame correct. */
16237 static int arm_compute_static_chain_stack_bytes (void)
16239 unsigned long func_type = arm_current_func_type ();
16240 int static_chain_stack_bytes = 0;
16242 if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM &&
16243 IS_NESTED (func_type) &&
16244 df_regs_ever_live_p (3) && crtl->args.pretend_args_size == 0)
16245 static_chain_stack_bytes = 4;
16247 return static_chain_stack_bytes;
16251 /* Compute a bit mask of which registers need to be
16252 saved on the stack for the current function.
16253 This is used by arm_get_frame_offsets, which may add extra registers. */
16255 static unsigned long
16256 arm_compute_save_reg_mask (void)
16258 unsigned int save_reg_mask = 0;
16259 unsigned long func_type = arm_current_func_type ();
16260 unsigned int reg;
16262 if (IS_NAKED (func_type))
16263 /* This should never really happen. */
16264 return 0;
16266 /* If we are creating a stack frame, then we must save the frame pointer,
16267 IP (which will hold the old stack pointer), LR and the PC. */
16268 if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
16269 save_reg_mask |=
16270 (1 << ARM_HARD_FRAME_POINTER_REGNUM)
16271 | (1 << IP_REGNUM)
16272 | (1 << LR_REGNUM)
16273 | (1 << PC_REGNUM);
16275 save_reg_mask |= arm_compute_save_reg0_reg12_mask ();
16277 /* Decide if we need to save the link register.
16278 Interrupt routines have their own banked link register,
16279 so they never need to save it.
16280 Otherwise if we do not use the link register we do not need to save
16281 it. If we are pushing other registers onto the stack however, we
16282 can save an instruction in the epilogue by pushing the link register
16283 now and then popping it back into the PC. This incurs extra memory
16284 accesses though, so we only do it when optimizing for size, and only
16285 if we know that we will not need a fancy return sequence. */
16286 if (df_regs_ever_live_p (LR_REGNUM)
16287 || (save_reg_mask
16288 && optimize_size
16289 && ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL
16290 && !crtl->calls_eh_return))
16291 save_reg_mask |= 1 << LR_REGNUM;
16293 if (cfun->machine->lr_save_eliminated)
16294 save_reg_mask &= ~ (1 << LR_REGNUM);
16296 if (TARGET_REALLY_IWMMXT
16297 && ((bit_count (save_reg_mask)
16298 + ARM_NUM_INTS (crtl->args.pretend_args_size +
16299 arm_compute_static_chain_stack_bytes())
16300 ) % 2) != 0)
16302 /* The total number of registers that are going to be pushed
16303 onto the stack is odd. We need to ensure that the stack
16304 is 64-bit aligned before we start to save iWMMXt registers,
16305 and also before we start to create locals. (A local variable
16306 might be a double or long long which we will load/store using
16307 an iWMMXt instruction). Therefore we need to push another
16308 ARM register, so that the stack will be 64-bit aligned. We
16309 try to avoid using the arg registers (r0 -r3) as they might be
16310 used to pass values in a tail call. */
16311 for (reg = 4; reg <= 12; reg++)
16312 if ((save_reg_mask & (1 << reg)) == 0)
16313 break;
16315 if (reg <= 12)
16316 save_reg_mask |= (1 << reg);
16317 else
16319 cfun->machine->sibcall_blocked = 1;
16320 save_reg_mask |= (1 << 3);
16324 /* We may need to push an additional register for use initializing the
16325 PIC base register. */
16326 if (TARGET_THUMB2 && IS_NESTED (func_type) && flag_pic
16327 && (save_reg_mask & THUMB2_WORK_REGS) == 0)
16329 reg = thumb_find_work_register (1 << 4);
16330 if (!call_used_regs[reg])
16331 save_reg_mask |= (1 << reg);
16334 return save_reg_mask;
16338 /* Compute a bit mask of which registers need to be
16339 saved on the stack for the current function. */
16340 static unsigned long
16341 thumb1_compute_save_reg_mask (void)
16343 unsigned long mask;
16344 unsigned reg;
16346 mask = 0;
16347 for (reg = 0; reg < 12; reg ++)
16348 if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
16349 mask |= 1 << reg;
16351 if (flag_pic
16352 && !TARGET_SINGLE_PIC_BASE
16353 && arm_pic_register != INVALID_REGNUM
16354 && crtl->uses_pic_offset_table)
16355 mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
16357 /* See if we might need r11 for calls to _interwork_r11_call_via_rN(). */
16358 if (!frame_pointer_needed && CALLER_INTERWORKING_SLOT_SIZE > 0)
16359 mask |= 1 << ARM_HARD_FRAME_POINTER_REGNUM;
16361 /* LR will also be pushed if any lo regs are pushed. */
16362 if (mask & 0xff || thumb_force_lr_save ())
16363 mask |= (1 << LR_REGNUM);
16365 /* Make sure we have a low work register if we need one.
16366 We will need one if we are going to push a high register,
16367 but we are not currently intending to push a low register. */
16368 if ((mask & 0xff) == 0
16369 && ((mask & 0x0f00) || TARGET_BACKTRACE))
16371 /* Use thumb_find_work_register to choose which register
16372 we will use. If the register is live then we will
16373 have to push it. Use LAST_LO_REGNUM as our fallback
16374 choice for the register to select. */
16375 reg = thumb_find_work_register (1 << LAST_LO_REGNUM);
16376 /* Make sure the register returned by thumb_find_work_register is
16377 not part of the return value. */
16378 if (reg * UNITS_PER_WORD <= (unsigned) arm_size_return_regs ())
16379 reg = LAST_LO_REGNUM;
16381 if (! call_used_regs[reg])
16382 mask |= 1 << reg;
16385 /* The 504 below is 8 bytes less than 512 because there are two possible
16386 alignment words. We can't tell here if they will be present or not so we
16387 have to play it safe and assume that they are. */
16388 if ((CALLER_INTERWORKING_SLOT_SIZE +
16389 ROUND_UP_WORD (get_frame_size ()) +
16390 crtl->outgoing_args_size) >= 504)
16392 /* This is the same as the code in thumb1_expand_prologue() which
16393 determines which register to use for stack decrement. */
16394 for (reg = LAST_ARG_REGNUM + 1; reg <= LAST_LO_REGNUM; reg++)
16395 if (mask & (1 << reg))
16396 break;
16398 if (reg > LAST_LO_REGNUM)
16400 /* Make sure we have a register available for stack decrement. */
16401 mask |= 1 << LAST_LO_REGNUM;
16405 return mask;
16409 /* Return the number of bytes required to save VFP registers. */
16410 static int
16411 arm_get_vfp_saved_size (void)
16413 unsigned int regno;
16414 int count;
16415 int saved;
16417 saved = 0;
16418 /* Space for saved VFP registers. */
16419 if (TARGET_HARD_FLOAT && TARGET_VFP)
16421 count = 0;
16422 for (regno = FIRST_VFP_REGNUM;
16423 regno < LAST_VFP_REGNUM;
16424 regno += 2)
16426 if ((!df_regs_ever_live_p (regno) || call_used_regs[regno])
16427 && (!df_regs_ever_live_p (regno + 1) || call_used_regs[regno + 1]))
16429 if (count > 0)
16431 /* Workaround ARM10 VFPr1 bug. */
16432 if (count == 2 && !arm_arch6)
16433 count++;
16434 saved += count * 8;
16436 count = 0;
16438 else
16439 count++;
16441 if (count > 0)
16443 if (count == 2 && !arm_arch6)
16444 count++;
16445 saved += count * 8;
16448 return saved;
16452 /* Generate a function exit sequence. If REALLY_RETURN is false, then do
16453 everything bar the final return instruction. If simple_return is true,
16454 then do not output epilogue, because it has already been emitted in RTL. */
16455 const char *
16456 output_return_instruction (rtx operand, bool really_return, bool reverse,
16457 bool simple_return)
16459 char conditional[10];
16460 char instr[100];
16461 unsigned reg;
16462 unsigned long live_regs_mask;
16463 unsigned long func_type;
16464 arm_stack_offsets *offsets;
16466 func_type = arm_current_func_type ();
16468 if (IS_NAKED (func_type))
16469 return "";
16471 if (IS_VOLATILE (func_type) && TARGET_ABORT_NORETURN)
16473 /* If this function was declared non-returning, and we have
16474 found a tail call, then we have to trust that the called
16475 function won't return. */
16476 if (really_return)
16478 rtx ops[2];
16480 /* Otherwise, trap an attempted return by aborting. */
16481 ops[0] = operand;
16482 ops[1] = gen_rtx_SYMBOL_REF (Pmode, NEED_PLT_RELOC ? "abort(PLT)"
16483 : "abort");
16484 assemble_external_libcall (ops[1]);
16485 output_asm_insn (reverse ? "bl%D0\t%a1" : "bl%d0\t%a1", ops);
16488 return "";
16491 gcc_assert (!cfun->calls_alloca || really_return);
16493 sprintf (conditional, "%%?%%%c0", reverse ? 'D' : 'd');
16495 cfun->machine->return_used_this_function = 1;
16497 offsets = arm_get_frame_offsets ();
16498 live_regs_mask = offsets->saved_regs_mask;
16500 if (!simple_return && live_regs_mask)
16502 const char * return_reg;
16504 /* If we do not have any special requirements for function exit
16505 (e.g. interworking) then we can load the return address
16506 directly into the PC. Otherwise we must load it into LR. */
16507 if (really_return
16508 && (IS_INTERRUPT (func_type) || !TARGET_INTERWORK))
16509 return_reg = reg_names[PC_REGNUM];
16510 else
16511 return_reg = reg_names[LR_REGNUM];
16513 if ((live_regs_mask & (1 << IP_REGNUM)) == (1 << IP_REGNUM))
16515 /* There are three possible reasons for the IP register
16516 being saved. 1) a stack frame was created, in which case
16517 IP contains the old stack pointer, or 2) an ISR routine
16518 corrupted it, or 3) it was saved to align the stack on
16519 iWMMXt. In case 1, restore IP into SP, otherwise just
16520 restore IP. */
16521 if (frame_pointer_needed)
16523 live_regs_mask &= ~ (1 << IP_REGNUM);
16524 live_regs_mask |= (1 << SP_REGNUM);
16526 else
16527 gcc_assert (IS_INTERRUPT (func_type) || TARGET_REALLY_IWMMXT);
16530 /* On some ARM architectures it is faster to use LDR rather than
16531 LDM to load a single register. On other architectures, the
16532 cost is the same. In 26 bit mode, or for exception handlers,
16533 we have to use LDM to load the PC so that the CPSR is also
16534 restored. */
16535 for (reg = 0; reg <= LAST_ARM_REGNUM; reg++)
16536 if (live_regs_mask == (1U << reg))
16537 break;
16539 if (reg <= LAST_ARM_REGNUM
16540 && (reg != LR_REGNUM
16541 || ! really_return
16542 || ! IS_INTERRUPT (func_type)))
16544 sprintf (instr, "ldr%s\t%%|%s, [%%|sp], #4", conditional,
16545 (reg == LR_REGNUM) ? return_reg : reg_names[reg]);
16547 else
16549 char *p;
16550 int first = 1;
16552 /* Generate the load multiple instruction to restore the
16553 registers. Note we can get here, even if
16554 frame_pointer_needed is true, but only if sp already
16555 points to the base of the saved core registers. */
16556 if (live_regs_mask & (1 << SP_REGNUM))
16558 unsigned HOST_WIDE_INT stack_adjust;
16560 stack_adjust = offsets->outgoing_args - offsets->saved_regs;
16561 gcc_assert (stack_adjust == 0 || stack_adjust == 4);
16563 if (stack_adjust && arm_arch5 && TARGET_ARM)
16564 if (TARGET_UNIFIED_ASM)
16565 sprintf (instr, "ldmib%s\t%%|sp, {", conditional);
16566 else
16567 sprintf (instr, "ldm%sib\t%%|sp, {", conditional);
16568 else
16570 /* If we can't use ldmib (SA110 bug),
16571 then try to pop r3 instead. */
16572 if (stack_adjust)
16573 live_regs_mask |= 1 << 3;
16575 if (TARGET_UNIFIED_ASM)
16576 sprintf (instr, "ldmfd%s\t%%|sp, {", conditional);
16577 else
16578 sprintf (instr, "ldm%sfd\t%%|sp, {", conditional);
16581 else
16582 if (TARGET_UNIFIED_ASM)
16583 sprintf (instr, "pop%s\t{", conditional);
16584 else
16585 sprintf (instr, "ldm%sfd\t%%|sp!, {", conditional);
16587 p = instr + strlen (instr);
16589 for (reg = 0; reg <= SP_REGNUM; reg++)
16590 if (live_regs_mask & (1 << reg))
16592 int l = strlen (reg_names[reg]);
16594 if (first)
16595 first = 0;
16596 else
16598 memcpy (p, ", ", 2);
16599 p += 2;
16602 memcpy (p, "%|", 2);
16603 memcpy (p + 2, reg_names[reg], l);
16604 p += l + 2;
16607 if (live_regs_mask & (1 << LR_REGNUM))
16609 sprintf (p, "%s%%|%s}", first ? "" : ", ", return_reg);
16610 /* If returning from an interrupt, restore the CPSR. */
16611 if (IS_INTERRUPT (func_type))
16612 strcat (p, "^");
16614 else
16615 strcpy (p, "}");
16618 output_asm_insn (instr, & operand);
16620 /* See if we need to generate an extra instruction to
16621 perform the actual function return. */
16622 if (really_return
16623 && func_type != ARM_FT_INTERWORKED
16624 && (live_regs_mask & (1 << LR_REGNUM)) != 0)
16626 /* The return has already been handled
16627 by loading the LR into the PC. */
16628 return "";
16632 if (really_return)
16634 switch ((int) ARM_FUNC_TYPE (func_type))
16636 case ARM_FT_ISR:
16637 case ARM_FT_FIQ:
16638 /* ??? This is wrong for unified assembly syntax. */
16639 sprintf (instr, "sub%ss\t%%|pc, %%|lr, #4", conditional);
16640 break;
16642 case ARM_FT_INTERWORKED:
16643 sprintf (instr, "bx%s\t%%|lr", conditional);
16644 break;
16646 case ARM_FT_EXCEPTION:
16647 /* ??? This is wrong for unified assembly syntax. */
16648 sprintf (instr, "mov%ss\t%%|pc, %%|lr", conditional);
16649 break;
16651 default:
16652 /* Use bx if it's available. */
16653 if (arm_arch5 || arm_arch4t)
16654 sprintf (instr, "bx%s\t%%|lr", conditional);
16655 else
16656 sprintf (instr, "mov%s\t%%|pc, %%|lr", conditional);
16657 break;
16660 output_asm_insn (instr, & operand);
16663 return "";
16666 /* Write the function name into the code section, directly preceding
16667 the function prologue.
16669 Code will be output similar to this:
16671 .ascii "arm_poke_function_name", 0
16672 .align
16674 .word 0xff000000 + (t1 - t0)
16675 arm_poke_function_name
16676 mov ip, sp
16677 stmfd sp!, {fp, ip, lr, pc}
16678 sub fp, ip, #4
16680 When performing a stack backtrace, code can inspect the value
16681 of 'pc' stored at 'fp' + 0. If the trace function then looks
16682 at location pc - 12 and the top 8 bits are set, then we know
16683 that there is a function name embedded immediately preceding this
16684 location and has length ((pc[-3]) & 0xff000000).
16686 We assume that pc is declared as a pointer to an unsigned long.
16688 It is of no benefit to output the function name if we are assembling
16689 a leaf function. These function types will not contain a stack
16690 backtrace structure, therefore it is not possible to determine the
16691 function name. */
16692 void
16693 arm_poke_function_name (FILE *stream, const char *name)
16695 unsigned long alignlength;
16696 unsigned long length;
16697 rtx x;
16699 length = strlen (name) + 1;
16700 alignlength = ROUND_UP_WORD (length);
16702 ASM_OUTPUT_ASCII (stream, name, length);
16703 ASM_OUTPUT_ALIGN (stream, 2);
16704 x = GEN_INT ((unsigned HOST_WIDE_INT) 0xff000000 + alignlength);
16705 assemble_aligned_integer (UNITS_PER_WORD, x);
16708 /* Place some comments into the assembler stream
16709 describing the current function. */
16710 static void
16711 arm_output_function_prologue (FILE *f, HOST_WIDE_INT frame_size)
16713 unsigned long func_type;
16715 /* ??? Do we want to print some of the below anyway? */
16716 if (TARGET_THUMB1)
16717 return;
16719 /* Sanity check. */
16720 gcc_assert (!arm_ccfsm_state && !arm_target_insn);
16722 func_type = arm_current_func_type ();
16724 switch ((int) ARM_FUNC_TYPE (func_type))
16726 default:
16727 case ARM_FT_NORMAL:
16728 break;
16729 case ARM_FT_INTERWORKED:
16730 asm_fprintf (f, "\t%@ Function supports interworking.\n");
16731 break;
16732 case ARM_FT_ISR:
16733 asm_fprintf (f, "\t%@ Interrupt Service Routine.\n");
16734 break;
16735 case ARM_FT_FIQ:
16736 asm_fprintf (f, "\t%@ Fast Interrupt Service Routine.\n");
16737 break;
16738 case ARM_FT_EXCEPTION:
16739 asm_fprintf (f, "\t%@ ARM Exception Handler.\n");
16740 break;
16743 if (IS_NAKED (func_type))
16744 asm_fprintf (f, "\t%@ Naked Function: prologue and epilogue provided by programmer.\n");
16746 if (IS_VOLATILE (func_type))
16747 asm_fprintf (f, "\t%@ Volatile: function does not return.\n");
16749 if (IS_NESTED (func_type))
16750 asm_fprintf (f, "\t%@ Nested: function declared inside another function.\n");
16751 if (IS_STACKALIGN (func_type))
16752 asm_fprintf (f, "\t%@ Stack Align: May be called with mis-aligned SP.\n");
16754 asm_fprintf (f, "\t%@ args = %d, pretend = %d, frame = %wd\n",
16755 crtl->args.size,
16756 crtl->args.pretend_args_size, frame_size);
16758 asm_fprintf (f, "\t%@ frame_needed = %d, uses_anonymous_args = %d\n",
16759 frame_pointer_needed,
16760 cfun->machine->uses_anonymous_args);
16762 if (cfun->machine->lr_save_eliminated)
16763 asm_fprintf (f, "\t%@ link register save eliminated.\n");
16765 if (crtl->calls_eh_return)
16766 asm_fprintf (f, "\t@ Calls __builtin_eh_return.\n");
16770 static void
16771 arm_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED,
16772 HOST_WIDE_INT frame_size ATTRIBUTE_UNUSED)
16774 arm_stack_offsets *offsets;
16776 if (TARGET_THUMB1)
16778 int regno;
16780 /* Emit any call-via-reg trampolines that are needed for v4t support
16781 of call_reg and call_value_reg type insns. */
16782 for (regno = 0; regno < LR_REGNUM; regno++)
16784 rtx label = cfun->machine->call_via[regno];
16786 if (label != NULL)
16788 switch_to_section (function_section (current_function_decl));
16789 targetm.asm_out.internal_label (asm_out_file, "L",
16790 CODE_LABEL_NUMBER (label));
16791 asm_fprintf (asm_out_file, "\tbx\t%r\n", regno);
16795 /* ??? Probably not safe to set this here, since it assumes that a
16796 function will be emitted as assembly immediately after we generate
16797 RTL for it. This does not happen for inline functions. */
16798 cfun->machine->return_used_this_function = 0;
16800 else /* TARGET_32BIT */
16802 /* We need to take into account any stack-frame rounding. */
16803 offsets = arm_get_frame_offsets ();
16805 gcc_assert (!use_return_insn (FALSE, NULL)
16806 || (cfun->machine->return_used_this_function != 0)
16807 || offsets->saved_regs == offsets->outgoing_args
16808 || frame_pointer_needed);
16810 /* Reset the ARM-specific per-function variables. */
16811 after_arm_reorg = 0;
16815 /* Generate and emit a sequence of insns equivalent to PUSH, but using
16816 STR and STRD. If an even number of registers are being pushed, one
16817 or more STRD patterns are created for each register pair. If an
16818 odd number of registers are pushed, emit an initial STR followed by
16819 as many STRD instructions as are needed. This works best when the
16820 stack is initially 64-bit aligned (the normal case), since it
16821 ensures that each STRD is also 64-bit aligned. */
16822 static void
16823 thumb2_emit_strd_push (unsigned long saved_regs_mask)
16825 int num_regs = 0;
16826 int i;
16827 int regno;
16828 rtx par = NULL_RTX;
16829 rtx dwarf = NULL_RTX;
16830 rtx tmp;
16831 bool first = true;
16833 num_regs = bit_count (saved_regs_mask);
16835 /* Must be at least one register to save, and can't save SP or PC. */
16836 gcc_assert (num_regs > 0 && num_regs <= 14);
16837 gcc_assert (!(saved_regs_mask & (1 << SP_REGNUM)));
16838 gcc_assert (!(saved_regs_mask & (1 << PC_REGNUM)));
16840 /* Create sequence for DWARF info. All the frame-related data for
16841 debugging is held in this wrapper. */
16842 dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (num_regs + 1));
16844 /* Describe the stack adjustment. */
16845 tmp = gen_rtx_SET (VOIDmode,
16846 stack_pointer_rtx,
16847 plus_constant (Pmode, stack_pointer_rtx, -4 * num_regs));
16848 RTX_FRAME_RELATED_P (tmp) = 1;
16849 XVECEXP (dwarf, 0, 0) = tmp;
16851 /* Find the first register. */
16852 for (regno = 0; (saved_regs_mask & (1 << regno)) == 0; regno++)
16855 i = 0;
16857 /* If there's an odd number of registers to push. Start off by
16858 pushing a single register. This ensures that subsequent strd
16859 operations are dword aligned (assuming that SP was originally
16860 64-bit aligned). */
16861 if ((num_regs & 1) != 0)
16863 rtx reg, mem, insn;
16865 reg = gen_rtx_REG (SImode, regno);
16866 if (num_regs == 1)
16867 mem = gen_frame_mem (Pmode, gen_rtx_PRE_DEC (Pmode,
16868 stack_pointer_rtx));
16869 else
16870 mem = gen_frame_mem (Pmode,
16871 gen_rtx_PRE_MODIFY
16872 (Pmode, stack_pointer_rtx,
16873 plus_constant (Pmode, stack_pointer_rtx,
16874 -4 * num_regs)));
16876 tmp = gen_rtx_SET (VOIDmode, mem, reg);
16877 RTX_FRAME_RELATED_P (tmp) = 1;
16878 insn = emit_insn (tmp);
16879 RTX_FRAME_RELATED_P (insn) = 1;
16880 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
16881 tmp = gen_rtx_SET (VOIDmode, gen_frame_mem (Pmode, stack_pointer_rtx),
16882 reg);
16883 RTX_FRAME_RELATED_P (tmp) = 1;
16884 i++;
16885 regno++;
16886 XVECEXP (dwarf, 0, i) = tmp;
16887 first = false;
16890 while (i < num_regs)
16891 if (saved_regs_mask & (1 << regno))
16893 rtx reg1, reg2, mem1, mem2;
16894 rtx tmp0, tmp1, tmp2;
16895 int regno2;
16897 /* Find the register to pair with this one. */
16898 for (regno2 = regno + 1; (saved_regs_mask & (1 << regno2)) == 0;
16899 regno2++)
16902 reg1 = gen_rtx_REG (SImode, regno);
16903 reg2 = gen_rtx_REG (SImode, regno2);
16905 if (first)
16907 rtx insn;
16909 first = false;
16910 mem1 = gen_frame_mem (Pmode, plus_constant (Pmode,
16911 stack_pointer_rtx,
16912 -4 * num_regs));
16913 mem2 = gen_frame_mem (Pmode, plus_constant (Pmode,
16914 stack_pointer_rtx,
16915 -4 * (num_regs - 1)));
16916 tmp0 = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
16917 plus_constant (Pmode, stack_pointer_rtx,
16918 -4 * (num_regs)));
16919 tmp1 = gen_rtx_SET (VOIDmode, mem1, reg1);
16920 tmp2 = gen_rtx_SET (VOIDmode, mem2, reg2);
16921 RTX_FRAME_RELATED_P (tmp0) = 1;
16922 RTX_FRAME_RELATED_P (tmp1) = 1;
16923 RTX_FRAME_RELATED_P (tmp2) = 1;
16924 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (3));
16925 XVECEXP (par, 0, 0) = tmp0;
16926 XVECEXP (par, 0, 1) = tmp1;
16927 XVECEXP (par, 0, 2) = tmp2;
16928 insn = emit_insn (par);
16929 RTX_FRAME_RELATED_P (insn) = 1;
16930 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
16932 else
16934 mem1 = gen_frame_mem (Pmode, plus_constant (Pmode,
16935 stack_pointer_rtx,
16936 4 * i));
16937 mem2 = gen_frame_mem (Pmode, plus_constant (Pmode,
16938 stack_pointer_rtx,
16939 4 * (i + 1)));
16940 tmp1 = gen_rtx_SET (VOIDmode, mem1, reg1);
16941 tmp2 = gen_rtx_SET (VOIDmode, mem2, reg2);
16942 RTX_FRAME_RELATED_P (tmp1) = 1;
16943 RTX_FRAME_RELATED_P (tmp2) = 1;
16944 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
16945 XVECEXP (par, 0, 0) = tmp1;
16946 XVECEXP (par, 0, 1) = tmp2;
16947 emit_insn (par);
16950 /* Create unwind information. This is an approximation. */
16951 tmp1 = gen_rtx_SET (VOIDmode,
16952 gen_frame_mem (Pmode,
16953 plus_constant (Pmode,
16954 stack_pointer_rtx,
16955 4 * i)),
16956 reg1);
16957 tmp2 = gen_rtx_SET (VOIDmode,
16958 gen_frame_mem (Pmode,
16959 plus_constant (Pmode,
16960 stack_pointer_rtx,
16961 4 * (i + 1))),
16962 reg2);
16964 RTX_FRAME_RELATED_P (tmp1) = 1;
16965 RTX_FRAME_RELATED_P (tmp2) = 1;
16966 XVECEXP (dwarf, 0, i + 1) = tmp1;
16967 XVECEXP (dwarf, 0, i + 2) = tmp2;
16968 i += 2;
16969 regno = regno2 + 1;
16971 else
16972 regno++;
16974 return;
16977 /* STRD in ARM mode requires consecutive registers. This function emits STRD
16978 whenever possible, otherwise it emits single-word stores. The first store
16979 also allocates stack space for all saved registers, using writeback with
16980 post-addressing mode. All other stores use offset addressing. If no STRD
16981 can be emitted, this function emits a sequence of single-word stores,
16982 and not an STM as before, because single-word stores provide more freedom
16983 scheduling and can be turned into an STM by peephole optimizations. */
16984 static void
16985 arm_emit_strd_push (unsigned long saved_regs_mask)
16987 int num_regs = 0;
16988 int i, j, dwarf_index = 0;
16989 int offset = 0;
16990 rtx dwarf = NULL_RTX;
16991 rtx insn = NULL_RTX;
16992 rtx tmp, mem;
16994 /* TODO: A more efficient code can be emitted by changing the
16995 layout, e.g., first push all pairs that can use STRD to keep the
16996 stack aligned, and then push all other registers. */
16997 for (i = 0; i <= LAST_ARM_REGNUM; i++)
16998 if (saved_regs_mask & (1 << i))
16999 num_regs++;
17001 gcc_assert (!(saved_regs_mask & (1 << SP_REGNUM)));
17002 gcc_assert (!(saved_regs_mask & (1 << PC_REGNUM)));
17003 gcc_assert (num_regs > 0);
17005 /* Create sequence for DWARF info. */
17006 dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (num_regs + 1));
17008 /* For dwarf info, we generate explicit stack update. */
17009 tmp = gen_rtx_SET (VOIDmode,
17010 stack_pointer_rtx,
17011 plus_constant (Pmode, stack_pointer_rtx, -4 * num_regs));
17012 RTX_FRAME_RELATED_P (tmp) = 1;
17013 XVECEXP (dwarf, 0, dwarf_index++) = tmp;
17015 /* Save registers. */
17016 offset = - 4 * num_regs;
17017 j = 0;
17018 while (j <= LAST_ARM_REGNUM)
17019 if (saved_regs_mask & (1 << j))
17021 if ((j % 2 == 0)
17022 && (saved_regs_mask & (1 << (j + 1))))
17024 /* Current register and previous register form register pair for
17025 which STRD can be generated. */
17026 if (offset < 0)
17028 /* Allocate stack space for all saved registers. */
17029 tmp = plus_constant (Pmode, stack_pointer_rtx, offset);
17030 tmp = gen_rtx_PRE_MODIFY (Pmode, stack_pointer_rtx, tmp);
17031 mem = gen_frame_mem (DImode, tmp);
17032 offset = 0;
17034 else if (offset > 0)
17035 mem = gen_frame_mem (DImode,
17036 plus_constant (Pmode,
17037 stack_pointer_rtx,
17038 offset));
17039 else
17040 mem = gen_frame_mem (DImode, stack_pointer_rtx);
17042 tmp = gen_rtx_SET (DImode, mem, gen_rtx_REG (DImode, j));
17043 RTX_FRAME_RELATED_P (tmp) = 1;
17044 tmp = emit_insn (tmp);
17046 /* Record the first store insn. */
17047 if (dwarf_index == 1)
17048 insn = tmp;
17050 /* Generate dwarf info. */
17051 mem = gen_frame_mem (SImode,
17052 plus_constant (Pmode,
17053 stack_pointer_rtx,
17054 offset));
17055 tmp = gen_rtx_SET (SImode, mem, gen_rtx_REG (SImode, j));
17056 RTX_FRAME_RELATED_P (tmp) = 1;
17057 XVECEXP (dwarf, 0, dwarf_index++) = tmp;
17059 mem = gen_frame_mem (SImode,
17060 plus_constant (Pmode,
17061 stack_pointer_rtx,
17062 offset + 4));
17063 tmp = gen_rtx_SET (SImode, mem, gen_rtx_REG (SImode, j + 1));
17064 RTX_FRAME_RELATED_P (tmp) = 1;
17065 XVECEXP (dwarf, 0, dwarf_index++) = tmp;
17067 offset += 8;
17068 j += 2;
17070 else
17072 /* Emit a single word store. */
17073 if (offset < 0)
17075 /* Allocate stack space for all saved registers. */
17076 tmp = plus_constant (Pmode, stack_pointer_rtx, offset);
17077 tmp = gen_rtx_PRE_MODIFY (Pmode, stack_pointer_rtx, tmp);
17078 mem = gen_frame_mem (SImode, tmp);
17079 offset = 0;
17081 else if (offset > 0)
17082 mem = gen_frame_mem (SImode,
17083 plus_constant (Pmode,
17084 stack_pointer_rtx,
17085 offset));
17086 else
17087 mem = gen_frame_mem (SImode, stack_pointer_rtx);
17089 tmp = gen_rtx_SET (SImode, mem, gen_rtx_REG (SImode, j));
17090 RTX_FRAME_RELATED_P (tmp) = 1;
17091 tmp = emit_insn (tmp);
17093 /* Record the first store insn. */
17094 if (dwarf_index == 1)
17095 insn = tmp;
17097 /* Generate dwarf info. */
17098 mem = gen_frame_mem (SImode,
17099 plus_constant(Pmode,
17100 stack_pointer_rtx,
17101 offset));
17102 tmp = gen_rtx_SET (SImode, mem, gen_rtx_REG (SImode, j));
17103 RTX_FRAME_RELATED_P (tmp) = 1;
17104 XVECEXP (dwarf, 0, dwarf_index++) = tmp;
17106 offset += 4;
17107 j += 1;
17110 else
17111 j++;
17113 /* Attach dwarf info to the first insn we generate. */
17114 gcc_assert (insn != NULL_RTX);
17115 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
17116 RTX_FRAME_RELATED_P (insn) = 1;
17119 /* Generate and emit an insn that we will recognize as a push_multi.
17120 Unfortunately, since this insn does not reflect very well the actual
17121 semantics of the operation, we need to annotate the insn for the benefit
17122 of DWARF2 frame unwind information. */
17123 static rtx
17124 emit_multi_reg_push (unsigned long mask)
17126 int num_regs = 0;
17127 int num_dwarf_regs;
17128 int i, j;
17129 rtx par;
17130 rtx dwarf;
17131 int dwarf_par_index;
17132 rtx tmp, reg;
17134 for (i = 0; i <= LAST_ARM_REGNUM; i++)
17135 if (mask & (1 << i))
17136 num_regs++;
17138 gcc_assert (num_regs && num_regs <= 16);
17140 /* We don't record the PC in the dwarf frame information. */
17141 num_dwarf_regs = num_regs;
17142 if (mask & (1 << PC_REGNUM))
17143 num_dwarf_regs--;
17145 /* For the body of the insn we are going to generate an UNSPEC in
17146 parallel with several USEs. This allows the insn to be recognized
17147 by the push_multi pattern in the arm.md file.
17149 The body of the insn looks something like this:
17151 (parallel [
17152 (set (mem:BLK (pre_modify:SI (reg:SI sp)
17153 (const_int:SI <num>)))
17154 (unspec:BLK [(reg:SI r4)] UNSPEC_PUSH_MULT))
17155 (use (reg:SI XX))
17156 (use (reg:SI YY))
17160 For the frame note however, we try to be more explicit and actually
17161 show each register being stored into the stack frame, plus a (single)
17162 decrement of the stack pointer. We do it this way in order to be
17163 friendly to the stack unwinding code, which only wants to see a single
17164 stack decrement per instruction. The RTL we generate for the note looks
17165 something like this:
17167 (sequence [
17168 (set (reg:SI sp) (plus:SI (reg:SI sp) (const_int -20)))
17169 (set (mem:SI (reg:SI sp)) (reg:SI r4))
17170 (set (mem:SI (plus:SI (reg:SI sp) (const_int 4))) (reg:SI XX))
17171 (set (mem:SI (plus:SI (reg:SI sp) (const_int 8))) (reg:SI YY))
17175 FIXME:: In an ideal world the PRE_MODIFY would not exist and
17176 instead we'd have a parallel expression detailing all
17177 the stores to the various memory addresses so that debug
17178 information is more up-to-date. Remember however while writing
17179 this to take care of the constraints with the push instruction.
17181 Note also that this has to be taken care of for the VFP registers.
17183 For more see PR43399. */
17185 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (num_regs));
17186 dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (num_dwarf_regs + 1));
17187 dwarf_par_index = 1;
17189 for (i = 0; i <= LAST_ARM_REGNUM; i++)
17191 if (mask & (1 << i))
17193 reg = gen_rtx_REG (SImode, i);
17195 XVECEXP (par, 0, 0)
17196 = gen_rtx_SET (VOIDmode,
17197 gen_frame_mem
17198 (BLKmode,
17199 gen_rtx_PRE_MODIFY (Pmode,
17200 stack_pointer_rtx,
17201 plus_constant
17202 (Pmode, stack_pointer_rtx,
17203 -4 * num_regs))
17205 gen_rtx_UNSPEC (BLKmode,
17206 gen_rtvec (1, reg),
17207 UNSPEC_PUSH_MULT));
17209 if (i != PC_REGNUM)
17211 tmp = gen_rtx_SET (VOIDmode,
17212 gen_frame_mem (SImode, stack_pointer_rtx),
17213 reg);
17214 RTX_FRAME_RELATED_P (tmp) = 1;
17215 XVECEXP (dwarf, 0, dwarf_par_index) = tmp;
17216 dwarf_par_index++;
17219 break;
17223 for (j = 1, i++; j < num_regs; i++)
17225 if (mask & (1 << i))
17227 reg = gen_rtx_REG (SImode, i);
17229 XVECEXP (par, 0, j) = gen_rtx_USE (VOIDmode, reg);
17231 if (i != PC_REGNUM)
17234 = gen_rtx_SET (VOIDmode,
17235 gen_frame_mem
17236 (SImode,
17237 plus_constant (Pmode, stack_pointer_rtx,
17238 4 * j)),
17239 reg);
17240 RTX_FRAME_RELATED_P (tmp) = 1;
17241 XVECEXP (dwarf, 0, dwarf_par_index++) = tmp;
17244 j++;
17248 par = emit_insn (par);
17250 tmp = gen_rtx_SET (VOIDmode,
17251 stack_pointer_rtx,
17252 plus_constant (Pmode, stack_pointer_rtx, -4 * num_regs));
17253 RTX_FRAME_RELATED_P (tmp) = 1;
17254 XVECEXP (dwarf, 0, 0) = tmp;
17256 add_reg_note (par, REG_FRAME_RELATED_EXPR, dwarf);
17258 return par;
17261 /* Add a REG_CFA_ADJUST_CFA REG note to INSN.
17262 SIZE is the offset to be adjusted.
17263 DEST and SRC might be stack_pointer_rtx or hard_frame_pointer_rtx. */
17264 static void
17265 arm_add_cfa_adjust_cfa_note (rtx insn, int size, rtx dest, rtx src)
17267 rtx dwarf;
17269 RTX_FRAME_RELATED_P (insn) = 1;
17270 dwarf = gen_rtx_SET (VOIDmode, dest, plus_constant (Pmode, src, size));
17271 add_reg_note (insn, REG_CFA_ADJUST_CFA, dwarf);
17274 /* Generate and emit an insn pattern that we will recognize as a pop_multi.
17275 SAVED_REGS_MASK shows which registers need to be restored.
17277 Unfortunately, since this insn does not reflect very well the actual
17278 semantics of the operation, we need to annotate the insn for the benefit
17279 of DWARF2 frame unwind information. */
17280 static void
17281 arm_emit_multi_reg_pop (unsigned long saved_regs_mask)
17283 int num_regs = 0;
17284 int i, j;
17285 rtx par;
17286 rtx dwarf = NULL_RTX;
17287 rtx tmp, reg;
17288 bool return_in_pc;
17289 int offset_adj;
17290 int emit_update;
17292 return_in_pc = (saved_regs_mask & (1 << PC_REGNUM)) ? true : false;
17293 offset_adj = return_in_pc ? 1 : 0;
17294 for (i = 0; i <= LAST_ARM_REGNUM; i++)
17295 if (saved_regs_mask & (1 << i))
17296 num_regs++;
17298 gcc_assert (num_regs && num_regs <= 16);
17300 /* If SP is in reglist, then we don't emit SP update insn. */
17301 emit_update = (saved_regs_mask & (1 << SP_REGNUM)) ? 0 : 1;
17303 /* The parallel needs to hold num_regs SETs
17304 and one SET for the stack update. */
17305 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (num_regs + emit_update + offset_adj));
17307 if (return_in_pc)
17309 tmp = ret_rtx;
17310 XVECEXP (par, 0, 0) = tmp;
17313 if (emit_update)
17315 /* Increment the stack pointer, based on there being
17316 num_regs 4-byte registers to restore. */
17317 tmp = gen_rtx_SET (VOIDmode,
17318 stack_pointer_rtx,
17319 plus_constant (Pmode,
17320 stack_pointer_rtx,
17321 4 * num_regs));
17322 RTX_FRAME_RELATED_P (tmp) = 1;
17323 XVECEXP (par, 0, offset_adj) = tmp;
17326 /* Now restore every reg, which may include PC. */
17327 for (j = 0, i = 0; j < num_regs; i++)
17328 if (saved_regs_mask & (1 << i))
17330 reg = gen_rtx_REG (SImode, i);
17331 if ((num_regs == 1) && emit_update && !return_in_pc)
17333 /* Emit single load with writeback. */
17334 tmp = gen_frame_mem (SImode,
17335 gen_rtx_POST_INC (Pmode,
17336 stack_pointer_rtx));
17337 tmp = emit_insn (gen_rtx_SET (VOIDmode, reg, tmp));
17338 REG_NOTES (tmp) = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
17339 return;
17342 tmp = gen_rtx_SET (VOIDmode,
17343 reg,
17344 gen_frame_mem
17345 (SImode,
17346 plus_constant (Pmode, stack_pointer_rtx, 4 * j)));
17347 RTX_FRAME_RELATED_P (tmp) = 1;
17348 XVECEXP (par, 0, j + emit_update + offset_adj) = tmp;
17350 /* We need to maintain a sequence for DWARF info too. As dwarf info
17351 should not have PC, skip PC. */
17352 if (i != PC_REGNUM)
17353 dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
17355 j++;
17358 if (return_in_pc)
17359 par = emit_jump_insn (par);
17360 else
17361 par = emit_insn (par);
17363 REG_NOTES (par) = dwarf;
17364 if (!return_in_pc)
17365 arm_add_cfa_adjust_cfa_note (par, UNITS_PER_WORD * num_regs,
17366 stack_pointer_rtx, stack_pointer_rtx);
17369 /* Generate and emit an insn pattern that we will recognize as a pop_multi
17370 of NUM_REGS consecutive VFP regs, starting at FIRST_REG.
17372 Unfortunately, since this insn does not reflect very well the actual
17373 semantics of the operation, we need to annotate the insn for the benefit
17374 of DWARF2 frame unwind information. */
17375 static void
17376 arm_emit_vfp_multi_reg_pop (int first_reg, int num_regs, rtx base_reg)
17378 int i, j;
17379 rtx par;
17380 rtx dwarf = NULL_RTX;
17381 rtx tmp, reg;
17383 gcc_assert (num_regs && num_regs <= 32);
17385 /* Workaround ARM10 VFPr1 bug. */
17386 if (num_regs == 2 && !arm_arch6)
17388 if (first_reg == 15)
17389 first_reg--;
17391 num_regs++;
17394 /* We can emit at most 16 D-registers in a single pop_multi instruction, and
17395 there could be up to 32 D-registers to restore.
17396 If there are more than 16 D-registers, make two recursive calls,
17397 each of which emits one pop_multi instruction. */
17398 if (num_regs > 16)
17400 arm_emit_vfp_multi_reg_pop (first_reg, 16, base_reg);
17401 arm_emit_vfp_multi_reg_pop (first_reg + 16, num_regs - 16, base_reg);
17402 return;
17405 /* The parallel needs to hold num_regs SETs
17406 and one SET for the stack update. */
17407 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (num_regs + 1));
17409 /* Increment the stack pointer, based on there being
17410 num_regs 8-byte registers to restore. */
17411 tmp = gen_rtx_SET (VOIDmode,
17412 base_reg,
17413 plus_constant (Pmode, base_reg, 8 * num_regs));
17414 RTX_FRAME_RELATED_P (tmp) = 1;
17415 XVECEXP (par, 0, 0) = tmp;
17417 /* Now show every reg that will be restored, using a SET for each. */
17418 for (j = 0, i=first_reg; j < num_regs; i += 2)
17420 reg = gen_rtx_REG (DFmode, i);
17422 tmp = gen_rtx_SET (VOIDmode,
17423 reg,
17424 gen_frame_mem
17425 (DFmode,
17426 plus_constant (Pmode, base_reg, 8 * j)));
17427 RTX_FRAME_RELATED_P (tmp) = 1;
17428 XVECEXP (par, 0, j + 1) = tmp;
17430 dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
17432 j++;
17435 par = emit_insn (par);
17436 REG_NOTES (par) = dwarf;
17438 /* Make sure cfa doesn't leave with IP_REGNUM to allow unwinding fron FP. */
17439 if (TARGET_VFP && REGNO (base_reg) == IP_REGNUM)
17441 RTX_FRAME_RELATED_P (par) = 1;
17442 add_reg_note (par, REG_CFA_DEF_CFA, hard_frame_pointer_rtx);
17444 else
17445 arm_add_cfa_adjust_cfa_note (par, 2 * UNITS_PER_WORD * num_regs,
17446 base_reg, base_reg);
17449 /* Generate and emit a pattern that will be recognized as LDRD pattern. If even
17450 number of registers are being popped, multiple LDRD patterns are created for
17451 all register pairs. If odd number of registers are popped, last register is
17452 loaded by using LDR pattern. */
17453 static void
17454 thumb2_emit_ldrd_pop (unsigned long saved_regs_mask)
17456 int num_regs = 0;
17457 int i, j;
17458 rtx par = NULL_RTX;
17459 rtx dwarf = NULL_RTX;
17460 rtx tmp, reg, tmp1;
17461 bool return_in_pc;
17463 return_in_pc = (saved_regs_mask & (1 << PC_REGNUM)) ? true : false;
17464 for (i = 0; i <= LAST_ARM_REGNUM; i++)
17465 if (saved_regs_mask & (1 << i))
17466 num_regs++;
17468 gcc_assert (num_regs && num_regs <= 16);
17470 /* We cannot generate ldrd for PC. Hence, reduce the count if PC is
17471 to be popped. So, if num_regs is even, now it will become odd,
17472 and we can generate pop with PC. If num_regs is odd, it will be
17473 even now, and ldr with return can be generated for PC. */
17474 if (return_in_pc)
17475 num_regs--;
17477 gcc_assert (!(saved_regs_mask & (1 << SP_REGNUM)));
17479 /* Var j iterates over all the registers to gather all the registers in
17480 saved_regs_mask. Var i gives index of saved registers in stack frame.
17481 A PARALLEL RTX of register-pair is created here, so that pattern for
17482 LDRD can be matched. As PC is always last register to be popped, and
17483 we have already decremented num_regs if PC, we don't have to worry
17484 about PC in this loop. */
17485 for (i = 0, j = 0; i < (num_regs - (num_regs % 2)); j++)
17486 if (saved_regs_mask & (1 << j))
17488 /* Create RTX for memory load. */
17489 reg = gen_rtx_REG (SImode, j);
17490 tmp = gen_rtx_SET (SImode,
17491 reg,
17492 gen_frame_mem (SImode,
17493 plus_constant (Pmode,
17494 stack_pointer_rtx, 4 * i)));
17495 RTX_FRAME_RELATED_P (tmp) = 1;
17497 if (i % 2 == 0)
17499 /* When saved-register index (i) is even, the RTX to be emitted is
17500 yet to be created. Hence create it first. The LDRD pattern we
17501 are generating is :
17502 [ (SET (reg_t0) (MEM (PLUS (SP) (NUM))))
17503 (SET (reg_t1) (MEM (PLUS (SP) (NUM + 4)))) ]
17504 where target registers need not be consecutive. */
17505 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
17506 dwarf = NULL_RTX;
17509 /* ith register is added in PARALLEL RTX. If i is even, the reg_i is
17510 added as 0th element and if i is odd, reg_i is added as 1st element
17511 of LDRD pattern shown above. */
17512 XVECEXP (par, 0, (i % 2)) = tmp;
17513 dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
17515 if ((i % 2) == 1)
17517 /* When saved-register index (i) is odd, RTXs for both the registers
17518 to be loaded are generated in above given LDRD pattern, and the
17519 pattern can be emitted now. */
17520 par = emit_insn (par);
17521 REG_NOTES (par) = dwarf;
17522 RTX_FRAME_RELATED_P (par) = 1;
17525 i++;
17528 /* If the number of registers pushed is odd AND return_in_pc is false OR
17529 number of registers are even AND return_in_pc is true, last register is
17530 popped using LDR. It can be PC as well. Hence, adjust the stack first and
17531 then LDR with post increment. */
17533 /* Increment the stack pointer, based on there being
17534 num_regs 4-byte registers to restore. */
17535 tmp = gen_rtx_SET (VOIDmode,
17536 stack_pointer_rtx,
17537 plus_constant (Pmode, stack_pointer_rtx, 4 * i));
17538 RTX_FRAME_RELATED_P (tmp) = 1;
17539 tmp = emit_insn (tmp);
17540 if (!return_in_pc)
17542 arm_add_cfa_adjust_cfa_note (tmp, UNITS_PER_WORD * i,
17543 stack_pointer_rtx, stack_pointer_rtx);
17546 dwarf = NULL_RTX;
17548 if (((num_regs % 2) == 1 && !return_in_pc)
17549 || ((num_regs % 2) == 0 && return_in_pc))
17551 /* Scan for the single register to be popped. Skip until the saved
17552 register is found. */
17553 for (; (saved_regs_mask & (1 << j)) == 0; j++);
17555 /* Gen LDR with post increment here. */
17556 tmp1 = gen_rtx_MEM (SImode,
17557 gen_rtx_POST_INC (SImode,
17558 stack_pointer_rtx));
17559 set_mem_alias_set (tmp1, get_frame_alias_set ());
17561 reg = gen_rtx_REG (SImode, j);
17562 tmp = gen_rtx_SET (SImode, reg, tmp1);
17563 RTX_FRAME_RELATED_P (tmp) = 1;
17564 dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
17566 if (return_in_pc)
17568 /* If return_in_pc, j must be PC_REGNUM. */
17569 gcc_assert (j == PC_REGNUM);
17570 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
17571 XVECEXP (par, 0, 0) = ret_rtx;
17572 XVECEXP (par, 0, 1) = tmp;
17573 par = emit_jump_insn (par);
17575 else
17577 par = emit_insn (tmp);
17578 REG_NOTES (par) = dwarf;
17579 arm_add_cfa_adjust_cfa_note (par, UNITS_PER_WORD,
17580 stack_pointer_rtx, stack_pointer_rtx);
17584 else if ((num_regs % 2) == 1 && return_in_pc)
17586 /* There are 2 registers to be popped. So, generate the pattern
17587 pop_multiple_with_stack_update_and_return to pop in PC. */
17588 arm_emit_multi_reg_pop (saved_regs_mask & (~((1 << j) - 1)));
17591 return;
17594 /* LDRD in ARM mode needs consecutive registers as operands. This function
17595 emits LDRD whenever possible, otherwise it emits single-word loads. It uses
17596 offset addressing and then generates one separate stack udpate. This provides
17597 more scheduling freedom, compared to writeback on every load. However,
17598 if the function returns using load into PC directly
17599 (i.e., if PC is in SAVED_REGS_MASK), the stack needs to be updated
17600 before the last load. TODO: Add a peephole optimization to recognize
17601 the new epilogue sequence as an LDM instruction whenever possible. TODO: Add
17602 peephole optimization to merge the load at stack-offset zero
17603 with the stack update instruction using load with writeback
17604 in post-index addressing mode. */
17605 static void
17606 arm_emit_ldrd_pop (unsigned long saved_regs_mask)
17608 int j = 0;
17609 int offset = 0;
17610 rtx par = NULL_RTX;
17611 rtx dwarf = NULL_RTX;
17612 rtx tmp, mem;
17614 /* Restore saved registers. */
17615 gcc_assert (!((saved_regs_mask & (1 << SP_REGNUM))));
17616 j = 0;
17617 while (j <= LAST_ARM_REGNUM)
17618 if (saved_regs_mask & (1 << j))
17620 if ((j % 2) == 0
17621 && (saved_regs_mask & (1 << (j + 1)))
17622 && (j + 1) != PC_REGNUM)
17624 /* Current register and next register form register pair for which
17625 LDRD can be generated. PC is always the last register popped, and
17626 we handle it separately. */
17627 if (offset > 0)
17628 mem = gen_frame_mem (DImode,
17629 plus_constant (Pmode,
17630 stack_pointer_rtx,
17631 offset));
17632 else
17633 mem = gen_frame_mem (DImode, stack_pointer_rtx);
17635 tmp = gen_rtx_SET (DImode, gen_rtx_REG (DImode, j), mem);
17636 tmp = emit_insn (tmp);
17637 RTX_FRAME_RELATED_P (tmp) = 1;
17639 /* Generate dwarf info. */
17641 dwarf = alloc_reg_note (REG_CFA_RESTORE,
17642 gen_rtx_REG (SImode, j),
17643 NULL_RTX);
17644 dwarf = alloc_reg_note (REG_CFA_RESTORE,
17645 gen_rtx_REG (SImode, j + 1),
17646 dwarf);
17648 REG_NOTES (tmp) = dwarf;
17650 offset += 8;
17651 j += 2;
17653 else if (j != PC_REGNUM)
17655 /* Emit a single word load. */
17656 if (offset > 0)
17657 mem = gen_frame_mem (SImode,
17658 plus_constant (Pmode,
17659 stack_pointer_rtx,
17660 offset));
17661 else
17662 mem = gen_frame_mem (SImode, stack_pointer_rtx);
17664 tmp = gen_rtx_SET (SImode, gen_rtx_REG (SImode, j), mem);
17665 tmp = emit_insn (tmp);
17666 RTX_FRAME_RELATED_P (tmp) = 1;
17668 /* Generate dwarf info. */
17669 REG_NOTES (tmp) = alloc_reg_note (REG_CFA_RESTORE,
17670 gen_rtx_REG (SImode, j),
17671 NULL_RTX);
17673 offset += 4;
17674 j += 1;
17676 else /* j == PC_REGNUM */
17677 j++;
17679 else
17680 j++;
17682 /* Update the stack. */
17683 if (offset > 0)
17685 tmp = gen_rtx_SET (Pmode,
17686 stack_pointer_rtx,
17687 plus_constant (Pmode,
17688 stack_pointer_rtx,
17689 offset));
17690 tmp = emit_insn (tmp);
17691 arm_add_cfa_adjust_cfa_note (tmp, offset,
17692 stack_pointer_rtx, stack_pointer_rtx);
17693 offset = 0;
17696 if (saved_regs_mask & (1 << PC_REGNUM))
17698 /* Only PC is to be popped. */
17699 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
17700 XVECEXP (par, 0, 0) = ret_rtx;
17701 tmp = gen_rtx_SET (SImode,
17702 gen_rtx_REG (SImode, PC_REGNUM),
17703 gen_frame_mem (SImode,
17704 gen_rtx_POST_INC (SImode,
17705 stack_pointer_rtx)));
17706 RTX_FRAME_RELATED_P (tmp) = 1;
17707 XVECEXP (par, 0, 1) = tmp;
17708 par = emit_jump_insn (par);
17710 /* Generate dwarf info. */
17711 dwarf = alloc_reg_note (REG_CFA_RESTORE,
17712 gen_rtx_REG (SImode, PC_REGNUM),
17713 NULL_RTX);
17714 REG_NOTES (par) = dwarf;
17715 arm_add_cfa_adjust_cfa_note (par, UNITS_PER_WORD,
17716 stack_pointer_rtx, stack_pointer_rtx);
17720 /* Calculate the size of the return value that is passed in registers. */
17721 static unsigned
17722 arm_size_return_regs (void)
17724 enum machine_mode mode;
17726 if (crtl->return_rtx != 0)
17727 mode = GET_MODE (crtl->return_rtx);
17728 else
17729 mode = DECL_MODE (DECL_RESULT (current_function_decl));
17731 return GET_MODE_SIZE (mode);
17734 /* Return true if the current function needs to save/restore LR. */
17735 static bool
17736 thumb_force_lr_save (void)
17738 return !cfun->machine->lr_save_eliminated
17739 && (!leaf_function_p ()
17740 || thumb_far_jump_used_p ()
17741 || df_regs_ever_live_p (LR_REGNUM));
17744 /* We do not know if r3 will be available because
17745 we do have an indirect tailcall happening in this
17746 particular case. */
17747 static bool
17748 is_indirect_tailcall_p (rtx call)
17750 rtx pat = PATTERN (call);
17752 /* Indirect tail call. */
17753 pat = XVECEXP (pat, 0, 0);
17754 if (GET_CODE (pat) == SET)
17755 pat = SET_SRC (pat);
17757 pat = XEXP (XEXP (pat, 0), 0);
17758 return REG_P (pat);
17761 /* Return true if r3 is used by any of the tail call insns in the
17762 current function. */
17763 static bool
17764 any_sibcall_could_use_r3 (void)
17766 edge_iterator ei;
17767 edge e;
17769 if (!crtl->tail_call_emit)
17770 return false;
17771 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR->preds)
17772 if (e->flags & EDGE_SIBCALL)
17774 rtx call = BB_END (e->src);
17775 if (!CALL_P (call))
17776 call = prev_nonnote_nondebug_insn (call);
17777 gcc_assert (CALL_P (call) && SIBLING_CALL_P (call));
17778 if (find_regno_fusage (call, USE, 3)
17779 || is_indirect_tailcall_p (call))
17780 return true;
17782 return false;
17786 /* Compute the distance from register FROM to register TO.
17787 These can be the arg pointer (26), the soft frame pointer (25),
17788 the stack pointer (13) or the hard frame pointer (11).
17789 In thumb mode r7 is used as the soft frame pointer, if needed.
17790 Typical stack layout looks like this:
17792 old stack pointer -> | |
17793 ----
17794 | | \
17795 | | saved arguments for
17796 | | vararg functions
17797 | | /
17799 hard FP & arg pointer -> | | \
17800 | | stack
17801 | | frame
17802 | | /
17804 | | \
17805 | | call saved
17806 | | registers
17807 soft frame pointer -> | | /
17809 | | \
17810 | | local
17811 | | variables
17812 locals base pointer -> | | /
17814 | | \
17815 | | outgoing
17816 | | arguments
17817 current stack pointer -> | | /
17820 For a given function some or all of these stack components
17821 may not be needed, giving rise to the possibility of
17822 eliminating some of the registers.
17824 The values returned by this function must reflect the behavior
17825 of arm_expand_prologue() and arm_compute_save_reg_mask().
17827 The sign of the number returned reflects the direction of stack
17828 growth, so the values are positive for all eliminations except
17829 from the soft frame pointer to the hard frame pointer.
17831 SFP may point just inside the local variables block to ensure correct
17832 alignment. */
17835 /* Calculate stack offsets. These are used to calculate register elimination
17836 offsets and in prologue/epilogue code. Also calculates which registers
17837 should be saved. */
17839 static arm_stack_offsets *
17840 arm_get_frame_offsets (void)
17842 struct arm_stack_offsets *offsets;
17843 unsigned long func_type;
17844 int leaf;
17845 int saved;
17846 int core_saved;
17847 HOST_WIDE_INT frame_size;
17848 int i;
17850 offsets = &cfun->machine->stack_offsets;
17852 /* We need to know if we are a leaf function. Unfortunately, it
17853 is possible to be called after start_sequence has been called,
17854 which causes get_insns to return the insns for the sequence,
17855 not the function, which will cause leaf_function_p to return
17856 the incorrect result.
17858 to know about leaf functions once reload has completed, and the
17859 frame size cannot be changed after that time, so we can safely
17860 use the cached value. */
17862 if (reload_completed)
17863 return offsets;
17865 /* Initially this is the size of the local variables. It will translated
17866 into an offset once we have determined the size of preceding data. */
17867 frame_size = ROUND_UP_WORD (get_frame_size ());
17869 leaf = leaf_function_p ();
17871 /* Space for variadic functions. */
17872 offsets->saved_args = crtl->args.pretend_args_size;
17874 /* In Thumb mode this is incorrect, but never used. */
17875 offsets->frame = offsets->saved_args + (frame_pointer_needed ? 4 : 0) +
17876 arm_compute_static_chain_stack_bytes();
17878 if (TARGET_32BIT)
17880 unsigned int regno;
17882 offsets->saved_regs_mask = arm_compute_save_reg_mask ();
17883 core_saved = bit_count (offsets->saved_regs_mask) * 4;
17884 saved = core_saved;
17886 /* We know that SP will be doubleword aligned on entry, and we must
17887 preserve that condition at any subroutine call. We also require the
17888 soft frame pointer to be doubleword aligned. */
17890 if (TARGET_REALLY_IWMMXT)
17892 /* Check for the call-saved iWMMXt registers. */
17893 for (regno = FIRST_IWMMXT_REGNUM;
17894 regno <= LAST_IWMMXT_REGNUM;
17895 regno++)
17896 if (df_regs_ever_live_p (regno) && ! call_used_regs[regno])
17897 saved += 8;
17900 func_type = arm_current_func_type ();
17901 /* Space for saved VFP registers. */
17902 if (! IS_VOLATILE (func_type)
17903 && TARGET_HARD_FLOAT && TARGET_VFP)
17904 saved += arm_get_vfp_saved_size ();
17906 else /* TARGET_THUMB1 */
17908 offsets->saved_regs_mask = thumb1_compute_save_reg_mask ();
17909 core_saved = bit_count (offsets->saved_regs_mask) * 4;
17910 saved = core_saved;
17911 if (TARGET_BACKTRACE)
17912 saved += 16;
17915 /* Saved registers include the stack frame. */
17916 offsets->saved_regs = offsets->saved_args + saved +
17917 arm_compute_static_chain_stack_bytes();
17918 offsets->soft_frame = offsets->saved_regs + CALLER_INTERWORKING_SLOT_SIZE;
17919 /* A leaf function does not need any stack alignment if it has nothing
17920 on the stack. */
17921 if (leaf && frame_size == 0
17922 /* However if it calls alloca(), we have a dynamically allocated
17923 block of BIGGEST_ALIGNMENT on stack, so still do stack alignment. */
17924 && ! cfun->calls_alloca)
17926 offsets->outgoing_args = offsets->soft_frame;
17927 offsets->locals_base = offsets->soft_frame;
17928 return offsets;
17931 /* Ensure SFP has the correct alignment. */
17932 if (ARM_DOUBLEWORD_ALIGN
17933 && (offsets->soft_frame & 7))
17935 offsets->soft_frame += 4;
17936 /* Try to align stack by pushing an extra reg. Don't bother doing this
17937 when there is a stack frame as the alignment will be rolled into
17938 the normal stack adjustment. */
17939 if (frame_size + crtl->outgoing_args_size == 0)
17941 int reg = -1;
17943 /* If it is safe to use r3, then do so. This sometimes
17944 generates better code on Thumb-2 by avoiding the need to
17945 use 32-bit push/pop instructions. */
17946 if (! any_sibcall_could_use_r3 ()
17947 && arm_size_return_regs () <= 12
17948 && (offsets->saved_regs_mask & (1 << 3)) == 0
17949 && (TARGET_THUMB2
17950 || !(TARGET_LDRD && current_tune->prefer_ldrd_strd)))
17952 reg = 3;
17954 else
17955 for (i = 4; i <= (TARGET_THUMB1 ? LAST_LO_REGNUM : 11); i++)
17957 /* Avoid fixed registers; they may be changed at
17958 arbitrary times so it's unsafe to restore them
17959 during the epilogue. */
17960 if (!fixed_regs[i]
17961 && (offsets->saved_regs_mask & (1 << i)) == 0)
17963 reg = i;
17964 break;
17968 if (reg != -1)
17970 offsets->saved_regs += 4;
17971 offsets->saved_regs_mask |= (1 << reg);
17976 offsets->locals_base = offsets->soft_frame + frame_size;
17977 offsets->outgoing_args = (offsets->locals_base
17978 + crtl->outgoing_args_size);
17980 if (ARM_DOUBLEWORD_ALIGN)
17982 /* Ensure SP remains doubleword aligned. */
17983 if (offsets->outgoing_args & 7)
17984 offsets->outgoing_args += 4;
17985 gcc_assert (!(offsets->outgoing_args & 7));
17988 return offsets;
17992 /* Calculate the relative offsets for the different stack pointers. Positive
17993 offsets are in the direction of stack growth. */
17995 HOST_WIDE_INT
17996 arm_compute_initial_elimination_offset (unsigned int from, unsigned int to)
17998 arm_stack_offsets *offsets;
18000 offsets = arm_get_frame_offsets ();
18002 /* OK, now we have enough information to compute the distances.
18003 There must be an entry in these switch tables for each pair
18004 of registers in ELIMINABLE_REGS, even if some of the entries
18005 seem to be redundant or useless. */
18006 switch (from)
18008 case ARG_POINTER_REGNUM:
18009 switch (to)
18011 case THUMB_HARD_FRAME_POINTER_REGNUM:
18012 return 0;
18014 case FRAME_POINTER_REGNUM:
18015 /* This is the reverse of the soft frame pointer
18016 to hard frame pointer elimination below. */
18017 return offsets->soft_frame - offsets->saved_args;
18019 case ARM_HARD_FRAME_POINTER_REGNUM:
18020 /* This is only non-zero in the case where the static chain register
18021 is stored above the frame. */
18022 return offsets->frame - offsets->saved_args - 4;
18024 case STACK_POINTER_REGNUM:
18025 /* If nothing has been pushed on the stack at all
18026 then this will return -4. This *is* correct! */
18027 return offsets->outgoing_args - (offsets->saved_args + 4);
18029 default:
18030 gcc_unreachable ();
18032 gcc_unreachable ();
18034 case FRAME_POINTER_REGNUM:
18035 switch (to)
18037 case THUMB_HARD_FRAME_POINTER_REGNUM:
18038 return 0;
18040 case ARM_HARD_FRAME_POINTER_REGNUM:
18041 /* The hard frame pointer points to the top entry in the
18042 stack frame. The soft frame pointer to the bottom entry
18043 in the stack frame. If there is no stack frame at all,
18044 then they are identical. */
18046 return offsets->frame - offsets->soft_frame;
18048 case STACK_POINTER_REGNUM:
18049 return offsets->outgoing_args - offsets->soft_frame;
18051 default:
18052 gcc_unreachable ();
18054 gcc_unreachable ();
18056 default:
18057 /* You cannot eliminate from the stack pointer.
18058 In theory you could eliminate from the hard frame
18059 pointer to the stack pointer, but this will never
18060 happen, since if a stack frame is not needed the
18061 hard frame pointer will never be used. */
18062 gcc_unreachable ();
18066 /* Given FROM and TO register numbers, say whether this elimination is
18067 allowed. Frame pointer elimination is automatically handled.
18069 All eliminations are permissible. Note that ARG_POINTER_REGNUM and
18070 HARD_FRAME_POINTER_REGNUM are in fact the same thing. If we need a frame
18071 pointer, we must eliminate FRAME_POINTER_REGNUM into
18072 HARD_FRAME_POINTER_REGNUM and not into STACK_POINTER_REGNUM or
18073 ARG_POINTER_REGNUM. */
18075 bool
18076 arm_can_eliminate (const int from, const int to)
18078 return ((to == FRAME_POINTER_REGNUM && from == ARG_POINTER_REGNUM) ? false :
18079 (to == STACK_POINTER_REGNUM && frame_pointer_needed) ? false :
18080 (to == ARM_HARD_FRAME_POINTER_REGNUM && TARGET_THUMB) ? false :
18081 (to == THUMB_HARD_FRAME_POINTER_REGNUM && TARGET_ARM) ? false :
18082 true);
18085 /* Emit RTL to save coprocessor registers on function entry. Returns the
18086 number of bytes pushed. */
18088 static int
18089 arm_save_coproc_regs(void)
18091 int saved_size = 0;
18092 unsigned reg;
18093 unsigned start_reg;
18094 rtx insn;
18096 for (reg = LAST_IWMMXT_REGNUM; reg >= FIRST_IWMMXT_REGNUM; reg--)
18097 if (df_regs_ever_live_p (reg) && ! call_used_regs[reg])
18099 insn = gen_rtx_PRE_DEC (Pmode, stack_pointer_rtx);
18100 insn = gen_rtx_MEM (V2SImode, insn);
18101 insn = emit_set_insn (insn, gen_rtx_REG (V2SImode, reg));
18102 RTX_FRAME_RELATED_P (insn) = 1;
18103 saved_size += 8;
18106 if (TARGET_HARD_FLOAT && TARGET_VFP)
18108 start_reg = FIRST_VFP_REGNUM;
18110 for (reg = FIRST_VFP_REGNUM; reg < LAST_VFP_REGNUM; reg += 2)
18112 if ((!df_regs_ever_live_p (reg) || call_used_regs[reg])
18113 && (!df_regs_ever_live_p (reg + 1) || call_used_regs[reg + 1]))
18115 if (start_reg != reg)
18116 saved_size += vfp_emit_fstmd (start_reg,
18117 (reg - start_reg) / 2);
18118 start_reg = reg + 2;
18121 if (start_reg != reg)
18122 saved_size += vfp_emit_fstmd (start_reg,
18123 (reg - start_reg) / 2);
18125 return saved_size;
18129 /* Set the Thumb frame pointer from the stack pointer. */
18131 static void
18132 thumb_set_frame_pointer (arm_stack_offsets *offsets)
18134 HOST_WIDE_INT amount;
18135 rtx insn, dwarf;
18137 amount = offsets->outgoing_args - offsets->locals_base;
18138 if (amount < 1024)
18139 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
18140 stack_pointer_rtx, GEN_INT (amount)));
18141 else
18143 emit_insn (gen_movsi (hard_frame_pointer_rtx, GEN_INT (amount)));
18144 /* Thumb-2 RTL patterns expect sp as the first input. Thumb-1
18145 expects the first two operands to be the same. */
18146 if (TARGET_THUMB2)
18148 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
18149 stack_pointer_rtx,
18150 hard_frame_pointer_rtx));
18152 else
18154 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
18155 hard_frame_pointer_rtx,
18156 stack_pointer_rtx));
18158 dwarf = gen_rtx_SET (VOIDmode, hard_frame_pointer_rtx,
18159 plus_constant (Pmode, stack_pointer_rtx, amount));
18160 RTX_FRAME_RELATED_P (dwarf) = 1;
18161 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
18164 RTX_FRAME_RELATED_P (insn) = 1;
18167 /* Generate the prologue instructions for entry into an ARM or Thumb-2
18168 function. */
18169 void
18170 arm_expand_prologue (void)
18172 rtx amount;
18173 rtx insn;
18174 rtx ip_rtx;
18175 unsigned long live_regs_mask;
18176 unsigned long func_type;
18177 int fp_offset = 0;
18178 int saved_pretend_args = 0;
18179 int saved_regs = 0;
18180 unsigned HOST_WIDE_INT args_to_push;
18181 arm_stack_offsets *offsets;
18183 func_type = arm_current_func_type ();
18185 /* Naked functions don't have prologues. */
18186 if (IS_NAKED (func_type))
18187 return;
18189 /* Make a copy of c_f_p_a_s as we may need to modify it locally. */
18190 args_to_push = crtl->args.pretend_args_size;
18192 /* Compute which register we will have to save onto the stack. */
18193 offsets = arm_get_frame_offsets ();
18194 live_regs_mask = offsets->saved_regs_mask;
18196 ip_rtx = gen_rtx_REG (SImode, IP_REGNUM);
18198 if (IS_STACKALIGN (func_type))
18200 rtx r0, r1;
18202 /* Handle a word-aligned stack pointer. We generate the following:
18204 mov r0, sp
18205 bic r1, r0, #7
18206 mov sp, r1
18207 <save and restore r0 in normal prologue/epilogue>
18208 mov sp, r0
18209 bx lr
18211 The unwinder doesn't need to know about the stack realignment.
18212 Just tell it we saved SP in r0. */
18213 gcc_assert (TARGET_THUMB2 && !arm_arch_notm && args_to_push == 0);
18215 r0 = gen_rtx_REG (SImode, 0);
18216 r1 = gen_rtx_REG (SImode, 1);
18218 insn = emit_insn (gen_movsi (r0, stack_pointer_rtx));
18219 RTX_FRAME_RELATED_P (insn) = 1;
18220 add_reg_note (insn, REG_CFA_REGISTER, NULL);
18222 emit_insn (gen_andsi3 (r1, r0, GEN_INT (~(HOST_WIDE_INT)7)));
18224 /* ??? The CFA changes here, which may cause GDB to conclude that it
18225 has entered a different function. That said, the unwind info is
18226 correct, individually, before and after this instruction because
18227 we've described the save of SP, which will override the default
18228 handling of SP as restoring from the CFA. */
18229 emit_insn (gen_movsi (stack_pointer_rtx, r1));
18232 /* For APCS frames, if IP register is clobbered
18233 when creating frame, save that register in a special
18234 way. */
18235 if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
18237 if (IS_INTERRUPT (func_type))
18239 /* Interrupt functions must not corrupt any registers.
18240 Creating a frame pointer however, corrupts the IP
18241 register, so we must push it first. */
18242 emit_multi_reg_push (1 << IP_REGNUM);
18244 /* Do not set RTX_FRAME_RELATED_P on this insn.
18245 The dwarf stack unwinding code only wants to see one
18246 stack decrement per function, and this is not it. If
18247 this instruction is labeled as being part of the frame
18248 creation sequence then dwarf2out_frame_debug_expr will
18249 die when it encounters the assignment of IP to FP
18250 later on, since the use of SP here establishes SP as
18251 the CFA register and not IP.
18253 Anyway this instruction is not really part of the stack
18254 frame creation although it is part of the prologue. */
18256 else if (IS_NESTED (func_type))
18258 /* The Static chain register is the same as the IP register
18259 used as a scratch register during stack frame creation.
18260 To get around this need to find somewhere to store IP
18261 whilst the frame is being created. We try the following
18262 places in order:
18264 1. The last argument register.
18265 2. A slot on the stack above the frame. (This only
18266 works if the function is not a varargs function).
18267 3. Register r3, after pushing the argument registers
18268 onto the stack.
18270 Note - we only need to tell the dwarf2 backend about the SP
18271 adjustment in the second variant; the static chain register
18272 doesn't need to be unwound, as it doesn't contain a value
18273 inherited from the caller. */
18275 if (df_regs_ever_live_p (3) == false)
18276 insn = emit_set_insn (gen_rtx_REG (SImode, 3), ip_rtx);
18277 else if (args_to_push == 0)
18279 rtx dwarf;
18281 gcc_assert(arm_compute_static_chain_stack_bytes() == 4);
18282 saved_regs += 4;
18284 insn = gen_rtx_PRE_DEC (SImode, stack_pointer_rtx);
18285 insn = emit_set_insn (gen_frame_mem (SImode, insn), ip_rtx);
18286 fp_offset = 4;
18288 /* Just tell the dwarf backend that we adjusted SP. */
18289 dwarf = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
18290 plus_constant (Pmode, stack_pointer_rtx,
18291 -fp_offset));
18292 RTX_FRAME_RELATED_P (insn) = 1;
18293 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
18295 else
18297 /* Store the args on the stack. */
18298 if (cfun->machine->uses_anonymous_args)
18299 insn = emit_multi_reg_push
18300 ((0xf0 >> (args_to_push / 4)) & 0xf);
18301 else
18302 insn = emit_insn
18303 (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
18304 GEN_INT (- args_to_push)));
18306 RTX_FRAME_RELATED_P (insn) = 1;
18308 saved_pretend_args = 1;
18309 fp_offset = args_to_push;
18310 args_to_push = 0;
18312 /* Now reuse r3 to preserve IP. */
18313 emit_set_insn (gen_rtx_REG (SImode, 3), ip_rtx);
18317 insn = emit_set_insn (ip_rtx,
18318 plus_constant (Pmode, stack_pointer_rtx,
18319 fp_offset));
18320 RTX_FRAME_RELATED_P (insn) = 1;
18323 if (args_to_push)
18325 /* Push the argument registers, or reserve space for them. */
18326 if (cfun->machine->uses_anonymous_args)
18327 insn = emit_multi_reg_push
18328 ((0xf0 >> (args_to_push / 4)) & 0xf);
18329 else
18330 insn = emit_insn
18331 (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
18332 GEN_INT (- args_to_push)));
18333 RTX_FRAME_RELATED_P (insn) = 1;
18336 /* If this is an interrupt service routine, and the link register
18337 is going to be pushed, and we're not generating extra
18338 push of IP (needed when frame is needed and frame layout if apcs),
18339 subtracting four from LR now will mean that the function return
18340 can be done with a single instruction. */
18341 if ((func_type == ARM_FT_ISR || func_type == ARM_FT_FIQ)
18342 && (live_regs_mask & (1 << LR_REGNUM)) != 0
18343 && !(frame_pointer_needed && TARGET_APCS_FRAME)
18344 && TARGET_ARM)
18346 rtx lr = gen_rtx_REG (SImode, LR_REGNUM);
18348 emit_set_insn (lr, plus_constant (SImode, lr, -4));
18351 if (live_regs_mask)
18353 saved_regs += bit_count (live_regs_mask) * 4;
18354 if (optimize_size && !frame_pointer_needed
18355 && saved_regs == offsets->saved_regs - offsets->saved_args)
18357 /* If no coprocessor registers are being pushed and we don't have
18358 to worry about a frame pointer then push extra registers to
18359 create the stack frame. This is done is a way that does not
18360 alter the frame layout, so is independent of the epilogue. */
18361 int n;
18362 int frame;
18363 n = 0;
18364 while (n < 8 && (live_regs_mask & (1 << n)) == 0)
18365 n++;
18366 frame = offsets->outgoing_args - (offsets->saved_args + saved_regs);
18367 if (frame && n * 4 >= frame)
18369 n = frame / 4;
18370 live_regs_mask |= (1 << n) - 1;
18371 saved_regs += frame;
18375 if (TARGET_LDRD
18376 && current_tune->prefer_ldrd_strd
18377 && !optimize_function_for_size_p (cfun))
18379 if (TARGET_THUMB2)
18381 thumb2_emit_strd_push (live_regs_mask);
18383 else if (TARGET_ARM
18384 && !TARGET_APCS_FRAME
18385 && !IS_INTERRUPT (func_type))
18387 arm_emit_strd_push (live_regs_mask);
18389 else
18391 insn = emit_multi_reg_push (live_regs_mask);
18392 RTX_FRAME_RELATED_P (insn) = 1;
18395 else
18397 insn = emit_multi_reg_push (live_regs_mask);
18398 RTX_FRAME_RELATED_P (insn) = 1;
18402 if (! IS_VOLATILE (func_type))
18403 saved_regs += arm_save_coproc_regs ();
18405 if (frame_pointer_needed && TARGET_ARM)
18407 /* Create the new frame pointer. */
18408 if (TARGET_APCS_FRAME)
18410 insn = GEN_INT (-(4 + args_to_push + fp_offset));
18411 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx, ip_rtx, insn));
18412 RTX_FRAME_RELATED_P (insn) = 1;
18414 if (IS_NESTED (func_type))
18416 /* Recover the static chain register. */
18417 if (!df_regs_ever_live_p (3)
18418 || saved_pretend_args)
18419 insn = gen_rtx_REG (SImode, 3);
18420 else /* if (crtl->args.pretend_args_size == 0) */
18422 insn = plus_constant (Pmode, hard_frame_pointer_rtx, 4);
18423 insn = gen_frame_mem (SImode, insn);
18425 emit_set_insn (ip_rtx, insn);
18426 /* Add a USE to stop propagate_one_insn() from barfing. */
18427 emit_insn (gen_force_register_use (ip_rtx));
18430 else
18432 insn = GEN_INT (saved_regs - 4);
18433 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
18434 stack_pointer_rtx, insn));
18435 RTX_FRAME_RELATED_P (insn) = 1;
18439 if (flag_stack_usage_info)
18440 current_function_static_stack_size
18441 = offsets->outgoing_args - offsets->saved_args;
18443 if (offsets->outgoing_args != offsets->saved_args + saved_regs)
18445 /* This add can produce multiple insns for a large constant, so we
18446 need to get tricky. */
18447 rtx last = get_last_insn ();
18449 amount = GEN_INT (offsets->saved_args + saved_regs
18450 - offsets->outgoing_args);
18452 insn = emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
18453 amount));
18456 last = last ? NEXT_INSN (last) : get_insns ();
18457 RTX_FRAME_RELATED_P (last) = 1;
18459 while (last != insn);
18461 /* If the frame pointer is needed, emit a special barrier that
18462 will prevent the scheduler from moving stores to the frame
18463 before the stack adjustment. */
18464 if (frame_pointer_needed)
18465 insn = emit_insn (gen_stack_tie (stack_pointer_rtx,
18466 hard_frame_pointer_rtx));
18470 if (frame_pointer_needed && TARGET_THUMB2)
18471 thumb_set_frame_pointer (offsets);
18473 if (flag_pic && arm_pic_register != INVALID_REGNUM)
18475 unsigned long mask;
18477 mask = live_regs_mask;
18478 mask &= THUMB2_WORK_REGS;
18479 if (!IS_NESTED (func_type))
18480 mask |= (1 << IP_REGNUM);
18481 arm_load_pic_register (mask);
18484 /* If we are profiling, make sure no instructions are scheduled before
18485 the call to mcount. Similarly if the user has requested no
18486 scheduling in the prolog. Similarly if we want non-call exceptions
18487 using the EABI unwinder, to prevent faulting instructions from being
18488 swapped with a stack adjustment. */
18489 if (crtl->profile || !TARGET_SCHED_PROLOG
18490 || (arm_except_unwind_info (&global_options) == UI_TARGET
18491 && cfun->can_throw_non_call_exceptions))
18492 emit_insn (gen_blockage ());
18494 /* If the link register is being kept alive, with the return address in it,
18495 then make sure that it does not get reused by the ce2 pass. */
18496 if ((live_regs_mask & (1 << LR_REGNUM)) == 0)
18497 cfun->machine->lr_save_eliminated = 1;
18500 /* Print condition code to STREAM. Helper function for arm_print_operand. */
18501 static void
18502 arm_print_condition (FILE *stream)
18504 if (arm_ccfsm_state == 3 || arm_ccfsm_state == 4)
18506 /* Branch conversion is not implemented for Thumb-2. */
18507 if (TARGET_THUMB)
18509 output_operand_lossage ("predicated Thumb instruction");
18510 return;
18512 if (current_insn_predicate != NULL)
18514 output_operand_lossage
18515 ("predicated instruction in conditional sequence");
18516 return;
18519 fputs (arm_condition_codes[arm_current_cc], stream);
18521 else if (current_insn_predicate)
18523 enum arm_cond_code code;
18525 if (TARGET_THUMB1)
18527 output_operand_lossage ("predicated Thumb instruction");
18528 return;
18531 code = get_arm_condition_code (current_insn_predicate);
18532 fputs (arm_condition_codes[code], stream);
18537 /* If CODE is 'd', then the X is a condition operand and the instruction
18538 should only be executed if the condition is true.
18539 if CODE is 'D', then the X is a condition operand and the instruction
18540 should only be executed if the condition is false: however, if the mode
18541 of the comparison is CCFPEmode, then always execute the instruction -- we
18542 do this because in these circumstances !GE does not necessarily imply LT;
18543 in these cases the instruction pattern will take care to make sure that
18544 an instruction containing %d will follow, thereby undoing the effects of
18545 doing this instruction unconditionally.
18546 If CODE is 'N' then X is a floating point operand that must be negated
18547 before output.
18548 If CODE is 'B' then output a bitwise inverted value of X (a const int).
18549 If X is a REG and CODE is `M', output a ldm/stm style multi-reg. */
18550 static void
18551 arm_print_operand (FILE *stream, rtx x, int code)
18553 switch (code)
18555 case '@':
18556 fputs (ASM_COMMENT_START, stream);
18557 return;
18559 case '_':
18560 fputs (user_label_prefix, stream);
18561 return;
18563 case '|':
18564 fputs (REGISTER_PREFIX, stream);
18565 return;
18567 case '?':
18568 arm_print_condition (stream);
18569 return;
18571 case '(':
18572 /* Nothing in unified syntax, otherwise the current condition code. */
18573 if (!TARGET_UNIFIED_ASM)
18574 arm_print_condition (stream);
18575 break;
18577 case ')':
18578 /* The current condition code in unified syntax, otherwise nothing. */
18579 if (TARGET_UNIFIED_ASM)
18580 arm_print_condition (stream);
18581 break;
18583 case '.':
18584 /* The current condition code for a condition code setting instruction.
18585 Preceded by 's' in unified syntax, otherwise followed by 's'. */
18586 if (TARGET_UNIFIED_ASM)
18588 fputc('s', stream);
18589 arm_print_condition (stream);
18591 else
18593 arm_print_condition (stream);
18594 fputc('s', stream);
18596 return;
18598 case '!':
18599 /* If the instruction is conditionally executed then print
18600 the current condition code, otherwise print 's'. */
18601 gcc_assert (TARGET_THUMB2 && TARGET_UNIFIED_ASM);
18602 if (current_insn_predicate)
18603 arm_print_condition (stream);
18604 else
18605 fputc('s', stream);
18606 break;
18608 /* %# is a "break" sequence. It doesn't output anything, but is used to
18609 separate e.g. operand numbers from following text, if that text consists
18610 of further digits which we don't want to be part of the operand
18611 number. */
18612 case '#':
18613 return;
18615 case 'N':
18617 REAL_VALUE_TYPE r;
18618 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
18619 r = real_value_negate (&r);
18620 fprintf (stream, "%s", fp_const_from_val (&r));
18622 return;
18624 /* An integer or symbol address without a preceding # sign. */
18625 case 'c':
18626 switch (GET_CODE (x))
18628 case CONST_INT:
18629 fprintf (stream, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
18630 break;
18632 case SYMBOL_REF:
18633 output_addr_const (stream, x);
18634 break;
18636 case CONST:
18637 if (GET_CODE (XEXP (x, 0)) == PLUS
18638 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF)
18640 output_addr_const (stream, x);
18641 break;
18643 /* Fall through. */
18645 default:
18646 output_operand_lossage ("Unsupported operand for code '%c'", code);
18648 return;
18650 /* An integer that we want to print in HEX. */
18651 case 'x':
18652 switch (GET_CODE (x))
18654 case CONST_INT:
18655 fprintf (stream, "#" HOST_WIDE_INT_PRINT_HEX, INTVAL (x));
18656 break;
18658 default:
18659 output_operand_lossage ("Unsupported operand for code '%c'", code);
18661 return;
18663 case 'B':
18664 if (CONST_INT_P (x))
18666 HOST_WIDE_INT val;
18667 val = ARM_SIGN_EXTEND (~INTVAL (x));
18668 fprintf (stream, HOST_WIDE_INT_PRINT_DEC, val);
18670 else
18672 putc ('~', stream);
18673 output_addr_const (stream, x);
18675 return;
18677 case 'L':
18678 /* The low 16 bits of an immediate constant. */
18679 fprintf (stream, HOST_WIDE_INT_PRINT_DEC, INTVAL(x) & 0xffff);
18680 return;
18682 case 'i':
18683 fprintf (stream, "%s", arithmetic_instr (x, 1));
18684 return;
18686 case 'I':
18687 fprintf (stream, "%s", arithmetic_instr (x, 0));
18688 return;
18690 case 'S':
18692 HOST_WIDE_INT val;
18693 const char *shift;
18695 shift = shift_op (x, &val);
18697 if (shift)
18699 fprintf (stream, ", %s ", shift);
18700 if (val == -1)
18701 arm_print_operand (stream, XEXP (x, 1), 0);
18702 else
18703 fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, val);
18706 return;
18708 /* An explanation of the 'Q', 'R' and 'H' register operands:
18710 In a pair of registers containing a DI or DF value the 'Q'
18711 operand returns the register number of the register containing
18712 the least significant part of the value. The 'R' operand returns
18713 the register number of the register containing the most
18714 significant part of the value.
18716 The 'H' operand returns the higher of the two register numbers.
18717 On a run where WORDS_BIG_ENDIAN is true the 'H' operand is the
18718 same as the 'Q' operand, since the most significant part of the
18719 value is held in the lower number register. The reverse is true
18720 on systems where WORDS_BIG_ENDIAN is false.
18722 The purpose of these operands is to distinguish between cases
18723 where the endian-ness of the values is important (for example
18724 when they are added together), and cases where the endian-ness
18725 is irrelevant, but the order of register operations is important.
18726 For example when loading a value from memory into a register
18727 pair, the endian-ness does not matter. Provided that the value
18728 from the lower memory address is put into the lower numbered
18729 register, and the value from the higher address is put into the
18730 higher numbered register, the load will work regardless of whether
18731 the value being loaded is big-wordian or little-wordian. The
18732 order of the two register loads can matter however, if the address
18733 of the memory location is actually held in one of the registers
18734 being overwritten by the load.
18736 The 'Q' and 'R' constraints are also available for 64-bit
18737 constants. */
18738 case 'Q':
18739 if (CONST_INT_P (x) || CONST_DOUBLE_P (x))
18741 rtx part = gen_lowpart (SImode, x);
18742 fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, INTVAL (part));
18743 return;
18746 if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
18748 output_operand_lossage ("invalid operand for code '%c'", code);
18749 return;
18752 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 1 : 0));
18753 return;
18755 case 'R':
18756 if (CONST_INT_P (x) || CONST_DOUBLE_P (x))
18758 enum machine_mode mode = GET_MODE (x);
18759 rtx part;
18761 if (mode == VOIDmode)
18762 mode = DImode;
18763 part = gen_highpart_mode (SImode, mode, x);
18764 fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, INTVAL (part));
18765 return;
18768 if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
18770 output_operand_lossage ("invalid operand for code '%c'", code);
18771 return;
18774 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 0 : 1));
18775 return;
18777 case 'H':
18778 if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
18780 output_operand_lossage ("invalid operand for code '%c'", code);
18781 return;
18784 asm_fprintf (stream, "%r", REGNO (x) + 1);
18785 return;
18787 case 'J':
18788 if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
18790 output_operand_lossage ("invalid operand for code '%c'", code);
18791 return;
18794 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 3 : 2));
18795 return;
18797 case 'K':
18798 if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
18800 output_operand_lossage ("invalid operand for code '%c'", code);
18801 return;
18804 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 2 : 3));
18805 return;
18807 case 'm':
18808 asm_fprintf (stream, "%r",
18809 REG_P (XEXP (x, 0))
18810 ? REGNO (XEXP (x, 0)) : REGNO (XEXP (XEXP (x, 0), 0)));
18811 return;
18813 case 'M':
18814 asm_fprintf (stream, "{%r-%r}",
18815 REGNO (x),
18816 REGNO (x) + ARM_NUM_REGS (GET_MODE (x)) - 1);
18817 return;
18819 /* Like 'M', but writing doubleword vector registers, for use by Neon
18820 insns. */
18821 case 'h':
18823 int regno = (REGNO (x) - FIRST_VFP_REGNUM) / 2;
18824 int numregs = ARM_NUM_REGS (GET_MODE (x)) / 2;
18825 if (numregs == 1)
18826 asm_fprintf (stream, "{d%d}", regno);
18827 else
18828 asm_fprintf (stream, "{d%d-d%d}", regno, regno + numregs - 1);
18830 return;
18832 case 'd':
18833 /* CONST_TRUE_RTX means always -- that's the default. */
18834 if (x == const_true_rtx)
18835 return;
18837 if (!COMPARISON_P (x))
18839 output_operand_lossage ("invalid operand for code '%c'", code);
18840 return;
18843 fputs (arm_condition_codes[get_arm_condition_code (x)],
18844 stream);
18845 return;
18847 case 'D':
18848 /* CONST_TRUE_RTX means not always -- i.e. never. We shouldn't ever
18849 want to do that. */
18850 if (x == const_true_rtx)
18852 output_operand_lossage ("instruction never executed");
18853 return;
18855 if (!COMPARISON_P (x))
18857 output_operand_lossage ("invalid operand for code '%c'", code);
18858 return;
18861 fputs (arm_condition_codes[ARM_INVERSE_CONDITION_CODE
18862 (get_arm_condition_code (x))],
18863 stream);
18864 return;
18866 case 's':
18867 case 'V':
18868 case 'W':
18869 case 'X':
18870 case 'Y':
18871 case 'Z':
18872 /* Former Maverick support, removed after GCC-4.7. */
18873 output_operand_lossage ("obsolete Maverick format code '%c'", code);
18874 return;
18876 case 'U':
18877 if (!REG_P (x)
18878 || REGNO (x) < FIRST_IWMMXT_GR_REGNUM
18879 || REGNO (x) > LAST_IWMMXT_GR_REGNUM)
18880 /* Bad value for wCG register number. */
18882 output_operand_lossage ("invalid operand for code '%c'", code);
18883 return;
18886 else
18887 fprintf (stream, "%d", REGNO (x) - FIRST_IWMMXT_GR_REGNUM);
18888 return;
18890 /* Print an iWMMXt control register name. */
18891 case 'w':
18892 if (!CONST_INT_P (x)
18893 || INTVAL (x) < 0
18894 || INTVAL (x) >= 16)
18895 /* Bad value for wC register number. */
18897 output_operand_lossage ("invalid operand for code '%c'", code);
18898 return;
18901 else
18903 static const char * wc_reg_names [16] =
18905 "wCID", "wCon", "wCSSF", "wCASF",
18906 "wC4", "wC5", "wC6", "wC7",
18907 "wCGR0", "wCGR1", "wCGR2", "wCGR3",
18908 "wC12", "wC13", "wC14", "wC15"
18911 fputs (wc_reg_names [INTVAL (x)], stream);
18913 return;
18915 /* Print the high single-precision register of a VFP double-precision
18916 register. */
18917 case 'p':
18919 int mode = GET_MODE (x);
18920 int regno;
18922 if (GET_MODE_SIZE (mode) != 8 || !REG_P (x))
18924 output_operand_lossage ("invalid operand for code '%c'", code);
18925 return;
18928 regno = REGNO (x);
18929 if (!VFP_REGNO_OK_FOR_DOUBLE (regno))
18931 output_operand_lossage ("invalid operand for code '%c'", code);
18932 return;
18935 fprintf (stream, "s%d", regno - FIRST_VFP_REGNUM + 1);
18937 return;
18939 /* Print a VFP/Neon double precision or quad precision register name. */
18940 case 'P':
18941 case 'q':
18943 int mode = GET_MODE (x);
18944 int is_quad = (code == 'q');
18945 int regno;
18947 if (GET_MODE_SIZE (mode) != (is_quad ? 16 : 8))
18949 output_operand_lossage ("invalid operand for code '%c'", code);
18950 return;
18953 if (!REG_P (x)
18954 || !IS_VFP_REGNUM (REGNO (x)))
18956 output_operand_lossage ("invalid operand for code '%c'", code);
18957 return;
18960 regno = REGNO (x);
18961 if ((is_quad && !NEON_REGNO_OK_FOR_QUAD (regno))
18962 || (!is_quad && !VFP_REGNO_OK_FOR_DOUBLE (regno)))
18964 output_operand_lossage ("invalid operand for code '%c'", code);
18965 return;
18968 fprintf (stream, "%c%d", is_quad ? 'q' : 'd',
18969 (regno - FIRST_VFP_REGNUM) >> (is_quad ? 2 : 1));
18971 return;
18973 /* These two codes print the low/high doubleword register of a Neon quad
18974 register, respectively. For pair-structure types, can also print
18975 low/high quadword registers. */
18976 case 'e':
18977 case 'f':
18979 int mode = GET_MODE (x);
18980 int regno;
18982 if ((GET_MODE_SIZE (mode) != 16
18983 && GET_MODE_SIZE (mode) != 32) || !REG_P (x))
18985 output_operand_lossage ("invalid operand for code '%c'", code);
18986 return;
18989 regno = REGNO (x);
18990 if (!NEON_REGNO_OK_FOR_QUAD (regno))
18992 output_operand_lossage ("invalid operand for code '%c'", code);
18993 return;
18996 if (GET_MODE_SIZE (mode) == 16)
18997 fprintf (stream, "d%d", ((regno - FIRST_VFP_REGNUM) >> 1)
18998 + (code == 'f' ? 1 : 0));
18999 else
19000 fprintf (stream, "q%d", ((regno - FIRST_VFP_REGNUM) >> 2)
19001 + (code == 'f' ? 1 : 0));
19003 return;
19005 /* Print a VFPv3 floating-point constant, represented as an integer
19006 index. */
19007 case 'G':
19009 int index = vfp3_const_double_index (x);
19010 gcc_assert (index != -1);
19011 fprintf (stream, "%d", index);
19013 return;
19015 /* Print bits representing opcode features for Neon.
19017 Bit 0 is 1 for signed, 0 for unsigned. Floats count as signed
19018 and polynomials as unsigned.
19020 Bit 1 is 1 for floats and polynomials, 0 for ordinary integers.
19022 Bit 2 is 1 for rounding functions, 0 otherwise. */
19024 /* Identify the type as 's', 'u', 'p' or 'f'. */
19025 case 'T':
19027 HOST_WIDE_INT bits = INTVAL (x);
19028 fputc ("uspf"[bits & 3], stream);
19030 return;
19032 /* Likewise, but signed and unsigned integers are both 'i'. */
19033 case 'F':
19035 HOST_WIDE_INT bits = INTVAL (x);
19036 fputc ("iipf"[bits & 3], stream);
19038 return;
19040 /* As for 'T', but emit 'u' instead of 'p'. */
19041 case 't':
19043 HOST_WIDE_INT bits = INTVAL (x);
19044 fputc ("usuf"[bits & 3], stream);
19046 return;
19048 /* Bit 2: rounding (vs none). */
19049 case 'O':
19051 HOST_WIDE_INT bits = INTVAL (x);
19052 fputs ((bits & 4) != 0 ? "r" : "", stream);
19054 return;
19056 /* Memory operand for vld1/vst1 instruction. */
19057 case 'A':
19059 rtx addr;
19060 bool postinc = FALSE;
19061 unsigned align, memsize, align_bits;
19063 gcc_assert (MEM_P (x));
19064 addr = XEXP (x, 0);
19065 if (GET_CODE (addr) == POST_INC)
19067 postinc = 1;
19068 addr = XEXP (addr, 0);
19070 asm_fprintf (stream, "[%r", REGNO (addr));
19072 /* We know the alignment of this access, so we can emit a hint in the
19073 instruction (for some alignments) as an aid to the memory subsystem
19074 of the target. */
19075 align = MEM_ALIGN (x) >> 3;
19076 memsize = MEM_SIZE (x);
19078 /* Only certain alignment specifiers are supported by the hardware. */
19079 if (memsize == 32 && (align % 32) == 0)
19080 align_bits = 256;
19081 else if ((memsize == 16 || memsize == 32) && (align % 16) == 0)
19082 align_bits = 128;
19083 else if (memsize >= 8 && (align % 8) == 0)
19084 align_bits = 64;
19085 else
19086 align_bits = 0;
19088 if (align_bits != 0)
19089 asm_fprintf (stream, ":%d", align_bits);
19091 asm_fprintf (stream, "]");
19093 if (postinc)
19094 fputs("!", stream);
19096 return;
19098 case 'C':
19100 rtx addr;
19102 gcc_assert (MEM_P (x));
19103 addr = XEXP (x, 0);
19104 gcc_assert (REG_P (addr));
19105 asm_fprintf (stream, "[%r]", REGNO (addr));
19107 return;
19109 /* Translate an S register number into a D register number and element index. */
19110 case 'y':
19112 int mode = GET_MODE (x);
19113 int regno;
19115 if (GET_MODE_SIZE (mode) != 4 || !REG_P (x))
19117 output_operand_lossage ("invalid operand for code '%c'", code);
19118 return;
19121 regno = REGNO (x);
19122 if (!VFP_REGNO_OK_FOR_SINGLE (regno))
19124 output_operand_lossage ("invalid operand for code '%c'", code);
19125 return;
19128 regno = regno - FIRST_VFP_REGNUM;
19129 fprintf (stream, "d%d[%d]", regno / 2, regno % 2);
19131 return;
19133 case 'v':
19134 gcc_assert (CONST_DOUBLE_P (x));
19135 fprintf (stream, "#%d", vfp3_const_double_for_fract_bits (x));
19136 return;
19138 /* Register specifier for vld1.16/vst1.16. Translate the S register
19139 number into a D register number and element index. */
19140 case 'z':
19142 int mode = GET_MODE (x);
19143 int regno;
19145 if (GET_MODE_SIZE (mode) != 2 || !REG_P (x))
19147 output_operand_lossage ("invalid operand for code '%c'", code);
19148 return;
19151 regno = REGNO (x);
19152 if (!VFP_REGNO_OK_FOR_SINGLE (regno))
19154 output_operand_lossage ("invalid operand for code '%c'", code);
19155 return;
19158 regno = regno - FIRST_VFP_REGNUM;
19159 fprintf (stream, "d%d[%d]", regno/2, ((regno % 2) ? 2 : 0));
19161 return;
19163 default:
19164 if (x == 0)
19166 output_operand_lossage ("missing operand");
19167 return;
19170 switch (GET_CODE (x))
19172 case REG:
19173 asm_fprintf (stream, "%r", REGNO (x));
19174 break;
19176 case MEM:
19177 output_memory_reference_mode = GET_MODE (x);
19178 output_address (XEXP (x, 0));
19179 break;
19181 case CONST_DOUBLE:
19182 if (TARGET_NEON)
19184 char fpstr[20];
19185 real_to_decimal (fpstr, CONST_DOUBLE_REAL_VALUE (x),
19186 sizeof (fpstr), 0, 1);
19187 fprintf (stream, "#%s", fpstr);
19189 else
19190 fprintf (stream, "#%s", fp_immediate_constant (x));
19191 break;
19193 default:
19194 gcc_assert (GET_CODE (x) != NEG);
19195 fputc ('#', stream);
19196 if (GET_CODE (x) == HIGH)
19198 fputs (":lower16:", stream);
19199 x = XEXP (x, 0);
19202 output_addr_const (stream, x);
19203 break;
19208 /* Target hook for printing a memory address. */
19209 static void
19210 arm_print_operand_address (FILE *stream, rtx x)
19212 if (TARGET_32BIT)
19214 int is_minus = GET_CODE (x) == MINUS;
19216 if (REG_P (x))
19217 asm_fprintf (stream, "[%r]", REGNO (x));
19218 else if (GET_CODE (x) == PLUS || is_minus)
19220 rtx base = XEXP (x, 0);
19221 rtx index = XEXP (x, 1);
19222 HOST_WIDE_INT offset = 0;
19223 if (!REG_P (base)
19224 || (REG_P (index) && REGNO (index) == SP_REGNUM))
19226 /* Ensure that BASE is a register. */
19227 /* (one of them must be). */
19228 /* Also ensure the SP is not used as in index register. */
19229 rtx temp = base;
19230 base = index;
19231 index = temp;
19233 switch (GET_CODE (index))
19235 case CONST_INT:
19236 offset = INTVAL (index);
19237 if (is_minus)
19238 offset = -offset;
19239 asm_fprintf (stream, "[%r, #%wd]",
19240 REGNO (base), offset);
19241 break;
19243 case REG:
19244 asm_fprintf (stream, "[%r, %s%r]",
19245 REGNO (base), is_minus ? "-" : "",
19246 REGNO (index));
19247 break;
19249 case MULT:
19250 case ASHIFTRT:
19251 case LSHIFTRT:
19252 case ASHIFT:
19253 case ROTATERT:
19255 asm_fprintf (stream, "[%r, %s%r",
19256 REGNO (base), is_minus ? "-" : "",
19257 REGNO (XEXP (index, 0)));
19258 arm_print_operand (stream, index, 'S');
19259 fputs ("]", stream);
19260 break;
19263 default:
19264 gcc_unreachable ();
19267 else if (GET_CODE (x) == PRE_INC || GET_CODE (x) == POST_INC
19268 || GET_CODE (x) == PRE_DEC || GET_CODE (x) == POST_DEC)
19270 extern enum machine_mode output_memory_reference_mode;
19272 gcc_assert (REG_P (XEXP (x, 0)));
19274 if (GET_CODE (x) == PRE_DEC || GET_CODE (x) == PRE_INC)
19275 asm_fprintf (stream, "[%r, #%s%d]!",
19276 REGNO (XEXP (x, 0)),
19277 GET_CODE (x) == PRE_DEC ? "-" : "",
19278 GET_MODE_SIZE (output_memory_reference_mode));
19279 else
19280 asm_fprintf (stream, "[%r], #%s%d",
19281 REGNO (XEXP (x, 0)),
19282 GET_CODE (x) == POST_DEC ? "-" : "",
19283 GET_MODE_SIZE (output_memory_reference_mode));
19285 else if (GET_CODE (x) == PRE_MODIFY)
19287 asm_fprintf (stream, "[%r, ", REGNO (XEXP (x, 0)));
19288 if (CONST_INT_P (XEXP (XEXP (x, 1), 1)))
19289 asm_fprintf (stream, "#%wd]!",
19290 INTVAL (XEXP (XEXP (x, 1), 1)));
19291 else
19292 asm_fprintf (stream, "%r]!",
19293 REGNO (XEXP (XEXP (x, 1), 1)));
19295 else if (GET_CODE (x) == POST_MODIFY)
19297 asm_fprintf (stream, "[%r], ", REGNO (XEXP (x, 0)));
19298 if (CONST_INT_P (XEXP (XEXP (x, 1), 1)))
19299 asm_fprintf (stream, "#%wd",
19300 INTVAL (XEXP (XEXP (x, 1), 1)));
19301 else
19302 asm_fprintf (stream, "%r",
19303 REGNO (XEXP (XEXP (x, 1), 1)));
19305 else output_addr_const (stream, x);
19307 else
19309 if (REG_P (x))
19310 asm_fprintf (stream, "[%r]", REGNO (x));
19311 else if (GET_CODE (x) == POST_INC)
19312 asm_fprintf (stream, "%r!", REGNO (XEXP (x, 0)));
19313 else if (GET_CODE (x) == PLUS)
19315 gcc_assert (REG_P (XEXP (x, 0)));
19316 if (CONST_INT_P (XEXP (x, 1)))
19317 asm_fprintf (stream, "[%r, #%wd]",
19318 REGNO (XEXP (x, 0)),
19319 INTVAL (XEXP (x, 1)));
19320 else
19321 asm_fprintf (stream, "[%r, %r]",
19322 REGNO (XEXP (x, 0)),
19323 REGNO (XEXP (x, 1)));
19325 else
19326 output_addr_const (stream, x);
19330 /* Target hook for indicating whether a punctuation character for
19331 TARGET_PRINT_OPERAND is valid. */
19332 static bool
19333 arm_print_operand_punct_valid_p (unsigned char code)
19335 return (code == '@' || code == '|' || code == '.'
19336 || code == '(' || code == ')' || code == '#'
19337 || (TARGET_32BIT && (code == '?'))
19338 || (TARGET_THUMB2 && (code == '!'))
19339 || (TARGET_THUMB && (code == '_')));
19342 /* Target hook for assembling integer objects. The ARM version needs to
19343 handle word-sized values specially. */
19344 static bool
19345 arm_assemble_integer (rtx x, unsigned int size, int aligned_p)
19347 enum machine_mode mode;
19349 if (size == UNITS_PER_WORD && aligned_p)
19351 fputs ("\t.word\t", asm_out_file);
19352 output_addr_const (asm_out_file, x);
19354 /* Mark symbols as position independent. We only do this in the
19355 .text segment, not in the .data segment. */
19356 if (NEED_GOT_RELOC && flag_pic && making_const_table &&
19357 (GET_CODE (x) == SYMBOL_REF || GET_CODE (x) == LABEL_REF))
19359 /* See legitimize_pic_address for an explanation of the
19360 TARGET_VXWORKS_RTP check. */
19361 if (TARGET_VXWORKS_RTP
19362 || (GET_CODE (x) == SYMBOL_REF && !SYMBOL_REF_LOCAL_P (x)))
19363 fputs ("(GOT)", asm_out_file);
19364 else
19365 fputs ("(GOTOFF)", asm_out_file);
19367 fputc ('\n', asm_out_file);
19368 return true;
19371 mode = GET_MODE (x);
19373 if (arm_vector_mode_supported_p (mode))
19375 int i, units;
19377 gcc_assert (GET_CODE (x) == CONST_VECTOR);
19379 units = CONST_VECTOR_NUNITS (x);
19380 size = GET_MODE_SIZE (GET_MODE_INNER (mode));
19382 if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
19383 for (i = 0; i < units; i++)
19385 rtx elt = CONST_VECTOR_ELT (x, i);
19386 assemble_integer
19387 (elt, size, i == 0 ? BIGGEST_ALIGNMENT : size * BITS_PER_UNIT, 1);
19389 else
19390 for (i = 0; i < units; i++)
19392 rtx elt = CONST_VECTOR_ELT (x, i);
19393 REAL_VALUE_TYPE rval;
19395 REAL_VALUE_FROM_CONST_DOUBLE (rval, elt);
19397 assemble_real
19398 (rval, GET_MODE_INNER (mode),
19399 i == 0 ? BIGGEST_ALIGNMENT : size * BITS_PER_UNIT);
19402 return true;
19405 return default_assemble_integer (x, size, aligned_p);
19408 static void
19409 arm_elf_asm_cdtor (rtx symbol, int priority, bool is_ctor)
19411 section *s;
19413 if (!TARGET_AAPCS_BASED)
19415 (is_ctor ?
19416 default_named_section_asm_out_constructor
19417 : default_named_section_asm_out_destructor) (symbol, priority);
19418 return;
19421 /* Put these in the .init_array section, using a special relocation. */
19422 if (priority != DEFAULT_INIT_PRIORITY)
19424 char buf[18];
19425 sprintf (buf, "%s.%.5u",
19426 is_ctor ? ".init_array" : ".fini_array",
19427 priority);
19428 s = get_section (buf, SECTION_WRITE, NULL_TREE);
19430 else if (is_ctor)
19431 s = ctors_section;
19432 else
19433 s = dtors_section;
19435 switch_to_section (s);
19436 assemble_align (POINTER_SIZE);
19437 fputs ("\t.word\t", asm_out_file);
19438 output_addr_const (asm_out_file, symbol);
19439 fputs ("(target1)\n", asm_out_file);
19442 /* Add a function to the list of static constructors. */
19444 static void
19445 arm_elf_asm_constructor (rtx symbol, int priority)
19447 arm_elf_asm_cdtor (symbol, priority, /*is_ctor=*/true);
19450 /* Add a function to the list of static destructors. */
19452 static void
19453 arm_elf_asm_destructor (rtx symbol, int priority)
19455 arm_elf_asm_cdtor (symbol, priority, /*is_ctor=*/false);
19458 /* A finite state machine takes care of noticing whether or not instructions
19459 can be conditionally executed, and thus decrease execution time and code
19460 size by deleting branch instructions. The fsm is controlled by
19461 final_prescan_insn, and controls the actions of ASM_OUTPUT_OPCODE. */
19463 /* The state of the fsm controlling condition codes are:
19464 0: normal, do nothing special
19465 1: make ASM_OUTPUT_OPCODE not output this instruction
19466 2: make ASM_OUTPUT_OPCODE not output this instruction
19467 3: make instructions conditional
19468 4: make instructions conditional
19470 State transitions (state->state by whom under condition):
19471 0 -> 1 final_prescan_insn if the `target' is a label
19472 0 -> 2 final_prescan_insn if the `target' is an unconditional branch
19473 1 -> 3 ASM_OUTPUT_OPCODE after not having output the conditional branch
19474 2 -> 4 ASM_OUTPUT_OPCODE after not having output the conditional branch
19475 3 -> 0 (*targetm.asm_out.internal_label) if the `target' label is reached
19476 (the target label has CODE_LABEL_NUMBER equal to arm_target_label).
19477 4 -> 0 final_prescan_insn if the `target' unconditional branch is reached
19478 (the target insn is arm_target_insn).
19480 If the jump clobbers the conditions then we use states 2 and 4.
19482 A similar thing can be done with conditional return insns.
19484 XXX In case the `target' is an unconditional branch, this conditionalising
19485 of the instructions always reduces code size, but not always execution
19486 time. But then, I want to reduce the code size to somewhere near what
19487 /bin/cc produces. */
19489 /* In addition to this, state is maintained for Thumb-2 COND_EXEC
19490 instructions. When a COND_EXEC instruction is seen the subsequent
19491 instructions are scanned so that multiple conditional instructions can be
19492 combined into a single IT block. arm_condexec_count and arm_condexec_mask
19493 specify the length and true/false mask for the IT block. These will be
19494 decremented/zeroed by arm_asm_output_opcode as the insns are output. */
19496 /* Returns the index of the ARM condition code string in
19497 `arm_condition_codes', or ARM_NV if the comparison is invalid.
19498 COMPARISON should be an rtx like `(eq (...) (...))'. */
19500 enum arm_cond_code
19501 maybe_get_arm_condition_code (rtx comparison)
19503 enum machine_mode mode = GET_MODE (XEXP (comparison, 0));
19504 enum arm_cond_code code;
19505 enum rtx_code comp_code = GET_CODE (comparison);
19507 if (GET_MODE_CLASS (mode) != MODE_CC)
19508 mode = SELECT_CC_MODE (comp_code, XEXP (comparison, 0),
19509 XEXP (comparison, 1));
19511 switch (mode)
19513 case CC_DNEmode: code = ARM_NE; goto dominance;
19514 case CC_DEQmode: code = ARM_EQ; goto dominance;
19515 case CC_DGEmode: code = ARM_GE; goto dominance;
19516 case CC_DGTmode: code = ARM_GT; goto dominance;
19517 case CC_DLEmode: code = ARM_LE; goto dominance;
19518 case CC_DLTmode: code = ARM_LT; goto dominance;
19519 case CC_DGEUmode: code = ARM_CS; goto dominance;
19520 case CC_DGTUmode: code = ARM_HI; goto dominance;
19521 case CC_DLEUmode: code = ARM_LS; goto dominance;
19522 case CC_DLTUmode: code = ARM_CC;
19524 dominance:
19525 if (comp_code == EQ)
19526 return ARM_INVERSE_CONDITION_CODE (code);
19527 if (comp_code == NE)
19528 return code;
19529 return ARM_NV;
19531 case CC_NOOVmode:
19532 switch (comp_code)
19534 case NE: return ARM_NE;
19535 case EQ: return ARM_EQ;
19536 case GE: return ARM_PL;
19537 case LT: return ARM_MI;
19538 default: return ARM_NV;
19541 case CC_Zmode:
19542 switch (comp_code)
19544 case NE: return ARM_NE;
19545 case EQ: return ARM_EQ;
19546 default: return ARM_NV;
19549 case CC_Nmode:
19550 switch (comp_code)
19552 case NE: return ARM_MI;
19553 case EQ: return ARM_PL;
19554 default: return ARM_NV;
19557 case CCFPEmode:
19558 case CCFPmode:
19559 /* We can handle all cases except UNEQ and LTGT. */
19560 switch (comp_code)
19562 case GE: return ARM_GE;
19563 case GT: return ARM_GT;
19564 case LE: return ARM_LS;
19565 case LT: return ARM_MI;
19566 case NE: return ARM_NE;
19567 case EQ: return ARM_EQ;
19568 case ORDERED: return ARM_VC;
19569 case UNORDERED: return ARM_VS;
19570 case UNLT: return ARM_LT;
19571 case UNLE: return ARM_LE;
19572 case UNGT: return ARM_HI;
19573 case UNGE: return ARM_PL;
19574 /* UNEQ and LTGT do not have a representation. */
19575 case UNEQ: /* Fall through. */
19576 case LTGT: /* Fall through. */
19577 default: return ARM_NV;
19580 case CC_SWPmode:
19581 switch (comp_code)
19583 case NE: return ARM_NE;
19584 case EQ: return ARM_EQ;
19585 case GE: return ARM_LE;
19586 case GT: return ARM_LT;
19587 case LE: return ARM_GE;
19588 case LT: return ARM_GT;
19589 case GEU: return ARM_LS;
19590 case GTU: return ARM_CC;
19591 case LEU: return ARM_CS;
19592 case LTU: return ARM_HI;
19593 default: return ARM_NV;
19596 case CC_Cmode:
19597 switch (comp_code)
19599 case LTU: return ARM_CS;
19600 case GEU: return ARM_CC;
19601 default: return ARM_NV;
19604 case CC_CZmode:
19605 switch (comp_code)
19607 case NE: return ARM_NE;
19608 case EQ: return ARM_EQ;
19609 case GEU: return ARM_CS;
19610 case GTU: return ARM_HI;
19611 case LEU: return ARM_LS;
19612 case LTU: return ARM_CC;
19613 default: return ARM_NV;
19616 case CC_NCVmode:
19617 switch (comp_code)
19619 case GE: return ARM_GE;
19620 case LT: return ARM_LT;
19621 case GEU: return ARM_CS;
19622 case LTU: return ARM_CC;
19623 default: return ARM_NV;
19626 case CCmode:
19627 switch (comp_code)
19629 case NE: return ARM_NE;
19630 case EQ: return ARM_EQ;
19631 case GE: return ARM_GE;
19632 case GT: return ARM_GT;
19633 case LE: return ARM_LE;
19634 case LT: return ARM_LT;
19635 case GEU: return ARM_CS;
19636 case GTU: return ARM_HI;
19637 case LEU: return ARM_LS;
19638 case LTU: return ARM_CC;
19639 default: return ARM_NV;
19642 default: gcc_unreachable ();
19646 /* Like maybe_get_arm_condition_code, but never return ARM_NV. */
19647 static enum arm_cond_code
19648 get_arm_condition_code (rtx comparison)
19650 enum arm_cond_code code = maybe_get_arm_condition_code (comparison);
19651 gcc_assert (code != ARM_NV);
19652 return code;
19655 /* Tell arm_asm_output_opcode to output IT blocks for conditionally executed
19656 instructions. */
19657 void
19658 thumb2_final_prescan_insn (rtx insn)
19660 rtx first_insn = insn;
19661 rtx body = PATTERN (insn);
19662 rtx predicate;
19663 enum arm_cond_code code;
19664 int n;
19665 int mask;
19666 int max;
19668 /* Maximum number of conditionally executed instructions in a block
19669 is minimum of the two max values: maximum allowed in an IT block
19670 and maximum that is beneficial according to the cost model and tune. */
19671 max = (max_insns_skipped < MAX_INSN_PER_IT_BLOCK) ?
19672 max_insns_skipped : MAX_INSN_PER_IT_BLOCK;
19674 /* Remove the previous insn from the count of insns to be output. */
19675 if (arm_condexec_count)
19676 arm_condexec_count--;
19678 /* Nothing to do if we are already inside a conditional block. */
19679 if (arm_condexec_count)
19680 return;
19682 if (GET_CODE (body) != COND_EXEC)
19683 return;
19685 /* Conditional jumps are implemented directly. */
19686 if (JUMP_P (insn))
19687 return;
19689 predicate = COND_EXEC_TEST (body);
19690 arm_current_cc = get_arm_condition_code (predicate);
19692 n = get_attr_ce_count (insn);
19693 arm_condexec_count = 1;
19694 arm_condexec_mask = (1 << n) - 1;
19695 arm_condexec_masklen = n;
19696 /* See if subsequent instructions can be combined into the same block. */
19697 for (;;)
19699 insn = next_nonnote_insn (insn);
19701 /* Jumping into the middle of an IT block is illegal, so a label or
19702 barrier terminates the block. */
19703 if (!NONJUMP_INSN_P (insn) && !JUMP_P (insn))
19704 break;
19706 body = PATTERN (insn);
19707 /* USE and CLOBBER aren't really insns, so just skip them. */
19708 if (GET_CODE (body) == USE
19709 || GET_CODE (body) == CLOBBER)
19710 continue;
19712 /* ??? Recognize conditional jumps, and combine them with IT blocks. */
19713 if (GET_CODE (body) != COND_EXEC)
19714 break;
19715 /* Maximum number of conditionally executed instructions in a block. */
19716 n = get_attr_ce_count (insn);
19717 if (arm_condexec_masklen + n > max)
19718 break;
19720 predicate = COND_EXEC_TEST (body);
19721 code = get_arm_condition_code (predicate);
19722 mask = (1 << n) - 1;
19723 if (arm_current_cc == code)
19724 arm_condexec_mask |= (mask << arm_condexec_masklen);
19725 else if (arm_current_cc != ARM_INVERSE_CONDITION_CODE(code))
19726 break;
19728 arm_condexec_count++;
19729 arm_condexec_masklen += n;
19731 /* A jump must be the last instruction in a conditional block. */
19732 if (JUMP_P (insn))
19733 break;
19735 /* Restore recog_data (getting the attributes of other insns can
19736 destroy this array, but final.c assumes that it remains intact
19737 across this call). */
19738 extract_constrain_insn_cached (first_insn);
19741 void
19742 arm_final_prescan_insn (rtx insn)
19744 /* BODY will hold the body of INSN. */
19745 rtx body = PATTERN (insn);
19747 /* This will be 1 if trying to repeat the trick, and things need to be
19748 reversed if it appears to fail. */
19749 int reverse = 0;
19751 /* If we start with a return insn, we only succeed if we find another one. */
19752 int seeking_return = 0;
19753 enum rtx_code return_code = UNKNOWN;
19755 /* START_INSN will hold the insn from where we start looking. This is the
19756 first insn after the following code_label if REVERSE is true. */
19757 rtx start_insn = insn;
19759 /* If in state 4, check if the target branch is reached, in order to
19760 change back to state 0. */
19761 if (arm_ccfsm_state == 4)
19763 if (insn == arm_target_insn)
19765 arm_target_insn = NULL;
19766 arm_ccfsm_state = 0;
19768 return;
19771 /* If in state 3, it is possible to repeat the trick, if this insn is an
19772 unconditional branch to a label, and immediately following this branch
19773 is the previous target label which is only used once, and the label this
19774 branch jumps to is not too far off. */
19775 if (arm_ccfsm_state == 3)
19777 if (simplejump_p (insn))
19779 start_insn = next_nonnote_insn (start_insn);
19780 if (BARRIER_P (start_insn))
19782 /* XXX Isn't this always a barrier? */
19783 start_insn = next_nonnote_insn (start_insn);
19785 if (LABEL_P (start_insn)
19786 && CODE_LABEL_NUMBER (start_insn) == arm_target_label
19787 && LABEL_NUSES (start_insn) == 1)
19788 reverse = TRUE;
19789 else
19790 return;
19792 else if (ANY_RETURN_P (body))
19794 start_insn = next_nonnote_insn (start_insn);
19795 if (BARRIER_P (start_insn))
19796 start_insn = next_nonnote_insn (start_insn);
19797 if (LABEL_P (start_insn)
19798 && CODE_LABEL_NUMBER (start_insn) == arm_target_label
19799 && LABEL_NUSES (start_insn) == 1)
19801 reverse = TRUE;
19802 seeking_return = 1;
19803 return_code = GET_CODE (body);
19805 else
19806 return;
19808 else
19809 return;
19812 gcc_assert (!arm_ccfsm_state || reverse);
19813 if (!JUMP_P (insn))
19814 return;
19816 /* This jump might be paralleled with a clobber of the condition codes
19817 the jump should always come first */
19818 if (GET_CODE (body) == PARALLEL && XVECLEN (body, 0) > 0)
19819 body = XVECEXP (body, 0, 0);
19821 if (reverse
19822 || (GET_CODE (body) == SET && GET_CODE (SET_DEST (body)) == PC
19823 && GET_CODE (SET_SRC (body)) == IF_THEN_ELSE))
19825 int insns_skipped;
19826 int fail = FALSE, succeed = FALSE;
19827 /* Flag which part of the IF_THEN_ELSE is the LABEL_REF. */
19828 int then_not_else = TRUE;
19829 rtx this_insn = start_insn, label = 0;
19831 /* Register the insn jumped to. */
19832 if (reverse)
19834 if (!seeking_return)
19835 label = XEXP (SET_SRC (body), 0);
19837 else if (GET_CODE (XEXP (SET_SRC (body), 1)) == LABEL_REF)
19838 label = XEXP (XEXP (SET_SRC (body), 1), 0);
19839 else if (GET_CODE (XEXP (SET_SRC (body), 2)) == LABEL_REF)
19841 label = XEXP (XEXP (SET_SRC (body), 2), 0);
19842 then_not_else = FALSE;
19844 else if (ANY_RETURN_P (XEXP (SET_SRC (body), 1)))
19846 seeking_return = 1;
19847 return_code = GET_CODE (XEXP (SET_SRC (body), 1));
19849 else if (ANY_RETURN_P (XEXP (SET_SRC (body), 2)))
19851 seeking_return = 1;
19852 return_code = GET_CODE (XEXP (SET_SRC (body), 2));
19853 then_not_else = FALSE;
19855 else
19856 gcc_unreachable ();
19858 /* See how many insns this branch skips, and what kind of insns. If all
19859 insns are okay, and the label or unconditional branch to the same
19860 label is not too far away, succeed. */
19861 for (insns_skipped = 0;
19862 !fail && !succeed && insns_skipped++ < max_insns_skipped;)
19864 rtx scanbody;
19866 this_insn = next_nonnote_insn (this_insn);
19867 if (!this_insn)
19868 break;
19870 switch (GET_CODE (this_insn))
19872 case CODE_LABEL:
19873 /* Succeed if it is the target label, otherwise fail since
19874 control falls in from somewhere else. */
19875 if (this_insn == label)
19877 arm_ccfsm_state = 1;
19878 succeed = TRUE;
19880 else
19881 fail = TRUE;
19882 break;
19884 case BARRIER:
19885 /* Succeed if the following insn is the target label.
19886 Otherwise fail.
19887 If return insns are used then the last insn in a function
19888 will be a barrier. */
19889 this_insn = next_nonnote_insn (this_insn);
19890 if (this_insn && this_insn == label)
19892 arm_ccfsm_state = 1;
19893 succeed = TRUE;
19895 else
19896 fail = TRUE;
19897 break;
19899 case CALL_INSN:
19900 /* The AAPCS says that conditional calls should not be
19901 used since they make interworking inefficient (the
19902 linker can't transform BL<cond> into BLX). That's
19903 only a problem if the machine has BLX. */
19904 if (arm_arch5)
19906 fail = TRUE;
19907 break;
19910 /* Succeed if the following insn is the target label, or
19911 if the following two insns are a barrier and the
19912 target label. */
19913 this_insn = next_nonnote_insn (this_insn);
19914 if (this_insn && BARRIER_P (this_insn))
19915 this_insn = next_nonnote_insn (this_insn);
19917 if (this_insn && this_insn == label
19918 && insns_skipped < max_insns_skipped)
19920 arm_ccfsm_state = 1;
19921 succeed = TRUE;
19923 else
19924 fail = TRUE;
19925 break;
19927 case JUMP_INSN:
19928 /* If this is an unconditional branch to the same label, succeed.
19929 If it is to another label, do nothing. If it is conditional,
19930 fail. */
19931 /* XXX Probably, the tests for SET and the PC are
19932 unnecessary. */
19934 scanbody = PATTERN (this_insn);
19935 if (GET_CODE (scanbody) == SET
19936 && GET_CODE (SET_DEST (scanbody)) == PC)
19938 if (GET_CODE (SET_SRC (scanbody)) == LABEL_REF
19939 && XEXP (SET_SRC (scanbody), 0) == label && !reverse)
19941 arm_ccfsm_state = 2;
19942 succeed = TRUE;
19944 else if (GET_CODE (SET_SRC (scanbody)) == IF_THEN_ELSE)
19945 fail = TRUE;
19947 /* Fail if a conditional return is undesirable (e.g. on a
19948 StrongARM), but still allow this if optimizing for size. */
19949 else if (GET_CODE (scanbody) == return_code
19950 && !use_return_insn (TRUE, NULL)
19951 && !optimize_size)
19952 fail = TRUE;
19953 else if (GET_CODE (scanbody) == return_code)
19955 arm_ccfsm_state = 2;
19956 succeed = TRUE;
19958 else if (GET_CODE (scanbody) == PARALLEL)
19960 switch (get_attr_conds (this_insn))
19962 case CONDS_NOCOND:
19963 break;
19964 default:
19965 fail = TRUE;
19966 break;
19969 else
19970 fail = TRUE; /* Unrecognized jump (e.g. epilogue). */
19972 break;
19974 case INSN:
19975 /* Instructions using or affecting the condition codes make it
19976 fail. */
19977 scanbody = PATTERN (this_insn);
19978 if (!(GET_CODE (scanbody) == SET
19979 || GET_CODE (scanbody) == PARALLEL)
19980 || get_attr_conds (this_insn) != CONDS_NOCOND)
19981 fail = TRUE;
19982 break;
19984 default:
19985 break;
19988 if (succeed)
19990 if ((!seeking_return) && (arm_ccfsm_state == 1 || reverse))
19991 arm_target_label = CODE_LABEL_NUMBER (label);
19992 else
19994 gcc_assert (seeking_return || arm_ccfsm_state == 2);
19996 while (this_insn && GET_CODE (PATTERN (this_insn)) == USE)
19998 this_insn = next_nonnote_insn (this_insn);
19999 gcc_assert (!this_insn
20000 || (!BARRIER_P (this_insn)
20001 && !LABEL_P (this_insn)));
20003 if (!this_insn)
20005 /* Oh, dear! we ran off the end.. give up. */
20006 extract_constrain_insn_cached (insn);
20007 arm_ccfsm_state = 0;
20008 arm_target_insn = NULL;
20009 return;
20011 arm_target_insn = this_insn;
20014 /* If REVERSE is true, ARM_CURRENT_CC needs to be inverted from
20015 what it was. */
20016 if (!reverse)
20017 arm_current_cc = get_arm_condition_code (XEXP (SET_SRC (body), 0));
20019 if (reverse || then_not_else)
20020 arm_current_cc = ARM_INVERSE_CONDITION_CODE (arm_current_cc);
20023 /* Restore recog_data (getting the attributes of other insns can
20024 destroy this array, but final.c assumes that it remains intact
20025 across this call. */
20026 extract_constrain_insn_cached (insn);
20030 /* Output IT instructions. */
20031 void
20032 thumb2_asm_output_opcode (FILE * stream)
20034 char buff[5];
20035 int n;
20037 if (arm_condexec_mask)
20039 for (n = 0; n < arm_condexec_masklen; n++)
20040 buff[n] = (arm_condexec_mask & (1 << n)) ? 't' : 'e';
20041 buff[n] = 0;
20042 asm_fprintf(stream, "i%s\t%s\n\t", buff,
20043 arm_condition_codes[arm_current_cc]);
20044 arm_condexec_mask = 0;
20048 /* Returns true if REGNO is a valid register
20049 for holding a quantity of type MODE. */
20051 arm_hard_regno_mode_ok (unsigned int regno, enum machine_mode mode)
20053 if (GET_MODE_CLASS (mode) == MODE_CC)
20054 return (regno == CC_REGNUM
20055 || (TARGET_HARD_FLOAT && TARGET_VFP
20056 && regno == VFPCC_REGNUM));
20058 if (TARGET_THUMB1)
20059 /* For the Thumb we only allow values bigger than SImode in
20060 registers 0 - 6, so that there is always a second low
20061 register available to hold the upper part of the value.
20062 We probably we ought to ensure that the register is the
20063 start of an even numbered register pair. */
20064 return (ARM_NUM_REGS (mode) < 2) || (regno < LAST_LO_REGNUM);
20066 if (TARGET_HARD_FLOAT && TARGET_VFP
20067 && IS_VFP_REGNUM (regno))
20069 if (mode == SFmode || mode == SImode)
20070 return VFP_REGNO_OK_FOR_SINGLE (regno);
20072 if (mode == DFmode)
20073 return VFP_REGNO_OK_FOR_DOUBLE (regno);
20075 /* VFP registers can hold HFmode values, but there is no point in
20076 putting them there unless we have hardware conversion insns. */
20077 if (mode == HFmode)
20078 return TARGET_FP16 && VFP_REGNO_OK_FOR_SINGLE (regno);
20080 if (TARGET_NEON)
20081 return (VALID_NEON_DREG_MODE (mode) && VFP_REGNO_OK_FOR_DOUBLE (regno))
20082 || (VALID_NEON_QREG_MODE (mode)
20083 && NEON_REGNO_OK_FOR_QUAD (regno))
20084 || (mode == TImode && NEON_REGNO_OK_FOR_NREGS (regno, 2))
20085 || (mode == EImode && NEON_REGNO_OK_FOR_NREGS (regno, 3))
20086 || (mode == OImode && NEON_REGNO_OK_FOR_NREGS (regno, 4))
20087 || (mode == CImode && NEON_REGNO_OK_FOR_NREGS (regno, 6))
20088 || (mode == XImode && NEON_REGNO_OK_FOR_NREGS (regno, 8));
20090 return FALSE;
20093 if (TARGET_REALLY_IWMMXT)
20095 if (IS_IWMMXT_GR_REGNUM (regno))
20096 return mode == SImode;
20098 if (IS_IWMMXT_REGNUM (regno))
20099 return VALID_IWMMXT_REG_MODE (mode);
20102 /* We allow almost any value to be stored in the general registers.
20103 Restrict doubleword quantities to even register pairs so that we can
20104 use ldrd. Do not allow very large Neon structure opaque modes in
20105 general registers; they would use too many. */
20106 if (regno <= LAST_ARM_REGNUM)
20107 return !(TARGET_LDRD && GET_MODE_SIZE (mode) > 4 && (regno & 1) != 0)
20108 && ARM_NUM_REGS (mode) <= 4;
20110 if (regno == FRAME_POINTER_REGNUM
20111 || regno == ARG_POINTER_REGNUM)
20112 /* We only allow integers in the fake hard registers. */
20113 return GET_MODE_CLASS (mode) == MODE_INT;
20115 return FALSE;
20118 /* Implement MODES_TIEABLE_P. */
20120 bool
20121 arm_modes_tieable_p (enum machine_mode mode1, enum machine_mode mode2)
20123 if (GET_MODE_CLASS (mode1) == GET_MODE_CLASS (mode2))
20124 return true;
20126 /* We specifically want to allow elements of "structure" modes to
20127 be tieable to the structure. This more general condition allows
20128 other rarer situations too. */
20129 if (TARGET_NEON
20130 && (VALID_NEON_DREG_MODE (mode1)
20131 || VALID_NEON_QREG_MODE (mode1)
20132 || VALID_NEON_STRUCT_MODE (mode1))
20133 && (VALID_NEON_DREG_MODE (mode2)
20134 || VALID_NEON_QREG_MODE (mode2)
20135 || VALID_NEON_STRUCT_MODE (mode2)))
20136 return true;
20138 return false;
20141 /* For efficiency and historical reasons LO_REGS, HI_REGS and CC_REGS are
20142 not used in arm mode. */
20144 enum reg_class
20145 arm_regno_class (int regno)
20147 if (TARGET_THUMB1)
20149 if (regno == STACK_POINTER_REGNUM)
20150 return STACK_REG;
20151 if (regno == CC_REGNUM)
20152 return CC_REG;
20153 if (regno < 8)
20154 return LO_REGS;
20155 return HI_REGS;
20158 if (TARGET_THUMB2 && regno < 8)
20159 return LO_REGS;
20161 if ( regno <= LAST_ARM_REGNUM
20162 || regno == FRAME_POINTER_REGNUM
20163 || regno == ARG_POINTER_REGNUM)
20164 return TARGET_THUMB2 ? HI_REGS : GENERAL_REGS;
20166 if (regno == CC_REGNUM || regno == VFPCC_REGNUM)
20167 return TARGET_THUMB2 ? CC_REG : NO_REGS;
20169 if (IS_VFP_REGNUM (regno))
20171 if (regno <= D7_VFP_REGNUM)
20172 return VFP_D0_D7_REGS;
20173 else if (regno <= LAST_LO_VFP_REGNUM)
20174 return VFP_LO_REGS;
20175 else
20176 return VFP_HI_REGS;
20179 if (IS_IWMMXT_REGNUM (regno))
20180 return IWMMXT_REGS;
20182 if (IS_IWMMXT_GR_REGNUM (regno))
20183 return IWMMXT_GR_REGS;
20185 return NO_REGS;
20188 /* Handle a special case when computing the offset
20189 of an argument from the frame pointer. */
20191 arm_debugger_arg_offset (int value, rtx addr)
20193 rtx insn;
20195 /* We are only interested if dbxout_parms() failed to compute the offset. */
20196 if (value != 0)
20197 return 0;
20199 /* We can only cope with the case where the address is held in a register. */
20200 if (!REG_P (addr))
20201 return 0;
20203 /* If we are using the frame pointer to point at the argument, then
20204 an offset of 0 is correct. */
20205 if (REGNO (addr) == (unsigned) HARD_FRAME_POINTER_REGNUM)
20206 return 0;
20208 /* If we are using the stack pointer to point at the
20209 argument, then an offset of 0 is correct. */
20210 /* ??? Check this is consistent with thumb2 frame layout. */
20211 if ((TARGET_THUMB || !frame_pointer_needed)
20212 && REGNO (addr) == SP_REGNUM)
20213 return 0;
20215 /* Oh dear. The argument is pointed to by a register rather
20216 than being held in a register, or being stored at a known
20217 offset from the frame pointer. Since GDB only understands
20218 those two kinds of argument we must translate the address
20219 held in the register into an offset from the frame pointer.
20220 We do this by searching through the insns for the function
20221 looking to see where this register gets its value. If the
20222 register is initialized from the frame pointer plus an offset
20223 then we are in luck and we can continue, otherwise we give up.
20225 This code is exercised by producing debugging information
20226 for a function with arguments like this:
20228 double func (double a, double b, int c, double d) {return d;}
20230 Without this code the stab for parameter 'd' will be set to
20231 an offset of 0 from the frame pointer, rather than 8. */
20233 /* The if() statement says:
20235 If the insn is a normal instruction
20236 and if the insn is setting the value in a register
20237 and if the register being set is the register holding the address of the argument
20238 and if the address is computing by an addition
20239 that involves adding to a register
20240 which is the frame pointer
20241 a constant integer
20243 then... */
20245 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
20247 if ( NONJUMP_INSN_P (insn)
20248 && GET_CODE (PATTERN (insn)) == SET
20249 && REGNO (XEXP (PATTERN (insn), 0)) == REGNO (addr)
20250 && GET_CODE (XEXP (PATTERN (insn), 1)) == PLUS
20251 && REG_P (XEXP (XEXP (PATTERN (insn), 1), 0))
20252 && REGNO (XEXP (XEXP (PATTERN (insn), 1), 0)) == (unsigned) HARD_FRAME_POINTER_REGNUM
20253 && CONST_INT_P (XEXP (XEXP (PATTERN (insn), 1), 1))
20256 value = INTVAL (XEXP (XEXP (PATTERN (insn), 1), 1));
20258 break;
20262 if (value == 0)
20264 debug_rtx (addr);
20265 warning (0, "unable to compute real location of stacked parameter");
20266 value = 8; /* XXX magic hack */
20269 return value;
20272 typedef enum {
20273 T_V8QI,
20274 T_V4HI,
20275 T_V4HF,
20276 T_V2SI,
20277 T_V2SF,
20278 T_DI,
20279 T_V16QI,
20280 T_V8HI,
20281 T_V4SI,
20282 T_V4SF,
20283 T_V2DI,
20284 T_TI,
20285 T_EI,
20286 T_OI,
20287 T_MAX /* Size of enum. Keep last. */
20288 } neon_builtin_type_mode;
20290 #define TYPE_MODE_BIT(X) (1 << (X))
20292 #define TB_DREG (TYPE_MODE_BIT (T_V8QI) | TYPE_MODE_BIT (T_V4HI) \
20293 | TYPE_MODE_BIT (T_V4HF) | TYPE_MODE_BIT (T_V2SI) \
20294 | TYPE_MODE_BIT (T_V2SF) | TYPE_MODE_BIT (T_DI))
20295 #define TB_QREG (TYPE_MODE_BIT (T_V16QI) | TYPE_MODE_BIT (T_V8HI) \
20296 | TYPE_MODE_BIT (T_V4SI) | TYPE_MODE_BIT (T_V4SF) \
20297 | TYPE_MODE_BIT (T_V2DI) | TYPE_MODE_BIT (T_TI))
20299 #define v8qi_UP T_V8QI
20300 #define v4hi_UP T_V4HI
20301 #define v4hf_UP T_V4HF
20302 #define v2si_UP T_V2SI
20303 #define v2sf_UP T_V2SF
20304 #define di_UP T_DI
20305 #define v16qi_UP T_V16QI
20306 #define v8hi_UP T_V8HI
20307 #define v4si_UP T_V4SI
20308 #define v4sf_UP T_V4SF
20309 #define v2di_UP T_V2DI
20310 #define ti_UP T_TI
20311 #define ei_UP T_EI
20312 #define oi_UP T_OI
20314 #define UP(X) X##_UP
20316 typedef enum {
20317 NEON_BINOP,
20318 NEON_TERNOP,
20319 NEON_UNOP,
20320 NEON_GETLANE,
20321 NEON_SETLANE,
20322 NEON_CREATE,
20323 NEON_RINT,
20324 NEON_DUP,
20325 NEON_DUPLANE,
20326 NEON_COMBINE,
20327 NEON_SPLIT,
20328 NEON_LANEMUL,
20329 NEON_LANEMULL,
20330 NEON_LANEMULH,
20331 NEON_LANEMAC,
20332 NEON_SCALARMUL,
20333 NEON_SCALARMULL,
20334 NEON_SCALARMULH,
20335 NEON_SCALARMAC,
20336 NEON_CONVERT,
20337 NEON_FLOAT_WIDEN,
20338 NEON_FLOAT_NARROW,
20339 NEON_FIXCONV,
20340 NEON_SELECT,
20341 NEON_RESULTPAIR,
20342 NEON_REINTERP,
20343 NEON_VTBL,
20344 NEON_VTBX,
20345 NEON_LOAD1,
20346 NEON_LOAD1LANE,
20347 NEON_STORE1,
20348 NEON_STORE1LANE,
20349 NEON_LOADSTRUCT,
20350 NEON_LOADSTRUCTLANE,
20351 NEON_STORESTRUCT,
20352 NEON_STORESTRUCTLANE,
20353 NEON_LOGICBINOP,
20354 NEON_SHIFTINSERT,
20355 NEON_SHIFTIMM,
20356 NEON_SHIFTACC
20357 } neon_itype;
20359 typedef struct {
20360 const char *name;
20361 const neon_itype itype;
20362 const neon_builtin_type_mode mode;
20363 const enum insn_code code;
20364 unsigned int fcode;
20365 } neon_builtin_datum;
20367 #define CF(N,X) CODE_FOR_neon_##N##X
20369 #define VAR1(T, N, A) \
20370 {#N, NEON_##T, UP (A), CF (N, A), 0}
20371 #define VAR2(T, N, A, B) \
20372 VAR1 (T, N, A), \
20373 {#N, NEON_##T, UP (B), CF (N, B), 0}
20374 #define VAR3(T, N, A, B, C) \
20375 VAR2 (T, N, A, B), \
20376 {#N, NEON_##T, UP (C), CF (N, C), 0}
20377 #define VAR4(T, N, A, B, C, D) \
20378 VAR3 (T, N, A, B, C), \
20379 {#N, NEON_##T, UP (D), CF (N, D), 0}
20380 #define VAR5(T, N, A, B, C, D, E) \
20381 VAR4 (T, N, A, B, C, D), \
20382 {#N, NEON_##T, UP (E), CF (N, E), 0}
20383 #define VAR6(T, N, A, B, C, D, E, F) \
20384 VAR5 (T, N, A, B, C, D, E), \
20385 {#N, NEON_##T, UP (F), CF (N, F), 0}
20386 #define VAR7(T, N, A, B, C, D, E, F, G) \
20387 VAR6 (T, N, A, B, C, D, E, F), \
20388 {#N, NEON_##T, UP (G), CF (N, G), 0}
20389 #define VAR8(T, N, A, B, C, D, E, F, G, H) \
20390 VAR7 (T, N, A, B, C, D, E, F, G), \
20391 {#N, NEON_##T, UP (H), CF (N, H), 0}
20392 #define VAR9(T, N, A, B, C, D, E, F, G, H, I) \
20393 VAR8 (T, N, A, B, C, D, E, F, G, H), \
20394 {#N, NEON_##T, UP (I), CF (N, I), 0}
20395 #define VAR10(T, N, A, B, C, D, E, F, G, H, I, J) \
20396 VAR9 (T, N, A, B, C, D, E, F, G, H, I), \
20397 {#N, NEON_##T, UP (J), CF (N, J), 0}
20399 /* The NEON builtin data can be found in arm_neon_builtins.def.
20400 The mode entries in the following table correspond to the "key" type of the
20401 instruction variant, i.e. equivalent to that which would be specified after
20402 the assembler mnemonic, which usually refers to the last vector operand.
20403 (Signed/unsigned/polynomial types are not differentiated between though, and
20404 are all mapped onto the same mode for a given element size.) The modes
20405 listed per instruction should be the same as those defined for that
20406 instruction's pattern in neon.md. */
20408 static neon_builtin_datum neon_builtin_data[] =
20410 #include "arm_neon_builtins.def"
20413 #undef CF
20414 #undef VAR1
20415 #undef VAR2
20416 #undef VAR3
20417 #undef VAR4
20418 #undef VAR5
20419 #undef VAR6
20420 #undef VAR7
20421 #undef VAR8
20422 #undef VAR9
20423 #undef VAR10
20425 #define CF(N,X) ARM_BUILTIN_NEON_##N##X
20426 #define VAR1(T, N, A) \
20427 CF (N, A)
20428 #define VAR2(T, N, A, B) \
20429 VAR1 (T, N, A), \
20430 CF (N, B)
20431 #define VAR3(T, N, A, B, C) \
20432 VAR2 (T, N, A, B), \
20433 CF (N, C)
20434 #define VAR4(T, N, A, B, C, D) \
20435 VAR3 (T, N, A, B, C), \
20436 CF (N, D)
20437 #define VAR5(T, N, A, B, C, D, E) \
20438 VAR4 (T, N, A, B, C, D), \
20439 CF (N, E)
20440 #define VAR6(T, N, A, B, C, D, E, F) \
20441 VAR5 (T, N, A, B, C, D, E), \
20442 CF (N, F)
20443 #define VAR7(T, N, A, B, C, D, E, F, G) \
20444 VAR6 (T, N, A, B, C, D, E, F), \
20445 CF (N, G)
20446 #define VAR8(T, N, A, B, C, D, E, F, G, H) \
20447 VAR7 (T, N, A, B, C, D, E, F, G), \
20448 CF (N, H)
20449 #define VAR9(T, N, A, B, C, D, E, F, G, H, I) \
20450 VAR8 (T, N, A, B, C, D, E, F, G, H), \
20451 CF (N, I)
20452 #define VAR10(T, N, A, B, C, D, E, F, G, H, I, J) \
20453 VAR9 (T, N, A, B, C, D, E, F, G, H, I), \
20454 CF (N, J)
20455 enum arm_builtins
20457 ARM_BUILTIN_GETWCGR0,
20458 ARM_BUILTIN_GETWCGR1,
20459 ARM_BUILTIN_GETWCGR2,
20460 ARM_BUILTIN_GETWCGR3,
20462 ARM_BUILTIN_SETWCGR0,
20463 ARM_BUILTIN_SETWCGR1,
20464 ARM_BUILTIN_SETWCGR2,
20465 ARM_BUILTIN_SETWCGR3,
20467 ARM_BUILTIN_WZERO,
20469 ARM_BUILTIN_WAVG2BR,
20470 ARM_BUILTIN_WAVG2HR,
20471 ARM_BUILTIN_WAVG2B,
20472 ARM_BUILTIN_WAVG2H,
20474 ARM_BUILTIN_WACCB,
20475 ARM_BUILTIN_WACCH,
20476 ARM_BUILTIN_WACCW,
20478 ARM_BUILTIN_WMACS,
20479 ARM_BUILTIN_WMACSZ,
20480 ARM_BUILTIN_WMACU,
20481 ARM_BUILTIN_WMACUZ,
20483 ARM_BUILTIN_WSADB,
20484 ARM_BUILTIN_WSADBZ,
20485 ARM_BUILTIN_WSADH,
20486 ARM_BUILTIN_WSADHZ,
20488 ARM_BUILTIN_WALIGNI,
20489 ARM_BUILTIN_WALIGNR0,
20490 ARM_BUILTIN_WALIGNR1,
20491 ARM_BUILTIN_WALIGNR2,
20492 ARM_BUILTIN_WALIGNR3,
20494 ARM_BUILTIN_TMIA,
20495 ARM_BUILTIN_TMIAPH,
20496 ARM_BUILTIN_TMIABB,
20497 ARM_BUILTIN_TMIABT,
20498 ARM_BUILTIN_TMIATB,
20499 ARM_BUILTIN_TMIATT,
20501 ARM_BUILTIN_TMOVMSKB,
20502 ARM_BUILTIN_TMOVMSKH,
20503 ARM_BUILTIN_TMOVMSKW,
20505 ARM_BUILTIN_TBCSTB,
20506 ARM_BUILTIN_TBCSTH,
20507 ARM_BUILTIN_TBCSTW,
20509 ARM_BUILTIN_WMADDS,
20510 ARM_BUILTIN_WMADDU,
20512 ARM_BUILTIN_WPACKHSS,
20513 ARM_BUILTIN_WPACKWSS,
20514 ARM_BUILTIN_WPACKDSS,
20515 ARM_BUILTIN_WPACKHUS,
20516 ARM_BUILTIN_WPACKWUS,
20517 ARM_BUILTIN_WPACKDUS,
20519 ARM_BUILTIN_WADDB,
20520 ARM_BUILTIN_WADDH,
20521 ARM_BUILTIN_WADDW,
20522 ARM_BUILTIN_WADDSSB,
20523 ARM_BUILTIN_WADDSSH,
20524 ARM_BUILTIN_WADDSSW,
20525 ARM_BUILTIN_WADDUSB,
20526 ARM_BUILTIN_WADDUSH,
20527 ARM_BUILTIN_WADDUSW,
20528 ARM_BUILTIN_WSUBB,
20529 ARM_BUILTIN_WSUBH,
20530 ARM_BUILTIN_WSUBW,
20531 ARM_BUILTIN_WSUBSSB,
20532 ARM_BUILTIN_WSUBSSH,
20533 ARM_BUILTIN_WSUBSSW,
20534 ARM_BUILTIN_WSUBUSB,
20535 ARM_BUILTIN_WSUBUSH,
20536 ARM_BUILTIN_WSUBUSW,
20538 ARM_BUILTIN_WAND,
20539 ARM_BUILTIN_WANDN,
20540 ARM_BUILTIN_WOR,
20541 ARM_BUILTIN_WXOR,
20543 ARM_BUILTIN_WCMPEQB,
20544 ARM_BUILTIN_WCMPEQH,
20545 ARM_BUILTIN_WCMPEQW,
20546 ARM_BUILTIN_WCMPGTUB,
20547 ARM_BUILTIN_WCMPGTUH,
20548 ARM_BUILTIN_WCMPGTUW,
20549 ARM_BUILTIN_WCMPGTSB,
20550 ARM_BUILTIN_WCMPGTSH,
20551 ARM_BUILTIN_WCMPGTSW,
20553 ARM_BUILTIN_TEXTRMSB,
20554 ARM_BUILTIN_TEXTRMSH,
20555 ARM_BUILTIN_TEXTRMSW,
20556 ARM_BUILTIN_TEXTRMUB,
20557 ARM_BUILTIN_TEXTRMUH,
20558 ARM_BUILTIN_TEXTRMUW,
20559 ARM_BUILTIN_TINSRB,
20560 ARM_BUILTIN_TINSRH,
20561 ARM_BUILTIN_TINSRW,
20563 ARM_BUILTIN_WMAXSW,
20564 ARM_BUILTIN_WMAXSH,
20565 ARM_BUILTIN_WMAXSB,
20566 ARM_BUILTIN_WMAXUW,
20567 ARM_BUILTIN_WMAXUH,
20568 ARM_BUILTIN_WMAXUB,
20569 ARM_BUILTIN_WMINSW,
20570 ARM_BUILTIN_WMINSH,
20571 ARM_BUILTIN_WMINSB,
20572 ARM_BUILTIN_WMINUW,
20573 ARM_BUILTIN_WMINUH,
20574 ARM_BUILTIN_WMINUB,
20576 ARM_BUILTIN_WMULUM,
20577 ARM_BUILTIN_WMULSM,
20578 ARM_BUILTIN_WMULUL,
20580 ARM_BUILTIN_PSADBH,
20581 ARM_BUILTIN_WSHUFH,
20583 ARM_BUILTIN_WSLLH,
20584 ARM_BUILTIN_WSLLW,
20585 ARM_BUILTIN_WSLLD,
20586 ARM_BUILTIN_WSRAH,
20587 ARM_BUILTIN_WSRAW,
20588 ARM_BUILTIN_WSRAD,
20589 ARM_BUILTIN_WSRLH,
20590 ARM_BUILTIN_WSRLW,
20591 ARM_BUILTIN_WSRLD,
20592 ARM_BUILTIN_WRORH,
20593 ARM_BUILTIN_WRORW,
20594 ARM_BUILTIN_WRORD,
20595 ARM_BUILTIN_WSLLHI,
20596 ARM_BUILTIN_WSLLWI,
20597 ARM_BUILTIN_WSLLDI,
20598 ARM_BUILTIN_WSRAHI,
20599 ARM_BUILTIN_WSRAWI,
20600 ARM_BUILTIN_WSRADI,
20601 ARM_BUILTIN_WSRLHI,
20602 ARM_BUILTIN_WSRLWI,
20603 ARM_BUILTIN_WSRLDI,
20604 ARM_BUILTIN_WRORHI,
20605 ARM_BUILTIN_WRORWI,
20606 ARM_BUILTIN_WRORDI,
20608 ARM_BUILTIN_WUNPCKIHB,
20609 ARM_BUILTIN_WUNPCKIHH,
20610 ARM_BUILTIN_WUNPCKIHW,
20611 ARM_BUILTIN_WUNPCKILB,
20612 ARM_BUILTIN_WUNPCKILH,
20613 ARM_BUILTIN_WUNPCKILW,
20615 ARM_BUILTIN_WUNPCKEHSB,
20616 ARM_BUILTIN_WUNPCKEHSH,
20617 ARM_BUILTIN_WUNPCKEHSW,
20618 ARM_BUILTIN_WUNPCKEHUB,
20619 ARM_BUILTIN_WUNPCKEHUH,
20620 ARM_BUILTIN_WUNPCKEHUW,
20621 ARM_BUILTIN_WUNPCKELSB,
20622 ARM_BUILTIN_WUNPCKELSH,
20623 ARM_BUILTIN_WUNPCKELSW,
20624 ARM_BUILTIN_WUNPCKELUB,
20625 ARM_BUILTIN_WUNPCKELUH,
20626 ARM_BUILTIN_WUNPCKELUW,
20628 ARM_BUILTIN_WABSB,
20629 ARM_BUILTIN_WABSH,
20630 ARM_BUILTIN_WABSW,
20632 ARM_BUILTIN_WADDSUBHX,
20633 ARM_BUILTIN_WSUBADDHX,
20635 ARM_BUILTIN_WABSDIFFB,
20636 ARM_BUILTIN_WABSDIFFH,
20637 ARM_BUILTIN_WABSDIFFW,
20639 ARM_BUILTIN_WADDCH,
20640 ARM_BUILTIN_WADDCW,
20642 ARM_BUILTIN_WAVG4,
20643 ARM_BUILTIN_WAVG4R,
20645 ARM_BUILTIN_WMADDSX,
20646 ARM_BUILTIN_WMADDUX,
20648 ARM_BUILTIN_WMADDSN,
20649 ARM_BUILTIN_WMADDUN,
20651 ARM_BUILTIN_WMULWSM,
20652 ARM_BUILTIN_WMULWUM,
20654 ARM_BUILTIN_WMULWSMR,
20655 ARM_BUILTIN_WMULWUMR,
20657 ARM_BUILTIN_WMULWL,
20659 ARM_BUILTIN_WMULSMR,
20660 ARM_BUILTIN_WMULUMR,
20662 ARM_BUILTIN_WQMULM,
20663 ARM_BUILTIN_WQMULMR,
20665 ARM_BUILTIN_WQMULWM,
20666 ARM_BUILTIN_WQMULWMR,
20668 ARM_BUILTIN_WADDBHUSM,
20669 ARM_BUILTIN_WADDBHUSL,
20671 ARM_BUILTIN_WQMIABB,
20672 ARM_BUILTIN_WQMIABT,
20673 ARM_BUILTIN_WQMIATB,
20674 ARM_BUILTIN_WQMIATT,
20676 ARM_BUILTIN_WQMIABBN,
20677 ARM_BUILTIN_WQMIABTN,
20678 ARM_BUILTIN_WQMIATBN,
20679 ARM_BUILTIN_WQMIATTN,
20681 ARM_BUILTIN_WMIABB,
20682 ARM_BUILTIN_WMIABT,
20683 ARM_BUILTIN_WMIATB,
20684 ARM_BUILTIN_WMIATT,
20686 ARM_BUILTIN_WMIABBN,
20687 ARM_BUILTIN_WMIABTN,
20688 ARM_BUILTIN_WMIATBN,
20689 ARM_BUILTIN_WMIATTN,
20691 ARM_BUILTIN_WMIAWBB,
20692 ARM_BUILTIN_WMIAWBT,
20693 ARM_BUILTIN_WMIAWTB,
20694 ARM_BUILTIN_WMIAWTT,
20696 ARM_BUILTIN_WMIAWBBN,
20697 ARM_BUILTIN_WMIAWBTN,
20698 ARM_BUILTIN_WMIAWTBN,
20699 ARM_BUILTIN_WMIAWTTN,
20701 ARM_BUILTIN_WMERGE,
20703 ARM_BUILTIN_CRC32B,
20704 ARM_BUILTIN_CRC32H,
20705 ARM_BUILTIN_CRC32W,
20706 ARM_BUILTIN_CRC32CB,
20707 ARM_BUILTIN_CRC32CH,
20708 ARM_BUILTIN_CRC32CW,
20710 #undef CRYPTO1
20711 #undef CRYPTO2
20712 #undef CRYPTO3
20714 #define CRYPTO1(L, U, M1, M2) \
20715 ARM_BUILTIN_CRYPTO_##U,
20716 #define CRYPTO2(L, U, M1, M2, M3) \
20717 ARM_BUILTIN_CRYPTO_##U,
20718 #define CRYPTO3(L, U, M1, M2, M3, M4) \
20719 ARM_BUILTIN_CRYPTO_##U,
20721 #include "crypto.def"
20723 #undef CRYPTO1
20724 #undef CRYPTO2
20725 #undef CRYPTO3
20727 #include "arm_neon_builtins.def"
20729 ,ARM_BUILTIN_MAX
20732 #define ARM_BUILTIN_NEON_BASE (ARM_BUILTIN_MAX - ARRAY_SIZE (neon_builtin_data))
20734 #undef CF
20735 #undef VAR1
20736 #undef VAR2
20737 #undef VAR3
20738 #undef VAR4
20739 #undef VAR5
20740 #undef VAR6
20741 #undef VAR7
20742 #undef VAR8
20743 #undef VAR9
20744 #undef VAR10
20746 static GTY(()) tree arm_builtin_decls[ARM_BUILTIN_MAX];
20748 #define NUM_DREG_TYPES 5
20749 #define NUM_QREG_TYPES 6
20751 static void
20752 arm_init_neon_builtins (void)
20754 unsigned int i, fcode;
20755 tree decl;
20757 tree neon_intQI_type_node;
20758 tree neon_intHI_type_node;
20759 tree neon_floatHF_type_node;
20760 tree neon_polyQI_type_node;
20761 tree neon_polyHI_type_node;
20762 tree neon_intSI_type_node;
20763 tree neon_intDI_type_node;
20764 tree neon_intUTI_type_node;
20765 tree neon_float_type_node;
20767 tree intQI_pointer_node;
20768 tree intHI_pointer_node;
20769 tree intSI_pointer_node;
20770 tree intDI_pointer_node;
20771 tree float_pointer_node;
20773 tree const_intQI_node;
20774 tree const_intHI_node;
20775 tree const_intSI_node;
20776 tree const_intDI_node;
20777 tree const_float_node;
20779 tree const_intQI_pointer_node;
20780 tree const_intHI_pointer_node;
20781 tree const_intSI_pointer_node;
20782 tree const_intDI_pointer_node;
20783 tree const_float_pointer_node;
20785 tree V8QI_type_node;
20786 tree V4HI_type_node;
20787 tree V4HF_type_node;
20788 tree V2SI_type_node;
20789 tree V2SF_type_node;
20790 tree V16QI_type_node;
20791 tree V8HI_type_node;
20792 tree V4SI_type_node;
20793 tree V4SF_type_node;
20794 tree V2DI_type_node;
20796 tree intUQI_type_node;
20797 tree intUHI_type_node;
20798 tree intUSI_type_node;
20799 tree intUDI_type_node;
20801 tree intEI_type_node;
20802 tree intOI_type_node;
20803 tree intCI_type_node;
20804 tree intXI_type_node;
20806 tree V8QI_pointer_node;
20807 tree V4HI_pointer_node;
20808 tree V2SI_pointer_node;
20809 tree V2SF_pointer_node;
20810 tree V16QI_pointer_node;
20811 tree V8HI_pointer_node;
20812 tree V4SI_pointer_node;
20813 tree V4SF_pointer_node;
20814 tree V2DI_pointer_node;
20816 tree void_ftype_pv8qi_v8qi_v8qi;
20817 tree void_ftype_pv4hi_v4hi_v4hi;
20818 tree void_ftype_pv2si_v2si_v2si;
20819 tree void_ftype_pv2sf_v2sf_v2sf;
20820 tree void_ftype_pdi_di_di;
20821 tree void_ftype_pv16qi_v16qi_v16qi;
20822 tree void_ftype_pv8hi_v8hi_v8hi;
20823 tree void_ftype_pv4si_v4si_v4si;
20824 tree void_ftype_pv4sf_v4sf_v4sf;
20825 tree void_ftype_pv2di_v2di_v2di;
20827 tree reinterp_ftype_dreg[NUM_DREG_TYPES][NUM_DREG_TYPES];
20828 tree reinterp_ftype_qreg[NUM_QREG_TYPES][NUM_QREG_TYPES];
20829 tree dreg_types[NUM_DREG_TYPES], qreg_types[NUM_QREG_TYPES];
20831 /* Create distinguished type nodes for NEON vector element types,
20832 and pointers to values of such types, so we can detect them later. */
20833 neon_intQI_type_node = make_signed_type (GET_MODE_PRECISION (QImode));
20834 neon_intHI_type_node = make_signed_type (GET_MODE_PRECISION (HImode));
20835 neon_polyQI_type_node = make_signed_type (GET_MODE_PRECISION (QImode));
20836 neon_polyHI_type_node = make_signed_type (GET_MODE_PRECISION (HImode));
20837 neon_intSI_type_node = make_signed_type (GET_MODE_PRECISION (SImode));
20838 neon_intDI_type_node = make_signed_type (GET_MODE_PRECISION (DImode));
20839 neon_float_type_node = make_node (REAL_TYPE);
20840 TYPE_PRECISION (neon_float_type_node) = FLOAT_TYPE_SIZE;
20841 layout_type (neon_float_type_node);
20842 neon_floatHF_type_node = make_node (REAL_TYPE);
20843 TYPE_PRECISION (neon_floatHF_type_node) = GET_MODE_PRECISION (HFmode);
20844 layout_type (neon_floatHF_type_node);
20846 /* Define typedefs which exactly correspond to the modes we are basing vector
20847 types on. If you change these names you'll need to change
20848 the table used by arm_mangle_type too. */
20849 (*lang_hooks.types.register_builtin_type) (neon_intQI_type_node,
20850 "__builtin_neon_qi");
20851 (*lang_hooks.types.register_builtin_type) (neon_intHI_type_node,
20852 "__builtin_neon_hi");
20853 (*lang_hooks.types.register_builtin_type) (neon_floatHF_type_node,
20854 "__builtin_neon_hf");
20855 (*lang_hooks.types.register_builtin_type) (neon_intSI_type_node,
20856 "__builtin_neon_si");
20857 (*lang_hooks.types.register_builtin_type) (neon_float_type_node,
20858 "__builtin_neon_sf");
20859 (*lang_hooks.types.register_builtin_type) (neon_intDI_type_node,
20860 "__builtin_neon_di");
20861 (*lang_hooks.types.register_builtin_type) (neon_polyQI_type_node,
20862 "__builtin_neon_poly8");
20863 (*lang_hooks.types.register_builtin_type) (neon_polyHI_type_node,
20864 "__builtin_neon_poly16");
20866 intQI_pointer_node = build_pointer_type (neon_intQI_type_node);
20867 intHI_pointer_node = build_pointer_type (neon_intHI_type_node);
20868 intSI_pointer_node = build_pointer_type (neon_intSI_type_node);
20869 intDI_pointer_node = build_pointer_type (neon_intDI_type_node);
20870 float_pointer_node = build_pointer_type (neon_float_type_node);
20872 /* Next create constant-qualified versions of the above types. */
20873 const_intQI_node = build_qualified_type (neon_intQI_type_node,
20874 TYPE_QUAL_CONST);
20875 const_intHI_node = build_qualified_type (neon_intHI_type_node,
20876 TYPE_QUAL_CONST);
20877 const_intSI_node = build_qualified_type (neon_intSI_type_node,
20878 TYPE_QUAL_CONST);
20879 const_intDI_node = build_qualified_type (neon_intDI_type_node,
20880 TYPE_QUAL_CONST);
20881 const_float_node = build_qualified_type (neon_float_type_node,
20882 TYPE_QUAL_CONST);
20884 const_intQI_pointer_node = build_pointer_type (const_intQI_node);
20885 const_intHI_pointer_node = build_pointer_type (const_intHI_node);
20886 const_intSI_pointer_node = build_pointer_type (const_intSI_node);
20887 const_intDI_pointer_node = build_pointer_type (const_intDI_node);
20888 const_float_pointer_node = build_pointer_type (const_float_node);
20890 /* Now create vector types based on our NEON element types. */
20891 /* 64-bit vectors. */
20892 V8QI_type_node =
20893 build_vector_type_for_mode (neon_intQI_type_node, V8QImode);
20894 V4HI_type_node =
20895 build_vector_type_for_mode (neon_intHI_type_node, V4HImode);
20896 V4HF_type_node =
20897 build_vector_type_for_mode (neon_floatHF_type_node, V4HFmode);
20898 V2SI_type_node =
20899 build_vector_type_for_mode (neon_intSI_type_node, V2SImode);
20900 V2SF_type_node =
20901 build_vector_type_for_mode (neon_float_type_node, V2SFmode);
20902 /* 128-bit vectors. */
20903 V16QI_type_node =
20904 build_vector_type_for_mode (neon_intQI_type_node, V16QImode);
20905 V8HI_type_node =
20906 build_vector_type_for_mode (neon_intHI_type_node, V8HImode);
20907 V4SI_type_node =
20908 build_vector_type_for_mode (neon_intSI_type_node, V4SImode);
20909 V4SF_type_node =
20910 build_vector_type_for_mode (neon_float_type_node, V4SFmode);
20911 V2DI_type_node =
20912 build_vector_type_for_mode (neon_intDI_type_node, V2DImode);
20914 /* Unsigned integer types for various mode sizes. */
20915 intUQI_type_node = make_unsigned_type (GET_MODE_PRECISION (QImode));
20916 intUHI_type_node = make_unsigned_type (GET_MODE_PRECISION (HImode));
20917 intUSI_type_node = make_unsigned_type (GET_MODE_PRECISION (SImode));
20918 intUDI_type_node = make_unsigned_type (GET_MODE_PRECISION (DImode));
20919 neon_intUTI_type_node = make_unsigned_type (GET_MODE_PRECISION (TImode));
20922 (*lang_hooks.types.register_builtin_type) (intUQI_type_node,
20923 "__builtin_neon_uqi");
20924 (*lang_hooks.types.register_builtin_type) (intUHI_type_node,
20925 "__builtin_neon_uhi");
20926 (*lang_hooks.types.register_builtin_type) (intUSI_type_node,
20927 "__builtin_neon_usi");
20928 (*lang_hooks.types.register_builtin_type) (intUDI_type_node,
20929 "__builtin_neon_udi");
20930 (*lang_hooks.types.register_builtin_type) (intUDI_type_node,
20931 "__builtin_neon_poly64");
20932 (*lang_hooks.types.register_builtin_type) (neon_intUTI_type_node,
20933 "__builtin_neon_poly128");
20935 /* Opaque integer types for structures of vectors. */
20936 intEI_type_node = make_signed_type (GET_MODE_PRECISION (EImode));
20937 intOI_type_node = make_signed_type (GET_MODE_PRECISION (OImode));
20938 intCI_type_node = make_signed_type (GET_MODE_PRECISION (CImode));
20939 intXI_type_node = make_signed_type (GET_MODE_PRECISION (XImode));
20941 (*lang_hooks.types.register_builtin_type) (intTI_type_node,
20942 "__builtin_neon_ti");
20943 (*lang_hooks.types.register_builtin_type) (intEI_type_node,
20944 "__builtin_neon_ei");
20945 (*lang_hooks.types.register_builtin_type) (intOI_type_node,
20946 "__builtin_neon_oi");
20947 (*lang_hooks.types.register_builtin_type) (intCI_type_node,
20948 "__builtin_neon_ci");
20949 (*lang_hooks.types.register_builtin_type) (intXI_type_node,
20950 "__builtin_neon_xi");
20952 /* Pointers to vector types. */
20953 V8QI_pointer_node = build_pointer_type (V8QI_type_node);
20954 V4HI_pointer_node = build_pointer_type (V4HI_type_node);
20955 V2SI_pointer_node = build_pointer_type (V2SI_type_node);
20956 V2SF_pointer_node = build_pointer_type (V2SF_type_node);
20957 V16QI_pointer_node = build_pointer_type (V16QI_type_node);
20958 V8HI_pointer_node = build_pointer_type (V8HI_type_node);
20959 V4SI_pointer_node = build_pointer_type (V4SI_type_node);
20960 V4SF_pointer_node = build_pointer_type (V4SF_type_node);
20961 V2DI_pointer_node = build_pointer_type (V2DI_type_node);
20963 /* Operations which return results as pairs. */
20964 void_ftype_pv8qi_v8qi_v8qi =
20965 build_function_type_list (void_type_node, V8QI_pointer_node, V8QI_type_node,
20966 V8QI_type_node, NULL);
20967 void_ftype_pv4hi_v4hi_v4hi =
20968 build_function_type_list (void_type_node, V4HI_pointer_node, V4HI_type_node,
20969 V4HI_type_node, NULL);
20970 void_ftype_pv2si_v2si_v2si =
20971 build_function_type_list (void_type_node, V2SI_pointer_node, V2SI_type_node,
20972 V2SI_type_node, NULL);
20973 void_ftype_pv2sf_v2sf_v2sf =
20974 build_function_type_list (void_type_node, V2SF_pointer_node, V2SF_type_node,
20975 V2SF_type_node, NULL);
20976 void_ftype_pdi_di_di =
20977 build_function_type_list (void_type_node, intDI_pointer_node,
20978 neon_intDI_type_node, neon_intDI_type_node, NULL);
20979 void_ftype_pv16qi_v16qi_v16qi =
20980 build_function_type_list (void_type_node, V16QI_pointer_node,
20981 V16QI_type_node, V16QI_type_node, NULL);
20982 void_ftype_pv8hi_v8hi_v8hi =
20983 build_function_type_list (void_type_node, V8HI_pointer_node, V8HI_type_node,
20984 V8HI_type_node, NULL);
20985 void_ftype_pv4si_v4si_v4si =
20986 build_function_type_list (void_type_node, V4SI_pointer_node, V4SI_type_node,
20987 V4SI_type_node, NULL);
20988 void_ftype_pv4sf_v4sf_v4sf =
20989 build_function_type_list (void_type_node, V4SF_pointer_node, V4SF_type_node,
20990 V4SF_type_node, NULL);
20991 void_ftype_pv2di_v2di_v2di =
20992 build_function_type_list (void_type_node, V2DI_pointer_node, V2DI_type_node,
20993 V2DI_type_node, NULL);
20995 if (TARGET_CRYPTO && TARGET_HARD_FLOAT)
20997 tree V4USI_type_node =
20998 build_vector_type_for_mode (intUSI_type_node, V4SImode);
21000 tree V16UQI_type_node =
21001 build_vector_type_for_mode (intUQI_type_node, V16QImode);
21003 tree v16uqi_ftype_v16uqi
21004 = build_function_type_list (V16UQI_type_node, V16UQI_type_node, NULL_TREE);
21006 tree v16uqi_ftype_v16uqi_v16uqi
21007 = build_function_type_list (V16UQI_type_node, V16UQI_type_node,
21008 V16UQI_type_node, NULL_TREE);
21010 tree v4usi_ftype_v4usi
21011 = build_function_type_list (V4USI_type_node, V4USI_type_node, NULL_TREE);
21013 tree v4usi_ftype_v4usi_v4usi
21014 = build_function_type_list (V4USI_type_node, V4USI_type_node,
21015 V4USI_type_node, NULL_TREE);
21017 tree v4usi_ftype_v4usi_v4usi_v4usi
21018 = build_function_type_list (V4USI_type_node, V4USI_type_node,
21019 V4USI_type_node, V4USI_type_node, NULL_TREE);
21021 tree uti_ftype_udi_udi
21022 = build_function_type_list (neon_intUTI_type_node, intUDI_type_node,
21023 intUDI_type_node, NULL_TREE);
21025 #undef CRYPTO1
21026 #undef CRYPTO2
21027 #undef CRYPTO3
21028 #undef C
21029 #undef N
21030 #undef CF
21031 #undef FT1
21032 #undef FT2
21033 #undef FT3
21035 #define C(U) \
21036 ARM_BUILTIN_CRYPTO_##U
21037 #define N(L) \
21038 "__builtin_arm_crypto_"#L
21039 #define FT1(R, A) \
21040 R##_ftype_##A
21041 #define FT2(R, A1, A2) \
21042 R##_ftype_##A1##_##A2
21043 #define FT3(R, A1, A2, A3) \
21044 R##_ftype_##A1##_##A2##_##A3
21045 #define CRYPTO1(L, U, R, A) \
21046 arm_builtin_decls[C (U)] = add_builtin_function (N (L), FT1 (R, A), \
21047 C (U), BUILT_IN_MD, \
21048 NULL, NULL_TREE);
21049 #define CRYPTO2(L, U, R, A1, A2) \
21050 arm_builtin_decls[C (U)] = add_builtin_function (N (L), FT2 (R, A1, A2), \
21051 C (U), BUILT_IN_MD, \
21052 NULL, NULL_TREE);
21054 #define CRYPTO3(L, U, R, A1, A2, A3) \
21055 arm_builtin_decls[C (U)] = add_builtin_function (N (L), FT3 (R, A1, A2, A3), \
21056 C (U), BUILT_IN_MD, \
21057 NULL, NULL_TREE);
21058 #include "crypto.def"
21060 #undef CRYPTO1
21061 #undef CRYPTO2
21062 #undef CRYPTO3
21063 #undef C
21064 #undef N
21065 #undef FT1
21066 #undef FT2
21067 #undef FT3
21069 dreg_types[0] = V8QI_type_node;
21070 dreg_types[1] = V4HI_type_node;
21071 dreg_types[2] = V2SI_type_node;
21072 dreg_types[3] = V2SF_type_node;
21073 dreg_types[4] = neon_intDI_type_node;
21075 qreg_types[0] = V16QI_type_node;
21076 qreg_types[1] = V8HI_type_node;
21077 qreg_types[2] = V4SI_type_node;
21078 qreg_types[3] = V4SF_type_node;
21079 qreg_types[4] = V2DI_type_node;
21080 qreg_types[5] = neon_intUTI_type_node;
21082 for (i = 0; i < NUM_QREG_TYPES; i++)
21084 int j;
21085 for (j = 0; j < NUM_QREG_TYPES; j++)
21087 if (i < NUM_DREG_TYPES && j < NUM_DREG_TYPES)
21088 reinterp_ftype_dreg[i][j]
21089 = build_function_type_list (dreg_types[i], dreg_types[j], NULL);
21091 reinterp_ftype_qreg[i][j]
21092 = build_function_type_list (qreg_types[i], qreg_types[j], NULL);
21096 for (i = 0, fcode = ARM_BUILTIN_NEON_BASE;
21097 i < ARRAY_SIZE (neon_builtin_data);
21098 i++, fcode++)
21100 neon_builtin_datum *d = &neon_builtin_data[i];
21102 const char* const modenames[] = {
21103 "v8qi", "v4hi", "v4hf", "v2si", "v2sf", "di",
21104 "v16qi", "v8hi", "v4si", "v4sf", "v2di",
21105 "ti", "ei", "oi"
21107 char namebuf[60];
21108 tree ftype = NULL;
21109 int is_load = 0, is_store = 0;
21111 gcc_assert (ARRAY_SIZE (modenames) == T_MAX);
21113 d->fcode = fcode;
21115 switch (d->itype)
21117 case NEON_LOAD1:
21118 case NEON_LOAD1LANE:
21119 case NEON_LOADSTRUCT:
21120 case NEON_LOADSTRUCTLANE:
21121 is_load = 1;
21122 /* Fall through. */
21123 case NEON_STORE1:
21124 case NEON_STORE1LANE:
21125 case NEON_STORESTRUCT:
21126 case NEON_STORESTRUCTLANE:
21127 if (!is_load)
21128 is_store = 1;
21129 /* Fall through. */
21130 case NEON_UNOP:
21131 case NEON_RINT:
21132 case NEON_BINOP:
21133 case NEON_LOGICBINOP:
21134 case NEON_SHIFTINSERT:
21135 case NEON_TERNOP:
21136 case NEON_GETLANE:
21137 case NEON_SETLANE:
21138 case NEON_CREATE:
21139 case NEON_DUP:
21140 case NEON_DUPLANE:
21141 case NEON_SHIFTIMM:
21142 case NEON_SHIFTACC:
21143 case NEON_COMBINE:
21144 case NEON_SPLIT:
21145 case NEON_CONVERT:
21146 case NEON_FIXCONV:
21147 case NEON_LANEMUL:
21148 case NEON_LANEMULL:
21149 case NEON_LANEMULH:
21150 case NEON_LANEMAC:
21151 case NEON_SCALARMUL:
21152 case NEON_SCALARMULL:
21153 case NEON_SCALARMULH:
21154 case NEON_SCALARMAC:
21155 case NEON_SELECT:
21156 case NEON_VTBL:
21157 case NEON_VTBX:
21159 int k;
21160 tree return_type = void_type_node, args = void_list_node;
21162 /* Build a function type directly from the insn_data for
21163 this builtin. The build_function_type() function takes
21164 care of removing duplicates for us. */
21165 for (k = insn_data[d->code].n_generator_args - 1; k >= 0; k--)
21167 tree eltype;
21169 if (is_load && k == 1)
21171 /* Neon load patterns always have the memory
21172 operand in the operand 1 position. */
21173 gcc_assert (insn_data[d->code].operand[k].predicate
21174 == neon_struct_operand);
21176 switch (d->mode)
21178 case T_V8QI:
21179 case T_V16QI:
21180 eltype = const_intQI_pointer_node;
21181 break;
21183 case T_V4HI:
21184 case T_V8HI:
21185 eltype = const_intHI_pointer_node;
21186 break;
21188 case T_V2SI:
21189 case T_V4SI:
21190 eltype = const_intSI_pointer_node;
21191 break;
21193 case T_V2SF:
21194 case T_V4SF:
21195 eltype = const_float_pointer_node;
21196 break;
21198 case T_DI:
21199 case T_V2DI:
21200 eltype = const_intDI_pointer_node;
21201 break;
21203 default: gcc_unreachable ();
21206 else if (is_store && k == 0)
21208 /* Similarly, Neon store patterns use operand 0 as
21209 the memory location to store to. */
21210 gcc_assert (insn_data[d->code].operand[k].predicate
21211 == neon_struct_operand);
21213 switch (d->mode)
21215 case T_V8QI:
21216 case T_V16QI:
21217 eltype = intQI_pointer_node;
21218 break;
21220 case T_V4HI:
21221 case T_V8HI:
21222 eltype = intHI_pointer_node;
21223 break;
21225 case T_V2SI:
21226 case T_V4SI:
21227 eltype = intSI_pointer_node;
21228 break;
21230 case T_V2SF:
21231 case T_V4SF:
21232 eltype = float_pointer_node;
21233 break;
21235 case T_DI:
21236 case T_V2DI:
21237 eltype = intDI_pointer_node;
21238 break;
21240 default: gcc_unreachable ();
21243 else
21245 switch (insn_data[d->code].operand[k].mode)
21247 case VOIDmode: eltype = void_type_node; break;
21248 /* Scalars. */
21249 case QImode: eltype = neon_intQI_type_node; break;
21250 case HImode: eltype = neon_intHI_type_node; break;
21251 case SImode: eltype = neon_intSI_type_node; break;
21252 case SFmode: eltype = neon_float_type_node; break;
21253 case DImode: eltype = neon_intDI_type_node; break;
21254 case TImode: eltype = intTI_type_node; break;
21255 case EImode: eltype = intEI_type_node; break;
21256 case OImode: eltype = intOI_type_node; break;
21257 case CImode: eltype = intCI_type_node; break;
21258 case XImode: eltype = intXI_type_node; break;
21259 /* 64-bit vectors. */
21260 case V8QImode: eltype = V8QI_type_node; break;
21261 case V4HImode: eltype = V4HI_type_node; break;
21262 case V2SImode: eltype = V2SI_type_node; break;
21263 case V2SFmode: eltype = V2SF_type_node; break;
21264 /* 128-bit vectors. */
21265 case V16QImode: eltype = V16QI_type_node; break;
21266 case V8HImode: eltype = V8HI_type_node; break;
21267 case V4SImode: eltype = V4SI_type_node; break;
21268 case V4SFmode: eltype = V4SF_type_node; break;
21269 case V2DImode: eltype = V2DI_type_node; break;
21270 default: gcc_unreachable ();
21274 if (k == 0 && !is_store)
21275 return_type = eltype;
21276 else
21277 args = tree_cons (NULL_TREE, eltype, args);
21280 ftype = build_function_type (return_type, args);
21282 break;
21284 case NEON_RESULTPAIR:
21286 switch (insn_data[d->code].operand[1].mode)
21288 case V8QImode: ftype = void_ftype_pv8qi_v8qi_v8qi; break;
21289 case V4HImode: ftype = void_ftype_pv4hi_v4hi_v4hi; break;
21290 case V2SImode: ftype = void_ftype_pv2si_v2si_v2si; break;
21291 case V2SFmode: ftype = void_ftype_pv2sf_v2sf_v2sf; break;
21292 case DImode: ftype = void_ftype_pdi_di_di; break;
21293 case V16QImode: ftype = void_ftype_pv16qi_v16qi_v16qi; break;
21294 case V8HImode: ftype = void_ftype_pv8hi_v8hi_v8hi; break;
21295 case V4SImode: ftype = void_ftype_pv4si_v4si_v4si; break;
21296 case V4SFmode: ftype = void_ftype_pv4sf_v4sf_v4sf; break;
21297 case V2DImode: ftype = void_ftype_pv2di_v2di_v2di; break;
21298 default: gcc_unreachable ();
21301 break;
21303 case NEON_REINTERP:
21305 /* We iterate over NUM_DREG_TYPES doubleword types,
21306 then NUM_QREG_TYPES quadword types.
21307 V4HF is not a type used in reinterpret, so we translate
21308 d->mode to the correct index in reinterp_ftype_dreg. */
21309 bool qreg_p
21310 = GET_MODE_SIZE (insn_data[d->code].operand[0].mode) > 8;
21311 int rhs = (d->mode - ((!qreg_p && (d->mode > T_V4HF)) ? 1 : 0))
21312 % NUM_QREG_TYPES;
21313 switch (insn_data[d->code].operand[0].mode)
21315 case V8QImode: ftype = reinterp_ftype_dreg[0][rhs]; break;
21316 case V4HImode: ftype = reinterp_ftype_dreg[1][rhs]; break;
21317 case V2SImode: ftype = reinterp_ftype_dreg[2][rhs]; break;
21318 case V2SFmode: ftype = reinterp_ftype_dreg[3][rhs]; break;
21319 case DImode: ftype = reinterp_ftype_dreg[4][rhs]; break;
21320 case V16QImode: ftype = reinterp_ftype_qreg[0][rhs]; break;
21321 case V8HImode: ftype = reinterp_ftype_qreg[1][rhs]; break;
21322 case V4SImode: ftype = reinterp_ftype_qreg[2][rhs]; break;
21323 case V4SFmode: ftype = reinterp_ftype_qreg[3][rhs]; break;
21324 case V2DImode: ftype = reinterp_ftype_qreg[4][rhs]; break;
21325 case TImode: ftype = reinterp_ftype_qreg[5][rhs]; break;
21326 default: gcc_unreachable ();
21329 break;
21330 case NEON_FLOAT_WIDEN:
21332 tree eltype = NULL_TREE;
21333 tree return_type = NULL_TREE;
21335 switch (insn_data[d->code].operand[1].mode)
21337 case V4HFmode:
21338 eltype = V4HF_type_node;
21339 return_type = V4SF_type_node;
21340 break;
21341 default: gcc_unreachable ();
21343 ftype = build_function_type_list (return_type, eltype, NULL);
21344 break;
21346 case NEON_FLOAT_NARROW:
21348 tree eltype = NULL_TREE;
21349 tree return_type = NULL_TREE;
21351 switch (insn_data[d->code].operand[1].mode)
21353 case V4SFmode:
21354 eltype = V4SF_type_node;
21355 return_type = V4HF_type_node;
21356 break;
21357 default: gcc_unreachable ();
21359 ftype = build_function_type_list (return_type, eltype, NULL);
21360 break;
21362 default:
21363 gcc_unreachable ();
21366 gcc_assert (ftype != NULL);
21368 sprintf (namebuf, "__builtin_neon_%s%s", d->name, modenames[d->mode]);
21370 decl = add_builtin_function (namebuf, ftype, fcode, BUILT_IN_MD, NULL,
21371 NULL_TREE);
21372 arm_builtin_decls[fcode] = decl;
21376 #undef NUM_DREG_TYPES
21377 #undef NUM_QREG_TYPES
21379 #define def_mbuiltin(MASK, NAME, TYPE, CODE) \
21380 do \
21382 if ((MASK) & insn_flags) \
21384 tree bdecl; \
21385 bdecl = add_builtin_function ((NAME), (TYPE), (CODE), \
21386 BUILT_IN_MD, NULL, NULL_TREE); \
21387 arm_builtin_decls[CODE] = bdecl; \
21390 while (0)
21392 struct builtin_description
21394 const unsigned int mask;
21395 const enum insn_code icode;
21396 const char * const name;
21397 const enum arm_builtins code;
21398 const enum rtx_code comparison;
21399 const unsigned int flag;
21402 static const struct builtin_description bdesc_2arg[] =
21404 #define IWMMXT_BUILTIN(code, string, builtin) \
21405 { FL_IWMMXT, CODE_FOR_##code, "__builtin_arm_" string, \
21406 ARM_BUILTIN_##builtin, UNKNOWN, 0 },
21408 #define IWMMXT2_BUILTIN(code, string, builtin) \
21409 { FL_IWMMXT2, CODE_FOR_##code, "__builtin_arm_" string, \
21410 ARM_BUILTIN_##builtin, UNKNOWN, 0 },
21412 IWMMXT_BUILTIN (addv8qi3, "waddb", WADDB)
21413 IWMMXT_BUILTIN (addv4hi3, "waddh", WADDH)
21414 IWMMXT_BUILTIN (addv2si3, "waddw", WADDW)
21415 IWMMXT_BUILTIN (subv8qi3, "wsubb", WSUBB)
21416 IWMMXT_BUILTIN (subv4hi3, "wsubh", WSUBH)
21417 IWMMXT_BUILTIN (subv2si3, "wsubw", WSUBW)
21418 IWMMXT_BUILTIN (ssaddv8qi3, "waddbss", WADDSSB)
21419 IWMMXT_BUILTIN (ssaddv4hi3, "waddhss", WADDSSH)
21420 IWMMXT_BUILTIN (ssaddv2si3, "waddwss", WADDSSW)
21421 IWMMXT_BUILTIN (sssubv8qi3, "wsubbss", WSUBSSB)
21422 IWMMXT_BUILTIN (sssubv4hi3, "wsubhss", WSUBSSH)
21423 IWMMXT_BUILTIN (sssubv2si3, "wsubwss", WSUBSSW)
21424 IWMMXT_BUILTIN (usaddv8qi3, "waddbus", WADDUSB)
21425 IWMMXT_BUILTIN (usaddv4hi3, "waddhus", WADDUSH)
21426 IWMMXT_BUILTIN (usaddv2si3, "waddwus", WADDUSW)
21427 IWMMXT_BUILTIN (ussubv8qi3, "wsubbus", WSUBUSB)
21428 IWMMXT_BUILTIN (ussubv4hi3, "wsubhus", WSUBUSH)
21429 IWMMXT_BUILTIN (ussubv2si3, "wsubwus", WSUBUSW)
21430 IWMMXT_BUILTIN (mulv4hi3, "wmulul", WMULUL)
21431 IWMMXT_BUILTIN (smulv4hi3_highpart, "wmulsm", WMULSM)
21432 IWMMXT_BUILTIN (umulv4hi3_highpart, "wmulum", WMULUM)
21433 IWMMXT_BUILTIN (eqv8qi3, "wcmpeqb", WCMPEQB)
21434 IWMMXT_BUILTIN (eqv4hi3, "wcmpeqh", WCMPEQH)
21435 IWMMXT_BUILTIN (eqv2si3, "wcmpeqw", WCMPEQW)
21436 IWMMXT_BUILTIN (gtuv8qi3, "wcmpgtub", WCMPGTUB)
21437 IWMMXT_BUILTIN (gtuv4hi3, "wcmpgtuh", WCMPGTUH)
21438 IWMMXT_BUILTIN (gtuv2si3, "wcmpgtuw", WCMPGTUW)
21439 IWMMXT_BUILTIN (gtv8qi3, "wcmpgtsb", WCMPGTSB)
21440 IWMMXT_BUILTIN (gtv4hi3, "wcmpgtsh", WCMPGTSH)
21441 IWMMXT_BUILTIN (gtv2si3, "wcmpgtsw", WCMPGTSW)
21442 IWMMXT_BUILTIN (umaxv8qi3, "wmaxub", WMAXUB)
21443 IWMMXT_BUILTIN (smaxv8qi3, "wmaxsb", WMAXSB)
21444 IWMMXT_BUILTIN (umaxv4hi3, "wmaxuh", WMAXUH)
21445 IWMMXT_BUILTIN (smaxv4hi3, "wmaxsh", WMAXSH)
21446 IWMMXT_BUILTIN (umaxv2si3, "wmaxuw", WMAXUW)
21447 IWMMXT_BUILTIN (smaxv2si3, "wmaxsw", WMAXSW)
21448 IWMMXT_BUILTIN (uminv8qi3, "wminub", WMINUB)
21449 IWMMXT_BUILTIN (sminv8qi3, "wminsb", WMINSB)
21450 IWMMXT_BUILTIN (uminv4hi3, "wminuh", WMINUH)
21451 IWMMXT_BUILTIN (sminv4hi3, "wminsh", WMINSH)
21452 IWMMXT_BUILTIN (uminv2si3, "wminuw", WMINUW)
21453 IWMMXT_BUILTIN (sminv2si3, "wminsw", WMINSW)
21454 IWMMXT_BUILTIN (iwmmxt_anddi3, "wand", WAND)
21455 IWMMXT_BUILTIN (iwmmxt_nanddi3, "wandn", WANDN)
21456 IWMMXT_BUILTIN (iwmmxt_iordi3, "wor", WOR)
21457 IWMMXT_BUILTIN (iwmmxt_xordi3, "wxor", WXOR)
21458 IWMMXT_BUILTIN (iwmmxt_uavgv8qi3, "wavg2b", WAVG2B)
21459 IWMMXT_BUILTIN (iwmmxt_uavgv4hi3, "wavg2h", WAVG2H)
21460 IWMMXT_BUILTIN (iwmmxt_uavgrndv8qi3, "wavg2br", WAVG2BR)
21461 IWMMXT_BUILTIN (iwmmxt_uavgrndv4hi3, "wavg2hr", WAVG2HR)
21462 IWMMXT_BUILTIN (iwmmxt_wunpckilb, "wunpckilb", WUNPCKILB)
21463 IWMMXT_BUILTIN (iwmmxt_wunpckilh, "wunpckilh", WUNPCKILH)
21464 IWMMXT_BUILTIN (iwmmxt_wunpckilw, "wunpckilw", WUNPCKILW)
21465 IWMMXT_BUILTIN (iwmmxt_wunpckihb, "wunpckihb", WUNPCKIHB)
21466 IWMMXT_BUILTIN (iwmmxt_wunpckihh, "wunpckihh", WUNPCKIHH)
21467 IWMMXT_BUILTIN (iwmmxt_wunpckihw, "wunpckihw", WUNPCKIHW)
21468 IWMMXT2_BUILTIN (iwmmxt_waddsubhx, "waddsubhx", WADDSUBHX)
21469 IWMMXT2_BUILTIN (iwmmxt_wsubaddhx, "wsubaddhx", WSUBADDHX)
21470 IWMMXT2_BUILTIN (iwmmxt_wabsdiffb, "wabsdiffb", WABSDIFFB)
21471 IWMMXT2_BUILTIN (iwmmxt_wabsdiffh, "wabsdiffh", WABSDIFFH)
21472 IWMMXT2_BUILTIN (iwmmxt_wabsdiffw, "wabsdiffw", WABSDIFFW)
21473 IWMMXT2_BUILTIN (iwmmxt_avg4, "wavg4", WAVG4)
21474 IWMMXT2_BUILTIN (iwmmxt_avg4r, "wavg4r", WAVG4R)
21475 IWMMXT2_BUILTIN (iwmmxt_wmulwsm, "wmulwsm", WMULWSM)
21476 IWMMXT2_BUILTIN (iwmmxt_wmulwum, "wmulwum", WMULWUM)
21477 IWMMXT2_BUILTIN (iwmmxt_wmulwsmr, "wmulwsmr", WMULWSMR)
21478 IWMMXT2_BUILTIN (iwmmxt_wmulwumr, "wmulwumr", WMULWUMR)
21479 IWMMXT2_BUILTIN (iwmmxt_wmulwl, "wmulwl", WMULWL)
21480 IWMMXT2_BUILTIN (iwmmxt_wmulsmr, "wmulsmr", WMULSMR)
21481 IWMMXT2_BUILTIN (iwmmxt_wmulumr, "wmulumr", WMULUMR)
21482 IWMMXT2_BUILTIN (iwmmxt_wqmulm, "wqmulm", WQMULM)
21483 IWMMXT2_BUILTIN (iwmmxt_wqmulmr, "wqmulmr", WQMULMR)
21484 IWMMXT2_BUILTIN (iwmmxt_wqmulwm, "wqmulwm", WQMULWM)
21485 IWMMXT2_BUILTIN (iwmmxt_wqmulwmr, "wqmulwmr", WQMULWMR)
21486 IWMMXT_BUILTIN (iwmmxt_walignr0, "walignr0", WALIGNR0)
21487 IWMMXT_BUILTIN (iwmmxt_walignr1, "walignr1", WALIGNR1)
21488 IWMMXT_BUILTIN (iwmmxt_walignr2, "walignr2", WALIGNR2)
21489 IWMMXT_BUILTIN (iwmmxt_walignr3, "walignr3", WALIGNR3)
21491 #define IWMMXT_BUILTIN2(code, builtin) \
21492 { FL_IWMMXT, CODE_FOR_##code, NULL, ARM_BUILTIN_##builtin, UNKNOWN, 0 },
21494 #define IWMMXT2_BUILTIN2(code, builtin) \
21495 { FL_IWMMXT2, CODE_FOR_##code, NULL, ARM_BUILTIN_##builtin, UNKNOWN, 0 },
21497 IWMMXT2_BUILTIN2 (iwmmxt_waddbhusm, WADDBHUSM)
21498 IWMMXT2_BUILTIN2 (iwmmxt_waddbhusl, WADDBHUSL)
21499 IWMMXT_BUILTIN2 (iwmmxt_wpackhss, WPACKHSS)
21500 IWMMXT_BUILTIN2 (iwmmxt_wpackwss, WPACKWSS)
21501 IWMMXT_BUILTIN2 (iwmmxt_wpackdss, WPACKDSS)
21502 IWMMXT_BUILTIN2 (iwmmxt_wpackhus, WPACKHUS)
21503 IWMMXT_BUILTIN2 (iwmmxt_wpackwus, WPACKWUS)
21504 IWMMXT_BUILTIN2 (iwmmxt_wpackdus, WPACKDUS)
21505 IWMMXT_BUILTIN2 (iwmmxt_wmacuz, WMACUZ)
21506 IWMMXT_BUILTIN2 (iwmmxt_wmacsz, WMACSZ)
21508 #define CRC32_BUILTIN(L, U) \
21509 {0, CODE_FOR_##L, "__builtin_arm_"#L, ARM_BUILTIN_##U, \
21510 UNKNOWN, 0},
21511 CRC32_BUILTIN (crc32b, CRC32B)
21512 CRC32_BUILTIN (crc32h, CRC32H)
21513 CRC32_BUILTIN (crc32w, CRC32W)
21514 CRC32_BUILTIN (crc32cb, CRC32CB)
21515 CRC32_BUILTIN (crc32ch, CRC32CH)
21516 CRC32_BUILTIN (crc32cw, CRC32CW)
21517 #undef CRC32_BUILTIN
21520 #define CRYPTO_BUILTIN(L, U) \
21521 {0, CODE_FOR_crypto_##L, "__builtin_arm_crypto_"#L, ARM_BUILTIN_CRYPTO_##U, \
21522 UNKNOWN, 0},
21523 #undef CRYPTO1
21524 #undef CRYPTO2
21525 #undef CRYPTO3
21526 #define CRYPTO2(L, U, R, A1, A2) CRYPTO_BUILTIN (L, U)
21527 #define CRYPTO1(L, U, R, A)
21528 #define CRYPTO3(L, U, R, A1, A2, A3)
21529 #include "crypto.def"
21530 #undef CRYPTO1
21531 #undef CRYPTO2
21532 #undef CRYPTO3
21536 static const struct builtin_description bdesc_1arg[] =
21538 IWMMXT_BUILTIN (iwmmxt_tmovmskb, "tmovmskb", TMOVMSKB)
21539 IWMMXT_BUILTIN (iwmmxt_tmovmskh, "tmovmskh", TMOVMSKH)
21540 IWMMXT_BUILTIN (iwmmxt_tmovmskw, "tmovmskw", TMOVMSKW)
21541 IWMMXT_BUILTIN (iwmmxt_waccb, "waccb", WACCB)
21542 IWMMXT_BUILTIN (iwmmxt_wacch, "wacch", WACCH)
21543 IWMMXT_BUILTIN (iwmmxt_waccw, "waccw", WACCW)
21544 IWMMXT_BUILTIN (iwmmxt_wunpckehub, "wunpckehub", WUNPCKEHUB)
21545 IWMMXT_BUILTIN (iwmmxt_wunpckehuh, "wunpckehuh", WUNPCKEHUH)
21546 IWMMXT_BUILTIN (iwmmxt_wunpckehuw, "wunpckehuw", WUNPCKEHUW)
21547 IWMMXT_BUILTIN (iwmmxt_wunpckehsb, "wunpckehsb", WUNPCKEHSB)
21548 IWMMXT_BUILTIN (iwmmxt_wunpckehsh, "wunpckehsh", WUNPCKEHSH)
21549 IWMMXT_BUILTIN (iwmmxt_wunpckehsw, "wunpckehsw", WUNPCKEHSW)
21550 IWMMXT_BUILTIN (iwmmxt_wunpckelub, "wunpckelub", WUNPCKELUB)
21551 IWMMXT_BUILTIN (iwmmxt_wunpckeluh, "wunpckeluh", WUNPCKELUH)
21552 IWMMXT_BUILTIN (iwmmxt_wunpckeluw, "wunpckeluw", WUNPCKELUW)
21553 IWMMXT_BUILTIN (iwmmxt_wunpckelsb, "wunpckelsb", WUNPCKELSB)
21554 IWMMXT_BUILTIN (iwmmxt_wunpckelsh, "wunpckelsh", WUNPCKELSH)
21555 IWMMXT_BUILTIN (iwmmxt_wunpckelsw, "wunpckelsw", WUNPCKELSW)
21556 IWMMXT2_BUILTIN (iwmmxt_wabsv8qi3, "wabsb", WABSB)
21557 IWMMXT2_BUILTIN (iwmmxt_wabsv4hi3, "wabsh", WABSH)
21558 IWMMXT2_BUILTIN (iwmmxt_wabsv2si3, "wabsw", WABSW)
21559 IWMMXT_BUILTIN (tbcstv8qi, "tbcstb", TBCSTB)
21560 IWMMXT_BUILTIN (tbcstv4hi, "tbcsth", TBCSTH)
21561 IWMMXT_BUILTIN (tbcstv2si, "tbcstw", TBCSTW)
21563 #define CRYPTO1(L, U, R, A) CRYPTO_BUILTIN (L, U)
21564 #define CRYPTO2(L, U, R, A1, A2)
21565 #define CRYPTO3(L, U, R, A1, A2, A3)
21566 #include "crypto.def"
21567 #undef CRYPTO1
21568 #undef CRYPTO2
21569 #undef CRYPTO3
21572 static const struct builtin_description bdesc_3arg[] =
21574 #define CRYPTO3(L, U, R, A1, A2, A3) CRYPTO_BUILTIN (L, U)
21575 #define CRYPTO1(L, U, R, A)
21576 #define CRYPTO2(L, U, R, A1, A2)
21577 #include "crypto.def"
21578 #undef CRYPTO1
21579 #undef CRYPTO2
21580 #undef CRYPTO3
21582 #undef CRYPTO_BUILTIN
21584 /* Set up all the iWMMXt builtins. This is not called if
21585 TARGET_IWMMXT is zero. */
21587 static void
21588 arm_init_iwmmxt_builtins (void)
21590 const struct builtin_description * d;
21591 size_t i;
21593 tree V2SI_type_node = build_vector_type_for_mode (intSI_type_node, V2SImode);
21594 tree V4HI_type_node = build_vector_type_for_mode (intHI_type_node, V4HImode);
21595 tree V8QI_type_node = build_vector_type_for_mode (intQI_type_node, V8QImode);
21597 tree v8qi_ftype_v8qi_v8qi_int
21598 = build_function_type_list (V8QI_type_node,
21599 V8QI_type_node, V8QI_type_node,
21600 integer_type_node, NULL_TREE);
21601 tree v4hi_ftype_v4hi_int
21602 = build_function_type_list (V4HI_type_node,
21603 V4HI_type_node, integer_type_node, NULL_TREE);
21604 tree v2si_ftype_v2si_int
21605 = build_function_type_list (V2SI_type_node,
21606 V2SI_type_node, integer_type_node, NULL_TREE);
21607 tree v2si_ftype_di_di
21608 = build_function_type_list (V2SI_type_node,
21609 long_long_integer_type_node,
21610 long_long_integer_type_node,
21611 NULL_TREE);
21612 tree di_ftype_di_int
21613 = build_function_type_list (long_long_integer_type_node,
21614 long_long_integer_type_node,
21615 integer_type_node, NULL_TREE);
21616 tree di_ftype_di_int_int
21617 = build_function_type_list (long_long_integer_type_node,
21618 long_long_integer_type_node,
21619 integer_type_node,
21620 integer_type_node, NULL_TREE);
21621 tree int_ftype_v8qi
21622 = build_function_type_list (integer_type_node,
21623 V8QI_type_node, NULL_TREE);
21624 tree int_ftype_v4hi
21625 = build_function_type_list (integer_type_node,
21626 V4HI_type_node, NULL_TREE);
21627 tree int_ftype_v2si
21628 = build_function_type_list (integer_type_node,
21629 V2SI_type_node, NULL_TREE);
21630 tree int_ftype_v8qi_int
21631 = build_function_type_list (integer_type_node,
21632 V8QI_type_node, integer_type_node, NULL_TREE);
21633 tree int_ftype_v4hi_int
21634 = build_function_type_list (integer_type_node,
21635 V4HI_type_node, integer_type_node, NULL_TREE);
21636 tree int_ftype_v2si_int
21637 = build_function_type_list (integer_type_node,
21638 V2SI_type_node, integer_type_node, NULL_TREE);
21639 tree v8qi_ftype_v8qi_int_int
21640 = build_function_type_list (V8QI_type_node,
21641 V8QI_type_node, integer_type_node,
21642 integer_type_node, NULL_TREE);
21643 tree v4hi_ftype_v4hi_int_int
21644 = build_function_type_list (V4HI_type_node,
21645 V4HI_type_node, integer_type_node,
21646 integer_type_node, NULL_TREE);
21647 tree v2si_ftype_v2si_int_int
21648 = build_function_type_list (V2SI_type_node,
21649 V2SI_type_node, integer_type_node,
21650 integer_type_node, NULL_TREE);
21651 /* Miscellaneous. */
21652 tree v8qi_ftype_v4hi_v4hi
21653 = build_function_type_list (V8QI_type_node,
21654 V4HI_type_node, V4HI_type_node, NULL_TREE);
21655 tree v4hi_ftype_v2si_v2si
21656 = build_function_type_list (V4HI_type_node,
21657 V2SI_type_node, V2SI_type_node, NULL_TREE);
21658 tree v8qi_ftype_v4hi_v8qi
21659 = build_function_type_list (V8QI_type_node,
21660 V4HI_type_node, V8QI_type_node, NULL_TREE);
21661 tree v2si_ftype_v4hi_v4hi
21662 = build_function_type_list (V2SI_type_node,
21663 V4HI_type_node, V4HI_type_node, NULL_TREE);
21664 tree v2si_ftype_v8qi_v8qi
21665 = build_function_type_list (V2SI_type_node,
21666 V8QI_type_node, V8QI_type_node, NULL_TREE);
21667 tree v4hi_ftype_v4hi_di
21668 = build_function_type_list (V4HI_type_node,
21669 V4HI_type_node, long_long_integer_type_node,
21670 NULL_TREE);
21671 tree v2si_ftype_v2si_di
21672 = build_function_type_list (V2SI_type_node,
21673 V2SI_type_node, long_long_integer_type_node,
21674 NULL_TREE);
21675 tree di_ftype_void
21676 = build_function_type_list (long_long_unsigned_type_node, NULL_TREE);
21677 tree int_ftype_void
21678 = build_function_type_list (integer_type_node, NULL_TREE);
21679 tree di_ftype_v8qi
21680 = build_function_type_list (long_long_integer_type_node,
21681 V8QI_type_node, NULL_TREE);
21682 tree di_ftype_v4hi
21683 = build_function_type_list (long_long_integer_type_node,
21684 V4HI_type_node, NULL_TREE);
21685 tree di_ftype_v2si
21686 = build_function_type_list (long_long_integer_type_node,
21687 V2SI_type_node, NULL_TREE);
21688 tree v2si_ftype_v4hi
21689 = build_function_type_list (V2SI_type_node,
21690 V4HI_type_node, NULL_TREE);
21691 tree v4hi_ftype_v8qi
21692 = build_function_type_list (V4HI_type_node,
21693 V8QI_type_node, NULL_TREE);
21694 tree v8qi_ftype_v8qi
21695 = build_function_type_list (V8QI_type_node,
21696 V8QI_type_node, NULL_TREE);
21697 tree v4hi_ftype_v4hi
21698 = build_function_type_list (V4HI_type_node,
21699 V4HI_type_node, NULL_TREE);
21700 tree v2si_ftype_v2si
21701 = build_function_type_list (V2SI_type_node,
21702 V2SI_type_node, NULL_TREE);
21704 tree di_ftype_di_v4hi_v4hi
21705 = build_function_type_list (long_long_unsigned_type_node,
21706 long_long_unsigned_type_node,
21707 V4HI_type_node, V4HI_type_node,
21708 NULL_TREE);
21710 tree di_ftype_v4hi_v4hi
21711 = build_function_type_list (long_long_unsigned_type_node,
21712 V4HI_type_node,V4HI_type_node,
21713 NULL_TREE);
21715 tree v2si_ftype_v2si_v4hi_v4hi
21716 = build_function_type_list (V2SI_type_node,
21717 V2SI_type_node, V4HI_type_node,
21718 V4HI_type_node, NULL_TREE);
21720 tree v2si_ftype_v2si_v8qi_v8qi
21721 = build_function_type_list (V2SI_type_node,
21722 V2SI_type_node, V8QI_type_node,
21723 V8QI_type_node, NULL_TREE);
21725 tree di_ftype_di_v2si_v2si
21726 = build_function_type_list (long_long_unsigned_type_node,
21727 long_long_unsigned_type_node,
21728 V2SI_type_node, V2SI_type_node,
21729 NULL_TREE);
21731 tree di_ftype_di_di_int
21732 = build_function_type_list (long_long_unsigned_type_node,
21733 long_long_unsigned_type_node,
21734 long_long_unsigned_type_node,
21735 integer_type_node, NULL_TREE);
21737 tree void_ftype_int
21738 = build_function_type_list (void_type_node,
21739 integer_type_node, NULL_TREE);
21741 tree v8qi_ftype_char
21742 = build_function_type_list (V8QI_type_node,
21743 signed_char_type_node, NULL_TREE);
21745 tree v4hi_ftype_short
21746 = build_function_type_list (V4HI_type_node,
21747 short_integer_type_node, NULL_TREE);
21749 tree v2si_ftype_int
21750 = build_function_type_list (V2SI_type_node,
21751 integer_type_node, NULL_TREE);
21753 /* Normal vector binops. */
21754 tree v8qi_ftype_v8qi_v8qi
21755 = build_function_type_list (V8QI_type_node,
21756 V8QI_type_node, V8QI_type_node, NULL_TREE);
21757 tree v4hi_ftype_v4hi_v4hi
21758 = build_function_type_list (V4HI_type_node,
21759 V4HI_type_node,V4HI_type_node, NULL_TREE);
21760 tree v2si_ftype_v2si_v2si
21761 = build_function_type_list (V2SI_type_node,
21762 V2SI_type_node, V2SI_type_node, NULL_TREE);
21763 tree di_ftype_di_di
21764 = build_function_type_list (long_long_unsigned_type_node,
21765 long_long_unsigned_type_node,
21766 long_long_unsigned_type_node,
21767 NULL_TREE);
21769 /* Add all builtins that are more or less simple operations on two
21770 operands. */
21771 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
21773 /* Use one of the operands; the target can have a different mode for
21774 mask-generating compares. */
21775 enum machine_mode mode;
21776 tree type;
21778 if (d->name == 0 || !(d->mask == FL_IWMMXT || d->mask == FL_IWMMXT2))
21779 continue;
21781 mode = insn_data[d->icode].operand[1].mode;
21783 switch (mode)
21785 case V8QImode:
21786 type = v8qi_ftype_v8qi_v8qi;
21787 break;
21788 case V4HImode:
21789 type = v4hi_ftype_v4hi_v4hi;
21790 break;
21791 case V2SImode:
21792 type = v2si_ftype_v2si_v2si;
21793 break;
21794 case DImode:
21795 type = di_ftype_di_di;
21796 break;
21798 default:
21799 gcc_unreachable ();
21802 def_mbuiltin (d->mask, d->name, type, d->code);
21805 /* Add the remaining MMX insns with somewhat more complicated types. */
21806 #define iwmmx_mbuiltin(NAME, TYPE, CODE) \
21807 def_mbuiltin (FL_IWMMXT, "__builtin_arm_" NAME, (TYPE), \
21808 ARM_BUILTIN_ ## CODE)
21810 #define iwmmx2_mbuiltin(NAME, TYPE, CODE) \
21811 def_mbuiltin (FL_IWMMXT2, "__builtin_arm_" NAME, (TYPE), \
21812 ARM_BUILTIN_ ## CODE)
21814 iwmmx_mbuiltin ("wzero", di_ftype_void, WZERO);
21815 iwmmx_mbuiltin ("setwcgr0", void_ftype_int, SETWCGR0);
21816 iwmmx_mbuiltin ("setwcgr1", void_ftype_int, SETWCGR1);
21817 iwmmx_mbuiltin ("setwcgr2", void_ftype_int, SETWCGR2);
21818 iwmmx_mbuiltin ("setwcgr3", void_ftype_int, SETWCGR3);
21819 iwmmx_mbuiltin ("getwcgr0", int_ftype_void, GETWCGR0);
21820 iwmmx_mbuiltin ("getwcgr1", int_ftype_void, GETWCGR1);
21821 iwmmx_mbuiltin ("getwcgr2", int_ftype_void, GETWCGR2);
21822 iwmmx_mbuiltin ("getwcgr3", int_ftype_void, GETWCGR3);
21824 iwmmx_mbuiltin ("wsllh", v4hi_ftype_v4hi_di, WSLLH);
21825 iwmmx_mbuiltin ("wsllw", v2si_ftype_v2si_di, WSLLW);
21826 iwmmx_mbuiltin ("wslld", di_ftype_di_di, WSLLD);
21827 iwmmx_mbuiltin ("wsllhi", v4hi_ftype_v4hi_int, WSLLHI);
21828 iwmmx_mbuiltin ("wsllwi", v2si_ftype_v2si_int, WSLLWI);
21829 iwmmx_mbuiltin ("wslldi", di_ftype_di_int, WSLLDI);
21831 iwmmx_mbuiltin ("wsrlh", v4hi_ftype_v4hi_di, WSRLH);
21832 iwmmx_mbuiltin ("wsrlw", v2si_ftype_v2si_di, WSRLW);
21833 iwmmx_mbuiltin ("wsrld", di_ftype_di_di, WSRLD);
21834 iwmmx_mbuiltin ("wsrlhi", v4hi_ftype_v4hi_int, WSRLHI);
21835 iwmmx_mbuiltin ("wsrlwi", v2si_ftype_v2si_int, WSRLWI);
21836 iwmmx_mbuiltin ("wsrldi", di_ftype_di_int, WSRLDI);
21838 iwmmx_mbuiltin ("wsrah", v4hi_ftype_v4hi_di, WSRAH);
21839 iwmmx_mbuiltin ("wsraw", v2si_ftype_v2si_di, WSRAW);
21840 iwmmx_mbuiltin ("wsrad", di_ftype_di_di, WSRAD);
21841 iwmmx_mbuiltin ("wsrahi", v4hi_ftype_v4hi_int, WSRAHI);
21842 iwmmx_mbuiltin ("wsrawi", v2si_ftype_v2si_int, WSRAWI);
21843 iwmmx_mbuiltin ("wsradi", di_ftype_di_int, WSRADI);
21845 iwmmx_mbuiltin ("wrorh", v4hi_ftype_v4hi_di, WRORH);
21846 iwmmx_mbuiltin ("wrorw", v2si_ftype_v2si_di, WRORW);
21847 iwmmx_mbuiltin ("wrord", di_ftype_di_di, WRORD);
21848 iwmmx_mbuiltin ("wrorhi", v4hi_ftype_v4hi_int, WRORHI);
21849 iwmmx_mbuiltin ("wrorwi", v2si_ftype_v2si_int, WRORWI);
21850 iwmmx_mbuiltin ("wrordi", di_ftype_di_int, WRORDI);
21852 iwmmx_mbuiltin ("wshufh", v4hi_ftype_v4hi_int, WSHUFH);
21854 iwmmx_mbuiltin ("wsadb", v2si_ftype_v2si_v8qi_v8qi, WSADB);
21855 iwmmx_mbuiltin ("wsadh", v2si_ftype_v2si_v4hi_v4hi, WSADH);
21856 iwmmx_mbuiltin ("wmadds", v2si_ftype_v4hi_v4hi, WMADDS);
21857 iwmmx2_mbuiltin ("wmaddsx", v2si_ftype_v4hi_v4hi, WMADDSX);
21858 iwmmx2_mbuiltin ("wmaddsn", v2si_ftype_v4hi_v4hi, WMADDSN);
21859 iwmmx_mbuiltin ("wmaddu", v2si_ftype_v4hi_v4hi, WMADDU);
21860 iwmmx2_mbuiltin ("wmaddux", v2si_ftype_v4hi_v4hi, WMADDUX);
21861 iwmmx2_mbuiltin ("wmaddun", v2si_ftype_v4hi_v4hi, WMADDUN);
21862 iwmmx_mbuiltin ("wsadbz", v2si_ftype_v8qi_v8qi, WSADBZ);
21863 iwmmx_mbuiltin ("wsadhz", v2si_ftype_v4hi_v4hi, WSADHZ);
21865 iwmmx_mbuiltin ("textrmsb", int_ftype_v8qi_int, TEXTRMSB);
21866 iwmmx_mbuiltin ("textrmsh", int_ftype_v4hi_int, TEXTRMSH);
21867 iwmmx_mbuiltin ("textrmsw", int_ftype_v2si_int, TEXTRMSW);
21868 iwmmx_mbuiltin ("textrmub", int_ftype_v8qi_int, TEXTRMUB);
21869 iwmmx_mbuiltin ("textrmuh", int_ftype_v4hi_int, TEXTRMUH);
21870 iwmmx_mbuiltin ("textrmuw", int_ftype_v2si_int, TEXTRMUW);
21871 iwmmx_mbuiltin ("tinsrb", v8qi_ftype_v8qi_int_int, TINSRB);
21872 iwmmx_mbuiltin ("tinsrh", v4hi_ftype_v4hi_int_int, TINSRH);
21873 iwmmx_mbuiltin ("tinsrw", v2si_ftype_v2si_int_int, TINSRW);
21875 iwmmx_mbuiltin ("waccb", di_ftype_v8qi, WACCB);
21876 iwmmx_mbuiltin ("wacch", di_ftype_v4hi, WACCH);
21877 iwmmx_mbuiltin ("waccw", di_ftype_v2si, WACCW);
21879 iwmmx_mbuiltin ("tmovmskb", int_ftype_v8qi, TMOVMSKB);
21880 iwmmx_mbuiltin ("tmovmskh", int_ftype_v4hi, TMOVMSKH);
21881 iwmmx_mbuiltin ("tmovmskw", int_ftype_v2si, TMOVMSKW);
21883 iwmmx2_mbuiltin ("waddbhusm", v8qi_ftype_v4hi_v8qi, WADDBHUSM);
21884 iwmmx2_mbuiltin ("waddbhusl", v8qi_ftype_v4hi_v8qi, WADDBHUSL);
21886 iwmmx_mbuiltin ("wpackhss", v8qi_ftype_v4hi_v4hi, WPACKHSS);
21887 iwmmx_mbuiltin ("wpackhus", v8qi_ftype_v4hi_v4hi, WPACKHUS);
21888 iwmmx_mbuiltin ("wpackwus", v4hi_ftype_v2si_v2si, WPACKWUS);
21889 iwmmx_mbuiltin ("wpackwss", v4hi_ftype_v2si_v2si, WPACKWSS);
21890 iwmmx_mbuiltin ("wpackdus", v2si_ftype_di_di, WPACKDUS);
21891 iwmmx_mbuiltin ("wpackdss", v2si_ftype_di_di, WPACKDSS);
21893 iwmmx_mbuiltin ("wunpckehub", v4hi_ftype_v8qi, WUNPCKEHUB);
21894 iwmmx_mbuiltin ("wunpckehuh", v2si_ftype_v4hi, WUNPCKEHUH);
21895 iwmmx_mbuiltin ("wunpckehuw", di_ftype_v2si, WUNPCKEHUW);
21896 iwmmx_mbuiltin ("wunpckehsb", v4hi_ftype_v8qi, WUNPCKEHSB);
21897 iwmmx_mbuiltin ("wunpckehsh", v2si_ftype_v4hi, WUNPCKEHSH);
21898 iwmmx_mbuiltin ("wunpckehsw", di_ftype_v2si, WUNPCKEHSW);
21899 iwmmx_mbuiltin ("wunpckelub", v4hi_ftype_v8qi, WUNPCKELUB);
21900 iwmmx_mbuiltin ("wunpckeluh", v2si_ftype_v4hi, WUNPCKELUH);
21901 iwmmx_mbuiltin ("wunpckeluw", di_ftype_v2si, WUNPCKELUW);
21902 iwmmx_mbuiltin ("wunpckelsb", v4hi_ftype_v8qi, WUNPCKELSB);
21903 iwmmx_mbuiltin ("wunpckelsh", v2si_ftype_v4hi, WUNPCKELSH);
21904 iwmmx_mbuiltin ("wunpckelsw", di_ftype_v2si, WUNPCKELSW);
21906 iwmmx_mbuiltin ("wmacs", di_ftype_di_v4hi_v4hi, WMACS);
21907 iwmmx_mbuiltin ("wmacsz", di_ftype_v4hi_v4hi, WMACSZ);
21908 iwmmx_mbuiltin ("wmacu", di_ftype_di_v4hi_v4hi, WMACU);
21909 iwmmx_mbuiltin ("wmacuz", di_ftype_v4hi_v4hi, WMACUZ);
21911 iwmmx_mbuiltin ("walign", v8qi_ftype_v8qi_v8qi_int, WALIGNI);
21912 iwmmx_mbuiltin ("tmia", di_ftype_di_int_int, TMIA);
21913 iwmmx_mbuiltin ("tmiaph", di_ftype_di_int_int, TMIAPH);
21914 iwmmx_mbuiltin ("tmiabb", di_ftype_di_int_int, TMIABB);
21915 iwmmx_mbuiltin ("tmiabt", di_ftype_di_int_int, TMIABT);
21916 iwmmx_mbuiltin ("tmiatb", di_ftype_di_int_int, TMIATB);
21917 iwmmx_mbuiltin ("tmiatt", di_ftype_di_int_int, TMIATT);
21919 iwmmx2_mbuiltin ("wabsb", v8qi_ftype_v8qi, WABSB);
21920 iwmmx2_mbuiltin ("wabsh", v4hi_ftype_v4hi, WABSH);
21921 iwmmx2_mbuiltin ("wabsw", v2si_ftype_v2si, WABSW);
21923 iwmmx2_mbuiltin ("wqmiabb", v2si_ftype_v2si_v4hi_v4hi, WQMIABB);
21924 iwmmx2_mbuiltin ("wqmiabt", v2si_ftype_v2si_v4hi_v4hi, WQMIABT);
21925 iwmmx2_mbuiltin ("wqmiatb", v2si_ftype_v2si_v4hi_v4hi, WQMIATB);
21926 iwmmx2_mbuiltin ("wqmiatt", v2si_ftype_v2si_v4hi_v4hi, WQMIATT);
21928 iwmmx2_mbuiltin ("wqmiabbn", v2si_ftype_v2si_v4hi_v4hi, WQMIABBN);
21929 iwmmx2_mbuiltin ("wqmiabtn", v2si_ftype_v2si_v4hi_v4hi, WQMIABTN);
21930 iwmmx2_mbuiltin ("wqmiatbn", v2si_ftype_v2si_v4hi_v4hi, WQMIATBN);
21931 iwmmx2_mbuiltin ("wqmiattn", v2si_ftype_v2si_v4hi_v4hi, WQMIATTN);
21933 iwmmx2_mbuiltin ("wmiabb", di_ftype_di_v4hi_v4hi, WMIABB);
21934 iwmmx2_mbuiltin ("wmiabt", di_ftype_di_v4hi_v4hi, WMIABT);
21935 iwmmx2_mbuiltin ("wmiatb", di_ftype_di_v4hi_v4hi, WMIATB);
21936 iwmmx2_mbuiltin ("wmiatt", di_ftype_di_v4hi_v4hi, WMIATT);
21938 iwmmx2_mbuiltin ("wmiabbn", di_ftype_di_v4hi_v4hi, WMIABBN);
21939 iwmmx2_mbuiltin ("wmiabtn", di_ftype_di_v4hi_v4hi, WMIABTN);
21940 iwmmx2_mbuiltin ("wmiatbn", di_ftype_di_v4hi_v4hi, WMIATBN);
21941 iwmmx2_mbuiltin ("wmiattn", di_ftype_di_v4hi_v4hi, WMIATTN);
21943 iwmmx2_mbuiltin ("wmiawbb", di_ftype_di_v2si_v2si, WMIAWBB);
21944 iwmmx2_mbuiltin ("wmiawbt", di_ftype_di_v2si_v2si, WMIAWBT);
21945 iwmmx2_mbuiltin ("wmiawtb", di_ftype_di_v2si_v2si, WMIAWTB);
21946 iwmmx2_mbuiltin ("wmiawtt", di_ftype_di_v2si_v2si, WMIAWTT);
21948 iwmmx2_mbuiltin ("wmiawbbn", di_ftype_di_v2si_v2si, WMIAWBBN);
21949 iwmmx2_mbuiltin ("wmiawbtn", di_ftype_di_v2si_v2si, WMIAWBTN);
21950 iwmmx2_mbuiltin ("wmiawtbn", di_ftype_di_v2si_v2si, WMIAWTBN);
21951 iwmmx2_mbuiltin ("wmiawttn", di_ftype_di_v2si_v2si, WMIAWTTN);
21953 iwmmx2_mbuiltin ("wmerge", di_ftype_di_di_int, WMERGE);
21955 iwmmx_mbuiltin ("tbcstb", v8qi_ftype_char, TBCSTB);
21956 iwmmx_mbuiltin ("tbcsth", v4hi_ftype_short, TBCSTH);
21957 iwmmx_mbuiltin ("tbcstw", v2si_ftype_int, TBCSTW);
21959 #undef iwmmx_mbuiltin
21960 #undef iwmmx2_mbuiltin
21963 static void
21964 arm_init_fp16_builtins (void)
21966 tree fp16_type = make_node (REAL_TYPE);
21967 TYPE_PRECISION (fp16_type) = 16;
21968 layout_type (fp16_type);
21969 (*lang_hooks.types.register_builtin_type) (fp16_type, "__fp16");
21972 static void
21973 arm_init_crc32_builtins ()
21975 tree si_ftype_si_qi
21976 = build_function_type_list (unsigned_intSI_type_node,
21977 unsigned_intSI_type_node,
21978 unsigned_intQI_type_node, NULL_TREE);
21979 tree si_ftype_si_hi
21980 = build_function_type_list (unsigned_intSI_type_node,
21981 unsigned_intSI_type_node,
21982 unsigned_intHI_type_node, NULL_TREE);
21983 tree si_ftype_si_si
21984 = build_function_type_list (unsigned_intSI_type_node,
21985 unsigned_intSI_type_node,
21986 unsigned_intSI_type_node, NULL_TREE);
21988 arm_builtin_decls[ARM_BUILTIN_CRC32B]
21989 = add_builtin_function ("__builtin_arm_crc32b", si_ftype_si_qi,
21990 ARM_BUILTIN_CRC32B, BUILT_IN_MD, NULL, NULL_TREE);
21991 arm_builtin_decls[ARM_BUILTIN_CRC32H]
21992 = add_builtin_function ("__builtin_arm_crc32h", si_ftype_si_hi,
21993 ARM_BUILTIN_CRC32H, BUILT_IN_MD, NULL, NULL_TREE);
21994 arm_builtin_decls[ARM_BUILTIN_CRC32W]
21995 = add_builtin_function ("__builtin_arm_crc32w", si_ftype_si_si,
21996 ARM_BUILTIN_CRC32W, BUILT_IN_MD, NULL, NULL_TREE);
21997 arm_builtin_decls[ARM_BUILTIN_CRC32CB]
21998 = add_builtin_function ("__builtin_arm_crc32cb", si_ftype_si_qi,
21999 ARM_BUILTIN_CRC32CB, BUILT_IN_MD, NULL, NULL_TREE);
22000 arm_builtin_decls[ARM_BUILTIN_CRC32CH]
22001 = add_builtin_function ("__builtin_arm_crc32ch", si_ftype_si_hi,
22002 ARM_BUILTIN_CRC32CH, BUILT_IN_MD, NULL, NULL_TREE);
22003 arm_builtin_decls[ARM_BUILTIN_CRC32CW]
22004 = add_builtin_function ("__builtin_arm_crc32cw", si_ftype_si_si,
22005 ARM_BUILTIN_CRC32CW, BUILT_IN_MD, NULL, NULL_TREE);
22008 static void
22009 arm_init_builtins (void)
22011 if (TARGET_REALLY_IWMMXT)
22012 arm_init_iwmmxt_builtins ();
22014 if (TARGET_NEON)
22015 arm_init_neon_builtins ();
22017 if (arm_fp16_format)
22018 arm_init_fp16_builtins ();
22020 if (TARGET_CRC32)
22021 arm_init_crc32_builtins ();
22024 /* Return the ARM builtin for CODE. */
22026 static tree
22027 arm_builtin_decl (unsigned code, bool initialize_p ATTRIBUTE_UNUSED)
22029 if (code >= ARM_BUILTIN_MAX)
22030 return error_mark_node;
22032 return arm_builtin_decls[code];
22035 /* Implement TARGET_INVALID_PARAMETER_TYPE. */
22037 static const char *
22038 arm_invalid_parameter_type (const_tree t)
22040 if (SCALAR_FLOAT_TYPE_P (t) && TYPE_PRECISION (t) == 16)
22041 return N_("function parameters cannot have __fp16 type");
22042 return NULL;
22045 /* Implement TARGET_INVALID_PARAMETER_TYPE. */
22047 static const char *
22048 arm_invalid_return_type (const_tree t)
22050 if (SCALAR_FLOAT_TYPE_P (t) && TYPE_PRECISION (t) == 16)
22051 return N_("functions cannot return __fp16 type");
22052 return NULL;
22055 /* Implement TARGET_PROMOTED_TYPE. */
22057 static tree
22058 arm_promoted_type (const_tree t)
22060 if (SCALAR_FLOAT_TYPE_P (t) && TYPE_PRECISION (t) == 16)
22061 return float_type_node;
22062 return NULL_TREE;
22065 /* Implement TARGET_CONVERT_TO_TYPE.
22066 Specifically, this hook implements the peculiarity of the ARM
22067 half-precision floating-point C semantics that requires conversions between
22068 __fp16 to or from double to do an intermediate conversion to float. */
22070 static tree
22071 arm_convert_to_type (tree type, tree expr)
22073 tree fromtype = TREE_TYPE (expr);
22074 if (!SCALAR_FLOAT_TYPE_P (fromtype) || !SCALAR_FLOAT_TYPE_P (type))
22075 return NULL_TREE;
22076 if ((TYPE_PRECISION (fromtype) == 16 && TYPE_PRECISION (type) > 32)
22077 || (TYPE_PRECISION (type) == 16 && TYPE_PRECISION (fromtype) > 32))
22078 return convert (type, convert (float_type_node, expr));
22079 return NULL_TREE;
22082 /* Implement TARGET_SCALAR_MODE_SUPPORTED_P.
22083 This simply adds HFmode as a supported mode; even though we don't
22084 implement arithmetic on this type directly, it's supported by
22085 optabs conversions, much the way the double-word arithmetic is
22086 special-cased in the default hook. */
22088 static bool
22089 arm_scalar_mode_supported_p (enum machine_mode mode)
22091 if (mode == HFmode)
22092 return (arm_fp16_format != ARM_FP16_FORMAT_NONE);
22093 else if (ALL_FIXED_POINT_MODE_P (mode))
22094 return true;
22095 else
22096 return default_scalar_mode_supported_p (mode);
22099 /* Errors in the source file can cause expand_expr to return const0_rtx
22100 where we expect a vector. To avoid crashing, use one of the vector
22101 clear instructions. */
22103 static rtx
22104 safe_vector_operand (rtx x, enum machine_mode mode)
22106 if (x != const0_rtx)
22107 return x;
22108 x = gen_reg_rtx (mode);
22110 emit_insn (gen_iwmmxt_clrdi (mode == DImode ? x
22111 : gen_rtx_SUBREG (DImode, x, 0)));
22112 return x;
22115 /* Function to expand ternary builtins. */
22116 static rtx
22117 arm_expand_ternop_builtin (enum insn_code icode,
22118 tree exp, rtx target)
22120 rtx pat;
22121 tree arg0 = CALL_EXPR_ARG (exp, 0);
22122 tree arg1 = CALL_EXPR_ARG (exp, 1);
22123 tree arg2 = CALL_EXPR_ARG (exp, 2);
22125 rtx op0 = expand_normal (arg0);
22126 rtx op1 = expand_normal (arg1);
22127 rtx op2 = expand_normal (arg2);
22128 rtx op3 = NULL_RTX;
22130 /* The sha1c, sha1p, sha1m crypto builtins require a different vec_select
22131 lane operand depending on endianness. */
22132 bool builtin_sha1cpm_p = false;
22134 if (insn_data[icode].n_operands == 5)
22136 gcc_assert (icode == CODE_FOR_crypto_sha1c
22137 || icode == CODE_FOR_crypto_sha1p
22138 || icode == CODE_FOR_crypto_sha1m);
22139 builtin_sha1cpm_p = true;
22141 enum machine_mode tmode = insn_data[icode].operand[0].mode;
22142 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
22143 enum machine_mode mode1 = insn_data[icode].operand[2].mode;
22144 enum machine_mode mode2 = insn_data[icode].operand[3].mode;
22147 if (VECTOR_MODE_P (mode0))
22148 op0 = safe_vector_operand (op0, mode0);
22149 if (VECTOR_MODE_P (mode1))
22150 op1 = safe_vector_operand (op1, mode1);
22151 if (VECTOR_MODE_P (mode2))
22152 op2 = safe_vector_operand (op2, mode2);
22154 if (! target
22155 || GET_MODE (target) != tmode
22156 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
22157 target = gen_reg_rtx (tmode);
22159 gcc_assert ((GET_MODE (op0) == mode0 || GET_MODE (op0) == VOIDmode)
22160 && (GET_MODE (op1) == mode1 || GET_MODE (op1) == VOIDmode)
22161 && (GET_MODE (op2) == mode2 || GET_MODE (op2) == VOIDmode));
22163 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
22164 op0 = copy_to_mode_reg (mode0, op0);
22165 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
22166 op1 = copy_to_mode_reg (mode1, op1);
22167 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
22168 op2 = copy_to_mode_reg (mode2, op2);
22169 if (builtin_sha1cpm_p)
22170 op3 = GEN_INT (TARGET_BIG_END ? 1 : 0);
22172 if (builtin_sha1cpm_p)
22173 pat = GEN_FCN (icode) (target, op0, op1, op2, op3);
22174 else
22175 pat = GEN_FCN (icode) (target, op0, op1, op2);
22176 if (! pat)
22177 return 0;
22178 emit_insn (pat);
22179 return target;
22182 /* Subroutine of arm_expand_builtin to take care of binop insns. */
22184 static rtx
22185 arm_expand_binop_builtin (enum insn_code icode,
22186 tree exp, rtx target)
22188 rtx pat;
22189 tree arg0 = CALL_EXPR_ARG (exp, 0);
22190 tree arg1 = CALL_EXPR_ARG (exp, 1);
22191 rtx op0 = expand_normal (arg0);
22192 rtx op1 = expand_normal (arg1);
22193 enum machine_mode tmode = insn_data[icode].operand[0].mode;
22194 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
22195 enum machine_mode mode1 = insn_data[icode].operand[2].mode;
22197 if (VECTOR_MODE_P (mode0))
22198 op0 = safe_vector_operand (op0, mode0);
22199 if (VECTOR_MODE_P (mode1))
22200 op1 = safe_vector_operand (op1, mode1);
22202 if (! target
22203 || GET_MODE (target) != tmode
22204 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
22205 target = gen_reg_rtx (tmode);
22207 gcc_assert ((GET_MODE (op0) == mode0 || GET_MODE (op0) == VOIDmode)
22208 && (GET_MODE (op1) == mode1 || GET_MODE (op1) == VOIDmode));
22210 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
22211 op0 = copy_to_mode_reg (mode0, op0);
22212 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
22213 op1 = copy_to_mode_reg (mode1, op1);
22215 pat = GEN_FCN (icode) (target, op0, op1);
22216 if (! pat)
22217 return 0;
22218 emit_insn (pat);
22219 return target;
22222 /* Subroutine of arm_expand_builtin to take care of unop insns. */
22224 static rtx
22225 arm_expand_unop_builtin (enum insn_code icode,
22226 tree exp, rtx target, int do_load)
22228 rtx pat;
22229 tree arg0 = CALL_EXPR_ARG (exp, 0);
22230 rtx op0 = expand_normal (arg0);
22231 rtx op1 = NULL_RTX;
22232 enum machine_mode tmode = insn_data[icode].operand[0].mode;
22233 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
22234 bool builtin_sha1h_p = false;
22236 if (insn_data[icode].n_operands == 3)
22238 gcc_assert (icode == CODE_FOR_crypto_sha1h);
22239 builtin_sha1h_p = true;
22242 if (! target
22243 || GET_MODE (target) != tmode
22244 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
22245 target = gen_reg_rtx (tmode);
22246 if (do_load)
22247 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
22248 else
22250 if (VECTOR_MODE_P (mode0))
22251 op0 = safe_vector_operand (op0, mode0);
22253 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
22254 op0 = copy_to_mode_reg (mode0, op0);
22256 if (builtin_sha1h_p)
22257 op1 = GEN_INT (TARGET_BIG_END ? 1 : 0);
22259 if (builtin_sha1h_p)
22260 pat = GEN_FCN (icode) (target, op0, op1);
22261 else
22262 pat = GEN_FCN (icode) (target, op0);
22263 if (! pat)
22264 return 0;
22265 emit_insn (pat);
22266 return target;
22269 typedef enum {
22270 NEON_ARG_COPY_TO_REG,
22271 NEON_ARG_CONSTANT,
22272 NEON_ARG_MEMORY,
22273 NEON_ARG_STOP
22274 } builtin_arg;
22276 #define NEON_MAX_BUILTIN_ARGS 5
22278 /* EXP is a pointer argument to a Neon load or store intrinsic. Derive
22279 and return an expression for the accessed memory.
22281 The intrinsic function operates on a block of registers that has
22282 mode REG_MODE. This block contains vectors of type TYPE_MODE. The
22283 function references the memory at EXP of type TYPE and in mode
22284 MEM_MODE; this mode may be BLKmode if no more suitable mode is
22285 available. */
22287 static tree
22288 neon_dereference_pointer (tree exp, tree type, enum machine_mode mem_mode,
22289 enum machine_mode reg_mode,
22290 neon_builtin_type_mode type_mode)
22292 HOST_WIDE_INT reg_size, vector_size, nvectors, nelems;
22293 tree elem_type, upper_bound, array_type;
22295 /* Work out the size of the register block in bytes. */
22296 reg_size = GET_MODE_SIZE (reg_mode);
22298 /* Work out the size of each vector in bytes. */
22299 gcc_assert (TYPE_MODE_BIT (type_mode) & (TB_DREG | TB_QREG));
22300 vector_size = (TYPE_MODE_BIT (type_mode) & TB_QREG ? 16 : 8);
22302 /* Work out how many vectors there are. */
22303 gcc_assert (reg_size % vector_size == 0);
22304 nvectors = reg_size / vector_size;
22306 /* Work out the type of each element. */
22307 gcc_assert (POINTER_TYPE_P (type));
22308 elem_type = TREE_TYPE (type);
22310 /* Work out how many elements are being loaded or stored.
22311 MEM_MODE == REG_MODE implies a one-to-one mapping between register
22312 and memory elements; anything else implies a lane load or store. */
22313 if (mem_mode == reg_mode)
22314 nelems = vector_size * nvectors / int_size_in_bytes (elem_type);
22315 else
22316 nelems = nvectors;
22318 /* Create a type that describes the full access. */
22319 upper_bound = build_int_cst (size_type_node, nelems - 1);
22320 array_type = build_array_type (elem_type, build_index_type (upper_bound));
22322 /* Dereference EXP using that type. */
22323 return fold_build2 (MEM_REF, array_type, exp,
22324 build_int_cst (build_pointer_type (array_type), 0));
22327 /* Expand a Neon builtin. */
22328 static rtx
22329 arm_expand_neon_args (rtx target, int icode, int have_retval,
22330 neon_builtin_type_mode type_mode,
22331 tree exp, int fcode, ...)
22333 va_list ap;
22334 rtx pat;
22335 tree arg[NEON_MAX_BUILTIN_ARGS];
22336 rtx op[NEON_MAX_BUILTIN_ARGS];
22337 tree arg_type;
22338 tree formals;
22339 enum machine_mode tmode = insn_data[icode].operand[0].mode;
22340 enum machine_mode mode[NEON_MAX_BUILTIN_ARGS];
22341 enum machine_mode other_mode;
22342 int argc = 0;
22343 int opno;
22345 if (have_retval
22346 && (!target
22347 || GET_MODE (target) != tmode
22348 || !(*insn_data[icode].operand[0].predicate) (target, tmode)))
22349 target = gen_reg_rtx (tmode);
22351 va_start (ap, fcode);
22353 formals = TYPE_ARG_TYPES (TREE_TYPE (arm_builtin_decls[fcode]));
22355 for (;;)
22357 builtin_arg thisarg = (builtin_arg) va_arg (ap, int);
22359 if (thisarg == NEON_ARG_STOP)
22360 break;
22361 else
22363 opno = argc + have_retval;
22364 mode[argc] = insn_data[icode].operand[opno].mode;
22365 arg[argc] = CALL_EXPR_ARG (exp, argc);
22366 arg_type = TREE_VALUE (formals);
22367 if (thisarg == NEON_ARG_MEMORY)
22369 other_mode = insn_data[icode].operand[1 - opno].mode;
22370 arg[argc] = neon_dereference_pointer (arg[argc], arg_type,
22371 mode[argc], other_mode,
22372 type_mode);
22375 /* Use EXPAND_MEMORY for NEON_ARG_MEMORY to ensure a MEM_P
22376 be returned. */
22377 op[argc] = expand_expr (arg[argc], NULL_RTX, VOIDmode,
22378 (thisarg == NEON_ARG_MEMORY
22379 ? EXPAND_MEMORY : EXPAND_NORMAL));
22381 switch (thisarg)
22383 case NEON_ARG_COPY_TO_REG:
22384 /*gcc_assert (GET_MODE (op[argc]) == mode[argc]);*/
22385 if (!(*insn_data[icode].operand[opno].predicate)
22386 (op[argc], mode[argc]))
22387 op[argc] = copy_to_mode_reg (mode[argc], op[argc]);
22388 break;
22390 case NEON_ARG_CONSTANT:
22391 /* FIXME: This error message is somewhat unhelpful. */
22392 if (!(*insn_data[icode].operand[opno].predicate)
22393 (op[argc], mode[argc]))
22394 error ("argument must be a constant");
22395 break;
22397 case NEON_ARG_MEMORY:
22398 /* Check if expand failed. */
22399 if (op[argc] == const0_rtx)
22400 return 0;
22401 gcc_assert (MEM_P (op[argc]));
22402 PUT_MODE (op[argc], mode[argc]);
22403 /* ??? arm_neon.h uses the same built-in functions for signed
22404 and unsigned accesses, casting where necessary. This isn't
22405 alias safe. */
22406 set_mem_alias_set (op[argc], 0);
22407 if (!(*insn_data[icode].operand[opno].predicate)
22408 (op[argc], mode[argc]))
22409 op[argc] = (replace_equiv_address
22410 (op[argc], force_reg (Pmode, XEXP (op[argc], 0))));
22411 break;
22413 case NEON_ARG_STOP:
22414 gcc_unreachable ();
22417 argc++;
22418 formals = TREE_CHAIN (formals);
22422 va_end (ap);
22424 if (have_retval)
22425 switch (argc)
22427 case 1:
22428 pat = GEN_FCN (icode) (target, op[0]);
22429 break;
22431 case 2:
22432 pat = GEN_FCN (icode) (target, op[0], op[1]);
22433 break;
22435 case 3:
22436 pat = GEN_FCN (icode) (target, op[0], op[1], op[2]);
22437 break;
22439 case 4:
22440 pat = GEN_FCN (icode) (target, op[0], op[1], op[2], op[3]);
22441 break;
22443 case 5:
22444 pat = GEN_FCN (icode) (target, op[0], op[1], op[2], op[3], op[4]);
22445 break;
22447 default:
22448 gcc_unreachable ();
22450 else
22451 switch (argc)
22453 case 1:
22454 pat = GEN_FCN (icode) (op[0]);
22455 break;
22457 case 2:
22458 pat = GEN_FCN (icode) (op[0], op[1]);
22459 break;
22461 case 3:
22462 pat = GEN_FCN (icode) (op[0], op[1], op[2]);
22463 break;
22465 case 4:
22466 pat = GEN_FCN (icode) (op[0], op[1], op[2], op[3]);
22467 break;
22469 case 5:
22470 pat = GEN_FCN (icode) (op[0], op[1], op[2], op[3], op[4]);
22471 break;
22473 default:
22474 gcc_unreachable ();
22477 if (!pat)
22478 return 0;
22480 emit_insn (pat);
22482 return target;
22485 /* Expand a Neon builtin. These are "special" because they don't have symbolic
22486 constants defined per-instruction or per instruction-variant. Instead, the
22487 required info is looked up in the table neon_builtin_data. */
22488 static rtx
22489 arm_expand_neon_builtin (int fcode, tree exp, rtx target)
22491 neon_builtin_datum *d = &neon_builtin_data[fcode - ARM_BUILTIN_NEON_BASE];
22492 neon_itype itype = d->itype;
22493 enum insn_code icode = d->code;
22494 neon_builtin_type_mode type_mode = d->mode;
22496 switch (itype)
22498 case NEON_UNOP:
22499 case NEON_CONVERT:
22500 case NEON_DUPLANE:
22501 return arm_expand_neon_args (target, icode, 1, type_mode, exp, fcode,
22502 NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT, NEON_ARG_STOP);
22504 case NEON_BINOP:
22505 case NEON_SETLANE:
22506 case NEON_SCALARMUL:
22507 case NEON_SCALARMULL:
22508 case NEON_SCALARMULH:
22509 case NEON_SHIFTINSERT:
22510 case NEON_LOGICBINOP:
22511 return arm_expand_neon_args (target, icode, 1, type_mode, exp, fcode,
22512 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT,
22513 NEON_ARG_STOP);
22515 case NEON_TERNOP:
22516 return arm_expand_neon_args (target, icode, 1, type_mode, exp, fcode,
22517 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG,
22518 NEON_ARG_CONSTANT, NEON_ARG_STOP);
22520 case NEON_GETLANE:
22521 case NEON_FIXCONV:
22522 case NEON_SHIFTIMM:
22523 return arm_expand_neon_args (target, icode, 1, type_mode, exp, fcode,
22524 NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT, NEON_ARG_CONSTANT,
22525 NEON_ARG_STOP);
22527 case NEON_CREATE:
22528 return arm_expand_neon_args (target, icode, 1, type_mode, exp, fcode,
22529 NEON_ARG_COPY_TO_REG, NEON_ARG_STOP);
22531 case NEON_DUP:
22532 case NEON_RINT:
22533 case NEON_SPLIT:
22534 case NEON_FLOAT_WIDEN:
22535 case NEON_FLOAT_NARROW:
22536 case NEON_REINTERP:
22537 return arm_expand_neon_args (target, icode, 1, type_mode, exp, fcode,
22538 NEON_ARG_COPY_TO_REG, NEON_ARG_STOP);
22540 case NEON_COMBINE:
22541 case NEON_VTBL:
22542 return arm_expand_neon_args (target, icode, 1, type_mode, exp, fcode,
22543 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_STOP);
22545 case NEON_RESULTPAIR:
22546 return arm_expand_neon_args (target, icode, 0, type_mode, exp, fcode,
22547 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG,
22548 NEON_ARG_STOP);
22550 case NEON_LANEMUL:
22551 case NEON_LANEMULL:
22552 case NEON_LANEMULH:
22553 return arm_expand_neon_args (target, icode, 1, type_mode, exp, fcode,
22554 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT,
22555 NEON_ARG_CONSTANT, NEON_ARG_STOP);
22557 case NEON_LANEMAC:
22558 return arm_expand_neon_args (target, icode, 1, type_mode, exp, fcode,
22559 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG,
22560 NEON_ARG_CONSTANT, NEON_ARG_CONSTANT, NEON_ARG_STOP);
22562 case NEON_SHIFTACC:
22563 return arm_expand_neon_args (target, icode, 1, type_mode, exp, fcode,
22564 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT,
22565 NEON_ARG_CONSTANT, NEON_ARG_STOP);
22567 case NEON_SCALARMAC:
22568 return arm_expand_neon_args (target, icode, 1, type_mode, exp, fcode,
22569 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG,
22570 NEON_ARG_CONSTANT, NEON_ARG_STOP);
22572 case NEON_SELECT:
22573 case NEON_VTBX:
22574 return arm_expand_neon_args (target, icode, 1, type_mode, exp, fcode,
22575 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG,
22576 NEON_ARG_STOP);
22578 case NEON_LOAD1:
22579 case NEON_LOADSTRUCT:
22580 return arm_expand_neon_args (target, icode, 1, type_mode, exp, fcode,
22581 NEON_ARG_MEMORY, NEON_ARG_STOP);
22583 case NEON_LOAD1LANE:
22584 case NEON_LOADSTRUCTLANE:
22585 return arm_expand_neon_args (target, icode, 1, type_mode, exp, fcode,
22586 NEON_ARG_MEMORY, NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT,
22587 NEON_ARG_STOP);
22589 case NEON_STORE1:
22590 case NEON_STORESTRUCT:
22591 return arm_expand_neon_args (target, icode, 0, type_mode, exp, fcode,
22592 NEON_ARG_MEMORY, NEON_ARG_COPY_TO_REG, NEON_ARG_STOP);
22594 case NEON_STORE1LANE:
22595 case NEON_STORESTRUCTLANE:
22596 return arm_expand_neon_args (target, icode, 0, type_mode, exp, fcode,
22597 NEON_ARG_MEMORY, NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT,
22598 NEON_ARG_STOP);
22601 gcc_unreachable ();
22604 /* Emit code to reinterpret one Neon type as another, without altering bits. */
22605 void
22606 neon_reinterpret (rtx dest, rtx src)
22608 emit_move_insn (dest, gen_lowpart (GET_MODE (dest), src));
22611 /* Emit code to place a Neon pair result in memory locations (with equal
22612 registers). */
22613 void
22614 neon_emit_pair_result_insn (enum machine_mode mode,
22615 rtx (*intfn) (rtx, rtx, rtx, rtx), rtx destaddr,
22616 rtx op1, rtx op2)
22618 rtx mem = gen_rtx_MEM (mode, destaddr);
22619 rtx tmp1 = gen_reg_rtx (mode);
22620 rtx tmp2 = gen_reg_rtx (mode);
22622 emit_insn (intfn (tmp1, op1, op2, tmp2));
22624 emit_move_insn (mem, tmp1);
22625 mem = adjust_address (mem, mode, GET_MODE_SIZE (mode));
22626 emit_move_insn (mem, tmp2);
22629 /* Set up OPERANDS for a register copy from SRC to DEST, taking care
22630 not to early-clobber SRC registers in the process.
22632 We assume that the operands described by SRC and DEST represent a
22633 decomposed copy of OPERANDS[1] into OPERANDS[0]. COUNT is the
22634 number of components into which the copy has been decomposed. */
22635 void
22636 neon_disambiguate_copy (rtx *operands, rtx *dest, rtx *src, unsigned int count)
22638 unsigned int i;
22640 if (!reg_overlap_mentioned_p (operands[0], operands[1])
22641 || REGNO (operands[0]) < REGNO (operands[1]))
22643 for (i = 0; i < count; i++)
22645 operands[2 * i] = dest[i];
22646 operands[2 * i + 1] = src[i];
22649 else
22651 for (i = 0; i < count; i++)
22653 operands[2 * i] = dest[count - i - 1];
22654 operands[2 * i + 1] = src[count - i - 1];
22659 /* Split operands into moves from op[1] + op[2] into op[0]. */
22661 void
22662 neon_split_vcombine (rtx operands[3])
22664 unsigned int dest = REGNO (operands[0]);
22665 unsigned int src1 = REGNO (operands[1]);
22666 unsigned int src2 = REGNO (operands[2]);
22667 enum machine_mode halfmode = GET_MODE (operands[1]);
22668 unsigned int halfregs = HARD_REGNO_NREGS (src1, halfmode);
22669 rtx destlo, desthi;
22671 if (src1 == dest && src2 == dest + halfregs)
22673 /* No-op move. Can't split to nothing; emit something. */
22674 emit_note (NOTE_INSN_DELETED);
22675 return;
22678 /* Preserve register attributes for variable tracking. */
22679 destlo = gen_rtx_REG_offset (operands[0], halfmode, dest, 0);
22680 desthi = gen_rtx_REG_offset (operands[0], halfmode, dest + halfregs,
22681 GET_MODE_SIZE (halfmode));
22683 /* Special case of reversed high/low parts. Use VSWP. */
22684 if (src2 == dest && src1 == dest + halfregs)
22686 rtx x = gen_rtx_SET (VOIDmode, destlo, operands[1]);
22687 rtx y = gen_rtx_SET (VOIDmode, desthi, operands[2]);
22688 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, x, y)));
22689 return;
22692 if (!reg_overlap_mentioned_p (operands[2], destlo))
22694 /* Try to avoid unnecessary moves if part of the result
22695 is in the right place already. */
22696 if (src1 != dest)
22697 emit_move_insn (destlo, operands[1]);
22698 if (src2 != dest + halfregs)
22699 emit_move_insn (desthi, operands[2]);
22701 else
22703 if (src2 != dest + halfregs)
22704 emit_move_insn (desthi, operands[2]);
22705 if (src1 != dest)
22706 emit_move_insn (destlo, operands[1]);
22710 /* Expand an expression EXP that calls a built-in function,
22711 with result going to TARGET if that's convenient
22712 (and in mode MODE if that's convenient).
22713 SUBTARGET may be used as the target for computing one of EXP's operands.
22714 IGNORE is nonzero if the value is to be ignored. */
22716 static rtx
22717 arm_expand_builtin (tree exp,
22718 rtx target,
22719 rtx subtarget ATTRIBUTE_UNUSED,
22720 enum machine_mode mode ATTRIBUTE_UNUSED,
22721 int ignore ATTRIBUTE_UNUSED)
22723 const struct builtin_description * d;
22724 enum insn_code icode;
22725 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
22726 tree arg0;
22727 tree arg1;
22728 tree arg2;
22729 rtx op0;
22730 rtx op1;
22731 rtx op2;
22732 rtx pat;
22733 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
22734 size_t i;
22735 enum machine_mode tmode;
22736 enum machine_mode mode0;
22737 enum machine_mode mode1;
22738 enum machine_mode mode2;
22739 int opint;
22740 int selector;
22741 int mask;
22742 int imm;
22744 if (fcode >= ARM_BUILTIN_NEON_BASE)
22745 return arm_expand_neon_builtin (fcode, exp, target);
22747 switch (fcode)
22749 case ARM_BUILTIN_TEXTRMSB:
22750 case ARM_BUILTIN_TEXTRMUB:
22751 case ARM_BUILTIN_TEXTRMSH:
22752 case ARM_BUILTIN_TEXTRMUH:
22753 case ARM_BUILTIN_TEXTRMSW:
22754 case ARM_BUILTIN_TEXTRMUW:
22755 icode = (fcode == ARM_BUILTIN_TEXTRMSB ? CODE_FOR_iwmmxt_textrmsb
22756 : fcode == ARM_BUILTIN_TEXTRMUB ? CODE_FOR_iwmmxt_textrmub
22757 : fcode == ARM_BUILTIN_TEXTRMSH ? CODE_FOR_iwmmxt_textrmsh
22758 : fcode == ARM_BUILTIN_TEXTRMUH ? CODE_FOR_iwmmxt_textrmuh
22759 : CODE_FOR_iwmmxt_textrmw);
22761 arg0 = CALL_EXPR_ARG (exp, 0);
22762 arg1 = CALL_EXPR_ARG (exp, 1);
22763 op0 = expand_normal (arg0);
22764 op1 = expand_normal (arg1);
22765 tmode = insn_data[icode].operand[0].mode;
22766 mode0 = insn_data[icode].operand[1].mode;
22767 mode1 = insn_data[icode].operand[2].mode;
22769 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
22770 op0 = copy_to_mode_reg (mode0, op0);
22771 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
22773 /* @@@ better error message */
22774 error ("selector must be an immediate");
22775 return gen_reg_rtx (tmode);
22778 opint = INTVAL (op1);
22779 if (fcode == ARM_BUILTIN_TEXTRMSB || fcode == ARM_BUILTIN_TEXTRMUB)
22781 if (opint > 7 || opint < 0)
22782 error ("the range of selector should be in 0 to 7");
22784 else if (fcode == ARM_BUILTIN_TEXTRMSH || fcode == ARM_BUILTIN_TEXTRMUH)
22786 if (opint > 3 || opint < 0)
22787 error ("the range of selector should be in 0 to 3");
22789 else /* ARM_BUILTIN_TEXTRMSW || ARM_BUILTIN_TEXTRMUW. */
22791 if (opint > 1 || opint < 0)
22792 error ("the range of selector should be in 0 to 1");
22795 if (target == 0
22796 || GET_MODE (target) != tmode
22797 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
22798 target = gen_reg_rtx (tmode);
22799 pat = GEN_FCN (icode) (target, op0, op1);
22800 if (! pat)
22801 return 0;
22802 emit_insn (pat);
22803 return target;
22805 case ARM_BUILTIN_WALIGNI:
22806 /* If op2 is immediate, call walighi, else call walighr. */
22807 arg0 = CALL_EXPR_ARG (exp, 0);
22808 arg1 = CALL_EXPR_ARG (exp, 1);
22809 arg2 = CALL_EXPR_ARG (exp, 2);
22810 op0 = expand_normal (arg0);
22811 op1 = expand_normal (arg1);
22812 op2 = expand_normal (arg2);
22813 if (CONST_INT_P (op2))
22815 icode = CODE_FOR_iwmmxt_waligni;
22816 tmode = insn_data[icode].operand[0].mode;
22817 mode0 = insn_data[icode].operand[1].mode;
22818 mode1 = insn_data[icode].operand[2].mode;
22819 mode2 = insn_data[icode].operand[3].mode;
22820 if (!(*insn_data[icode].operand[1].predicate) (op0, mode0))
22821 op0 = copy_to_mode_reg (mode0, op0);
22822 if (!(*insn_data[icode].operand[2].predicate) (op1, mode1))
22823 op1 = copy_to_mode_reg (mode1, op1);
22824 gcc_assert ((*insn_data[icode].operand[3].predicate) (op2, mode2));
22825 selector = INTVAL (op2);
22826 if (selector > 7 || selector < 0)
22827 error ("the range of selector should be in 0 to 7");
22829 else
22831 icode = CODE_FOR_iwmmxt_walignr;
22832 tmode = insn_data[icode].operand[0].mode;
22833 mode0 = insn_data[icode].operand[1].mode;
22834 mode1 = insn_data[icode].operand[2].mode;
22835 mode2 = insn_data[icode].operand[3].mode;
22836 if (!(*insn_data[icode].operand[1].predicate) (op0, mode0))
22837 op0 = copy_to_mode_reg (mode0, op0);
22838 if (!(*insn_data[icode].operand[2].predicate) (op1, mode1))
22839 op1 = copy_to_mode_reg (mode1, op1);
22840 if (!(*insn_data[icode].operand[3].predicate) (op2, mode2))
22841 op2 = copy_to_mode_reg (mode2, op2);
22843 if (target == 0
22844 || GET_MODE (target) != tmode
22845 || !(*insn_data[icode].operand[0].predicate) (target, tmode))
22846 target = gen_reg_rtx (tmode);
22847 pat = GEN_FCN (icode) (target, op0, op1, op2);
22848 if (!pat)
22849 return 0;
22850 emit_insn (pat);
22851 return target;
22853 case ARM_BUILTIN_TINSRB:
22854 case ARM_BUILTIN_TINSRH:
22855 case ARM_BUILTIN_TINSRW:
22856 case ARM_BUILTIN_WMERGE:
22857 icode = (fcode == ARM_BUILTIN_TINSRB ? CODE_FOR_iwmmxt_tinsrb
22858 : fcode == ARM_BUILTIN_TINSRH ? CODE_FOR_iwmmxt_tinsrh
22859 : fcode == ARM_BUILTIN_WMERGE ? CODE_FOR_iwmmxt_wmerge
22860 : CODE_FOR_iwmmxt_tinsrw);
22861 arg0 = CALL_EXPR_ARG (exp, 0);
22862 arg1 = CALL_EXPR_ARG (exp, 1);
22863 arg2 = CALL_EXPR_ARG (exp, 2);
22864 op0 = expand_normal (arg0);
22865 op1 = expand_normal (arg1);
22866 op2 = expand_normal (arg2);
22867 tmode = insn_data[icode].operand[0].mode;
22868 mode0 = insn_data[icode].operand[1].mode;
22869 mode1 = insn_data[icode].operand[2].mode;
22870 mode2 = insn_data[icode].operand[3].mode;
22872 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
22873 op0 = copy_to_mode_reg (mode0, op0);
22874 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
22875 op1 = copy_to_mode_reg (mode1, op1);
22876 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
22878 error ("selector must be an immediate");
22879 return const0_rtx;
22881 if (icode == CODE_FOR_iwmmxt_wmerge)
22883 selector = INTVAL (op2);
22884 if (selector > 7 || selector < 0)
22885 error ("the range of selector should be in 0 to 7");
22887 if ((icode == CODE_FOR_iwmmxt_tinsrb)
22888 || (icode == CODE_FOR_iwmmxt_tinsrh)
22889 || (icode == CODE_FOR_iwmmxt_tinsrw))
22891 mask = 0x01;
22892 selector= INTVAL (op2);
22893 if (icode == CODE_FOR_iwmmxt_tinsrb && (selector < 0 || selector > 7))
22894 error ("the range of selector should be in 0 to 7");
22895 else if (icode == CODE_FOR_iwmmxt_tinsrh && (selector < 0 ||selector > 3))
22896 error ("the range of selector should be in 0 to 3");
22897 else if (icode == CODE_FOR_iwmmxt_tinsrw && (selector < 0 ||selector > 1))
22898 error ("the range of selector should be in 0 to 1");
22899 mask <<= selector;
22900 op2 = GEN_INT (mask);
22902 if (target == 0
22903 || GET_MODE (target) != tmode
22904 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
22905 target = gen_reg_rtx (tmode);
22906 pat = GEN_FCN (icode) (target, op0, op1, op2);
22907 if (! pat)
22908 return 0;
22909 emit_insn (pat);
22910 return target;
22912 case ARM_BUILTIN_SETWCGR0:
22913 case ARM_BUILTIN_SETWCGR1:
22914 case ARM_BUILTIN_SETWCGR2:
22915 case ARM_BUILTIN_SETWCGR3:
22916 icode = (fcode == ARM_BUILTIN_SETWCGR0 ? CODE_FOR_iwmmxt_setwcgr0
22917 : fcode == ARM_BUILTIN_SETWCGR1 ? CODE_FOR_iwmmxt_setwcgr1
22918 : fcode == ARM_BUILTIN_SETWCGR2 ? CODE_FOR_iwmmxt_setwcgr2
22919 : CODE_FOR_iwmmxt_setwcgr3);
22920 arg0 = CALL_EXPR_ARG (exp, 0);
22921 op0 = expand_normal (arg0);
22922 mode0 = insn_data[icode].operand[0].mode;
22923 if (!(*insn_data[icode].operand[0].predicate) (op0, mode0))
22924 op0 = copy_to_mode_reg (mode0, op0);
22925 pat = GEN_FCN (icode) (op0);
22926 if (!pat)
22927 return 0;
22928 emit_insn (pat);
22929 return 0;
22931 case ARM_BUILTIN_GETWCGR0:
22932 case ARM_BUILTIN_GETWCGR1:
22933 case ARM_BUILTIN_GETWCGR2:
22934 case ARM_BUILTIN_GETWCGR3:
22935 icode = (fcode == ARM_BUILTIN_GETWCGR0 ? CODE_FOR_iwmmxt_getwcgr0
22936 : fcode == ARM_BUILTIN_GETWCGR1 ? CODE_FOR_iwmmxt_getwcgr1
22937 : fcode == ARM_BUILTIN_GETWCGR2 ? CODE_FOR_iwmmxt_getwcgr2
22938 : CODE_FOR_iwmmxt_getwcgr3);
22939 tmode = insn_data[icode].operand[0].mode;
22940 if (target == 0
22941 || GET_MODE (target) != tmode
22942 || !(*insn_data[icode].operand[0].predicate) (target, tmode))
22943 target = gen_reg_rtx (tmode);
22944 pat = GEN_FCN (icode) (target);
22945 if (!pat)
22946 return 0;
22947 emit_insn (pat);
22948 return target;
22950 case ARM_BUILTIN_WSHUFH:
22951 icode = CODE_FOR_iwmmxt_wshufh;
22952 arg0 = CALL_EXPR_ARG (exp, 0);
22953 arg1 = CALL_EXPR_ARG (exp, 1);
22954 op0 = expand_normal (arg0);
22955 op1 = expand_normal (arg1);
22956 tmode = insn_data[icode].operand[0].mode;
22957 mode1 = insn_data[icode].operand[1].mode;
22958 mode2 = insn_data[icode].operand[2].mode;
22960 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
22961 op0 = copy_to_mode_reg (mode1, op0);
22962 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
22964 error ("mask must be an immediate");
22965 return const0_rtx;
22967 selector = INTVAL (op1);
22968 if (selector < 0 || selector > 255)
22969 error ("the range of mask should be in 0 to 255");
22970 if (target == 0
22971 || GET_MODE (target) != tmode
22972 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
22973 target = gen_reg_rtx (tmode);
22974 pat = GEN_FCN (icode) (target, op0, op1);
22975 if (! pat)
22976 return 0;
22977 emit_insn (pat);
22978 return target;
22980 case ARM_BUILTIN_WMADDS:
22981 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wmadds, exp, target);
22982 case ARM_BUILTIN_WMADDSX:
22983 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wmaddsx, exp, target);
22984 case ARM_BUILTIN_WMADDSN:
22985 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wmaddsn, exp, target);
22986 case ARM_BUILTIN_WMADDU:
22987 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wmaddu, exp, target);
22988 case ARM_BUILTIN_WMADDUX:
22989 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wmaddux, exp, target);
22990 case ARM_BUILTIN_WMADDUN:
22991 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wmaddun, exp, target);
22992 case ARM_BUILTIN_WSADBZ:
22993 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wsadbz, exp, target);
22994 case ARM_BUILTIN_WSADHZ:
22995 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wsadhz, exp, target);
22997 /* Several three-argument builtins. */
22998 case ARM_BUILTIN_WMACS:
22999 case ARM_BUILTIN_WMACU:
23000 case ARM_BUILTIN_TMIA:
23001 case ARM_BUILTIN_TMIAPH:
23002 case ARM_BUILTIN_TMIATT:
23003 case ARM_BUILTIN_TMIATB:
23004 case ARM_BUILTIN_TMIABT:
23005 case ARM_BUILTIN_TMIABB:
23006 case ARM_BUILTIN_WQMIABB:
23007 case ARM_BUILTIN_WQMIABT:
23008 case ARM_BUILTIN_WQMIATB:
23009 case ARM_BUILTIN_WQMIATT:
23010 case ARM_BUILTIN_WQMIABBN:
23011 case ARM_BUILTIN_WQMIABTN:
23012 case ARM_BUILTIN_WQMIATBN:
23013 case ARM_BUILTIN_WQMIATTN:
23014 case ARM_BUILTIN_WMIABB:
23015 case ARM_BUILTIN_WMIABT:
23016 case ARM_BUILTIN_WMIATB:
23017 case ARM_BUILTIN_WMIATT:
23018 case ARM_BUILTIN_WMIABBN:
23019 case ARM_BUILTIN_WMIABTN:
23020 case ARM_BUILTIN_WMIATBN:
23021 case ARM_BUILTIN_WMIATTN:
23022 case ARM_BUILTIN_WMIAWBB:
23023 case ARM_BUILTIN_WMIAWBT:
23024 case ARM_BUILTIN_WMIAWTB:
23025 case ARM_BUILTIN_WMIAWTT:
23026 case ARM_BUILTIN_WMIAWBBN:
23027 case ARM_BUILTIN_WMIAWBTN:
23028 case ARM_BUILTIN_WMIAWTBN:
23029 case ARM_BUILTIN_WMIAWTTN:
23030 case ARM_BUILTIN_WSADB:
23031 case ARM_BUILTIN_WSADH:
23032 icode = (fcode == ARM_BUILTIN_WMACS ? CODE_FOR_iwmmxt_wmacs
23033 : fcode == ARM_BUILTIN_WMACU ? CODE_FOR_iwmmxt_wmacu
23034 : fcode == ARM_BUILTIN_TMIA ? CODE_FOR_iwmmxt_tmia
23035 : fcode == ARM_BUILTIN_TMIAPH ? CODE_FOR_iwmmxt_tmiaph
23036 : fcode == ARM_BUILTIN_TMIABB ? CODE_FOR_iwmmxt_tmiabb
23037 : fcode == ARM_BUILTIN_TMIABT ? CODE_FOR_iwmmxt_tmiabt
23038 : fcode == ARM_BUILTIN_TMIATB ? CODE_FOR_iwmmxt_tmiatb
23039 : fcode == ARM_BUILTIN_TMIATT ? CODE_FOR_iwmmxt_tmiatt
23040 : fcode == ARM_BUILTIN_WQMIABB ? CODE_FOR_iwmmxt_wqmiabb
23041 : fcode == ARM_BUILTIN_WQMIABT ? CODE_FOR_iwmmxt_wqmiabt
23042 : fcode == ARM_BUILTIN_WQMIATB ? CODE_FOR_iwmmxt_wqmiatb
23043 : fcode == ARM_BUILTIN_WQMIATT ? CODE_FOR_iwmmxt_wqmiatt
23044 : fcode == ARM_BUILTIN_WQMIABBN ? CODE_FOR_iwmmxt_wqmiabbn
23045 : fcode == ARM_BUILTIN_WQMIABTN ? CODE_FOR_iwmmxt_wqmiabtn
23046 : fcode == ARM_BUILTIN_WQMIATBN ? CODE_FOR_iwmmxt_wqmiatbn
23047 : fcode == ARM_BUILTIN_WQMIATTN ? CODE_FOR_iwmmxt_wqmiattn
23048 : fcode == ARM_BUILTIN_WMIABB ? CODE_FOR_iwmmxt_wmiabb
23049 : fcode == ARM_BUILTIN_WMIABT ? CODE_FOR_iwmmxt_wmiabt
23050 : fcode == ARM_BUILTIN_WMIATB ? CODE_FOR_iwmmxt_wmiatb
23051 : fcode == ARM_BUILTIN_WMIATT ? CODE_FOR_iwmmxt_wmiatt
23052 : fcode == ARM_BUILTIN_WMIABBN ? CODE_FOR_iwmmxt_wmiabbn
23053 : fcode == ARM_BUILTIN_WMIABTN ? CODE_FOR_iwmmxt_wmiabtn
23054 : fcode == ARM_BUILTIN_WMIATBN ? CODE_FOR_iwmmxt_wmiatbn
23055 : fcode == ARM_BUILTIN_WMIATTN ? CODE_FOR_iwmmxt_wmiattn
23056 : fcode == ARM_BUILTIN_WMIAWBB ? CODE_FOR_iwmmxt_wmiawbb
23057 : fcode == ARM_BUILTIN_WMIAWBT ? CODE_FOR_iwmmxt_wmiawbt
23058 : fcode == ARM_BUILTIN_WMIAWTB ? CODE_FOR_iwmmxt_wmiawtb
23059 : fcode == ARM_BUILTIN_WMIAWTT ? CODE_FOR_iwmmxt_wmiawtt
23060 : fcode == ARM_BUILTIN_WMIAWBBN ? CODE_FOR_iwmmxt_wmiawbbn
23061 : fcode == ARM_BUILTIN_WMIAWBTN ? CODE_FOR_iwmmxt_wmiawbtn
23062 : fcode == ARM_BUILTIN_WMIAWTBN ? CODE_FOR_iwmmxt_wmiawtbn
23063 : fcode == ARM_BUILTIN_WMIAWTTN ? CODE_FOR_iwmmxt_wmiawttn
23064 : fcode == ARM_BUILTIN_WSADB ? CODE_FOR_iwmmxt_wsadb
23065 : CODE_FOR_iwmmxt_wsadh);
23066 arg0 = CALL_EXPR_ARG (exp, 0);
23067 arg1 = CALL_EXPR_ARG (exp, 1);
23068 arg2 = CALL_EXPR_ARG (exp, 2);
23069 op0 = expand_normal (arg0);
23070 op1 = expand_normal (arg1);
23071 op2 = expand_normal (arg2);
23072 tmode = insn_data[icode].operand[0].mode;
23073 mode0 = insn_data[icode].operand[1].mode;
23074 mode1 = insn_data[icode].operand[2].mode;
23075 mode2 = insn_data[icode].operand[3].mode;
23077 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
23078 op0 = copy_to_mode_reg (mode0, op0);
23079 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
23080 op1 = copy_to_mode_reg (mode1, op1);
23081 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
23082 op2 = copy_to_mode_reg (mode2, op2);
23083 if (target == 0
23084 || GET_MODE (target) != tmode
23085 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
23086 target = gen_reg_rtx (tmode);
23087 pat = GEN_FCN (icode) (target, op0, op1, op2);
23088 if (! pat)
23089 return 0;
23090 emit_insn (pat);
23091 return target;
23093 case ARM_BUILTIN_WZERO:
23094 target = gen_reg_rtx (DImode);
23095 emit_insn (gen_iwmmxt_clrdi (target));
23096 return target;
23098 case ARM_BUILTIN_WSRLHI:
23099 case ARM_BUILTIN_WSRLWI:
23100 case ARM_BUILTIN_WSRLDI:
23101 case ARM_BUILTIN_WSLLHI:
23102 case ARM_BUILTIN_WSLLWI:
23103 case ARM_BUILTIN_WSLLDI:
23104 case ARM_BUILTIN_WSRAHI:
23105 case ARM_BUILTIN_WSRAWI:
23106 case ARM_BUILTIN_WSRADI:
23107 case ARM_BUILTIN_WRORHI:
23108 case ARM_BUILTIN_WRORWI:
23109 case ARM_BUILTIN_WRORDI:
23110 case ARM_BUILTIN_WSRLH:
23111 case ARM_BUILTIN_WSRLW:
23112 case ARM_BUILTIN_WSRLD:
23113 case ARM_BUILTIN_WSLLH:
23114 case ARM_BUILTIN_WSLLW:
23115 case ARM_BUILTIN_WSLLD:
23116 case ARM_BUILTIN_WSRAH:
23117 case ARM_BUILTIN_WSRAW:
23118 case ARM_BUILTIN_WSRAD:
23119 case ARM_BUILTIN_WRORH:
23120 case ARM_BUILTIN_WRORW:
23121 case ARM_BUILTIN_WRORD:
23122 icode = (fcode == ARM_BUILTIN_WSRLHI ? CODE_FOR_lshrv4hi3_iwmmxt
23123 : fcode == ARM_BUILTIN_WSRLWI ? CODE_FOR_lshrv2si3_iwmmxt
23124 : fcode == ARM_BUILTIN_WSRLDI ? CODE_FOR_lshrdi3_iwmmxt
23125 : fcode == ARM_BUILTIN_WSLLHI ? CODE_FOR_ashlv4hi3_iwmmxt
23126 : fcode == ARM_BUILTIN_WSLLWI ? CODE_FOR_ashlv2si3_iwmmxt
23127 : fcode == ARM_BUILTIN_WSLLDI ? CODE_FOR_ashldi3_iwmmxt
23128 : fcode == ARM_BUILTIN_WSRAHI ? CODE_FOR_ashrv4hi3_iwmmxt
23129 : fcode == ARM_BUILTIN_WSRAWI ? CODE_FOR_ashrv2si3_iwmmxt
23130 : fcode == ARM_BUILTIN_WSRADI ? CODE_FOR_ashrdi3_iwmmxt
23131 : fcode == ARM_BUILTIN_WRORHI ? CODE_FOR_rorv4hi3
23132 : fcode == ARM_BUILTIN_WRORWI ? CODE_FOR_rorv2si3
23133 : fcode == ARM_BUILTIN_WRORDI ? CODE_FOR_rordi3
23134 : fcode == ARM_BUILTIN_WSRLH ? CODE_FOR_lshrv4hi3_di
23135 : fcode == ARM_BUILTIN_WSRLW ? CODE_FOR_lshrv2si3_di
23136 : fcode == ARM_BUILTIN_WSRLD ? CODE_FOR_lshrdi3_di
23137 : fcode == ARM_BUILTIN_WSLLH ? CODE_FOR_ashlv4hi3_di
23138 : fcode == ARM_BUILTIN_WSLLW ? CODE_FOR_ashlv2si3_di
23139 : fcode == ARM_BUILTIN_WSLLD ? CODE_FOR_ashldi3_di
23140 : fcode == ARM_BUILTIN_WSRAH ? CODE_FOR_ashrv4hi3_di
23141 : fcode == ARM_BUILTIN_WSRAW ? CODE_FOR_ashrv2si3_di
23142 : fcode == ARM_BUILTIN_WSRAD ? CODE_FOR_ashrdi3_di
23143 : fcode == ARM_BUILTIN_WRORH ? CODE_FOR_rorv4hi3_di
23144 : fcode == ARM_BUILTIN_WRORW ? CODE_FOR_rorv2si3_di
23145 : fcode == ARM_BUILTIN_WRORD ? CODE_FOR_rordi3_di
23146 : CODE_FOR_nothing);
23147 arg1 = CALL_EXPR_ARG (exp, 1);
23148 op1 = expand_normal (arg1);
23149 if (GET_MODE (op1) == VOIDmode)
23151 imm = INTVAL (op1);
23152 if ((fcode == ARM_BUILTIN_WRORHI || fcode == ARM_BUILTIN_WRORWI
23153 || fcode == ARM_BUILTIN_WRORH || fcode == ARM_BUILTIN_WRORW)
23154 && (imm < 0 || imm > 32))
23156 if (fcode == ARM_BUILTIN_WRORHI)
23157 error ("the range of count should be in 0 to 32. please check the intrinsic _mm_rori_pi16 in code.");
23158 else if (fcode == ARM_BUILTIN_WRORWI)
23159 error ("the range of count should be in 0 to 32. please check the intrinsic _mm_rori_pi32 in code.");
23160 else if (fcode == ARM_BUILTIN_WRORH)
23161 error ("the range of count should be in 0 to 32. please check the intrinsic _mm_ror_pi16 in code.");
23162 else
23163 error ("the range of count should be in 0 to 32. please check the intrinsic _mm_ror_pi32 in code.");
23165 else if ((fcode == ARM_BUILTIN_WRORDI || fcode == ARM_BUILTIN_WRORD)
23166 && (imm < 0 || imm > 64))
23168 if (fcode == ARM_BUILTIN_WRORDI)
23169 error ("the range of count should be in 0 to 64. please check the intrinsic _mm_rori_si64 in code.");
23170 else
23171 error ("the range of count should be in 0 to 64. please check the intrinsic _mm_ror_si64 in code.");
23173 else if (imm < 0)
23175 if (fcode == ARM_BUILTIN_WSRLHI)
23176 error ("the count should be no less than 0. please check the intrinsic _mm_srli_pi16 in code.");
23177 else if (fcode == ARM_BUILTIN_WSRLWI)
23178 error ("the count should be no less than 0. please check the intrinsic _mm_srli_pi32 in code.");
23179 else if (fcode == ARM_BUILTIN_WSRLDI)
23180 error ("the count should be no less than 0. please check the intrinsic _mm_srli_si64 in code.");
23181 else if (fcode == ARM_BUILTIN_WSLLHI)
23182 error ("the count should be no less than 0. please check the intrinsic _mm_slli_pi16 in code.");
23183 else if (fcode == ARM_BUILTIN_WSLLWI)
23184 error ("the count should be no less than 0. please check the intrinsic _mm_slli_pi32 in code.");
23185 else if (fcode == ARM_BUILTIN_WSLLDI)
23186 error ("the count should be no less than 0. please check the intrinsic _mm_slli_si64 in code.");
23187 else if (fcode == ARM_BUILTIN_WSRAHI)
23188 error ("the count should be no less than 0. please check the intrinsic _mm_srai_pi16 in code.");
23189 else if (fcode == ARM_BUILTIN_WSRAWI)
23190 error ("the count should be no less than 0. please check the intrinsic _mm_srai_pi32 in code.");
23191 else if (fcode == ARM_BUILTIN_WSRADI)
23192 error ("the count should be no less than 0. please check the intrinsic _mm_srai_si64 in code.");
23193 else if (fcode == ARM_BUILTIN_WSRLH)
23194 error ("the count should be no less than 0. please check the intrinsic _mm_srl_pi16 in code.");
23195 else if (fcode == ARM_BUILTIN_WSRLW)
23196 error ("the count should be no less than 0. please check the intrinsic _mm_srl_pi32 in code.");
23197 else if (fcode == ARM_BUILTIN_WSRLD)
23198 error ("the count should be no less than 0. please check the intrinsic _mm_srl_si64 in code.");
23199 else if (fcode == ARM_BUILTIN_WSLLH)
23200 error ("the count should be no less than 0. please check the intrinsic _mm_sll_pi16 in code.");
23201 else if (fcode == ARM_BUILTIN_WSLLW)
23202 error ("the count should be no less than 0. please check the intrinsic _mm_sll_pi32 in code.");
23203 else if (fcode == ARM_BUILTIN_WSLLD)
23204 error ("the count should be no less than 0. please check the intrinsic _mm_sll_si64 in code.");
23205 else if (fcode == ARM_BUILTIN_WSRAH)
23206 error ("the count should be no less than 0. please check the intrinsic _mm_sra_pi16 in code.");
23207 else if (fcode == ARM_BUILTIN_WSRAW)
23208 error ("the count should be no less than 0. please check the intrinsic _mm_sra_pi32 in code.");
23209 else
23210 error ("the count should be no less than 0. please check the intrinsic _mm_sra_si64 in code.");
23213 return arm_expand_binop_builtin (icode, exp, target);
23215 default:
23216 break;
23219 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
23220 if (d->code == (const enum arm_builtins) fcode)
23221 return arm_expand_binop_builtin (d->icode, exp, target);
23223 for (i = 0, d = bdesc_1arg; i < ARRAY_SIZE (bdesc_1arg); i++, d++)
23224 if (d->code == (const enum arm_builtins) fcode)
23225 return arm_expand_unop_builtin (d->icode, exp, target, 0);
23227 for (i = 0, d = bdesc_3arg; i < ARRAY_SIZE (bdesc_3arg); i++, d++)
23228 if (d->code == (const enum arm_builtins) fcode)
23229 return arm_expand_ternop_builtin (d->icode, exp, target);
23231 /* @@@ Should really do something sensible here. */
23232 return NULL_RTX;
23235 /* Return the number (counting from 0) of
23236 the least significant set bit in MASK. */
23238 inline static int
23239 number_of_first_bit_set (unsigned mask)
23241 return ctz_hwi (mask);
23244 /* Like emit_multi_reg_push, but allowing for a different set of
23245 registers to be described as saved. MASK is the set of registers
23246 to be saved; REAL_REGS is the set of registers to be described as
23247 saved. If REAL_REGS is 0, only describe the stack adjustment. */
23249 static rtx
23250 thumb1_emit_multi_reg_push (unsigned long mask, unsigned long real_regs)
23252 unsigned long regno;
23253 rtx par[10], tmp, reg, insn;
23254 int i, j;
23256 /* Build the parallel of the registers actually being stored. */
23257 for (i = 0; mask; ++i, mask &= mask - 1)
23259 regno = ctz_hwi (mask);
23260 reg = gen_rtx_REG (SImode, regno);
23262 if (i == 0)
23263 tmp = gen_rtx_UNSPEC (BLKmode, gen_rtvec (1, reg), UNSPEC_PUSH_MULT);
23264 else
23265 tmp = gen_rtx_USE (VOIDmode, reg);
23267 par[i] = tmp;
23270 tmp = plus_constant (Pmode, stack_pointer_rtx, -4 * i);
23271 tmp = gen_rtx_PRE_MODIFY (Pmode, stack_pointer_rtx, tmp);
23272 tmp = gen_frame_mem (BLKmode, tmp);
23273 tmp = gen_rtx_SET (VOIDmode, tmp, par[0]);
23274 par[0] = tmp;
23276 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (i, par));
23277 insn = emit_insn (tmp);
23279 /* Always build the stack adjustment note for unwind info. */
23280 tmp = plus_constant (Pmode, stack_pointer_rtx, -4 * i);
23281 tmp = gen_rtx_SET (VOIDmode, stack_pointer_rtx, tmp);
23282 par[0] = tmp;
23284 /* Build the parallel of the registers recorded as saved for unwind. */
23285 for (j = 0; real_regs; ++j, real_regs &= real_regs - 1)
23287 regno = ctz_hwi (real_regs);
23288 reg = gen_rtx_REG (SImode, regno);
23290 tmp = plus_constant (Pmode, stack_pointer_rtx, j * 4);
23291 tmp = gen_frame_mem (SImode, tmp);
23292 tmp = gen_rtx_SET (VOIDmode, tmp, reg);
23293 RTX_FRAME_RELATED_P (tmp) = 1;
23294 par[j + 1] = tmp;
23297 if (j == 0)
23298 tmp = par[0];
23299 else
23301 RTX_FRAME_RELATED_P (par[0]) = 1;
23302 tmp = gen_rtx_SEQUENCE (VOIDmode, gen_rtvec_v (j + 1, par));
23305 add_reg_note (insn, REG_FRAME_RELATED_EXPR, tmp);
23307 return insn;
23310 /* Emit code to push or pop registers to or from the stack. F is the
23311 assembly file. MASK is the registers to pop. */
23312 static void
23313 thumb_pop (FILE *f, unsigned long mask)
23315 int regno;
23316 int lo_mask = mask & 0xFF;
23317 int pushed_words = 0;
23319 gcc_assert (mask);
23321 if (lo_mask == 0 && (mask & (1 << PC_REGNUM)))
23323 /* Special case. Do not generate a POP PC statement here, do it in
23324 thumb_exit() */
23325 thumb_exit (f, -1);
23326 return;
23329 fprintf (f, "\tpop\t{");
23331 /* Look at the low registers first. */
23332 for (regno = 0; regno <= LAST_LO_REGNUM; regno++, lo_mask >>= 1)
23334 if (lo_mask & 1)
23336 asm_fprintf (f, "%r", regno);
23338 if ((lo_mask & ~1) != 0)
23339 fprintf (f, ", ");
23341 pushed_words++;
23345 if (mask & (1 << PC_REGNUM))
23347 /* Catch popping the PC. */
23348 if (TARGET_INTERWORK || TARGET_BACKTRACE
23349 || crtl->calls_eh_return)
23351 /* The PC is never poped directly, instead
23352 it is popped into r3 and then BX is used. */
23353 fprintf (f, "}\n");
23355 thumb_exit (f, -1);
23357 return;
23359 else
23361 if (mask & 0xFF)
23362 fprintf (f, ", ");
23364 asm_fprintf (f, "%r", PC_REGNUM);
23368 fprintf (f, "}\n");
23371 /* Generate code to return from a thumb function.
23372 If 'reg_containing_return_addr' is -1, then the return address is
23373 actually on the stack, at the stack pointer. */
23374 static void
23375 thumb_exit (FILE *f, int reg_containing_return_addr)
23377 unsigned regs_available_for_popping;
23378 unsigned regs_to_pop;
23379 int pops_needed;
23380 unsigned available;
23381 unsigned required;
23382 int mode;
23383 int size;
23384 int restore_a4 = FALSE;
23386 /* Compute the registers we need to pop. */
23387 regs_to_pop = 0;
23388 pops_needed = 0;
23390 if (reg_containing_return_addr == -1)
23392 regs_to_pop |= 1 << LR_REGNUM;
23393 ++pops_needed;
23396 if (TARGET_BACKTRACE)
23398 /* Restore the (ARM) frame pointer and stack pointer. */
23399 regs_to_pop |= (1 << ARM_HARD_FRAME_POINTER_REGNUM) | (1 << SP_REGNUM);
23400 pops_needed += 2;
23403 /* If there is nothing to pop then just emit the BX instruction and
23404 return. */
23405 if (pops_needed == 0)
23407 if (crtl->calls_eh_return)
23408 asm_fprintf (f, "\tadd\t%r, %r\n", SP_REGNUM, ARM_EH_STACKADJ_REGNUM);
23410 asm_fprintf (f, "\tbx\t%r\n", reg_containing_return_addr);
23411 return;
23413 /* Otherwise if we are not supporting interworking and we have not created
23414 a backtrace structure and the function was not entered in ARM mode then
23415 just pop the return address straight into the PC. */
23416 else if (!TARGET_INTERWORK
23417 && !TARGET_BACKTRACE
23418 && !is_called_in_ARM_mode (current_function_decl)
23419 && !crtl->calls_eh_return)
23421 asm_fprintf (f, "\tpop\t{%r}\n", PC_REGNUM);
23422 return;
23425 /* Find out how many of the (return) argument registers we can corrupt. */
23426 regs_available_for_popping = 0;
23428 /* If returning via __builtin_eh_return, the bottom three registers
23429 all contain information needed for the return. */
23430 if (crtl->calls_eh_return)
23431 size = 12;
23432 else
23434 /* If we can deduce the registers used from the function's
23435 return value. This is more reliable that examining
23436 df_regs_ever_live_p () because that will be set if the register is
23437 ever used in the function, not just if the register is used
23438 to hold a return value. */
23440 if (crtl->return_rtx != 0)
23441 mode = GET_MODE (crtl->return_rtx);
23442 else
23443 mode = DECL_MODE (DECL_RESULT (current_function_decl));
23445 size = GET_MODE_SIZE (mode);
23447 if (size == 0)
23449 /* In a void function we can use any argument register.
23450 In a function that returns a structure on the stack
23451 we can use the second and third argument registers. */
23452 if (mode == VOIDmode)
23453 regs_available_for_popping =
23454 (1 << ARG_REGISTER (1))
23455 | (1 << ARG_REGISTER (2))
23456 | (1 << ARG_REGISTER (3));
23457 else
23458 regs_available_for_popping =
23459 (1 << ARG_REGISTER (2))
23460 | (1 << ARG_REGISTER (3));
23462 else if (size <= 4)
23463 regs_available_for_popping =
23464 (1 << ARG_REGISTER (2))
23465 | (1 << ARG_REGISTER (3));
23466 else if (size <= 8)
23467 regs_available_for_popping =
23468 (1 << ARG_REGISTER (3));
23471 /* Match registers to be popped with registers into which we pop them. */
23472 for (available = regs_available_for_popping,
23473 required = regs_to_pop;
23474 required != 0 && available != 0;
23475 available &= ~(available & - available),
23476 required &= ~(required & - required))
23477 -- pops_needed;
23479 /* If we have any popping registers left over, remove them. */
23480 if (available > 0)
23481 regs_available_for_popping &= ~available;
23483 /* Otherwise if we need another popping register we can use
23484 the fourth argument register. */
23485 else if (pops_needed)
23487 /* If we have not found any free argument registers and
23488 reg a4 contains the return address, we must move it. */
23489 if (regs_available_for_popping == 0
23490 && reg_containing_return_addr == LAST_ARG_REGNUM)
23492 asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM, LAST_ARG_REGNUM);
23493 reg_containing_return_addr = LR_REGNUM;
23495 else if (size > 12)
23497 /* Register a4 is being used to hold part of the return value,
23498 but we have dire need of a free, low register. */
23499 restore_a4 = TRUE;
23501 asm_fprintf (f, "\tmov\t%r, %r\n",IP_REGNUM, LAST_ARG_REGNUM);
23504 if (reg_containing_return_addr != LAST_ARG_REGNUM)
23506 /* The fourth argument register is available. */
23507 regs_available_for_popping |= 1 << LAST_ARG_REGNUM;
23509 --pops_needed;
23513 /* Pop as many registers as we can. */
23514 thumb_pop (f, regs_available_for_popping);
23516 /* Process the registers we popped. */
23517 if (reg_containing_return_addr == -1)
23519 /* The return address was popped into the lowest numbered register. */
23520 regs_to_pop &= ~(1 << LR_REGNUM);
23522 reg_containing_return_addr =
23523 number_of_first_bit_set (regs_available_for_popping);
23525 /* Remove this register for the mask of available registers, so that
23526 the return address will not be corrupted by further pops. */
23527 regs_available_for_popping &= ~(1 << reg_containing_return_addr);
23530 /* If we popped other registers then handle them here. */
23531 if (regs_available_for_popping)
23533 int frame_pointer;
23535 /* Work out which register currently contains the frame pointer. */
23536 frame_pointer = number_of_first_bit_set (regs_available_for_popping);
23538 /* Move it into the correct place. */
23539 asm_fprintf (f, "\tmov\t%r, %r\n",
23540 ARM_HARD_FRAME_POINTER_REGNUM, frame_pointer);
23542 /* (Temporarily) remove it from the mask of popped registers. */
23543 regs_available_for_popping &= ~(1 << frame_pointer);
23544 regs_to_pop &= ~(1 << ARM_HARD_FRAME_POINTER_REGNUM);
23546 if (regs_available_for_popping)
23548 int stack_pointer;
23550 /* We popped the stack pointer as well,
23551 find the register that contains it. */
23552 stack_pointer = number_of_first_bit_set (regs_available_for_popping);
23554 /* Move it into the stack register. */
23555 asm_fprintf (f, "\tmov\t%r, %r\n", SP_REGNUM, stack_pointer);
23557 /* At this point we have popped all necessary registers, so
23558 do not worry about restoring regs_available_for_popping
23559 to its correct value:
23561 assert (pops_needed == 0)
23562 assert (regs_available_for_popping == (1 << frame_pointer))
23563 assert (regs_to_pop == (1 << STACK_POINTER)) */
23565 else
23567 /* Since we have just move the popped value into the frame
23568 pointer, the popping register is available for reuse, and
23569 we know that we still have the stack pointer left to pop. */
23570 regs_available_for_popping |= (1 << frame_pointer);
23574 /* If we still have registers left on the stack, but we no longer have
23575 any registers into which we can pop them, then we must move the return
23576 address into the link register and make available the register that
23577 contained it. */
23578 if (regs_available_for_popping == 0 && pops_needed > 0)
23580 regs_available_for_popping |= 1 << reg_containing_return_addr;
23582 asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM,
23583 reg_containing_return_addr);
23585 reg_containing_return_addr = LR_REGNUM;
23588 /* If we have registers left on the stack then pop some more.
23589 We know that at most we will want to pop FP and SP. */
23590 if (pops_needed > 0)
23592 int popped_into;
23593 int move_to;
23595 thumb_pop (f, regs_available_for_popping);
23597 /* We have popped either FP or SP.
23598 Move whichever one it is into the correct register. */
23599 popped_into = number_of_first_bit_set (regs_available_for_popping);
23600 move_to = number_of_first_bit_set (regs_to_pop);
23602 asm_fprintf (f, "\tmov\t%r, %r\n", move_to, popped_into);
23604 regs_to_pop &= ~(1 << move_to);
23606 --pops_needed;
23609 /* If we still have not popped everything then we must have only
23610 had one register available to us and we are now popping the SP. */
23611 if (pops_needed > 0)
23613 int popped_into;
23615 thumb_pop (f, regs_available_for_popping);
23617 popped_into = number_of_first_bit_set (regs_available_for_popping);
23619 asm_fprintf (f, "\tmov\t%r, %r\n", SP_REGNUM, popped_into);
23621 assert (regs_to_pop == (1 << STACK_POINTER))
23622 assert (pops_needed == 1)
23626 /* If necessary restore the a4 register. */
23627 if (restore_a4)
23629 if (reg_containing_return_addr != LR_REGNUM)
23631 asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM, LAST_ARG_REGNUM);
23632 reg_containing_return_addr = LR_REGNUM;
23635 asm_fprintf (f, "\tmov\t%r, %r\n", LAST_ARG_REGNUM, IP_REGNUM);
23638 if (crtl->calls_eh_return)
23639 asm_fprintf (f, "\tadd\t%r, %r\n", SP_REGNUM, ARM_EH_STACKADJ_REGNUM);
23641 /* Return to caller. */
23642 asm_fprintf (f, "\tbx\t%r\n", reg_containing_return_addr);
23645 /* Scan INSN just before assembler is output for it.
23646 For Thumb-1, we track the status of the condition codes; this
23647 information is used in the cbranchsi4_insn pattern. */
23648 void
23649 thumb1_final_prescan_insn (rtx insn)
23651 if (flag_print_asm_name)
23652 asm_fprintf (asm_out_file, "%@ 0x%04x\n",
23653 INSN_ADDRESSES (INSN_UID (insn)));
23654 /* Don't overwrite the previous setter when we get to a cbranch. */
23655 if (INSN_CODE (insn) != CODE_FOR_cbranchsi4_insn)
23657 enum attr_conds conds;
23659 if (cfun->machine->thumb1_cc_insn)
23661 if (modified_in_p (cfun->machine->thumb1_cc_op0, insn)
23662 || modified_in_p (cfun->machine->thumb1_cc_op1, insn))
23663 CC_STATUS_INIT;
23665 conds = get_attr_conds (insn);
23666 if (conds == CONDS_SET)
23668 rtx set = single_set (insn);
23669 cfun->machine->thumb1_cc_insn = insn;
23670 cfun->machine->thumb1_cc_op0 = SET_DEST (set);
23671 cfun->machine->thumb1_cc_op1 = const0_rtx;
23672 cfun->machine->thumb1_cc_mode = CC_NOOVmode;
23673 if (INSN_CODE (insn) == CODE_FOR_thumb1_subsi3_insn)
23675 rtx src1 = XEXP (SET_SRC (set), 1);
23676 if (src1 == const0_rtx)
23677 cfun->machine->thumb1_cc_mode = CCmode;
23679 else if (REG_P (SET_DEST (set)) && REG_P (SET_SRC (set)))
23681 /* Record the src register operand instead of dest because
23682 cprop_hardreg pass propagates src. */
23683 cfun->machine->thumb1_cc_op0 = SET_SRC (set);
23686 else if (conds != CONDS_NOCOND)
23687 cfun->machine->thumb1_cc_insn = NULL_RTX;
23692 thumb_shiftable_const (unsigned HOST_WIDE_INT val)
23694 unsigned HOST_WIDE_INT mask = 0xff;
23695 int i;
23697 val = val & (unsigned HOST_WIDE_INT)0xffffffffu;
23698 if (val == 0) /* XXX */
23699 return 0;
23701 for (i = 0; i < 25; i++)
23702 if ((val & (mask << i)) == val)
23703 return 1;
23705 return 0;
23708 /* Returns nonzero if the current function contains,
23709 or might contain a far jump. */
23710 static int
23711 thumb_far_jump_used_p (void)
23713 rtx insn;
23715 /* This test is only important for leaf functions. */
23716 /* assert (!leaf_function_p ()); */
23718 /* If we have already decided that far jumps may be used,
23719 do not bother checking again, and always return true even if
23720 it turns out that they are not being used. Once we have made
23721 the decision that far jumps are present (and that hence the link
23722 register will be pushed onto the stack) we cannot go back on it. */
23723 if (cfun->machine->far_jump_used)
23724 return 1;
23726 /* If this function is not being called from the prologue/epilogue
23727 generation code then it must be being called from the
23728 INITIAL_ELIMINATION_OFFSET macro. */
23729 if (!(ARM_DOUBLEWORD_ALIGN || reload_completed))
23731 /* In this case we know that we are being asked about the elimination
23732 of the arg pointer register. If that register is not being used,
23733 then there are no arguments on the stack, and we do not have to
23734 worry that a far jump might force the prologue to push the link
23735 register, changing the stack offsets. In this case we can just
23736 return false, since the presence of far jumps in the function will
23737 not affect stack offsets.
23739 If the arg pointer is live (or if it was live, but has now been
23740 eliminated and so set to dead) then we do have to test to see if
23741 the function might contain a far jump. This test can lead to some
23742 false negatives, since before reload is completed, then length of
23743 branch instructions is not known, so gcc defaults to returning their
23744 longest length, which in turn sets the far jump attribute to true.
23746 A false negative will not result in bad code being generated, but it
23747 will result in a needless push and pop of the link register. We
23748 hope that this does not occur too often.
23750 If we need doubleword stack alignment this could affect the other
23751 elimination offsets so we can't risk getting it wrong. */
23752 if (df_regs_ever_live_p (ARG_POINTER_REGNUM))
23753 cfun->machine->arg_pointer_live = 1;
23754 else if (!cfun->machine->arg_pointer_live)
23755 return 0;
23758 /* Check to see if the function contains a branch
23759 insn with the far jump attribute set. */
23760 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
23762 if (JUMP_P (insn)
23763 /* Ignore tablejump patterns. */
23764 && GET_CODE (PATTERN (insn)) != ADDR_VEC
23765 && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC
23766 && get_attr_far_jump (insn) == FAR_JUMP_YES
23769 /* Record the fact that we have decided that
23770 the function does use far jumps. */
23771 cfun->machine->far_jump_used = 1;
23772 return 1;
23776 return 0;
23779 /* Return nonzero if FUNC must be entered in ARM mode. */
23781 is_called_in_ARM_mode (tree func)
23783 gcc_assert (TREE_CODE (func) == FUNCTION_DECL);
23785 /* Ignore the problem about functions whose address is taken. */
23786 if (TARGET_CALLEE_INTERWORKING && TREE_PUBLIC (func))
23787 return TRUE;
23789 #ifdef ARM_PE
23790 return lookup_attribute ("interfacearm", DECL_ATTRIBUTES (func)) != NULL_TREE;
23791 #else
23792 return FALSE;
23793 #endif
23796 /* Given the stack offsets and register mask in OFFSETS, decide how
23797 many additional registers to push instead of subtracting a constant
23798 from SP. For epilogues the principle is the same except we use pop.
23799 FOR_PROLOGUE indicates which we're generating. */
23800 static int
23801 thumb1_extra_regs_pushed (arm_stack_offsets *offsets, bool for_prologue)
23803 HOST_WIDE_INT amount;
23804 unsigned long live_regs_mask = offsets->saved_regs_mask;
23805 /* Extract a mask of the ones we can give to the Thumb's push/pop
23806 instruction. */
23807 unsigned long l_mask = live_regs_mask & (for_prologue ? 0x40ff : 0xff);
23808 /* Then count how many other high registers will need to be pushed. */
23809 unsigned long high_regs_pushed = bit_count (live_regs_mask & 0x0f00);
23810 int n_free, reg_base, size;
23812 if (!for_prologue && frame_pointer_needed)
23813 amount = offsets->locals_base - offsets->saved_regs;
23814 else
23815 amount = offsets->outgoing_args - offsets->saved_regs;
23817 /* If the stack frame size is 512 exactly, we can save one load
23818 instruction, which should make this a win even when optimizing
23819 for speed. */
23820 if (!optimize_size && amount != 512)
23821 return 0;
23823 /* Can't do this if there are high registers to push. */
23824 if (high_regs_pushed != 0)
23825 return 0;
23827 /* Shouldn't do it in the prologue if no registers would normally
23828 be pushed at all. In the epilogue, also allow it if we'll have
23829 a pop insn for the PC. */
23830 if (l_mask == 0
23831 && (for_prologue
23832 || TARGET_BACKTRACE
23833 || (live_regs_mask & 1 << LR_REGNUM) == 0
23834 || TARGET_INTERWORK
23835 || crtl->args.pretend_args_size != 0))
23836 return 0;
23838 /* Don't do this if thumb_expand_prologue wants to emit instructions
23839 between the push and the stack frame allocation. */
23840 if (for_prologue
23841 && ((flag_pic && arm_pic_register != INVALID_REGNUM)
23842 || (!frame_pointer_needed && CALLER_INTERWORKING_SLOT_SIZE > 0)))
23843 return 0;
23845 reg_base = 0;
23846 n_free = 0;
23847 if (!for_prologue)
23849 size = arm_size_return_regs ();
23850 reg_base = ARM_NUM_INTS (size);
23851 live_regs_mask >>= reg_base;
23854 while (reg_base + n_free < 8 && !(live_regs_mask & 1)
23855 && (for_prologue || call_used_regs[reg_base + n_free]))
23857 live_regs_mask >>= 1;
23858 n_free++;
23861 if (n_free == 0)
23862 return 0;
23863 gcc_assert (amount / 4 * 4 == amount);
23865 if (amount >= 512 && (amount - n_free * 4) < 512)
23866 return (amount - 508) / 4;
23867 if (amount <= n_free * 4)
23868 return amount / 4;
23869 return 0;
23872 /* The bits which aren't usefully expanded as rtl. */
23873 const char *
23874 thumb1_unexpanded_epilogue (void)
23876 arm_stack_offsets *offsets;
23877 int regno;
23878 unsigned long live_regs_mask = 0;
23879 int high_regs_pushed = 0;
23880 int extra_pop;
23881 int had_to_push_lr;
23882 int size;
23884 if (cfun->machine->return_used_this_function != 0)
23885 return "";
23887 if (IS_NAKED (arm_current_func_type ()))
23888 return "";
23890 offsets = arm_get_frame_offsets ();
23891 live_regs_mask = offsets->saved_regs_mask;
23892 high_regs_pushed = bit_count (live_regs_mask & 0x0f00);
23894 /* If we can deduce the registers used from the function's return value.
23895 This is more reliable that examining df_regs_ever_live_p () because that
23896 will be set if the register is ever used in the function, not just if
23897 the register is used to hold a return value. */
23898 size = arm_size_return_regs ();
23900 extra_pop = thumb1_extra_regs_pushed (offsets, false);
23901 if (extra_pop > 0)
23903 unsigned long extra_mask = (1 << extra_pop) - 1;
23904 live_regs_mask |= extra_mask << ARM_NUM_INTS (size);
23907 /* The prolog may have pushed some high registers to use as
23908 work registers. e.g. the testsuite file:
23909 gcc/testsuite/gcc/gcc.c-torture/execute/complex-2.c
23910 compiles to produce:
23911 push {r4, r5, r6, r7, lr}
23912 mov r7, r9
23913 mov r6, r8
23914 push {r6, r7}
23915 as part of the prolog. We have to undo that pushing here. */
23917 if (high_regs_pushed)
23919 unsigned long mask = live_regs_mask & 0xff;
23920 int next_hi_reg;
23922 /* The available low registers depend on the size of the value we are
23923 returning. */
23924 if (size <= 12)
23925 mask |= 1 << 3;
23926 if (size <= 8)
23927 mask |= 1 << 2;
23929 if (mask == 0)
23930 /* Oh dear! We have no low registers into which we can pop
23931 high registers! */
23932 internal_error
23933 ("no low registers available for popping high registers");
23935 for (next_hi_reg = 8; next_hi_reg < 13; next_hi_reg++)
23936 if (live_regs_mask & (1 << next_hi_reg))
23937 break;
23939 while (high_regs_pushed)
23941 /* Find lo register(s) into which the high register(s) can
23942 be popped. */
23943 for (regno = 0; regno <= LAST_LO_REGNUM; regno++)
23945 if (mask & (1 << regno))
23946 high_regs_pushed--;
23947 if (high_regs_pushed == 0)
23948 break;
23951 mask &= (2 << regno) - 1; /* A noop if regno == 8 */
23953 /* Pop the values into the low register(s). */
23954 thumb_pop (asm_out_file, mask);
23956 /* Move the value(s) into the high registers. */
23957 for (regno = 0; regno <= LAST_LO_REGNUM; regno++)
23959 if (mask & (1 << regno))
23961 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", next_hi_reg,
23962 regno);
23964 for (next_hi_reg++; next_hi_reg < 13; next_hi_reg++)
23965 if (live_regs_mask & (1 << next_hi_reg))
23966 break;
23970 live_regs_mask &= ~0x0f00;
23973 had_to_push_lr = (live_regs_mask & (1 << LR_REGNUM)) != 0;
23974 live_regs_mask &= 0xff;
23976 if (crtl->args.pretend_args_size == 0 || TARGET_BACKTRACE)
23978 /* Pop the return address into the PC. */
23979 if (had_to_push_lr)
23980 live_regs_mask |= 1 << PC_REGNUM;
23982 /* Either no argument registers were pushed or a backtrace
23983 structure was created which includes an adjusted stack
23984 pointer, so just pop everything. */
23985 if (live_regs_mask)
23986 thumb_pop (asm_out_file, live_regs_mask);
23988 /* We have either just popped the return address into the
23989 PC or it is was kept in LR for the entire function.
23990 Note that thumb_pop has already called thumb_exit if the
23991 PC was in the list. */
23992 if (!had_to_push_lr)
23993 thumb_exit (asm_out_file, LR_REGNUM);
23995 else
23997 /* Pop everything but the return address. */
23998 if (live_regs_mask)
23999 thumb_pop (asm_out_file, live_regs_mask);
24001 if (had_to_push_lr)
24003 if (size > 12)
24005 /* We have no free low regs, so save one. */
24006 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", IP_REGNUM,
24007 LAST_ARG_REGNUM);
24010 /* Get the return address into a temporary register. */
24011 thumb_pop (asm_out_file, 1 << LAST_ARG_REGNUM);
24013 if (size > 12)
24015 /* Move the return address to lr. */
24016 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", LR_REGNUM,
24017 LAST_ARG_REGNUM);
24018 /* Restore the low register. */
24019 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", LAST_ARG_REGNUM,
24020 IP_REGNUM);
24021 regno = LR_REGNUM;
24023 else
24024 regno = LAST_ARG_REGNUM;
24026 else
24027 regno = LR_REGNUM;
24029 /* Remove the argument registers that were pushed onto the stack. */
24030 asm_fprintf (asm_out_file, "\tadd\t%r, %r, #%d\n",
24031 SP_REGNUM, SP_REGNUM,
24032 crtl->args.pretend_args_size);
24034 thumb_exit (asm_out_file, regno);
24037 return "";
24040 /* Functions to save and restore machine-specific function data. */
24041 static struct machine_function *
24042 arm_init_machine_status (void)
24044 struct machine_function *machine;
24045 machine = ggc_alloc_cleared_machine_function ();
24047 #if ARM_FT_UNKNOWN != 0
24048 machine->func_type = ARM_FT_UNKNOWN;
24049 #endif
24050 return machine;
24053 /* Return an RTX indicating where the return address to the
24054 calling function can be found. */
24056 arm_return_addr (int count, rtx frame ATTRIBUTE_UNUSED)
24058 if (count != 0)
24059 return NULL_RTX;
24061 return get_hard_reg_initial_val (Pmode, LR_REGNUM);
24064 /* Do anything needed before RTL is emitted for each function. */
24065 void
24066 arm_init_expanders (void)
24068 /* Arrange to initialize and mark the machine per-function status. */
24069 init_machine_status = arm_init_machine_status;
24071 /* This is to stop the combine pass optimizing away the alignment
24072 adjustment of va_arg. */
24073 /* ??? It is claimed that this should not be necessary. */
24074 if (cfun)
24075 mark_reg_pointer (arg_pointer_rtx, PARM_BOUNDARY);
24079 /* Like arm_compute_initial_elimination offset. Simpler because there
24080 isn't an ABI specified frame pointer for Thumb. Instead, we set it
24081 to point at the base of the local variables after static stack
24082 space for a function has been allocated. */
24084 HOST_WIDE_INT
24085 thumb_compute_initial_elimination_offset (unsigned int from, unsigned int to)
24087 arm_stack_offsets *offsets;
24089 offsets = arm_get_frame_offsets ();
24091 switch (from)
24093 case ARG_POINTER_REGNUM:
24094 switch (to)
24096 case STACK_POINTER_REGNUM:
24097 return offsets->outgoing_args - offsets->saved_args;
24099 case FRAME_POINTER_REGNUM:
24100 return offsets->soft_frame - offsets->saved_args;
24102 case ARM_HARD_FRAME_POINTER_REGNUM:
24103 return offsets->saved_regs - offsets->saved_args;
24105 case THUMB_HARD_FRAME_POINTER_REGNUM:
24106 return offsets->locals_base - offsets->saved_args;
24108 default:
24109 gcc_unreachable ();
24111 break;
24113 case FRAME_POINTER_REGNUM:
24114 switch (to)
24116 case STACK_POINTER_REGNUM:
24117 return offsets->outgoing_args - offsets->soft_frame;
24119 case ARM_HARD_FRAME_POINTER_REGNUM:
24120 return offsets->saved_regs - offsets->soft_frame;
24122 case THUMB_HARD_FRAME_POINTER_REGNUM:
24123 return offsets->locals_base - offsets->soft_frame;
24125 default:
24126 gcc_unreachable ();
24128 break;
24130 default:
24131 gcc_unreachable ();
24135 /* Generate the function's prologue. */
24137 void
24138 thumb1_expand_prologue (void)
24140 rtx insn;
24142 HOST_WIDE_INT amount;
24143 arm_stack_offsets *offsets;
24144 unsigned long func_type;
24145 int regno;
24146 unsigned long live_regs_mask;
24147 unsigned long l_mask;
24148 unsigned high_regs_pushed = 0;
24150 func_type = arm_current_func_type ();
24152 /* Naked functions don't have prologues. */
24153 if (IS_NAKED (func_type))
24154 return;
24156 if (IS_INTERRUPT (func_type))
24158 error ("interrupt Service Routines cannot be coded in Thumb mode");
24159 return;
24162 if (is_called_in_ARM_mode (current_function_decl))
24163 emit_insn (gen_prologue_thumb1_interwork ());
24165 offsets = arm_get_frame_offsets ();
24166 live_regs_mask = offsets->saved_regs_mask;
24168 /* Extract a mask of the ones we can give to the Thumb's push instruction. */
24169 l_mask = live_regs_mask & 0x40ff;
24170 /* Then count how many other high registers will need to be pushed. */
24171 high_regs_pushed = bit_count (live_regs_mask & 0x0f00);
24173 if (crtl->args.pretend_args_size)
24175 rtx x = GEN_INT (-crtl->args.pretend_args_size);
24177 if (cfun->machine->uses_anonymous_args)
24179 int num_pushes = ARM_NUM_INTS (crtl->args.pretend_args_size);
24180 unsigned long mask;
24182 mask = 1ul << (LAST_ARG_REGNUM + 1);
24183 mask -= 1ul << (LAST_ARG_REGNUM + 1 - num_pushes);
24185 insn = thumb1_emit_multi_reg_push (mask, 0);
24187 else
24189 insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
24190 stack_pointer_rtx, x));
24192 RTX_FRAME_RELATED_P (insn) = 1;
24195 if (TARGET_BACKTRACE)
24197 HOST_WIDE_INT offset = 0;
24198 unsigned work_register;
24199 rtx work_reg, x, arm_hfp_rtx;
24201 /* We have been asked to create a stack backtrace structure.
24202 The code looks like this:
24204 0 .align 2
24205 0 func:
24206 0 sub SP, #16 Reserve space for 4 registers.
24207 2 push {R7} Push low registers.
24208 4 add R7, SP, #20 Get the stack pointer before the push.
24209 6 str R7, [SP, #8] Store the stack pointer
24210 (before reserving the space).
24211 8 mov R7, PC Get hold of the start of this code + 12.
24212 10 str R7, [SP, #16] Store it.
24213 12 mov R7, FP Get hold of the current frame pointer.
24214 14 str R7, [SP, #4] Store it.
24215 16 mov R7, LR Get hold of the current return address.
24216 18 str R7, [SP, #12] Store it.
24217 20 add R7, SP, #16 Point at the start of the
24218 backtrace structure.
24219 22 mov FP, R7 Put this value into the frame pointer. */
24221 work_register = thumb_find_work_register (live_regs_mask);
24222 work_reg = gen_rtx_REG (SImode, work_register);
24223 arm_hfp_rtx = gen_rtx_REG (SImode, ARM_HARD_FRAME_POINTER_REGNUM);
24225 insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
24226 stack_pointer_rtx, GEN_INT (-16)));
24227 RTX_FRAME_RELATED_P (insn) = 1;
24229 if (l_mask)
24231 insn = thumb1_emit_multi_reg_push (l_mask, l_mask);
24232 RTX_FRAME_RELATED_P (insn) = 1;
24234 offset = bit_count (l_mask) * UNITS_PER_WORD;
24237 x = GEN_INT (offset + 16 + crtl->args.pretend_args_size);
24238 emit_insn (gen_addsi3 (work_reg, stack_pointer_rtx, x));
24240 x = plus_constant (Pmode, stack_pointer_rtx, offset + 4);
24241 x = gen_frame_mem (SImode, x);
24242 emit_move_insn (x, work_reg);
24244 /* Make sure that the instruction fetching the PC is in the right place
24245 to calculate "start of backtrace creation code + 12". */
24246 /* ??? The stores using the common WORK_REG ought to be enough to
24247 prevent the scheduler from doing anything weird. Failing that
24248 we could always move all of the following into an UNSPEC_VOLATILE. */
24249 if (l_mask)
24251 x = gen_rtx_REG (SImode, PC_REGNUM);
24252 emit_move_insn (work_reg, x);
24254 x = plus_constant (Pmode, stack_pointer_rtx, offset + 12);
24255 x = gen_frame_mem (SImode, x);
24256 emit_move_insn (x, work_reg);
24258 emit_move_insn (work_reg, arm_hfp_rtx);
24260 x = plus_constant (Pmode, stack_pointer_rtx, offset);
24261 x = gen_frame_mem (SImode, x);
24262 emit_move_insn (x, work_reg);
24264 else
24266 emit_move_insn (work_reg, arm_hfp_rtx);
24268 x = plus_constant (Pmode, stack_pointer_rtx, offset);
24269 x = gen_frame_mem (SImode, x);
24270 emit_move_insn (x, work_reg);
24272 x = gen_rtx_REG (SImode, PC_REGNUM);
24273 emit_move_insn (work_reg, x);
24275 x = plus_constant (Pmode, stack_pointer_rtx, offset + 12);
24276 x = gen_frame_mem (SImode, x);
24277 emit_move_insn (x, work_reg);
24280 x = gen_rtx_REG (SImode, LR_REGNUM);
24281 emit_move_insn (work_reg, x);
24283 x = plus_constant (Pmode, stack_pointer_rtx, offset + 8);
24284 x = gen_frame_mem (SImode, x);
24285 emit_move_insn (x, work_reg);
24287 x = GEN_INT (offset + 12);
24288 emit_insn (gen_addsi3 (work_reg, stack_pointer_rtx, x));
24290 emit_move_insn (arm_hfp_rtx, work_reg);
24292 /* Optimization: If we are not pushing any low registers but we are going
24293 to push some high registers then delay our first push. This will just
24294 be a push of LR and we can combine it with the push of the first high
24295 register. */
24296 else if ((l_mask & 0xff) != 0
24297 || (high_regs_pushed == 0 && l_mask))
24299 unsigned long mask = l_mask;
24300 mask |= (1 << thumb1_extra_regs_pushed (offsets, true)) - 1;
24301 insn = thumb1_emit_multi_reg_push (mask, mask);
24302 RTX_FRAME_RELATED_P (insn) = 1;
24305 if (high_regs_pushed)
24307 unsigned pushable_regs;
24308 unsigned next_hi_reg;
24309 unsigned arg_regs_num = TARGET_AAPCS_BASED ? crtl->args.info.aapcs_ncrn
24310 : crtl->args.info.nregs;
24311 unsigned arg_regs_mask = (1 << arg_regs_num) - 1;
24313 for (next_hi_reg = 12; next_hi_reg > LAST_LO_REGNUM; next_hi_reg--)
24314 if (live_regs_mask & (1 << next_hi_reg))
24315 break;
24317 /* Here we need to mask out registers used for passing arguments
24318 even if they can be pushed. This is to avoid using them to stash the high
24319 registers. Such kind of stash may clobber the use of arguments. */
24320 pushable_regs = l_mask & (~arg_regs_mask) & 0xff;
24322 if (pushable_regs == 0)
24323 pushable_regs = 1 << thumb_find_work_register (live_regs_mask);
24325 while (high_regs_pushed > 0)
24327 unsigned long real_regs_mask = 0;
24329 for (regno = LAST_LO_REGNUM; regno >= 0; regno --)
24331 if (pushable_regs & (1 << regno))
24333 emit_move_insn (gen_rtx_REG (SImode, regno),
24334 gen_rtx_REG (SImode, next_hi_reg));
24336 high_regs_pushed --;
24337 real_regs_mask |= (1 << next_hi_reg);
24339 if (high_regs_pushed)
24341 for (next_hi_reg --; next_hi_reg > LAST_LO_REGNUM;
24342 next_hi_reg --)
24343 if (live_regs_mask & (1 << next_hi_reg))
24344 break;
24346 else
24348 pushable_regs &= ~((1 << regno) - 1);
24349 break;
24354 /* If we had to find a work register and we have not yet
24355 saved the LR then add it to the list of regs to push. */
24356 if (l_mask == (1 << LR_REGNUM))
24358 pushable_regs |= l_mask;
24359 real_regs_mask |= l_mask;
24360 l_mask = 0;
24363 insn = thumb1_emit_multi_reg_push (pushable_regs, real_regs_mask);
24364 RTX_FRAME_RELATED_P (insn) = 1;
24368 /* Load the pic register before setting the frame pointer,
24369 so we can use r7 as a temporary work register. */
24370 if (flag_pic && arm_pic_register != INVALID_REGNUM)
24371 arm_load_pic_register (live_regs_mask);
24373 if (!frame_pointer_needed && CALLER_INTERWORKING_SLOT_SIZE > 0)
24374 emit_move_insn (gen_rtx_REG (Pmode, ARM_HARD_FRAME_POINTER_REGNUM),
24375 stack_pointer_rtx);
24377 if (flag_stack_usage_info)
24378 current_function_static_stack_size
24379 = offsets->outgoing_args - offsets->saved_args;
24381 amount = offsets->outgoing_args - offsets->saved_regs;
24382 amount -= 4 * thumb1_extra_regs_pushed (offsets, true);
24383 if (amount)
24385 if (amount < 512)
24387 insn = emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
24388 GEN_INT (- amount)));
24389 RTX_FRAME_RELATED_P (insn) = 1;
24391 else
24393 rtx reg, dwarf;
24395 /* The stack decrement is too big for an immediate value in a single
24396 insn. In theory we could issue multiple subtracts, but after
24397 three of them it becomes more space efficient to place the full
24398 value in the constant pool and load into a register. (Also the
24399 ARM debugger really likes to see only one stack decrement per
24400 function). So instead we look for a scratch register into which
24401 we can load the decrement, and then we subtract this from the
24402 stack pointer. Unfortunately on the thumb the only available
24403 scratch registers are the argument registers, and we cannot use
24404 these as they may hold arguments to the function. Instead we
24405 attempt to locate a call preserved register which is used by this
24406 function. If we can find one, then we know that it will have
24407 been pushed at the start of the prologue and so we can corrupt
24408 it now. */
24409 for (regno = LAST_ARG_REGNUM + 1; regno <= LAST_LO_REGNUM; regno++)
24410 if (live_regs_mask & (1 << regno))
24411 break;
24413 gcc_assert(regno <= LAST_LO_REGNUM);
24415 reg = gen_rtx_REG (SImode, regno);
24417 emit_insn (gen_movsi (reg, GEN_INT (- amount)));
24419 insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
24420 stack_pointer_rtx, reg));
24422 dwarf = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
24423 plus_constant (Pmode, stack_pointer_rtx,
24424 -amount));
24425 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
24426 RTX_FRAME_RELATED_P (insn) = 1;
24430 if (frame_pointer_needed)
24431 thumb_set_frame_pointer (offsets);
24433 /* If we are profiling, make sure no instructions are scheduled before
24434 the call to mcount. Similarly if the user has requested no
24435 scheduling in the prolog. Similarly if we want non-call exceptions
24436 using the EABI unwinder, to prevent faulting instructions from being
24437 swapped with a stack adjustment. */
24438 if (crtl->profile || !TARGET_SCHED_PROLOG
24439 || (arm_except_unwind_info (&global_options) == UI_TARGET
24440 && cfun->can_throw_non_call_exceptions))
24441 emit_insn (gen_blockage ());
24443 cfun->machine->lr_save_eliminated = !thumb_force_lr_save ();
24444 if (live_regs_mask & 0xff)
24445 cfun->machine->lr_save_eliminated = 0;
24448 /* Generate pattern *pop_multiple_with_stack_update_and_return if single
24449 POP instruction can be generated. LR should be replaced by PC. All
24450 the checks required are already done by USE_RETURN_INSN (). Hence,
24451 all we really need to check here is if single register is to be
24452 returned, or multiple register return. */
24453 void
24454 thumb2_expand_return (bool simple_return)
24456 int i, num_regs;
24457 unsigned long saved_regs_mask;
24458 arm_stack_offsets *offsets;
24460 offsets = arm_get_frame_offsets ();
24461 saved_regs_mask = offsets->saved_regs_mask;
24463 for (i = 0, num_regs = 0; i <= LAST_ARM_REGNUM; i++)
24464 if (saved_regs_mask & (1 << i))
24465 num_regs++;
24467 if (!simple_return && saved_regs_mask)
24469 if (num_regs == 1)
24471 rtx par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
24472 rtx reg = gen_rtx_REG (SImode, PC_REGNUM);
24473 rtx addr = gen_rtx_MEM (SImode,
24474 gen_rtx_POST_INC (SImode,
24475 stack_pointer_rtx));
24476 set_mem_alias_set (addr, get_frame_alias_set ());
24477 XVECEXP (par, 0, 0) = ret_rtx;
24478 XVECEXP (par, 0, 1) = gen_rtx_SET (SImode, reg, addr);
24479 RTX_FRAME_RELATED_P (XVECEXP (par, 0, 1)) = 1;
24480 emit_jump_insn (par);
24482 else
24484 saved_regs_mask &= ~ (1 << LR_REGNUM);
24485 saved_regs_mask |= (1 << PC_REGNUM);
24486 arm_emit_multi_reg_pop (saved_regs_mask);
24489 else
24491 emit_jump_insn (simple_return_rtx);
24495 void
24496 thumb1_expand_epilogue (void)
24498 HOST_WIDE_INT amount;
24499 arm_stack_offsets *offsets;
24500 int regno;
24502 /* Naked functions don't have prologues. */
24503 if (IS_NAKED (arm_current_func_type ()))
24504 return;
24506 offsets = arm_get_frame_offsets ();
24507 amount = offsets->outgoing_args - offsets->saved_regs;
24509 if (frame_pointer_needed)
24511 emit_insn (gen_movsi (stack_pointer_rtx, hard_frame_pointer_rtx));
24512 amount = offsets->locals_base - offsets->saved_regs;
24514 amount -= 4 * thumb1_extra_regs_pushed (offsets, false);
24516 gcc_assert (amount >= 0);
24517 if (amount)
24519 emit_insn (gen_blockage ());
24521 if (amount < 512)
24522 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
24523 GEN_INT (amount)));
24524 else
24526 /* r3 is always free in the epilogue. */
24527 rtx reg = gen_rtx_REG (SImode, LAST_ARG_REGNUM);
24529 emit_insn (gen_movsi (reg, GEN_INT (amount)));
24530 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, reg));
24534 /* Emit a USE (stack_pointer_rtx), so that
24535 the stack adjustment will not be deleted. */
24536 emit_insn (gen_force_register_use (stack_pointer_rtx));
24538 if (crtl->profile || !TARGET_SCHED_PROLOG)
24539 emit_insn (gen_blockage ());
24541 /* Emit a clobber for each insn that will be restored in the epilogue,
24542 so that flow2 will get register lifetimes correct. */
24543 for (regno = 0; regno < 13; regno++)
24544 if (df_regs_ever_live_p (regno) && !call_used_regs[regno])
24545 emit_clobber (gen_rtx_REG (SImode, regno));
24547 if (! df_regs_ever_live_p (LR_REGNUM))
24548 emit_use (gen_rtx_REG (SImode, LR_REGNUM));
24551 /* Epilogue code for APCS frame. */
24552 static void
24553 arm_expand_epilogue_apcs_frame (bool really_return)
24555 unsigned long func_type;
24556 unsigned long saved_regs_mask;
24557 int num_regs = 0;
24558 int i;
24559 int floats_from_frame = 0;
24560 arm_stack_offsets *offsets;
24562 gcc_assert (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM);
24563 func_type = arm_current_func_type ();
24565 /* Get frame offsets for ARM. */
24566 offsets = arm_get_frame_offsets ();
24567 saved_regs_mask = offsets->saved_regs_mask;
24569 /* Find the offset of the floating-point save area in the frame. */
24570 floats_from_frame = offsets->saved_args - offsets->frame;
24572 /* Compute how many core registers saved and how far away the floats are. */
24573 for (i = 0; i <= LAST_ARM_REGNUM; i++)
24574 if (saved_regs_mask & (1 << i))
24576 num_regs++;
24577 floats_from_frame += 4;
24580 if (TARGET_HARD_FLOAT && TARGET_VFP)
24582 int start_reg;
24583 rtx ip_rtx = gen_rtx_REG (SImode, IP_REGNUM);
24585 /* The offset is from IP_REGNUM. */
24586 int saved_size = arm_get_vfp_saved_size ();
24587 if (saved_size > 0)
24589 rtx insn;
24590 floats_from_frame += saved_size;
24591 insn = emit_insn (gen_addsi3 (ip_rtx,
24592 hard_frame_pointer_rtx,
24593 GEN_INT (-floats_from_frame)));
24594 arm_add_cfa_adjust_cfa_note (insn, -floats_from_frame,
24595 ip_rtx, hard_frame_pointer_rtx);
24598 /* Generate VFP register multi-pop. */
24599 start_reg = FIRST_VFP_REGNUM;
24601 for (i = FIRST_VFP_REGNUM; i < LAST_VFP_REGNUM; i += 2)
24602 /* Look for a case where a reg does not need restoring. */
24603 if ((!df_regs_ever_live_p (i) || call_used_regs[i])
24604 && (!df_regs_ever_live_p (i + 1)
24605 || call_used_regs[i + 1]))
24607 if (start_reg != i)
24608 arm_emit_vfp_multi_reg_pop (start_reg,
24609 (i - start_reg) / 2,
24610 gen_rtx_REG (SImode,
24611 IP_REGNUM));
24612 start_reg = i + 2;
24615 /* Restore the remaining regs that we have discovered (or possibly
24616 even all of them, if the conditional in the for loop never
24617 fired). */
24618 if (start_reg != i)
24619 arm_emit_vfp_multi_reg_pop (start_reg,
24620 (i - start_reg) / 2,
24621 gen_rtx_REG (SImode, IP_REGNUM));
24624 if (TARGET_IWMMXT)
24626 /* The frame pointer is guaranteed to be non-double-word aligned, as
24627 it is set to double-word-aligned old_stack_pointer - 4. */
24628 rtx insn;
24629 int lrm_count = (num_regs % 2) ? (num_regs + 2) : (num_regs + 1);
24631 for (i = LAST_IWMMXT_REGNUM; i >= FIRST_IWMMXT_REGNUM; i--)
24632 if (df_regs_ever_live_p (i) && !call_used_regs[i])
24634 rtx addr = gen_frame_mem (V2SImode,
24635 plus_constant (Pmode, hard_frame_pointer_rtx,
24636 - lrm_count * 4));
24637 insn = emit_insn (gen_movsi (gen_rtx_REG (V2SImode, i), addr));
24638 REG_NOTES (insn) = alloc_reg_note (REG_CFA_RESTORE,
24639 gen_rtx_REG (V2SImode, i),
24640 NULL_RTX);
24641 lrm_count += 2;
24645 /* saved_regs_mask should contain IP which contains old stack pointer
24646 at the time of activation creation. Since SP and IP are adjacent registers,
24647 we can restore the value directly into SP. */
24648 gcc_assert (saved_regs_mask & (1 << IP_REGNUM));
24649 saved_regs_mask &= ~(1 << IP_REGNUM);
24650 saved_regs_mask |= (1 << SP_REGNUM);
24652 /* There are two registers left in saved_regs_mask - LR and PC. We
24653 only need to restore LR (the return address), but to
24654 save time we can load it directly into PC, unless we need a
24655 special function exit sequence, or we are not really returning. */
24656 if (really_return
24657 && ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL
24658 && !crtl->calls_eh_return)
24659 /* Delete LR from the register mask, so that LR on
24660 the stack is loaded into the PC in the register mask. */
24661 saved_regs_mask &= ~(1 << LR_REGNUM);
24662 else
24663 saved_regs_mask &= ~(1 << PC_REGNUM);
24665 num_regs = bit_count (saved_regs_mask);
24666 if ((offsets->outgoing_args != (1 + num_regs)) || cfun->calls_alloca)
24668 rtx insn;
24669 emit_insn (gen_blockage ());
24670 /* Unwind the stack to just below the saved registers. */
24671 insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
24672 hard_frame_pointer_rtx,
24673 GEN_INT (- 4 * num_regs)));
24675 arm_add_cfa_adjust_cfa_note (insn, - 4 * num_regs,
24676 stack_pointer_rtx, hard_frame_pointer_rtx);
24679 arm_emit_multi_reg_pop (saved_regs_mask);
24681 if (IS_INTERRUPT (func_type))
24683 /* Interrupt handlers will have pushed the
24684 IP onto the stack, so restore it now. */
24685 rtx insn;
24686 rtx addr = gen_rtx_MEM (SImode,
24687 gen_rtx_POST_INC (SImode,
24688 stack_pointer_rtx));
24689 set_mem_alias_set (addr, get_frame_alias_set ());
24690 insn = emit_insn (gen_movsi (gen_rtx_REG (SImode, IP_REGNUM), addr));
24691 REG_NOTES (insn) = alloc_reg_note (REG_CFA_RESTORE,
24692 gen_rtx_REG (SImode, IP_REGNUM),
24693 NULL_RTX);
24696 if (!really_return || (saved_regs_mask & (1 << PC_REGNUM)))
24697 return;
24699 if (crtl->calls_eh_return)
24700 emit_insn (gen_addsi3 (stack_pointer_rtx,
24701 stack_pointer_rtx,
24702 gen_rtx_REG (SImode, ARM_EH_STACKADJ_REGNUM)));
24704 if (IS_STACKALIGN (func_type))
24705 /* Restore the original stack pointer. Before prologue, the stack was
24706 realigned and the original stack pointer saved in r0. For details,
24707 see comment in arm_expand_prologue. */
24708 emit_insn (gen_movsi (stack_pointer_rtx, gen_rtx_REG (SImode, 0)));
24710 emit_jump_insn (simple_return_rtx);
24713 /* Generate RTL to represent ARM epilogue. Really_return is true if the
24714 function is not a sibcall. */
24715 void
24716 arm_expand_epilogue (bool really_return)
24718 unsigned long func_type;
24719 unsigned long saved_regs_mask;
24720 int num_regs = 0;
24721 int i;
24722 int amount;
24723 arm_stack_offsets *offsets;
24725 func_type = arm_current_func_type ();
24727 /* Naked functions don't have epilogue. Hence, generate return pattern, and
24728 let output_return_instruction take care of instruction emition if any. */
24729 if (IS_NAKED (func_type)
24730 || (IS_VOLATILE (func_type) && TARGET_ABORT_NORETURN))
24732 if (really_return)
24733 emit_jump_insn (simple_return_rtx);
24734 return;
24737 /* If we are throwing an exception, then we really must be doing a
24738 return, so we can't tail-call. */
24739 gcc_assert (!crtl->calls_eh_return || really_return);
24741 if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
24743 arm_expand_epilogue_apcs_frame (really_return);
24744 return;
24747 /* Get frame offsets for ARM. */
24748 offsets = arm_get_frame_offsets ();
24749 saved_regs_mask = offsets->saved_regs_mask;
24750 num_regs = bit_count (saved_regs_mask);
24752 if (frame_pointer_needed)
24754 rtx insn;
24755 /* Restore stack pointer if necessary. */
24756 if (TARGET_ARM)
24758 /* In ARM mode, frame pointer points to first saved register.
24759 Restore stack pointer to last saved register. */
24760 amount = offsets->frame - offsets->saved_regs;
24762 /* Force out any pending memory operations that reference stacked data
24763 before stack de-allocation occurs. */
24764 emit_insn (gen_blockage ());
24765 insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
24766 hard_frame_pointer_rtx,
24767 GEN_INT (amount)));
24768 arm_add_cfa_adjust_cfa_note (insn, amount,
24769 stack_pointer_rtx,
24770 hard_frame_pointer_rtx);
24772 /* Emit USE(stack_pointer_rtx) to ensure that stack adjustment is not
24773 deleted. */
24774 emit_insn (gen_force_register_use (stack_pointer_rtx));
24776 else
24778 /* In Thumb-2 mode, the frame pointer points to the last saved
24779 register. */
24780 amount = offsets->locals_base - offsets->saved_regs;
24781 if (amount)
24783 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
24784 hard_frame_pointer_rtx,
24785 GEN_INT (amount)));
24786 arm_add_cfa_adjust_cfa_note (insn, amount,
24787 hard_frame_pointer_rtx,
24788 hard_frame_pointer_rtx);
24791 /* Force out any pending memory operations that reference stacked data
24792 before stack de-allocation occurs. */
24793 emit_insn (gen_blockage ());
24794 insn = emit_insn (gen_movsi (stack_pointer_rtx,
24795 hard_frame_pointer_rtx));
24796 arm_add_cfa_adjust_cfa_note (insn, 0,
24797 stack_pointer_rtx,
24798 hard_frame_pointer_rtx);
24799 /* Emit USE(stack_pointer_rtx) to ensure that stack adjustment is not
24800 deleted. */
24801 emit_insn (gen_force_register_use (stack_pointer_rtx));
24804 else
24806 /* Pop off outgoing args and local frame to adjust stack pointer to
24807 last saved register. */
24808 amount = offsets->outgoing_args - offsets->saved_regs;
24809 if (amount)
24811 rtx tmp;
24812 /* Force out any pending memory operations that reference stacked data
24813 before stack de-allocation occurs. */
24814 emit_insn (gen_blockage ());
24815 tmp = emit_insn (gen_addsi3 (stack_pointer_rtx,
24816 stack_pointer_rtx,
24817 GEN_INT (amount)));
24818 arm_add_cfa_adjust_cfa_note (tmp, amount,
24819 stack_pointer_rtx, stack_pointer_rtx);
24820 /* Emit USE(stack_pointer_rtx) to ensure that stack adjustment is
24821 not deleted. */
24822 emit_insn (gen_force_register_use (stack_pointer_rtx));
24826 if (TARGET_HARD_FLOAT && TARGET_VFP)
24828 /* Generate VFP register multi-pop. */
24829 int end_reg = LAST_VFP_REGNUM + 1;
24831 /* Scan the registers in reverse order. We need to match
24832 any groupings made in the prologue and generate matching
24833 vldm operations. The need to match groups is because,
24834 unlike pop, vldm can only do consecutive regs. */
24835 for (i = LAST_VFP_REGNUM - 1; i >= FIRST_VFP_REGNUM; i -= 2)
24836 /* Look for a case where a reg does not need restoring. */
24837 if ((!df_regs_ever_live_p (i) || call_used_regs[i])
24838 && (!df_regs_ever_live_p (i + 1)
24839 || call_used_regs[i + 1]))
24841 /* Restore the regs discovered so far (from reg+2 to
24842 end_reg). */
24843 if (end_reg > i + 2)
24844 arm_emit_vfp_multi_reg_pop (i + 2,
24845 (end_reg - (i + 2)) / 2,
24846 stack_pointer_rtx);
24847 end_reg = i;
24850 /* Restore the remaining regs that we have discovered (or possibly
24851 even all of them, if the conditional in the for loop never
24852 fired). */
24853 if (end_reg > i + 2)
24854 arm_emit_vfp_multi_reg_pop (i + 2,
24855 (end_reg - (i + 2)) / 2,
24856 stack_pointer_rtx);
24859 if (TARGET_IWMMXT)
24860 for (i = FIRST_IWMMXT_REGNUM; i <= LAST_IWMMXT_REGNUM; i++)
24861 if (df_regs_ever_live_p (i) && !call_used_regs[i])
24863 rtx insn;
24864 rtx addr = gen_rtx_MEM (V2SImode,
24865 gen_rtx_POST_INC (SImode,
24866 stack_pointer_rtx));
24867 set_mem_alias_set (addr, get_frame_alias_set ());
24868 insn = emit_insn (gen_movsi (gen_rtx_REG (V2SImode, i), addr));
24869 REG_NOTES (insn) = alloc_reg_note (REG_CFA_RESTORE,
24870 gen_rtx_REG (V2SImode, i),
24871 NULL_RTX);
24872 arm_add_cfa_adjust_cfa_note (insn, UNITS_PER_WORD,
24873 stack_pointer_rtx, stack_pointer_rtx);
24876 if (saved_regs_mask)
24878 rtx insn;
24879 bool return_in_pc = false;
24881 if (ARM_FUNC_TYPE (func_type) != ARM_FT_INTERWORKED
24882 && (TARGET_ARM || ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL)
24883 && !IS_STACKALIGN (func_type)
24884 && really_return
24885 && crtl->args.pretend_args_size == 0
24886 && saved_regs_mask & (1 << LR_REGNUM)
24887 && !crtl->calls_eh_return)
24889 saved_regs_mask &= ~(1 << LR_REGNUM);
24890 saved_regs_mask |= (1 << PC_REGNUM);
24891 return_in_pc = true;
24894 if (num_regs == 1 && (!IS_INTERRUPT (func_type) || !return_in_pc))
24896 for (i = 0; i <= LAST_ARM_REGNUM; i++)
24897 if (saved_regs_mask & (1 << i))
24899 rtx addr = gen_rtx_MEM (SImode,
24900 gen_rtx_POST_INC (SImode,
24901 stack_pointer_rtx));
24902 set_mem_alias_set (addr, get_frame_alias_set ());
24904 if (i == PC_REGNUM)
24906 insn = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
24907 XVECEXP (insn, 0, 0) = ret_rtx;
24908 XVECEXP (insn, 0, 1) = gen_rtx_SET (SImode,
24909 gen_rtx_REG (SImode, i),
24910 addr);
24911 RTX_FRAME_RELATED_P (XVECEXP (insn, 0, 1)) = 1;
24912 insn = emit_jump_insn (insn);
24914 else
24916 insn = emit_insn (gen_movsi (gen_rtx_REG (SImode, i),
24917 addr));
24918 REG_NOTES (insn) = alloc_reg_note (REG_CFA_RESTORE,
24919 gen_rtx_REG (SImode, i),
24920 NULL_RTX);
24921 arm_add_cfa_adjust_cfa_note (insn, UNITS_PER_WORD,
24922 stack_pointer_rtx,
24923 stack_pointer_rtx);
24927 else
24929 if (TARGET_LDRD
24930 && current_tune->prefer_ldrd_strd
24931 && !optimize_function_for_size_p (cfun))
24933 if (TARGET_THUMB2)
24934 thumb2_emit_ldrd_pop (saved_regs_mask);
24935 else if (TARGET_ARM && !IS_INTERRUPT (func_type))
24936 arm_emit_ldrd_pop (saved_regs_mask);
24937 else
24938 arm_emit_multi_reg_pop (saved_regs_mask);
24940 else
24941 arm_emit_multi_reg_pop (saved_regs_mask);
24944 if (return_in_pc == true)
24945 return;
24948 if (crtl->args.pretend_args_size)
24950 int i, j;
24951 rtx dwarf = NULL_RTX;
24952 rtx tmp = emit_insn (gen_addsi3 (stack_pointer_rtx,
24953 stack_pointer_rtx,
24954 GEN_INT (crtl->args.pretend_args_size)));
24956 RTX_FRAME_RELATED_P (tmp) = 1;
24958 if (cfun->machine->uses_anonymous_args)
24960 /* Restore pretend args. Refer arm_expand_prologue on how to save
24961 pretend_args in stack. */
24962 int num_regs = crtl->args.pretend_args_size / 4;
24963 saved_regs_mask = (0xf0 >> num_regs) & 0xf;
24964 for (j = 0, i = 0; j < num_regs; i++)
24965 if (saved_regs_mask & (1 << i))
24967 rtx reg = gen_rtx_REG (SImode, i);
24968 dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
24969 j++;
24971 REG_NOTES (tmp) = dwarf;
24973 arm_add_cfa_adjust_cfa_note (tmp, crtl->args.pretend_args_size,
24974 stack_pointer_rtx, stack_pointer_rtx);
24977 if (!really_return)
24978 return;
24980 if (crtl->calls_eh_return)
24981 emit_insn (gen_addsi3 (stack_pointer_rtx,
24982 stack_pointer_rtx,
24983 gen_rtx_REG (SImode, ARM_EH_STACKADJ_REGNUM)));
24985 if (IS_STACKALIGN (func_type))
24986 /* Restore the original stack pointer. Before prologue, the stack was
24987 realigned and the original stack pointer saved in r0. For details,
24988 see comment in arm_expand_prologue. */
24989 emit_insn (gen_movsi (stack_pointer_rtx, gen_rtx_REG (SImode, 0)));
24991 emit_jump_insn (simple_return_rtx);
24994 /* Implementation of insn prologue_thumb1_interwork. This is the first
24995 "instruction" of a function called in ARM mode. Swap to thumb mode. */
24997 const char *
24998 thumb1_output_interwork (void)
25000 const char * name;
25001 FILE *f = asm_out_file;
25003 gcc_assert (MEM_P (DECL_RTL (current_function_decl)));
25004 gcc_assert (GET_CODE (XEXP (DECL_RTL (current_function_decl), 0))
25005 == SYMBOL_REF);
25006 name = XSTR (XEXP (DECL_RTL (current_function_decl), 0), 0);
25008 /* Generate code sequence to switch us into Thumb mode. */
25009 /* The .code 32 directive has already been emitted by
25010 ASM_DECLARE_FUNCTION_NAME. */
25011 asm_fprintf (f, "\torr\t%r, %r, #1\n", IP_REGNUM, PC_REGNUM);
25012 asm_fprintf (f, "\tbx\t%r\n", IP_REGNUM);
25014 /* Generate a label, so that the debugger will notice the
25015 change in instruction sets. This label is also used by
25016 the assembler to bypass the ARM code when this function
25017 is called from a Thumb encoded function elsewhere in the
25018 same file. Hence the definition of STUB_NAME here must
25019 agree with the definition in gas/config/tc-arm.c. */
25021 #define STUB_NAME ".real_start_of"
25023 fprintf (f, "\t.code\t16\n");
25024 #ifdef ARM_PE
25025 if (arm_dllexport_name_p (name))
25026 name = arm_strip_name_encoding (name);
25027 #endif
25028 asm_fprintf (f, "\t.globl %s%U%s\n", STUB_NAME, name);
25029 fprintf (f, "\t.thumb_func\n");
25030 asm_fprintf (f, "%s%U%s:\n", STUB_NAME, name);
25032 return "";
25035 /* Handle the case of a double word load into a low register from
25036 a computed memory address. The computed address may involve a
25037 register which is overwritten by the load. */
25038 const char *
25039 thumb_load_double_from_address (rtx *operands)
25041 rtx addr;
25042 rtx base;
25043 rtx offset;
25044 rtx arg1;
25045 rtx arg2;
25047 gcc_assert (REG_P (operands[0]));
25048 gcc_assert (MEM_P (operands[1]));
25050 /* Get the memory address. */
25051 addr = XEXP (operands[1], 0);
25053 /* Work out how the memory address is computed. */
25054 switch (GET_CODE (addr))
25056 case REG:
25057 operands[2] = adjust_address (operands[1], SImode, 4);
25059 if (REGNO (operands[0]) == REGNO (addr))
25061 output_asm_insn ("ldr\t%H0, %2", operands);
25062 output_asm_insn ("ldr\t%0, %1", operands);
25064 else
25066 output_asm_insn ("ldr\t%0, %1", operands);
25067 output_asm_insn ("ldr\t%H0, %2", operands);
25069 break;
25071 case CONST:
25072 /* Compute <address> + 4 for the high order load. */
25073 operands[2] = adjust_address (operands[1], SImode, 4);
25075 output_asm_insn ("ldr\t%0, %1", operands);
25076 output_asm_insn ("ldr\t%H0, %2", operands);
25077 break;
25079 case PLUS:
25080 arg1 = XEXP (addr, 0);
25081 arg2 = XEXP (addr, 1);
25083 if (CONSTANT_P (arg1))
25084 base = arg2, offset = arg1;
25085 else
25086 base = arg1, offset = arg2;
25088 gcc_assert (REG_P (base));
25090 /* Catch the case of <address> = <reg> + <reg> */
25091 if (REG_P (offset))
25093 int reg_offset = REGNO (offset);
25094 int reg_base = REGNO (base);
25095 int reg_dest = REGNO (operands[0]);
25097 /* Add the base and offset registers together into the
25098 higher destination register. */
25099 asm_fprintf (asm_out_file, "\tadd\t%r, %r, %r",
25100 reg_dest + 1, reg_base, reg_offset);
25102 /* Load the lower destination register from the address in
25103 the higher destination register. */
25104 asm_fprintf (asm_out_file, "\tldr\t%r, [%r, #0]",
25105 reg_dest, reg_dest + 1);
25107 /* Load the higher destination register from its own address
25108 plus 4. */
25109 asm_fprintf (asm_out_file, "\tldr\t%r, [%r, #4]",
25110 reg_dest + 1, reg_dest + 1);
25112 else
25114 /* Compute <address> + 4 for the high order load. */
25115 operands[2] = adjust_address (operands[1], SImode, 4);
25117 /* If the computed address is held in the low order register
25118 then load the high order register first, otherwise always
25119 load the low order register first. */
25120 if (REGNO (operands[0]) == REGNO (base))
25122 output_asm_insn ("ldr\t%H0, %2", operands);
25123 output_asm_insn ("ldr\t%0, %1", operands);
25125 else
25127 output_asm_insn ("ldr\t%0, %1", operands);
25128 output_asm_insn ("ldr\t%H0, %2", operands);
25131 break;
25133 case LABEL_REF:
25134 /* With no registers to worry about we can just load the value
25135 directly. */
25136 operands[2] = adjust_address (operands[1], SImode, 4);
25138 output_asm_insn ("ldr\t%H0, %2", operands);
25139 output_asm_insn ("ldr\t%0, %1", operands);
25140 break;
25142 default:
25143 gcc_unreachable ();
25146 return "";
25149 const char *
25150 thumb_output_move_mem_multiple (int n, rtx *operands)
25152 rtx tmp;
25154 switch (n)
25156 case 2:
25157 if (REGNO (operands[4]) > REGNO (operands[5]))
25159 tmp = operands[4];
25160 operands[4] = operands[5];
25161 operands[5] = tmp;
25163 output_asm_insn ("ldmia\t%1!, {%4, %5}", operands);
25164 output_asm_insn ("stmia\t%0!, {%4, %5}", operands);
25165 break;
25167 case 3:
25168 if (REGNO (operands[4]) > REGNO (operands[5]))
25170 tmp = operands[4];
25171 operands[4] = operands[5];
25172 operands[5] = tmp;
25174 if (REGNO (operands[5]) > REGNO (operands[6]))
25176 tmp = operands[5];
25177 operands[5] = operands[6];
25178 operands[6] = tmp;
25180 if (REGNO (operands[4]) > REGNO (operands[5]))
25182 tmp = operands[4];
25183 operands[4] = operands[5];
25184 operands[5] = tmp;
25187 output_asm_insn ("ldmia\t%1!, {%4, %5, %6}", operands);
25188 output_asm_insn ("stmia\t%0!, {%4, %5, %6}", operands);
25189 break;
25191 default:
25192 gcc_unreachable ();
25195 return "";
25198 /* Output a call-via instruction for thumb state. */
25199 const char *
25200 thumb_call_via_reg (rtx reg)
25202 int regno = REGNO (reg);
25203 rtx *labelp;
25205 gcc_assert (regno < LR_REGNUM);
25207 /* If we are in the normal text section we can use a single instance
25208 per compilation unit. If we are doing function sections, then we need
25209 an entry per section, since we can't rely on reachability. */
25210 if (in_section == text_section)
25212 thumb_call_reg_needed = 1;
25214 if (thumb_call_via_label[regno] == NULL)
25215 thumb_call_via_label[regno] = gen_label_rtx ();
25216 labelp = thumb_call_via_label + regno;
25218 else
25220 if (cfun->machine->call_via[regno] == NULL)
25221 cfun->machine->call_via[regno] = gen_label_rtx ();
25222 labelp = cfun->machine->call_via + regno;
25225 output_asm_insn ("bl\t%a0", labelp);
25226 return "";
25229 /* Routines for generating rtl. */
25230 void
25231 thumb_expand_movmemqi (rtx *operands)
25233 rtx out = copy_to_mode_reg (SImode, XEXP (operands[0], 0));
25234 rtx in = copy_to_mode_reg (SImode, XEXP (operands[1], 0));
25235 HOST_WIDE_INT len = INTVAL (operands[2]);
25236 HOST_WIDE_INT offset = 0;
25238 while (len >= 12)
25240 emit_insn (gen_movmem12b (out, in, out, in));
25241 len -= 12;
25244 if (len >= 8)
25246 emit_insn (gen_movmem8b (out, in, out, in));
25247 len -= 8;
25250 if (len >= 4)
25252 rtx reg = gen_reg_rtx (SImode);
25253 emit_insn (gen_movsi (reg, gen_rtx_MEM (SImode, in)));
25254 emit_insn (gen_movsi (gen_rtx_MEM (SImode, out), reg));
25255 len -= 4;
25256 offset += 4;
25259 if (len >= 2)
25261 rtx reg = gen_reg_rtx (HImode);
25262 emit_insn (gen_movhi (reg, gen_rtx_MEM (HImode,
25263 plus_constant (Pmode, in,
25264 offset))));
25265 emit_insn (gen_movhi (gen_rtx_MEM (HImode, plus_constant (Pmode, out,
25266 offset)),
25267 reg));
25268 len -= 2;
25269 offset += 2;
25272 if (len)
25274 rtx reg = gen_reg_rtx (QImode);
25275 emit_insn (gen_movqi (reg, gen_rtx_MEM (QImode,
25276 plus_constant (Pmode, in,
25277 offset))));
25278 emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (Pmode, out,
25279 offset)),
25280 reg));
25284 void
25285 thumb_reload_out_hi (rtx *operands)
25287 emit_insn (gen_thumb_movhi_clobber (operands[0], operands[1], operands[2]));
25290 /* Handle reading a half-word from memory during reload. */
25291 void
25292 thumb_reload_in_hi (rtx *operands ATTRIBUTE_UNUSED)
25294 gcc_unreachable ();
25297 /* Return the length of a function name prefix
25298 that starts with the character 'c'. */
25299 static int
25300 arm_get_strip_length (int c)
25302 switch (c)
25304 ARM_NAME_ENCODING_LENGTHS
25305 default: return 0;
25309 /* Return a pointer to a function's name with any
25310 and all prefix encodings stripped from it. */
25311 const char *
25312 arm_strip_name_encoding (const char *name)
25314 int skip;
25316 while ((skip = arm_get_strip_length (* name)))
25317 name += skip;
25319 return name;
25322 /* If there is a '*' anywhere in the name's prefix, then
25323 emit the stripped name verbatim, otherwise prepend an
25324 underscore if leading underscores are being used. */
25325 void
25326 arm_asm_output_labelref (FILE *stream, const char *name)
25328 int skip;
25329 int verbatim = 0;
25331 while ((skip = arm_get_strip_length (* name)))
25333 verbatim |= (*name == '*');
25334 name += skip;
25337 if (verbatim)
25338 fputs (name, stream);
25339 else
25340 asm_fprintf (stream, "%U%s", name);
25343 /* This function is used to emit an EABI tag and its associated value.
25344 We emit the numerical value of the tag in case the assembler does not
25345 support textual tags. (Eg gas prior to 2.20). If requested we include
25346 the tag name in a comment so that anyone reading the assembler output
25347 will know which tag is being set.
25349 This function is not static because arm-c.c needs it too. */
25351 void
25352 arm_emit_eabi_attribute (const char *name, int num, int val)
25354 asm_fprintf (asm_out_file, "\t.eabi_attribute %d, %d", num, val);
25355 if (flag_verbose_asm || flag_debug_asm)
25356 asm_fprintf (asm_out_file, "\t%s %s", ASM_COMMENT_START, name);
25357 asm_fprintf (asm_out_file, "\n");
25360 static void
25361 arm_file_start (void)
25363 int val;
25365 if (TARGET_UNIFIED_ASM)
25366 asm_fprintf (asm_out_file, "\t.syntax unified\n");
25368 if (TARGET_BPABI)
25370 const char *fpu_name;
25371 if (arm_selected_arch)
25373 const char* pos = strchr (arm_selected_arch->name, '+');
25374 if (pos)
25376 char buf[15];
25377 gcc_assert (strlen (arm_selected_arch->name)
25378 <= sizeof (buf) / sizeof (*pos));
25379 strncpy (buf, arm_selected_arch->name,
25380 (pos - arm_selected_arch->name) * sizeof (*pos));
25381 buf[pos - arm_selected_arch->name] = '\0';
25382 asm_fprintf (asm_out_file, "\t.arch %s\n", buf);
25383 asm_fprintf (asm_out_file, "\t.arch_extension %s\n", pos + 1);
25385 else
25386 asm_fprintf (asm_out_file, "\t.arch %s\n", arm_selected_arch->name);
25388 else if (strncmp (arm_selected_cpu->name, "generic", 7) == 0)
25389 asm_fprintf (asm_out_file, "\t.arch %s\n", arm_selected_cpu->name + 8);
25390 else
25391 asm_fprintf (asm_out_file, "\t.cpu %s\n", arm_selected_cpu->name);
25393 if (TARGET_SOFT_FLOAT)
25395 fpu_name = "softvfp";
25397 else
25399 fpu_name = arm_fpu_desc->name;
25400 if (arm_fpu_desc->model == ARM_FP_MODEL_VFP)
25402 if (TARGET_HARD_FLOAT)
25403 arm_emit_eabi_attribute ("Tag_ABI_HardFP_use", 27, 3);
25404 if (TARGET_HARD_FLOAT_ABI)
25405 arm_emit_eabi_attribute ("Tag_ABI_VFP_args", 28, 1);
25408 asm_fprintf (asm_out_file, "\t.fpu %s\n", fpu_name);
25410 /* Some of these attributes only apply when the corresponding features
25411 are used. However we don't have any easy way of figuring this out.
25412 Conservatively record the setting that would have been used. */
25414 if (flag_rounding_math)
25415 arm_emit_eabi_attribute ("Tag_ABI_FP_rounding", 19, 1);
25417 if (!flag_unsafe_math_optimizations)
25419 arm_emit_eabi_attribute ("Tag_ABI_FP_denormal", 20, 1);
25420 arm_emit_eabi_attribute ("Tag_ABI_FP_exceptions", 21, 1);
25422 if (flag_signaling_nans)
25423 arm_emit_eabi_attribute ("Tag_ABI_FP_user_exceptions", 22, 1);
25425 arm_emit_eabi_attribute ("Tag_ABI_FP_number_model", 23,
25426 flag_finite_math_only ? 1 : 3);
25428 arm_emit_eabi_attribute ("Tag_ABI_align8_needed", 24, 1);
25429 arm_emit_eabi_attribute ("Tag_ABI_align8_preserved", 25, 1);
25430 arm_emit_eabi_attribute ("Tag_ABI_enum_size", 26,
25431 flag_short_enums ? 1 : 2);
25433 /* Tag_ABI_optimization_goals. */
25434 if (optimize_size)
25435 val = 4;
25436 else if (optimize >= 2)
25437 val = 2;
25438 else if (optimize)
25439 val = 1;
25440 else
25441 val = 6;
25442 arm_emit_eabi_attribute ("Tag_ABI_optimization_goals", 30, val);
25444 arm_emit_eabi_attribute ("Tag_CPU_unaligned_access", 34,
25445 unaligned_access);
25447 if (arm_fp16_format)
25448 arm_emit_eabi_attribute ("Tag_ABI_FP_16bit_format", 38,
25449 (int) arm_fp16_format);
25451 if (arm_lang_output_object_attributes_hook)
25452 arm_lang_output_object_attributes_hook();
25455 default_file_start ();
25458 static void
25459 arm_file_end (void)
25461 int regno;
25463 if (NEED_INDICATE_EXEC_STACK)
25464 /* Add .note.GNU-stack. */
25465 file_end_indicate_exec_stack ();
25467 if (! thumb_call_reg_needed)
25468 return;
25470 switch_to_section (text_section);
25471 asm_fprintf (asm_out_file, "\t.code 16\n");
25472 ASM_OUTPUT_ALIGN (asm_out_file, 1);
25474 for (regno = 0; regno < LR_REGNUM; regno++)
25476 rtx label = thumb_call_via_label[regno];
25478 if (label != 0)
25480 targetm.asm_out.internal_label (asm_out_file, "L",
25481 CODE_LABEL_NUMBER (label));
25482 asm_fprintf (asm_out_file, "\tbx\t%r\n", regno);
25487 #ifndef ARM_PE
25488 /* Symbols in the text segment can be accessed without indirecting via the
25489 constant pool; it may take an extra binary operation, but this is still
25490 faster than indirecting via memory. Don't do this when not optimizing,
25491 since we won't be calculating al of the offsets necessary to do this
25492 simplification. */
25494 static void
25495 arm_encode_section_info (tree decl, rtx rtl, int first)
25497 if (optimize > 0 && TREE_CONSTANT (decl))
25498 SYMBOL_REF_FLAG (XEXP (rtl, 0)) = 1;
25500 default_encode_section_info (decl, rtl, first);
25502 #endif /* !ARM_PE */
25504 static void
25505 arm_internal_label (FILE *stream, const char *prefix, unsigned long labelno)
25507 if (arm_ccfsm_state == 3 && (unsigned) arm_target_label == labelno
25508 && !strcmp (prefix, "L"))
25510 arm_ccfsm_state = 0;
25511 arm_target_insn = NULL;
25513 default_internal_label (stream, prefix, labelno);
25516 /* Output code to add DELTA to the first argument, and then jump
25517 to FUNCTION. Used for C++ multiple inheritance. */
25518 static void
25519 arm_output_mi_thunk (FILE *file, tree thunk ATTRIBUTE_UNUSED,
25520 HOST_WIDE_INT delta,
25521 HOST_WIDE_INT vcall_offset ATTRIBUTE_UNUSED,
25522 tree function)
25524 static int thunk_label = 0;
25525 char label[256];
25526 char labelpc[256];
25527 int mi_delta = delta;
25528 const char *const mi_op = mi_delta < 0 ? "sub" : "add";
25529 int shift = 0;
25530 int this_regno = (aggregate_value_p (TREE_TYPE (TREE_TYPE (function)), function)
25531 ? 1 : 0);
25532 if (mi_delta < 0)
25533 mi_delta = - mi_delta;
25535 final_start_function (emit_barrier (), file, 1);
25537 if (TARGET_THUMB1)
25539 int labelno = thunk_label++;
25540 ASM_GENERATE_INTERNAL_LABEL (label, "LTHUMBFUNC", labelno);
25541 /* Thunks are entered in arm mode when avaiable. */
25542 if (TARGET_THUMB1_ONLY)
25544 /* push r3 so we can use it as a temporary. */
25545 /* TODO: Omit this save if r3 is not used. */
25546 fputs ("\tpush {r3}\n", file);
25547 fputs ("\tldr\tr3, ", file);
25549 else
25551 fputs ("\tldr\tr12, ", file);
25553 assemble_name (file, label);
25554 fputc ('\n', file);
25555 if (flag_pic)
25557 /* If we are generating PIC, the ldr instruction below loads
25558 "(target - 7) - .LTHUNKPCn" into r12. The pc reads as
25559 the address of the add + 8, so we have:
25561 r12 = (target - 7) - .LTHUNKPCn + (.LTHUNKPCn + 8)
25562 = target + 1.
25564 Note that we have "+ 1" because some versions of GNU ld
25565 don't set the low bit of the result for R_ARM_REL32
25566 relocations against thumb function symbols.
25567 On ARMv6M this is +4, not +8. */
25568 ASM_GENERATE_INTERNAL_LABEL (labelpc, "LTHUNKPC", labelno);
25569 assemble_name (file, labelpc);
25570 fputs (":\n", file);
25571 if (TARGET_THUMB1_ONLY)
25573 /* This is 2 insns after the start of the thunk, so we know it
25574 is 4-byte aligned. */
25575 fputs ("\tadd\tr3, pc, r3\n", file);
25576 fputs ("\tmov r12, r3\n", file);
25578 else
25579 fputs ("\tadd\tr12, pc, r12\n", file);
25581 else if (TARGET_THUMB1_ONLY)
25582 fputs ("\tmov r12, r3\n", file);
25584 if (TARGET_THUMB1_ONLY)
25586 if (mi_delta > 255)
25588 fputs ("\tldr\tr3, ", file);
25589 assemble_name (file, label);
25590 fputs ("+4\n", file);
25591 asm_fprintf (file, "\t%s\t%r, %r, r3\n",
25592 mi_op, this_regno, this_regno);
25594 else if (mi_delta != 0)
25596 asm_fprintf (file, "\t%s\t%r, %r, #%d\n",
25597 mi_op, this_regno, this_regno,
25598 mi_delta);
25601 else
25603 /* TODO: Use movw/movt for large constants when available. */
25604 while (mi_delta != 0)
25606 if ((mi_delta & (3 << shift)) == 0)
25607 shift += 2;
25608 else
25610 asm_fprintf (file, "\t%s\t%r, %r, #%d\n",
25611 mi_op, this_regno, this_regno,
25612 mi_delta & (0xff << shift));
25613 mi_delta &= ~(0xff << shift);
25614 shift += 8;
25618 if (TARGET_THUMB1)
25620 if (TARGET_THUMB1_ONLY)
25621 fputs ("\tpop\t{r3}\n", file);
25623 fprintf (file, "\tbx\tr12\n");
25624 ASM_OUTPUT_ALIGN (file, 2);
25625 assemble_name (file, label);
25626 fputs (":\n", file);
25627 if (flag_pic)
25629 /* Output ".word .LTHUNKn-7-.LTHUNKPCn". */
25630 rtx tem = XEXP (DECL_RTL (function), 0);
25631 tem = gen_rtx_PLUS (GET_MODE (tem), tem, GEN_INT (-7));
25632 tem = gen_rtx_MINUS (GET_MODE (tem),
25633 tem,
25634 gen_rtx_SYMBOL_REF (Pmode,
25635 ggc_strdup (labelpc)));
25636 assemble_integer (tem, 4, BITS_PER_WORD, 1);
25638 else
25639 /* Output ".word .LTHUNKn". */
25640 assemble_integer (XEXP (DECL_RTL (function), 0), 4, BITS_PER_WORD, 1);
25642 if (TARGET_THUMB1_ONLY && mi_delta > 255)
25643 assemble_integer (GEN_INT(mi_delta), 4, BITS_PER_WORD, 1);
25645 else
25647 fputs ("\tb\t", file);
25648 assemble_name (file, XSTR (XEXP (DECL_RTL (function), 0), 0));
25649 if (NEED_PLT_RELOC)
25650 fputs ("(PLT)", file);
25651 fputc ('\n', file);
25654 final_end_function ();
25658 arm_emit_vector_const (FILE *file, rtx x)
25660 int i;
25661 const char * pattern;
25663 gcc_assert (GET_CODE (x) == CONST_VECTOR);
25665 switch (GET_MODE (x))
25667 case V2SImode: pattern = "%08x"; break;
25668 case V4HImode: pattern = "%04x"; break;
25669 case V8QImode: pattern = "%02x"; break;
25670 default: gcc_unreachable ();
25673 fprintf (file, "0x");
25674 for (i = CONST_VECTOR_NUNITS (x); i--;)
25676 rtx element;
25678 element = CONST_VECTOR_ELT (x, i);
25679 fprintf (file, pattern, INTVAL (element));
25682 return 1;
25685 /* Emit a fp16 constant appropriately padded to occupy a 4-byte word.
25686 HFmode constant pool entries are actually loaded with ldr. */
25687 void
25688 arm_emit_fp16_const (rtx c)
25690 REAL_VALUE_TYPE r;
25691 long bits;
25693 REAL_VALUE_FROM_CONST_DOUBLE (r, c);
25694 bits = real_to_target (NULL, &r, HFmode);
25695 if (WORDS_BIG_ENDIAN)
25696 assemble_zeros (2);
25697 assemble_integer (GEN_INT (bits), 2, BITS_PER_WORD, 1);
25698 if (!WORDS_BIG_ENDIAN)
25699 assemble_zeros (2);
25702 const char *
25703 arm_output_load_gr (rtx *operands)
25705 rtx reg;
25706 rtx offset;
25707 rtx wcgr;
25708 rtx sum;
25710 if (!MEM_P (operands [1])
25711 || GET_CODE (sum = XEXP (operands [1], 0)) != PLUS
25712 || !REG_P (reg = XEXP (sum, 0))
25713 || !CONST_INT_P (offset = XEXP (sum, 1))
25714 || ((INTVAL (offset) < 1024) && (INTVAL (offset) > -1024)))
25715 return "wldrw%?\t%0, %1";
25717 /* Fix up an out-of-range load of a GR register. */
25718 output_asm_insn ("str%?\t%0, [sp, #-4]!\t@ Start of GR load expansion", & reg);
25719 wcgr = operands[0];
25720 operands[0] = reg;
25721 output_asm_insn ("ldr%?\t%0, %1", operands);
25723 operands[0] = wcgr;
25724 operands[1] = reg;
25725 output_asm_insn ("tmcr%?\t%0, %1", operands);
25726 output_asm_insn ("ldr%?\t%0, [sp], #4\t@ End of GR load expansion", & reg);
25728 return "";
25731 /* Worker function for TARGET_SETUP_INCOMING_VARARGS.
25733 On the ARM, PRETEND_SIZE is set in order to have the prologue push the last
25734 named arg and all anonymous args onto the stack.
25735 XXX I know the prologue shouldn't be pushing registers, but it is faster
25736 that way. */
25738 static void
25739 arm_setup_incoming_varargs (cumulative_args_t pcum_v,
25740 enum machine_mode mode,
25741 tree type,
25742 int *pretend_size,
25743 int second_time ATTRIBUTE_UNUSED)
25745 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
25746 int nregs;
25748 cfun->machine->uses_anonymous_args = 1;
25749 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
25751 nregs = pcum->aapcs_ncrn;
25752 if ((nregs & 1) && arm_needs_doubleword_align (mode, type))
25753 nregs++;
25755 else
25756 nregs = pcum->nregs;
25758 if (nregs < NUM_ARG_REGS)
25759 *pretend_size = (NUM_ARG_REGS - nregs) * UNITS_PER_WORD;
25762 /* Return nonzero if the CONSUMER instruction (a store) does not need
25763 PRODUCER's value to calculate the address. */
25766 arm_no_early_store_addr_dep (rtx producer, rtx consumer)
25768 rtx value = PATTERN (producer);
25769 rtx addr = PATTERN (consumer);
25771 if (GET_CODE (value) == COND_EXEC)
25772 value = COND_EXEC_CODE (value);
25773 if (GET_CODE (value) == PARALLEL)
25774 value = XVECEXP (value, 0, 0);
25775 value = XEXP (value, 0);
25776 if (GET_CODE (addr) == COND_EXEC)
25777 addr = COND_EXEC_CODE (addr);
25778 if (GET_CODE (addr) == PARALLEL)
25779 addr = XVECEXP (addr, 0, 0);
25780 addr = XEXP (addr, 0);
25782 return !reg_overlap_mentioned_p (value, addr);
25785 /* Return nonzero if the CONSUMER instruction (a store) does need
25786 PRODUCER's value to calculate the address. */
25789 arm_early_store_addr_dep (rtx producer, rtx consumer)
25791 return !arm_no_early_store_addr_dep (producer, consumer);
25794 /* Return nonzero if the CONSUMER instruction (a load) does need
25795 PRODUCER's value to calculate the address. */
25798 arm_early_load_addr_dep (rtx producer, rtx consumer)
25800 rtx value = PATTERN (producer);
25801 rtx addr = PATTERN (consumer);
25803 if (GET_CODE (value) == COND_EXEC)
25804 value = COND_EXEC_CODE (value);
25805 if (GET_CODE (value) == PARALLEL)
25806 value = XVECEXP (value, 0, 0);
25807 value = XEXP (value, 0);
25808 if (GET_CODE (addr) == COND_EXEC)
25809 addr = COND_EXEC_CODE (addr);
25810 if (GET_CODE (addr) == PARALLEL)
25812 if (GET_CODE (XVECEXP (addr, 0, 0)) == RETURN)
25813 addr = XVECEXP (addr, 0, 1);
25814 else
25815 addr = XVECEXP (addr, 0, 0);
25817 addr = XEXP (addr, 1);
25819 return reg_overlap_mentioned_p (value, addr);
25822 /* Return nonzero if the CONSUMER instruction (an ALU op) does not
25823 have an early register shift value or amount dependency on the
25824 result of PRODUCER. */
25827 arm_no_early_alu_shift_dep (rtx producer, rtx consumer)
25829 rtx value = PATTERN (producer);
25830 rtx op = PATTERN (consumer);
25831 rtx early_op;
25833 if (GET_CODE (value) == COND_EXEC)
25834 value = COND_EXEC_CODE (value);
25835 if (GET_CODE (value) == PARALLEL)
25836 value = XVECEXP (value, 0, 0);
25837 value = XEXP (value, 0);
25838 if (GET_CODE (op) == COND_EXEC)
25839 op = COND_EXEC_CODE (op);
25840 if (GET_CODE (op) == PARALLEL)
25841 op = XVECEXP (op, 0, 0);
25842 op = XEXP (op, 1);
25844 early_op = XEXP (op, 0);
25845 /* This is either an actual independent shift, or a shift applied to
25846 the first operand of another operation. We want the whole shift
25847 operation. */
25848 if (REG_P (early_op))
25849 early_op = op;
25851 return !reg_overlap_mentioned_p (value, early_op);
25854 /* Return nonzero if the CONSUMER instruction (an ALU op) does not
25855 have an early register shift value dependency on the result of
25856 PRODUCER. */
25859 arm_no_early_alu_shift_value_dep (rtx producer, rtx consumer)
25861 rtx value = PATTERN (producer);
25862 rtx op = PATTERN (consumer);
25863 rtx early_op;
25865 if (GET_CODE (value) == COND_EXEC)
25866 value = COND_EXEC_CODE (value);
25867 if (GET_CODE (value) == PARALLEL)
25868 value = XVECEXP (value, 0, 0);
25869 value = XEXP (value, 0);
25870 if (GET_CODE (op) == COND_EXEC)
25871 op = COND_EXEC_CODE (op);
25872 if (GET_CODE (op) == PARALLEL)
25873 op = XVECEXP (op, 0, 0);
25874 op = XEXP (op, 1);
25876 early_op = XEXP (op, 0);
25878 /* This is either an actual independent shift, or a shift applied to
25879 the first operand of another operation. We want the value being
25880 shifted, in either case. */
25881 if (!REG_P (early_op))
25882 early_op = XEXP (early_op, 0);
25884 return !reg_overlap_mentioned_p (value, early_op);
25887 /* Return nonzero if the CONSUMER (a mul or mac op) does not
25888 have an early register mult dependency on the result of
25889 PRODUCER. */
25892 arm_no_early_mul_dep (rtx producer, rtx consumer)
25894 rtx value = PATTERN (producer);
25895 rtx op = PATTERN (consumer);
25897 if (GET_CODE (value) == COND_EXEC)
25898 value = COND_EXEC_CODE (value);
25899 if (GET_CODE (value) == PARALLEL)
25900 value = XVECEXP (value, 0, 0);
25901 value = XEXP (value, 0);
25902 if (GET_CODE (op) == COND_EXEC)
25903 op = COND_EXEC_CODE (op);
25904 if (GET_CODE (op) == PARALLEL)
25905 op = XVECEXP (op, 0, 0);
25906 op = XEXP (op, 1);
25908 if (GET_CODE (op) == PLUS || GET_CODE (op) == MINUS)
25910 if (GET_CODE (XEXP (op, 0)) == MULT)
25911 return !reg_overlap_mentioned_p (value, XEXP (op, 0));
25912 else
25913 return !reg_overlap_mentioned_p (value, XEXP (op, 1));
25916 return 0;
25919 /* We can't rely on the caller doing the proper promotion when
25920 using APCS or ATPCS. */
25922 static bool
25923 arm_promote_prototypes (const_tree t ATTRIBUTE_UNUSED)
25925 return !TARGET_AAPCS_BASED;
25928 static enum machine_mode
25929 arm_promote_function_mode (const_tree type ATTRIBUTE_UNUSED,
25930 enum machine_mode mode,
25931 int *punsignedp ATTRIBUTE_UNUSED,
25932 const_tree fntype ATTRIBUTE_UNUSED,
25933 int for_return ATTRIBUTE_UNUSED)
25935 if (GET_MODE_CLASS (mode) == MODE_INT
25936 && GET_MODE_SIZE (mode) < 4)
25937 return SImode;
25939 return mode;
25942 /* AAPCS based ABIs use short enums by default. */
25944 static bool
25945 arm_default_short_enums (void)
25947 return TARGET_AAPCS_BASED && arm_abi != ARM_ABI_AAPCS_LINUX;
25951 /* AAPCS requires that anonymous bitfields affect structure alignment. */
25953 static bool
25954 arm_align_anon_bitfield (void)
25956 return TARGET_AAPCS_BASED;
25960 /* The generic C++ ABI says 64-bit (long long). The EABI says 32-bit. */
25962 static tree
25963 arm_cxx_guard_type (void)
25965 return TARGET_AAPCS_BASED ? integer_type_node : long_long_integer_type_node;
25968 /* Return non-zero iff the consumer (a multiply-accumulate or a
25969 multiple-subtract instruction) has an accumulator dependency on the
25970 result of the producer and no other dependency on that result. It
25971 does not check if the producer is multiply-accumulate instruction. */
25973 arm_mac_accumulator_is_result (rtx producer, rtx consumer)
25975 rtx result;
25976 rtx op0, op1, acc;
25978 producer = PATTERN (producer);
25979 consumer = PATTERN (consumer);
25981 if (GET_CODE (producer) == COND_EXEC)
25982 producer = COND_EXEC_CODE (producer);
25983 if (GET_CODE (consumer) == COND_EXEC)
25984 consumer = COND_EXEC_CODE (consumer);
25986 if (GET_CODE (producer) != SET)
25987 return 0;
25989 result = XEXP (producer, 0);
25991 if (GET_CODE (consumer) != SET)
25992 return 0;
25994 /* Check that the consumer is of the form
25995 (set (...) (plus (mult ...) (...)))
25997 (set (...) (minus (...) (mult ...))). */
25998 if (GET_CODE (XEXP (consumer, 1)) == PLUS)
26000 if (GET_CODE (XEXP (XEXP (consumer, 1), 0)) != MULT)
26001 return 0;
26003 op0 = XEXP (XEXP (XEXP (consumer, 1), 0), 0);
26004 op1 = XEXP (XEXP (XEXP (consumer, 1), 0), 1);
26005 acc = XEXP (XEXP (consumer, 1), 1);
26007 else if (GET_CODE (XEXP (consumer, 1)) == MINUS)
26009 if (GET_CODE (XEXP (XEXP (consumer, 1), 1)) != MULT)
26010 return 0;
26012 op0 = XEXP (XEXP (XEXP (consumer, 1), 1), 0);
26013 op1 = XEXP (XEXP (XEXP (consumer, 1), 1), 1);
26014 acc = XEXP (XEXP (consumer, 1), 0);
26016 else
26017 return 0;
26019 return (reg_overlap_mentioned_p (result, acc)
26020 && !reg_overlap_mentioned_p (result, op0)
26021 && !reg_overlap_mentioned_p (result, op1));
26024 /* Return non-zero if the consumer (a multiply-accumulate instruction)
26025 has an accumulator dependency on the result of the producer (a
26026 multiplication instruction) and no other dependency on that result. */
26028 arm_mac_accumulator_is_mul_result (rtx producer, rtx consumer)
26030 rtx mul = PATTERN (producer);
26031 rtx mac = PATTERN (consumer);
26032 rtx mul_result;
26033 rtx mac_op0, mac_op1, mac_acc;
26035 if (GET_CODE (mul) == COND_EXEC)
26036 mul = COND_EXEC_CODE (mul);
26037 if (GET_CODE (mac) == COND_EXEC)
26038 mac = COND_EXEC_CODE (mac);
26040 /* Check that mul is of the form (set (...) (mult ...))
26041 and mla is of the form (set (...) (plus (mult ...) (...))). */
26042 if ((GET_CODE (mul) != SET || GET_CODE (XEXP (mul, 1)) != MULT)
26043 || (GET_CODE (mac) != SET || GET_CODE (XEXP (mac, 1)) != PLUS
26044 || GET_CODE (XEXP (XEXP (mac, 1), 0)) != MULT))
26045 return 0;
26047 mul_result = XEXP (mul, 0);
26048 mac_op0 = XEXP (XEXP (XEXP (mac, 1), 0), 0);
26049 mac_op1 = XEXP (XEXP (XEXP (mac, 1), 0), 1);
26050 mac_acc = XEXP (XEXP (mac, 1), 1);
26052 return (reg_overlap_mentioned_p (mul_result, mac_acc)
26053 && !reg_overlap_mentioned_p (mul_result, mac_op0)
26054 && !reg_overlap_mentioned_p (mul_result, mac_op1));
26058 /* The EABI says test the least significant bit of a guard variable. */
26060 static bool
26061 arm_cxx_guard_mask_bit (void)
26063 return TARGET_AAPCS_BASED;
26067 /* The EABI specifies that all array cookies are 8 bytes long. */
26069 static tree
26070 arm_get_cookie_size (tree type)
26072 tree size;
26074 if (!TARGET_AAPCS_BASED)
26075 return default_cxx_get_cookie_size (type);
26077 size = build_int_cst (sizetype, 8);
26078 return size;
26082 /* The EABI says that array cookies should also contain the element size. */
26084 static bool
26085 arm_cookie_has_size (void)
26087 return TARGET_AAPCS_BASED;
26091 /* The EABI says constructors and destructors should return a pointer to
26092 the object constructed/destroyed. */
26094 static bool
26095 arm_cxx_cdtor_returns_this (void)
26097 return TARGET_AAPCS_BASED;
26100 /* The EABI says that an inline function may never be the key
26101 method. */
26103 static bool
26104 arm_cxx_key_method_may_be_inline (void)
26106 return !TARGET_AAPCS_BASED;
26109 static void
26110 arm_cxx_determine_class_data_visibility (tree decl)
26112 if (!TARGET_AAPCS_BASED
26113 || !TARGET_DLLIMPORT_DECL_ATTRIBUTES)
26114 return;
26116 /* In general, \S 3.2.5.5 of the ARM EABI requires that class data
26117 is exported. However, on systems without dynamic vague linkage,
26118 \S 3.2.5.6 says that COMDAT class data has hidden linkage. */
26119 if (!TARGET_ARM_DYNAMIC_VAGUE_LINKAGE_P && DECL_COMDAT (decl))
26120 DECL_VISIBILITY (decl) = VISIBILITY_HIDDEN;
26121 else
26122 DECL_VISIBILITY (decl) = VISIBILITY_DEFAULT;
26123 DECL_VISIBILITY_SPECIFIED (decl) = 1;
26126 static bool
26127 arm_cxx_class_data_always_comdat (void)
26129 /* \S 3.2.5.4 of the ARM C++ ABI says that class data only have
26130 vague linkage if the class has no key function. */
26131 return !TARGET_AAPCS_BASED;
26135 /* The EABI says __aeabi_atexit should be used to register static
26136 destructors. */
26138 static bool
26139 arm_cxx_use_aeabi_atexit (void)
26141 return TARGET_AAPCS_BASED;
26145 void
26146 arm_set_return_address (rtx source, rtx scratch)
26148 arm_stack_offsets *offsets;
26149 HOST_WIDE_INT delta;
26150 rtx addr;
26151 unsigned long saved_regs;
26153 offsets = arm_get_frame_offsets ();
26154 saved_regs = offsets->saved_regs_mask;
26156 if ((saved_regs & (1 << LR_REGNUM)) == 0)
26157 emit_move_insn (gen_rtx_REG (Pmode, LR_REGNUM), source);
26158 else
26160 if (frame_pointer_needed)
26161 addr = plus_constant (Pmode, hard_frame_pointer_rtx, -4);
26162 else
26164 /* LR will be the first saved register. */
26165 delta = offsets->outgoing_args - (offsets->frame + 4);
26168 if (delta >= 4096)
26170 emit_insn (gen_addsi3 (scratch, stack_pointer_rtx,
26171 GEN_INT (delta & ~4095)));
26172 addr = scratch;
26173 delta &= 4095;
26175 else
26176 addr = stack_pointer_rtx;
26178 addr = plus_constant (Pmode, addr, delta);
26180 emit_move_insn (gen_frame_mem (Pmode, addr), source);
26185 void
26186 thumb_set_return_address (rtx source, rtx scratch)
26188 arm_stack_offsets *offsets;
26189 HOST_WIDE_INT delta;
26190 HOST_WIDE_INT limit;
26191 int reg;
26192 rtx addr;
26193 unsigned long mask;
26195 emit_use (source);
26197 offsets = arm_get_frame_offsets ();
26198 mask = offsets->saved_regs_mask;
26199 if (mask & (1 << LR_REGNUM))
26201 limit = 1024;
26202 /* Find the saved regs. */
26203 if (frame_pointer_needed)
26205 delta = offsets->soft_frame - offsets->saved_args;
26206 reg = THUMB_HARD_FRAME_POINTER_REGNUM;
26207 if (TARGET_THUMB1)
26208 limit = 128;
26210 else
26212 delta = offsets->outgoing_args - offsets->saved_args;
26213 reg = SP_REGNUM;
26215 /* Allow for the stack frame. */
26216 if (TARGET_THUMB1 && TARGET_BACKTRACE)
26217 delta -= 16;
26218 /* The link register is always the first saved register. */
26219 delta -= 4;
26221 /* Construct the address. */
26222 addr = gen_rtx_REG (SImode, reg);
26223 if (delta > limit)
26225 emit_insn (gen_movsi (scratch, GEN_INT (delta)));
26226 emit_insn (gen_addsi3 (scratch, scratch, stack_pointer_rtx));
26227 addr = scratch;
26229 else
26230 addr = plus_constant (Pmode, addr, delta);
26232 emit_move_insn (gen_frame_mem (Pmode, addr), source);
26234 else
26235 emit_move_insn (gen_rtx_REG (Pmode, LR_REGNUM), source);
26238 /* Implements target hook vector_mode_supported_p. */
26239 bool
26240 arm_vector_mode_supported_p (enum machine_mode mode)
26242 /* Neon also supports V2SImode, etc. listed in the clause below. */
26243 if (TARGET_NEON && (mode == V2SFmode || mode == V4SImode || mode == V8HImode
26244 || mode == V4HFmode || mode == V16QImode || mode == V4SFmode || mode == V2DImode))
26245 return true;
26247 if ((TARGET_NEON || TARGET_IWMMXT)
26248 && ((mode == V2SImode)
26249 || (mode == V4HImode)
26250 || (mode == V8QImode)))
26251 return true;
26253 if (TARGET_INT_SIMD && (mode == V4UQQmode || mode == V4QQmode
26254 || mode == V2UHQmode || mode == V2HQmode || mode == V2UHAmode
26255 || mode == V2HAmode))
26256 return true;
26258 return false;
26261 /* Implements target hook array_mode_supported_p. */
26263 static bool
26264 arm_array_mode_supported_p (enum machine_mode mode,
26265 unsigned HOST_WIDE_INT nelems)
26267 if (TARGET_NEON
26268 && (VALID_NEON_DREG_MODE (mode) || VALID_NEON_QREG_MODE (mode))
26269 && (nelems >= 2 && nelems <= 4))
26270 return true;
26272 return false;
26275 /* Use the option -mvectorize-with-neon-double to override the use of quardword
26276 registers when autovectorizing for Neon, at least until multiple vector
26277 widths are supported properly by the middle-end. */
26279 static enum machine_mode
26280 arm_preferred_simd_mode (enum machine_mode mode)
26282 if (TARGET_NEON)
26283 switch (mode)
26285 case SFmode:
26286 return TARGET_NEON_VECTORIZE_DOUBLE ? V2SFmode : V4SFmode;
26287 case SImode:
26288 return TARGET_NEON_VECTORIZE_DOUBLE ? V2SImode : V4SImode;
26289 case HImode:
26290 return TARGET_NEON_VECTORIZE_DOUBLE ? V4HImode : V8HImode;
26291 case QImode:
26292 return TARGET_NEON_VECTORIZE_DOUBLE ? V8QImode : V16QImode;
26293 case DImode:
26294 if (!TARGET_NEON_VECTORIZE_DOUBLE)
26295 return V2DImode;
26296 break;
26298 default:;
26301 if (TARGET_REALLY_IWMMXT)
26302 switch (mode)
26304 case SImode:
26305 return V2SImode;
26306 case HImode:
26307 return V4HImode;
26308 case QImode:
26309 return V8QImode;
26311 default:;
26314 return word_mode;
26317 /* Implement TARGET_CLASS_LIKELY_SPILLED_P.
26319 We need to define this for LO_REGS on Thumb-1. Otherwise we can end up
26320 using r0-r4 for function arguments, r7 for the stack frame and don't have
26321 enough left over to do doubleword arithmetic. For Thumb-2 all the
26322 potentially problematic instructions accept high registers so this is not
26323 necessary. Care needs to be taken to avoid adding new Thumb-2 patterns
26324 that require many low registers. */
26325 static bool
26326 arm_class_likely_spilled_p (reg_class_t rclass)
26328 if ((TARGET_THUMB1 && rclass == LO_REGS)
26329 || rclass == CC_REG)
26330 return true;
26332 return false;
26335 /* Implements target hook small_register_classes_for_mode_p. */
26336 bool
26337 arm_small_register_classes_for_mode_p (enum machine_mode mode ATTRIBUTE_UNUSED)
26339 return TARGET_THUMB1;
26342 /* Implement TARGET_SHIFT_TRUNCATION_MASK. SImode shifts use normal
26343 ARM insns and therefore guarantee that the shift count is modulo 256.
26344 DImode shifts (those implemented by lib1funcs.S or by optabs.c)
26345 guarantee no particular behavior for out-of-range counts. */
26347 static unsigned HOST_WIDE_INT
26348 arm_shift_truncation_mask (enum machine_mode mode)
26350 return mode == SImode ? 255 : 0;
26354 /* Map internal gcc register numbers to DWARF2 register numbers. */
26356 unsigned int
26357 arm_dbx_register_number (unsigned int regno)
26359 if (regno < 16)
26360 return regno;
26362 if (IS_VFP_REGNUM (regno))
26364 /* See comment in arm_dwarf_register_span. */
26365 if (VFP_REGNO_OK_FOR_SINGLE (regno))
26366 return 64 + regno - FIRST_VFP_REGNUM;
26367 else
26368 return 256 + (regno - FIRST_VFP_REGNUM) / 2;
26371 if (IS_IWMMXT_GR_REGNUM (regno))
26372 return 104 + regno - FIRST_IWMMXT_GR_REGNUM;
26374 if (IS_IWMMXT_REGNUM (regno))
26375 return 112 + regno - FIRST_IWMMXT_REGNUM;
26377 gcc_unreachable ();
26380 /* Dwarf models VFPv3 registers as 32 64-bit registers.
26381 GCC models tham as 64 32-bit registers, so we need to describe this to
26382 the DWARF generation code. Other registers can use the default. */
26383 static rtx
26384 arm_dwarf_register_span (rtx rtl)
26386 unsigned regno;
26387 int nregs;
26388 int i;
26389 rtx p;
26391 regno = REGNO (rtl);
26392 if (!IS_VFP_REGNUM (regno))
26393 return NULL_RTX;
26395 /* XXX FIXME: The EABI defines two VFP register ranges:
26396 64-95: Legacy VFPv2 numbering for S0-S31 (obsolescent)
26397 256-287: D0-D31
26398 The recommended encoding for S0-S31 is a DW_OP_bit_piece of the
26399 corresponding D register. Until GDB supports this, we shall use the
26400 legacy encodings. We also use these encodings for D0-D15 for
26401 compatibility with older debuggers. */
26402 if (VFP_REGNO_OK_FOR_SINGLE (regno))
26403 return NULL_RTX;
26405 nregs = GET_MODE_SIZE (GET_MODE (rtl)) / 8;
26406 p = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (nregs));
26407 for (i = 0; i < nregs; i++)
26408 XVECEXP (p, 0, i) = gen_rtx_REG (DImode, regno + i);
26410 return p;
26413 #if ARM_UNWIND_INFO
26414 /* Emit unwind directives for a store-multiple instruction or stack pointer
26415 push during alignment.
26416 These should only ever be generated by the function prologue code, so
26417 expect them to have a particular form. */
26419 static void
26420 arm_unwind_emit_sequence (FILE * asm_out_file, rtx p)
26422 int i;
26423 HOST_WIDE_INT offset;
26424 HOST_WIDE_INT nregs;
26425 int reg_size;
26426 unsigned reg;
26427 unsigned lastreg;
26428 rtx e;
26430 e = XVECEXP (p, 0, 0);
26431 if (GET_CODE (e) != SET)
26432 abort ();
26434 /* First insn will adjust the stack pointer. */
26435 if (GET_CODE (e) != SET
26436 || !REG_P (XEXP (e, 0))
26437 || REGNO (XEXP (e, 0)) != SP_REGNUM
26438 || GET_CODE (XEXP (e, 1)) != PLUS)
26439 abort ();
26441 offset = -INTVAL (XEXP (XEXP (e, 1), 1));
26442 nregs = XVECLEN (p, 0) - 1;
26444 reg = REGNO (XEXP (XVECEXP (p, 0, 1), 1));
26445 if (reg < 16)
26447 /* The function prologue may also push pc, but not annotate it as it is
26448 never restored. We turn this into a stack pointer adjustment. */
26449 if (nregs * 4 == offset - 4)
26451 fprintf (asm_out_file, "\t.pad #4\n");
26452 offset -= 4;
26454 reg_size = 4;
26455 fprintf (asm_out_file, "\t.save {");
26457 else if (IS_VFP_REGNUM (reg))
26459 reg_size = 8;
26460 fprintf (asm_out_file, "\t.vsave {");
26462 else
26463 /* Unknown register type. */
26464 abort ();
26466 /* If the stack increment doesn't match the size of the saved registers,
26467 something has gone horribly wrong. */
26468 if (offset != nregs * reg_size)
26469 abort ();
26471 offset = 0;
26472 lastreg = 0;
26473 /* The remaining insns will describe the stores. */
26474 for (i = 1; i <= nregs; i++)
26476 /* Expect (set (mem <addr>) (reg)).
26477 Where <addr> is (reg:SP) or (plus (reg:SP) (const_int)). */
26478 e = XVECEXP (p, 0, i);
26479 if (GET_CODE (e) != SET
26480 || !MEM_P (XEXP (e, 0))
26481 || !REG_P (XEXP (e, 1)))
26482 abort ();
26484 reg = REGNO (XEXP (e, 1));
26485 if (reg < lastreg)
26486 abort ();
26488 if (i != 1)
26489 fprintf (asm_out_file, ", ");
26490 /* We can't use %r for vfp because we need to use the
26491 double precision register names. */
26492 if (IS_VFP_REGNUM (reg))
26493 asm_fprintf (asm_out_file, "d%d", (reg - FIRST_VFP_REGNUM) / 2);
26494 else
26495 asm_fprintf (asm_out_file, "%r", reg);
26497 #ifdef ENABLE_CHECKING
26498 /* Check that the addresses are consecutive. */
26499 e = XEXP (XEXP (e, 0), 0);
26500 if (GET_CODE (e) == PLUS)
26502 offset += reg_size;
26503 if (!REG_P (XEXP (e, 0))
26504 || REGNO (XEXP (e, 0)) != SP_REGNUM
26505 || !CONST_INT_P (XEXP (e, 1))
26506 || offset != INTVAL (XEXP (e, 1)))
26507 abort ();
26509 else if (i != 1
26510 || !REG_P (e)
26511 || REGNO (e) != SP_REGNUM)
26512 abort ();
26513 #endif
26515 fprintf (asm_out_file, "}\n");
26518 /* Emit unwind directives for a SET. */
26520 static void
26521 arm_unwind_emit_set (FILE * asm_out_file, rtx p)
26523 rtx e0;
26524 rtx e1;
26525 unsigned reg;
26527 e0 = XEXP (p, 0);
26528 e1 = XEXP (p, 1);
26529 switch (GET_CODE (e0))
26531 case MEM:
26532 /* Pushing a single register. */
26533 if (GET_CODE (XEXP (e0, 0)) != PRE_DEC
26534 || !REG_P (XEXP (XEXP (e0, 0), 0))
26535 || REGNO (XEXP (XEXP (e0, 0), 0)) != SP_REGNUM)
26536 abort ();
26538 asm_fprintf (asm_out_file, "\t.save ");
26539 if (IS_VFP_REGNUM (REGNO (e1)))
26540 asm_fprintf(asm_out_file, "{d%d}\n",
26541 (REGNO (e1) - FIRST_VFP_REGNUM) / 2);
26542 else
26543 asm_fprintf(asm_out_file, "{%r}\n", REGNO (e1));
26544 break;
26546 case REG:
26547 if (REGNO (e0) == SP_REGNUM)
26549 /* A stack increment. */
26550 if (GET_CODE (e1) != PLUS
26551 || !REG_P (XEXP (e1, 0))
26552 || REGNO (XEXP (e1, 0)) != SP_REGNUM
26553 || !CONST_INT_P (XEXP (e1, 1)))
26554 abort ();
26556 asm_fprintf (asm_out_file, "\t.pad #%wd\n",
26557 -INTVAL (XEXP (e1, 1)));
26559 else if (REGNO (e0) == HARD_FRAME_POINTER_REGNUM)
26561 HOST_WIDE_INT offset;
26563 if (GET_CODE (e1) == PLUS)
26565 if (!REG_P (XEXP (e1, 0))
26566 || !CONST_INT_P (XEXP (e1, 1)))
26567 abort ();
26568 reg = REGNO (XEXP (e1, 0));
26569 offset = INTVAL (XEXP (e1, 1));
26570 asm_fprintf (asm_out_file, "\t.setfp %r, %r, #%wd\n",
26571 HARD_FRAME_POINTER_REGNUM, reg,
26572 offset);
26574 else if (REG_P (e1))
26576 reg = REGNO (e1);
26577 asm_fprintf (asm_out_file, "\t.setfp %r, %r\n",
26578 HARD_FRAME_POINTER_REGNUM, reg);
26580 else
26581 abort ();
26583 else if (REG_P (e1) && REGNO (e1) == SP_REGNUM)
26585 /* Move from sp to reg. */
26586 asm_fprintf (asm_out_file, "\t.movsp %r\n", REGNO (e0));
26588 else if (GET_CODE (e1) == PLUS
26589 && REG_P (XEXP (e1, 0))
26590 && REGNO (XEXP (e1, 0)) == SP_REGNUM
26591 && CONST_INT_P (XEXP (e1, 1)))
26593 /* Set reg to offset from sp. */
26594 asm_fprintf (asm_out_file, "\t.movsp %r, #%d\n",
26595 REGNO (e0), (int)INTVAL(XEXP (e1, 1)));
26597 else
26598 abort ();
26599 break;
26601 default:
26602 abort ();
26607 /* Emit unwind directives for the given insn. */
26609 static void
26610 arm_unwind_emit (FILE * asm_out_file, rtx insn)
26612 rtx note, pat;
26613 bool handled_one = false;
26615 if (arm_except_unwind_info (&global_options) != UI_TARGET)
26616 return;
26618 if (!(flag_unwind_tables || crtl->uses_eh_lsda)
26619 && (TREE_NOTHROW (current_function_decl)
26620 || crtl->all_throwers_are_sibcalls))
26621 return;
26623 if (NOTE_P (insn) || !RTX_FRAME_RELATED_P (insn))
26624 return;
26626 for (note = REG_NOTES (insn); note ; note = XEXP (note, 1))
26628 pat = XEXP (note, 0);
26629 switch (REG_NOTE_KIND (note))
26631 case REG_FRAME_RELATED_EXPR:
26632 goto found;
26634 case REG_CFA_REGISTER:
26635 if (pat == NULL)
26637 pat = PATTERN (insn);
26638 if (GET_CODE (pat) == PARALLEL)
26639 pat = XVECEXP (pat, 0, 0);
26642 /* Only emitted for IS_STACKALIGN re-alignment. */
26644 rtx dest, src;
26645 unsigned reg;
26647 src = SET_SRC (pat);
26648 dest = SET_DEST (pat);
26650 gcc_assert (src == stack_pointer_rtx);
26651 reg = REGNO (dest);
26652 asm_fprintf (asm_out_file, "\t.unwind_raw 0, 0x%x @ vsp = r%d\n",
26653 reg + 0x90, reg);
26655 handled_one = true;
26656 break;
26658 /* The INSN is generated in epilogue. It is set as RTX_FRAME_RELATED_P
26659 to get correct dwarf information for shrink-wrap. We should not
26660 emit unwind information for it because these are used either for
26661 pretend arguments or notes to adjust sp and restore registers from
26662 stack. */
26663 case REG_CFA_DEF_CFA:
26664 case REG_CFA_ADJUST_CFA:
26665 case REG_CFA_RESTORE:
26666 return;
26668 case REG_CFA_EXPRESSION:
26669 case REG_CFA_OFFSET:
26670 /* ??? Only handling here what we actually emit. */
26671 gcc_unreachable ();
26673 default:
26674 break;
26677 if (handled_one)
26678 return;
26679 pat = PATTERN (insn);
26680 found:
26682 switch (GET_CODE (pat))
26684 case SET:
26685 arm_unwind_emit_set (asm_out_file, pat);
26686 break;
26688 case SEQUENCE:
26689 /* Store multiple. */
26690 arm_unwind_emit_sequence (asm_out_file, pat);
26691 break;
26693 default:
26694 abort();
26699 /* Output a reference from a function exception table to the type_info
26700 object X. The EABI specifies that the symbol should be relocated by
26701 an R_ARM_TARGET2 relocation. */
26703 static bool
26704 arm_output_ttype (rtx x)
26706 fputs ("\t.word\t", asm_out_file);
26707 output_addr_const (asm_out_file, x);
26708 /* Use special relocations for symbol references. */
26709 if (!CONST_INT_P (x))
26710 fputs ("(TARGET2)", asm_out_file);
26711 fputc ('\n', asm_out_file);
26713 return TRUE;
26716 /* Implement TARGET_ASM_EMIT_EXCEPT_PERSONALITY. */
26718 static void
26719 arm_asm_emit_except_personality (rtx personality)
26721 fputs ("\t.personality\t", asm_out_file);
26722 output_addr_const (asm_out_file, personality);
26723 fputc ('\n', asm_out_file);
26726 /* Implement TARGET_ASM_INITIALIZE_SECTIONS. */
26728 static void
26729 arm_asm_init_sections (void)
26731 exception_section = get_unnamed_section (0, output_section_asm_op,
26732 "\t.handlerdata");
26734 #endif /* ARM_UNWIND_INFO */
26736 /* Output unwind directives for the start/end of a function. */
26738 void
26739 arm_output_fn_unwind (FILE * f, bool prologue)
26741 if (arm_except_unwind_info (&global_options) != UI_TARGET)
26742 return;
26744 if (prologue)
26745 fputs ("\t.fnstart\n", f);
26746 else
26748 /* If this function will never be unwound, then mark it as such.
26749 The came condition is used in arm_unwind_emit to suppress
26750 the frame annotations. */
26751 if (!(flag_unwind_tables || crtl->uses_eh_lsda)
26752 && (TREE_NOTHROW (current_function_decl)
26753 || crtl->all_throwers_are_sibcalls))
26754 fputs("\t.cantunwind\n", f);
26756 fputs ("\t.fnend\n", f);
26760 static bool
26761 arm_emit_tls_decoration (FILE *fp, rtx x)
26763 enum tls_reloc reloc;
26764 rtx val;
26766 val = XVECEXP (x, 0, 0);
26767 reloc = (enum tls_reloc) INTVAL (XVECEXP (x, 0, 1));
26769 output_addr_const (fp, val);
26771 switch (reloc)
26773 case TLS_GD32:
26774 fputs ("(tlsgd)", fp);
26775 break;
26776 case TLS_LDM32:
26777 fputs ("(tlsldm)", fp);
26778 break;
26779 case TLS_LDO32:
26780 fputs ("(tlsldo)", fp);
26781 break;
26782 case TLS_IE32:
26783 fputs ("(gottpoff)", fp);
26784 break;
26785 case TLS_LE32:
26786 fputs ("(tpoff)", fp);
26787 break;
26788 case TLS_DESCSEQ:
26789 fputs ("(tlsdesc)", fp);
26790 break;
26791 default:
26792 gcc_unreachable ();
26795 switch (reloc)
26797 case TLS_GD32:
26798 case TLS_LDM32:
26799 case TLS_IE32:
26800 case TLS_DESCSEQ:
26801 fputs (" + (. - ", fp);
26802 output_addr_const (fp, XVECEXP (x, 0, 2));
26803 /* For DESCSEQ the 3rd operand encodes thumbness, and is added */
26804 fputs (reloc == TLS_DESCSEQ ? " + " : " - ", fp);
26805 output_addr_const (fp, XVECEXP (x, 0, 3));
26806 fputc (')', fp);
26807 break;
26808 default:
26809 break;
26812 return TRUE;
26815 /* ARM implementation of TARGET_ASM_OUTPUT_DWARF_DTPREL. */
26817 static void
26818 arm_output_dwarf_dtprel (FILE *file, int size, rtx x)
26820 gcc_assert (size == 4);
26821 fputs ("\t.word\t", file);
26822 output_addr_const (file, x);
26823 fputs ("(tlsldo)", file);
26826 /* Implement TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA. */
26828 static bool
26829 arm_output_addr_const_extra (FILE *fp, rtx x)
26831 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
26832 return arm_emit_tls_decoration (fp, x);
26833 else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_PIC_LABEL)
26835 char label[256];
26836 int labelno = INTVAL (XVECEXP (x, 0, 0));
26838 ASM_GENERATE_INTERNAL_LABEL (label, "LPIC", labelno);
26839 assemble_name_raw (fp, label);
26841 return TRUE;
26843 else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_GOTSYM_OFF)
26845 assemble_name (fp, "_GLOBAL_OFFSET_TABLE_");
26846 if (GOT_PCREL)
26847 fputs ("+.", fp);
26848 fputs ("-(", fp);
26849 output_addr_const (fp, XVECEXP (x, 0, 0));
26850 fputc (')', fp);
26851 return TRUE;
26853 else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_SYMBOL_OFFSET)
26855 output_addr_const (fp, XVECEXP (x, 0, 0));
26856 if (GOT_PCREL)
26857 fputs ("+.", fp);
26858 fputs ("-(", fp);
26859 output_addr_const (fp, XVECEXP (x, 0, 1));
26860 fputc (')', fp);
26861 return TRUE;
26863 else if (GET_CODE (x) == CONST_VECTOR)
26864 return arm_emit_vector_const (fp, x);
26866 return FALSE;
26869 /* Output assembly for a shift instruction.
26870 SET_FLAGS determines how the instruction modifies the condition codes.
26871 0 - Do not set condition codes.
26872 1 - Set condition codes.
26873 2 - Use smallest instruction. */
26874 const char *
26875 arm_output_shift(rtx * operands, int set_flags)
26877 char pattern[100];
26878 static const char flag_chars[3] = {'?', '.', '!'};
26879 const char *shift;
26880 HOST_WIDE_INT val;
26881 char c;
26883 c = flag_chars[set_flags];
26884 if (TARGET_UNIFIED_ASM)
26886 shift = shift_op(operands[3], &val);
26887 if (shift)
26889 if (val != -1)
26890 operands[2] = GEN_INT(val);
26891 sprintf (pattern, "%s%%%c\t%%0, %%1, %%2", shift, c);
26893 else
26894 sprintf (pattern, "mov%%%c\t%%0, %%1", c);
26896 else
26897 sprintf (pattern, "mov%%%c\t%%0, %%1%%S3", c);
26898 output_asm_insn (pattern, operands);
26899 return "";
26902 /* Output assembly for a WMMX immediate shift instruction. */
26903 const char *
26904 arm_output_iwmmxt_shift_immediate (const char *insn_name, rtx *operands, bool wror_or_wsra)
26906 int shift = INTVAL (operands[2]);
26907 char templ[50];
26908 enum machine_mode opmode = GET_MODE (operands[0]);
26910 gcc_assert (shift >= 0);
26912 /* If the shift value in the register versions is > 63 (for D qualifier),
26913 31 (for W qualifier) or 15 (for H qualifier). */
26914 if (((opmode == V4HImode) && (shift > 15))
26915 || ((opmode == V2SImode) && (shift > 31))
26916 || ((opmode == DImode) && (shift > 63)))
26918 if (wror_or_wsra)
26920 sprintf (templ, "%s\t%%0, %%1, #%d", insn_name, 32);
26921 output_asm_insn (templ, operands);
26922 if (opmode == DImode)
26924 sprintf (templ, "%s\t%%0, %%0, #%d", insn_name, 32);
26925 output_asm_insn (templ, operands);
26928 else
26930 /* The destination register will contain all zeros. */
26931 sprintf (templ, "wzero\t%%0");
26932 output_asm_insn (templ, operands);
26934 return "";
26937 if ((opmode == DImode) && (shift > 32))
26939 sprintf (templ, "%s\t%%0, %%1, #%d", insn_name, 32);
26940 output_asm_insn (templ, operands);
26941 sprintf (templ, "%s\t%%0, %%0, #%d", insn_name, shift - 32);
26942 output_asm_insn (templ, operands);
26944 else
26946 sprintf (templ, "%s\t%%0, %%1, #%d", insn_name, shift);
26947 output_asm_insn (templ, operands);
26949 return "";
26952 /* Output assembly for a WMMX tinsr instruction. */
26953 const char *
26954 arm_output_iwmmxt_tinsr (rtx *operands)
26956 int mask = INTVAL (operands[3]);
26957 int i;
26958 char templ[50];
26959 int units = mode_nunits[GET_MODE (operands[0])];
26960 gcc_assert ((mask & (mask - 1)) == 0);
26961 for (i = 0; i < units; ++i)
26963 if ((mask & 0x01) == 1)
26965 break;
26967 mask >>= 1;
26969 gcc_assert (i < units);
26971 switch (GET_MODE (operands[0]))
26973 case V8QImode:
26974 sprintf (templ, "tinsrb%%?\t%%0, %%2, #%d", i);
26975 break;
26976 case V4HImode:
26977 sprintf (templ, "tinsrh%%?\t%%0, %%2, #%d", i);
26978 break;
26979 case V2SImode:
26980 sprintf (templ, "tinsrw%%?\t%%0, %%2, #%d", i);
26981 break;
26982 default:
26983 gcc_unreachable ();
26984 break;
26986 output_asm_insn (templ, operands);
26988 return "";
26991 /* Output a Thumb-1 casesi dispatch sequence. */
26992 const char *
26993 thumb1_output_casesi (rtx *operands)
26995 rtx diff_vec = PATTERN (next_real_insn (operands[0]));
26997 gcc_assert (GET_CODE (diff_vec) == ADDR_DIFF_VEC);
26999 switch (GET_MODE(diff_vec))
27001 case QImode:
27002 return (ADDR_DIFF_VEC_FLAGS (diff_vec).offset_unsigned ?
27003 "bl\t%___gnu_thumb1_case_uqi" : "bl\t%___gnu_thumb1_case_sqi");
27004 case HImode:
27005 return (ADDR_DIFF_VEC_FLAGS (diff_vec).offset_unsigned ?
27006 "bl\t%___gnu_thumb1_case_uhi" : "bl\t%___gnu_thumb1_case_shi");
27007 case SImode:
27008 return "bl\t%___gnu_thumb1_case_si";
27009 default:
27010 gcc_unreachable ();
27014 /* Output a Thumb-2 casesi instruction. */
27015 const char *
27016 thumb2_output_casesi (rtx *operands)
27018 rtx diff_vec = PATTERN (next_real_insn (operands[2]));
27020 gcc_assert (GET_CODE (diff_vec) == ADDR_DIFF_VEC);
27022 output_asm_insn ("cmp\t%0, %1", operands);
27023 output_asm_insn ("bhi\t%l3", operands);
27024 switch (GET_MODE(diff_vec))
27026 case QImode:
27027 return "tbb\t[%|pc, %0]";
27028 case HImode:
27029 return "tbh\t[%|pc, %0, lsl #1]";
27030 case SImode:
27031 if (flag_pic)
27033 output_asm_insn ("adr\t%4, %l2", operands);
27034 output_asm_insn ("ldr\t%5, [%4, %0, lsl #2]", operands);
27035 output_asm_insn ("add\t%4, %4, %5", operands);
27036 return "bx\t%4";
27038 else
27040 output_asm_insn ("adr\t%4, %l2", operands);
27041 return "ldr\t%|pc, [%4, %0, lsl #2]";
27043 default:
27044 gcc_unreachable ();
27048 /* Most ARM cores are single issue, but some newer ones can dual issue.
27049 The scheduler descriptions rely on this being correct. */
27050 static int
27051 arm_issue_rate (void)
27053 switch (arm_tune)
27055 case cortexa15:
27056 return 3;
27058 case cortexr4:
27059 case cortexr4f:
27060 case cortexr5:
27061 case genericv7a:
27062 case cortexa5:
27063 case cortexa7:
27064 case cortexa8:
27065 case cortexa9:
27066 case cortexa53:
27067 case fa726te:
27068 case marvell_pj4:
27069 return 2;
27071 default:
27072 return 1;
27076 /* A table and a function to perform ARM-specific name mangling for
27077 NEON vector types in order to conform to the AAPCS (see "Procedure
27078 Call Standard for the ARM Architecture", Appendix A). To qualify
27079 for emission with the mangled names defined in that document, a
27080 vector type must not only be of the correct mode but also be
27081 composed of NEON vector element types (e.g. __builtin_neon_qi). */
27082 typedef struct
27084 enum machine_mode mode;
27085 const char *element_type_name;
27086 const char *aapcs_name;
27087 } arm_mangle_map_entry;
27089 static arm_mangle_map_entry arm_mangle_map[] = {
27090 /* 64-bit containerized types. */
27091 { V8QImode, "__builtin_neon_qi", "15__simd64_int8_t" },
27092 { V8QImode, "__builtin_neon_uqi", "16__simd64_uint8_t" },
27093 { V4HImode, "__builtin_neon_hi", "16__simd64_int16_t" },
27094 { V4HImode, "__builtin_neon_uhi", "17__simd64_uint16_t" },
27095 { V4HFmode, "__builtin_neon_hf", "18__simd64_float16_t" },
27096 { V2SImode, "__builtin_neon_si", "16__simd64_int32_t" },
27097 { V2SImode, "__builtin_neon_usi", "17__simd64_uint32_t" },
27098 { V2SFmode, "__builtin_neon_sf", "18__simd64_float32_t" },
27099 { V8QImode, "__builtin_neon_poly8", "16__simd64_poly8_t" },
27100 { V4HImode, "__builtin_neon_poly16", "17__simd64_poly16_t" },
27102 /* 128-bit containerized types. */
27103 { V16QImode, "__builtin_neon_qi", "16__simd128_int8_t" },
27104 { V16QImode, "__builtin_neon_uqi", "17__simd128_uint8_t" },
27105 { V8HImode, "__builtin_neon_hi", "17__simd128_int16_t" },
27106 { V8HImode, "__builtin_neon_uhi", "18__simd128_uint16_t" },
27107 { V4SImode, "__builtin_neon_si", "17__simd128_int32_t" },
27108 { V4SImode, "__builtin_neon_usi", "18__simd128_uint32_t" },
27109 { V4SFmode, "__builtin_neon_sf", "19__simd128_float32_t" },
27110 { V16QImode, "__builtin_neon_poly8", "17__simd128_poly8_t" },
27111 { V8HImode, "__builtin_neon_poly16", "18__simd128_poly16_t" },
27112 { VOIDmode, NULL, NULL }
27115 const char *
27116 arm_mangle_type (const_tree type)
27118 arm_mangle_map_entry *pos = arm_mangle_map;
27120 /* The ARM ABI documents (10th October 2008) say that "__va_list"
27121 has to be managled as if it is in the "std" namespace. */
27122 if (TARGET_AAPCS_BASED
27123 && lang_hooks.types_compatible_p (CONST_CAST_TREE (type), va_list_type))
27124 return "St9__va_list";
27126 /* Half-precision float. */
27127 if (TREE_CODE (type) == REAL_TYPE && TYPE_PRECISION (type) == 16)
27128 return "Dh";
27130 if (TREE_CODE (type) != VECTOR_TYPE)
27131 return NULL;
27133 /* Check the mode of the vector type, and the name of the vector
27134 element type, against the table. */
27135 while (pos->mode != VOIDmode)
27137 tree elt_type = TREE_TYPE (type);
27139 if (pos->mode == TYPE_MODE (type)
27140 && TREE_CODE (TYPE_NAME (elt_type)) == TYPE_DECL
27141 && !strcmp (IDENTIFIER_POINTER (DECL_NAME (TYPE_NAME (elt_type))),
27142 pos->element_type_name))
27143 return pos->aapcs_name;
27145 pos++;
27148 /* Use the default mangling for unrecognized (possibly user-defined)
27149 vector types. */
27150 return NULL;
27153 /* Order of allocation of core registers for Thumb: this allocation is
27154 written over the corresponding initial entries of the array
27155 initialized with REG_ALLOC_ORDER. We allocate all low registers
27156 first. Saving and restoring a low register is usually cheaper than
27157 using a call-clobbered high register. */
27159 static const int thumb_core_reg_alloc_order[] =
27161 3, 2, 1, 0, 4, 5, 6, 7,
27162 14, 12, 8, 9, 10, 11
27165 /* Adjust register allocation order when compiling for Thumb. */
27167 void
27168 arm_order_regs_for_local_alloc (void)
27170 const int arm_reg_alloc_order[] = REG_ALLOC_ORDER;
27171 memcpy(reg_alloc_order, arm_reg_alloc_order, sizeof (reg_alloc_order));
27172 if (TARGET_THUMB)
27173 memcpy (reg_alloc_order, thumb_core_reg_alloc_order,
27174 sizeof (thumb_core_reg_alloc_order));
27177 /* Implement TARGET_FRAME_POINTER_REQUIRED. */
27179 bool
27180 arm_frame_pointer_required (void)
27182 return (cfun->has_nonlocal_label
27183 || SUBTARGET_FRAME_POINTER_REQUIRED
27184 || (TARGET_ARM && TARGET_APCS_FRAME && ! leaf_function_p ()));
27187 /* Only thumb1 can't support conditional execution, so return true if
27188 the target is not thumb1. */
27189 static bool
27190 arm_have_conditional_execution (void)
27192 return !TARGET_THUMB1;
27195 tree
27196 arm_builtin_vectorized_function (tree fndecl, tree type_out, tree type_in)
27198 enum machine_mode in_mode, out_mode;
27199 int in_n, out_n;
27201 if (TREE_CODE (type_out) != VECTOR_TYPE
27202 || TREE_CODE (type_in) != VECTOR_TYPE
27203 || !(TARGET_NEON && TARGET_FPU_ARMV8 && flag_unsafe_math_optimizations))
27204 return NULL_TREE;
27206 out_mode = TYPE_MODE (TREE_TYPE (type_out));
27207 out_n = TYPE_VECTOR_SUBPARTS (type_out);
27208 in_mode = TYPE_MODE (TREE_TYPE (type_in));
27209 in_n = TYPE_VECTOR_SUBPARTS (type_in);
27211 /* ARM_CHECK_BUILTIN_MODE and ARM_FIND_VRINT_VARIANT are used to find the
27212 decl of the vectorized builtin for the appropriate vector mode.
27213 NULL_TREE is returned if no such builtin is available. */
27214 #undef ARM_CHECK_BUILTIN_MODE
27215 #define ARM_CHECK_BUILTIN_MODE(C) \
27216 (out_mode == SFmode && out_n == C \
27217 && in_mode == SFmode && in_n == C)
27219 #undef ARM_FIND_VRINT_VARIANT
27220 #define ARM_FIND_VRINT_VARIANT(N) \
27221 (ARM_CHECK_BUILTIN_MODE (2) \
27222 ? arm_builtin_decl(ARM_BUILTIN_NEON_##N##v2sf, false) \
27223 : (ARM_CHECK_BUILTIN_MODE (4) \
27224 ? arm_builtin_decl(ARM_BUILTIN_NEON_##N##v4sf, false) \
27225 : NULL_TREE))
27227 if (DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_NORMAL)
27229 enum built_in_function fn = DECL_FUNCTION_CODE (fndecl);
27230 switch (fn)
27232 case BUILT_IN_FLOORF:
27233 return ARM_FIND_VRINT_VARIANT (vrintm);
27234 case BUILT_IN_CEILF:
27235 return ARM_FIND_VRINT_VARIANT (vrintp);
27236 case BUILT_IN_TRUNCF:
27237 return ARM_FIND_VRINT_VARIANT (vrintz);
27238 case BUILT_IN_ROUNDF:
27239 return ARM_FIND_VRINT_VARIANT (vrinta);
27240 default:
27241 return NULL_TREE;
27244 return NULL_TREE;
27246 #undef ARM_CHECK_BUILTIN_MODE
27247 #undef ARM_FIND_VRINT_VARIANT
27249 /* The AAPCS sets the maximum alignment of a vector to 64 bits. */
27250 static HOST_WIDE_INT
27251 arm_vector_alignment (const_tree type)
27253 HOST_WIDE_INT align = tree_low_cst (TYPE_SIZE (type), 0);
27255 if (TARGET_AAPCS_BASED)
27256 align = MIN (align, 64);
27258 return align;
27261 static unsigned int
27262 arm_autovectorize_vector_sizes (void)
27264 return TARGET_NEON_VECTORIZE_DOUBLE ? 0 : (16 | 8);
27267 static bool
27268 arm_vector_alignment_reachable (const_tree type, bool is_packed)
27270 /* Vectors which aren't in packed structures will not be less aligned than
27271 the natural alignment of their element type, so this is safe. */
27272 if (TARGET_NEON && !BYTES_BIG_ENDIAN)
27273 return !is_packed;
27275 return default_builtin_vector_alignment_reachable (type, is_packed);
27278 static bool
27279 arm_builtin_support_vector_misalignment (enum machine_mode mode,
27280 const_tree type, int misalignment,
27281 bool is_packed)
27283 if (TARGET_NEON && !BYTES_BIG_ENDIAN)
27285 HOST_WIDE_INT align = TYPE_ALIGN_UNIT (type);
27287 if (is_packed)
27288 return align == 1;
27290 /* If the misalignment is unknown, we should be able to handle the access
27291 so long as it is not to a member of a packed data structure. */
27292 if (misalignment == -1)
27293 return true;
27295 /* Return true if the misalignment is a multiple of the natural alignment
27296 of the vector's element type. This is probably always going to be
27297 true in practice, since we've already established that this isn't a
27298 packed access. */
27299 return ((misalignment % align) == 0);
27302 return default_builtin_support_vector_misalignment (mode, type, misalignment,
27303 is_packed);
27306 static void
27307 arm_conditional_register_usage (void)
27309 int regno;
27311 if (TARGET_THUMB1 && optimize_size)
27313 /* When optimizing for size on Thumb-1, it's better not
27314 to use the HI regs, because of the overhead of
27315 stacking them. */
27316 for (regno = FIRST_HI_REGNUM;
27317 regno <= LAST_HI_REGNUM; ++regno)
27318 fixed_regs[regno] = call_used_regs[regno] = 1;
27321 /* The link register can be clobbered by any branch insn,
27322 but we have no way to track that at present, so mark
27323 it as unavailable. */
27324 if (TARGET_THUMB1)
27325 fixed_regs[LR_REGNUM] = call_used_regs[LR_REGNUM] = 1;
27327 if (TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP)
27329 /* VFPv3 registers are disabled when earlier VFP
27330 versions are selected due to the definition of
27331 LAST_VFP_REGNUM. */
27332 for (regno = FIRST_VFP_REGNUM;
27333 regno <= LAST_VFP_REGNUM; ++ regno)
27335 fixed_regs[regno] = 0;
27336 call_used_regs[regno] = regno < FIRST_VFP_REGNUM + 16
27337 || regno >= FIRST_VFP_REGNUM + 32;
27341 if (TARGET_REALLY_IWMMXT)
27343 regno = FIRST_IWMMXT_GR_REGNUM;
27344 /* The 2002/10/09 revision of the XScale ABI has wCG0
27345 and wCG1 as call-preserved registers. The 2002/11/21
27346 revision changed this so that all wCG registers are
27347 scratch registers. */
27348 for (regno = FIRST_IWMMXT_GR_REGNUM;
27349 regno <= LAST_IWMMXT_GR_REGNUM; ++ regno)
27350 fixed_regs[regno] = 0;
27351 /* The XScale ABI has wR0 - wR9 as scratch registers,
27352 the rest as call-preserved registers. */
27353 for (regno = FIRST_IWMMXT_REGNUM;
27354 regno <= LAST_IWMMXT_REGNUM; ++ regno)
27356 fixed_regs[regno] = 0;
27357 call_used_regs[regno] = regno < FIRST_IWMMXT_REGNUM + 10;
27361 if ((unsigned) PIC_OFFSET_TABLE_REGNUM != INVALID_REGNUM)
27363 fixed_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
27364 call_used_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
27366 else if (TARGET_APCS_STACK)
27368 fixed_regs[10] = 1;
27369 call_used_regs[10] = 1;
27371 /* -mcaller-super-interworking reserves r11 for calls to
27372 _interwork_r11_call_via_rN(). Making the register global
27373 is an easy way of ensuring that it remains valid for all
27374 calls. */
27375 if (TARGET_APCS_FRAME || TARGET_CALLER_INTERWORKING
27376 || TARGET_TPCS_FRAME || TARGET_TPCS_LEAF_FRAME)
27378 fixed_regs[ARM_HARD_FRAME_POINTER_REGNUM] = 1;
27379 call_used_regs[ARM_HARD_FRAME_POINTER_REGNUM] = 1;
27380 if (TARGET_CALLER_INTERWORKING)
27381 global_regs[ARM_HARD_FRAME_POINTER_REGNUM] = 1;
27383 SUBTARGET_CONDITIONAL_REGISTER_USAGE
27386 static reg_class_t
27387 arm_preferred_rename_class (reg_class_t rclass)
27389 /* Thumb-2 instructions using LO_REGS may be smaller than instructions
27390 using GENERIC_REGS. During register rename pass, we prefer LO_REGS,
27391 and code size can be reduced. */
27392 if (TARGET_THUMB2 && rclass == GENERAL_REGS)
27393 return LO_REGS;
27394 else
27395 return NO_REGS;
27398 /* Compute the atrribute "length" of insn "*push_multi".
27399 So this function MUST be kept in sync with that insn pattern. */
27401 arm_attr_length_push_multi(rtx parallel_op, rtx first_op)
27403 int i, regno, hi_reg;
27404 int num_saves = XVECLEN (parallel_op, 0);
27406 /* ARM mode. */
27407 if (TARGET_ARM)
27408 return 4;
27409 /* Thumb1 mode. */
27410 if (TARGET_THUMB1)
27411 return 2;
27413 /* Thumb2 mode. */
27414 regno = REGNO (first_op);
27415 hi_reg = (REGNO_REG_CLASS (regno) == HI_REGS) && (regno != LR_REGNUM);
27416 for (i = 1; i < num_saves && !hi_reg; i++)
27418 regno = REGNO (XEXP (XVECEXP (parallel_op, 0, i), 0));
27419 hi_reg |= (REGNO_REG_CLASS (regno) == HI_REGS) && (regno != LR_REGNUM);
27422 if (!hi_reg)
27423 return 2;
27424 return 4;
27427 /* Compute the number of instructions emitted by output_move_double. */
27429 arm_count_output_move_double_insns (rtx *operands)
27431 int count;
27432 rtx ops[2];
27433 /* output_move_double may modify the operands array, so call it
27434 here on a copy of the array. */
27435 ops[0] = operands[0];
27436 ops[1] = operands[1];
27437 output_move_double (ops, false, &count);
27438 return count;
27442 vfp3_const_double_for_fract_bits (rtx operand)
27444 REAL_VALUE_TYPE r0;
27446 if (!CONST_DOUBLE_P (operand))
27447 return 0;
27449 REAL_VALUE_FROM_CONST_DOUBLE (r0, operand);
27450 if (exact_real_inverse (DFmode, &r0))
27452 if (exact_real_truncate (DFmode, &r0))
27454 HOST_WIDE_INT value = real_to_integer (&r0);
27455 value = value & 0xffffffff;
27456 if ((value != 0) && ( (value & (value - 1)) == 0))
27457 return int_log2 (value);
27460 return 0;
27463 /* Emit a memory barrier around an atomic sequence according to MODEL. */
27465 static void
27466 arm_pre_atomic_barrier (enum memmodel model)
27468 if (need_atomic_barrier_p (model, true))
27469 emit_insn (gen_memory_barrier ());
27472 static void
27473 arm_post_atomic_barrier (enum memmodel model)
27475 if (need_atomic_barrier_p (model, false))
27476 emit_insn (gen_memory_barrier ());
27479 /* Emit the load-exclusive and store-exclusive instructions.
27480 Use acquire and release versions if necessary. */
27482 static void
27483 arm_emit_load_exclusive (enum machine_mode mode, rtx rval, rtx mem, bool acq)
27485 rtx (*gen) (rtx, rtx);
27487 if (acq)
27489 switch (mode)
27491 case QImode: gen = gen_arm_load_acquire_exclusiveqi; break;
27492 case HImode: gen = gen_arm_load_acquire_exclusivehi; break;
27493 case SImode: gen = gen_arm_load_acquire_exclusivesi; break;
27494 case DImode: gen = gen_arm_load_acquire_exclusivedi; break;
27495 default:
27496 gcc_unreachable ();
27499 else
27501 switch (mode)
27503 case QImode: gen = gen_arm_load_exclusiveqi; break;
27504 case HImode: gen = gen_arm_load_exclusivehi; break;
27505 case SImode: gen = gen_arm_load_exclusivesi; break;
27506 case DImode: gen = gen_arm_load_exclusivedi; break;
27507 default:
27508 gcc_unreachable ();
27512 emit_insn (gen (rval, mem));
27515 static void
27516 arm_emit_store_exclusive (enum machine_mode mode, rtx bval, rtx rval,
27517 rtx mem, bool rel)
27519 rtx (*gen) (rtx, rtx, rtx);
27521 if (rel)
27523 switch (mode)
27525 case QImode: gen = gen_arm_store_release_exclusiveqi; break;
27526 case HImode: gen = gen_arm_store_release_exclusivehi; break;
27527 case SImode: gen = gen_arm_store_release_exclusivesi; break;
27528 case DImode: gen = gen_arm_store_release_exclusivedi; break;
27529 default:
27530 gcc_unreachable ();
27533 else
27535 switch (mode)
27537 case QImode: gen = gen_arm_store_exclusiveqi; break;
27538 case HImode: gen = gen_arm_store_exclusivehi; break;
27539 case SImode: gen = gen_arm_store_exclusivesi; break;
27540 case DImode: gen = gen_arm_store_exclusivedi; break;
27541 default:
27542 gcc_unreachable ();
27546 emit_insn (gen (bval, rval, mem));
27549 /* Mark the previous jump instruction as unlikely. */
27551 static void
27552 emit_unlikely_jump (rtx insn)
27554 rtx very_unlikely = GEN_INT (REG_BR_PROB_BASE / 100 - 1);
27556 insn = emit_jump_insn (insn);
27557 add_reg_note (insn, REG_BR_PROB, very_unlikely);
27560 /* Expand a compare and swap pattern. */
27562 void
27563 arm_expand_compare_and_swap (rtx operands[])
27565 rtx bval, rval, mem, oldval, newval, is_weak, mod_s, mod_f, x;
27566 enum machine_mode mode;
27567 rtx (*gen) (rtx, rtx, rtx, rtx, rtx, rtx, rtx);
27569 bval = operands[0];
27570 rval = operands[1];
27571 mem = operands[2];
27572 oldval = operands[3];
27573 newval = operands[4];
27574 is_weak = operands[5];
27575 mod_s = operands[6];
27576 mod_f = operands[7];
27577 mode = GET_MODE (mem);
27579 /* Normally the succ memory model must be stronger than fail, but in the
27580 unlikely event of fail being ACQUIRE and succ being RELEASE we need to
27581 promote succ to ACQ_REL so that we don't lose the acquire semantics. */
27583 if (TARGET_HAVE_LDACQ
27584 && INTVAL (mod_f) == MEMMODEL_ACQUIRE
27585 && INTVAL (mod_s) == MEMMODEL_RELEASE)
27586 mod_s = GEN_INT (MEMMODEL_ACQ_REL);
27588 switch (mode)
27590 case QImode:
27591 case HImode:
27592 /* For narrow modes, we're going to perform the comparison in SImode,
27593 so do the zero-extension now. */
27594 rval = gen_reg_rtx (SImode);
27595 oldval = convert_modes (SImode, mode, oldval, true);
27596 /* FALLTHRU */
27598 case SImode:
27599 /* Force the value into a register if needed. We waited until after
27600 the zero-extension above to do this properly. */
27601 if (!arm_add_operand (oldval, SImode))
27602 oldval = force_reg (SImode, oldval);
27603 break;
27605 case DImode:
27606 if (!cmpdi_operand (oldval, mode))
27607 oldval = force_reg (mode, oldval);
27608 break;
27610 default:
27611 gcc_unreachable ();
27614 switch (mode)
27616 case QImode: gen = gen_atomic_compare_and_swapqi_1; break;
27617 case HImode: gen = gen_atomic_compare_and_swaphi_1; break;
27618 case SImode: gen = gen_atomic_compare_and_swapsi_1; break;
27619 case DImode: gen = gen_atomic_compare_and_swapdi_1; break;
27620 default:
27621 gcc_unreachable ();
27624 emit_insn (gen (rval, mem, oldval, newval, is_weak, mod_s, mod_f));
27626 if (mode == QImode || mode == HImode)
27627 emit_move_insn (operands[1], gen_lowpart (mode, rval));
27629 /* In all cases, we arrange for success to be signaled by Z set.
27630 This arrangement allows for the boolean result to be used directly
27631 in a subsequent branch, post optimization. */
27632 x = gen_rtx_REG (CCmode, CC_REGNUM);
27633 x = gen_rtx_EQ (SImode, x, const0_rtx);
27634 emit_insn (gen_rtx_SET (VOIDmode, bval, x));
27637 /* Split a compare and swap pattern. It is IMPLEMENTATION DEFINED whether
27638 another memory store between the load-exclusive and store-exclusive can
27639 reset the monitor from Exclusive to Open state. This means we must wait
27640 until after reload to split the pattern, lest we get a register spill in
27641 the middle of the atomic sequence. */
27643 void
27644 arm_split_compare_and_swap (rtx operands[])
27646 rtx rval, mem, oldval, newval, scratch;
27647 enum machine_mode mode;
27648 enum memmodel mod_s, mod_f;
27649 bool is_weak;
27650 rtx label1, label2, x, cond;
27652 rval = operands[0];
27653 mem = operands[1];
27654 oldval = operands[2];
27655 newval = operands[3];
27656 is_weak = (operands[4] != const0_rtx);
27657 mod_s = (enum memmodel) INTVAL (operands[5]);
27658 mod_f = (enum memmodel) INTVAL (operands[6]);
27659 scratch = operands[7];
27660 mode = GET_MODE (mem);
27662 bool use_acquire = TARGET_HAVE_LDACQ
27663 && !(mod_s == MEMMODEL_RELAXED
27664 || mod_s == MEMMODEL_CONSUME
27665 || mod_s == MEMMODEL_RELEASE);
27667 bool use_release = TARGET_HAVE_LDACQ
27668 && !(mod_s == MEMMODEL_RELAXED
27669 || mod_s == MEMMODEL_CONSUME
27670 || mod_s == MEMMODEL_ACQUIRE);
27672 /* Checks whether a barrier is needed and emits one accordingly. */
27673 if (!(use_acquire || use_release))
27674 arm_pre_atomic_barrier (mod_s);
27676 label1 = NULL_RTX;
27677 if (!is_weak)
27679 label1 = gen_label_rtx ();
27680 emit_label (label1);
27682 label2 = gen_label_rtx ();
27684 arm_emit_load_exclusive (mode, rval, mem, use_acquire);
27686 cond = arm_gen_compare_reg (NE, rval, oldval, scratch);
27687 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
27688 x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
27689 gen_rtx_LABEL_REF (Pmode, label2), pc_rtx);
27690 emit_unlikely_jump (gen_rtx_SET (VOIDmode, pc_rtx, x));
27692 arm_emit_store_exclusive (mode, scratch, mem, newval, use_release);
27694 /* Weak or strong, we want EQ to be true for success, so that we
27695 match the flags that we got from the compare above. */
27696 cond = gen_rtx_REG (CCmode, CC_REGNUM);
27697 x = gen_rtx_COMPARE (CCmode, scratch, const0_rtx);
27698 emit_insn (gen_rtx_SET (VOIDmode, cond, x));
27700 if (!is_weak)
27702 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
27703 x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
27704 gen_rtx_LABEL_REF (Pmode, label1), pc_rtx);
27705 emit_unlikely_jump (gen_rtx_SET (VOIDmode, pc_rtx, x));
27708 if (mod_f != MEMMODEL_RELAXED)
27709 emit_label (label2);
27711 /* Checks whether a barrier is needed and emits one accordingly. */
27712 if (!(use_acquire || use_release))
27713 arm_post_atomic_barrier (mod_s);
27715 if (mod_f == MEMMODEL_RELAXED)
27716 emit_label (label2);
27719 void
27720 arm_split_atomic_op (enum rtx_code code, rtx old_out, rtx new_out, rtx mem,
27721 rtx value, rtx model_rtx, rtx cond)
27723 enum memmodel model = (enum memmodel) INTVAL (model_rtx);
27724 enum machine_mode mode = GET_MODE (mem);
27725 enum machine_mode wmode = (mode == DImode ? DImode : SImode);
27726 rtx label, x;
27728 bool use_acquire = TARGET_HAVE_LDACQ
27729 && !(model == MEMMODEL_RELAXED
27730 || model == MEMMODEL_CONSUME
27731 || model == MEMMODEL_RELEASE);
27733 bool use_release = TARGET_HAVE_LDACQ
27734 && !(model == MEMMODEL_RELAXED
27735 || model == MEMMODEL_CONSUME
27736 || model == MEMMODEL_ACQUIRE);
27738 /* Checks whether a barrier is needed and emits one accordingly. */
27739 if (!(use_acquire || use_release))
27740 arm_pre_atomic_barrier (model);
27742 label = gen_label_rtx ();
27743 emit_label (label);
27745 if (new_out)
27746 new_out = gen_lowpart (wmode, new_out);
27747 if (old_out)
27748 old_out = gen_lowpart (wmode, old_out);
27749 else
27750 old_out = new_out;
27751 value = simplify_gen_subreg (wmode, value, mode, 0);
27753 arm_emit_load_exclusive (mode, old_out, mem, use_acquire);
27755 switch (code)
27757 case SET:
27758 new_out = value;
27759 break;
27761 case NOT:
27762 x = gen_rtx_AND (wmode, old_out, value);
27763 emit_insn (gen_rtx_SET (VOIDmode, new_out, x));
27764 x = gen_rtx_NOT (wmode, new_out);
27765 emit_insn (gen_rtx_SET (VOIDmode, new_out, x));
27766 break;
27768 case MINUS:
27769 if (CONST_INT_P (value))
27771 value = GEN_INT (-INTVAL (value));
27772 code = PLUS;
27774 /* FALLTHRU */
27776 case PLUS:
27777 if (mode == DImode)
27779 /* DImode plus/minus need to clobber flags. */
27780 /* The adddi3 and subdi3 patterns are incorrectly written so that
27781 they require matching operands, even when we could easily support
27782 three operands. Thankfully, this can be fixed up post-splitting,
27783 as the individual add+adc patterns do accept three operands and
27784 post-reload cprop can make these moves go away. */
27785 emit_move_insn (new_out, old_out);
27786 if (code == PLUS)
27787 x = gen_adddi3 (new_out, new_out, value);
27788 else
27789 x = gen_subdi3 (new_out, new_out, value);
27790 emit_insn (x);
27791 break;
27793 /* FALLTHRU */
27795 default:
27796 x = gen_rtx_fmt_ee (code, wmode, old_out, value);
27797 emit_insn (gen_rtx_SET (VOIDmode, new_out, x));
27798 break;
27801 arm_emit_store_exclusive (mode, cond, mem, gen_lowpart (mode, new_out),
27802 use_release);
27804 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
27805 emit_unlikely_jump (gen_cbranchsi4 (x, cond, const0_rtx, label));
27807 /* Checks whether a barrier is needed and emits one accordingly. */
27808 if (!(use_acquire || use_release))
27809 arm_post_atomic_barrier (model);
27812 #define MAX_VECT_LEN 16
27814 struct expand_vec_perm_d
27816 rtx target, op0, op1;
27817 unsigned char perm[MAX_VECT_LEN];
27818 enum machine_mode vmode;
27819 unsigned char nelt;
27820 bool one_vector_p;
27821 bool testing_p;
27824 /* Generate a variable permutation. */
27826 static void
27827 arm_expand_vec_perm_1 (rtx target, rtx op0, rtx op1, rtx sel)
27829 enum machine_mode vmode = GET_MODE (target);
27830 bool one_vector_p = rtx_equal_p (op0, op1);
27832 gcc_checking_assert (vmode == V8QImode || vmode == V16QImode);
27833 gcc_checking_assert (GET_MODE (op0) == vmode);
27834 gcc_checking_assert (GET_MODE (op1) == vmode);
27835 gcc_checking_assert (GET_MODE (sel) == vmode);
27836 gcc_checking_assert (TARGET_NEON);
27838 if (one_vector_p)
27840 if (vmode == V8QImode)
27841 emit_insn (gen_neon_vtbl1v8qi (target, op0, sel));
27842 else
27843 emit_insn (gen_neon_vtbl1v16qi (target, op0, sel));
27845 else
27847 rtx pair;
27849 if (vmode == V8QImode)
27851 pair = gen_reg_rtx (V16QImode);
27852 emit_insn (gen_neon_vcombinev8qi (pair, op0, op1));
27853 pair = gen_lowpart (TImode, pair);
27854 emit_insn (gen_neon_vtbl2v8qi (target, pair, sel));
27856 else
27858 pair = gen_reg_rtx (OImode);
27859 emit_insn (gen_neon_vcombinev16qi (pair, op0, op1));
27860 emit_insn (gen_neon_vtbl2v16qi (target, pair, sel));
27865 void
27866 arm_expand_vec_perm (rtx target, rtx op0, rtx op1, rtx sel)
27868 enum machine_mode vmode = GET_MODE (target);
27869 unsigned int i, nelt = GET_MODE_NUNITS (vmode);
27870 bool one_vector_p = rtx_equal_p (op0, op1);
27871 rtx rmask[MAX_VECT_LEN], mask;
27873 /* TODO: ARM's VTBL indexing is little-endian. In order to handle GCC's
27874 numbering of elements for big-endian, we must reverse the order. */
27875 gcc_checking_assert (!BYTES_BIG_ENDIAN);
27877 /* The VTBL instruction does not use a modulo index, so we must take care
27878 of that ourselves. */
27879 mask = GEN_INT (one_vector_p ? nelt - 1 : 2 * nelt - 1);
27880 for (i = 0; i < nelt; ++i)
27881 rmask[i] = mask;
27882 mask = gen_rtx_CONST_VECTOR (vmode, gen_rtvec_v (nelt, rmask));
27883 sel = expand_simple_binop (vmode, AND, sel, mask, NULL, 0, OPTAB_LIB_WIDEN);
27885 arm_expand_vec_perm_1 (target, op0, op1, sel);
27888 /* Generate or test for an insn that supports a constant permutation. */
27890 /* Recognize patterns for the VUZP insns. */
27892 static bool
27893 arm_evpc_neon_vuzp (struct expand_vec_perm_d *d)
27895 unsigned int i, odd, mask, nelt = d->nelt;
27896 rtx out0, out1, in0, in1, x;
27897 rtx (*gen)(rtx, rtx, rtx, rtx);
27899 if (GET_MODE_UNIT_SIZE (d->vmode) >= 8)
27900 return false;
27902 /* Note that these are little-endian tests. Adjust for big-endian later. */
27903 if (d->perm[0] == 0)
27904 odd = 0;
27905 else if (d->perm[0] == 1)
27906 odd = 1;
27907 else
27908 return false;
27909 mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
27911 for (i = 0; i < nelt; i++)
27913 unsigned elt = (i * 2 + odd) & mask;
27914 if (d->perm[i] != elt)
27915 return false;
27918 /* Success! */
27919 if (d->testing_p)
27920 return true;
27922 switch (d->vmode)
27924 case V16QImode: gen = gen_neon_vuzpv16qi_internal; break;
27925 case V8QImode: gen = gen_neon_vuzpv8qi_internal; break;
27926 case V8HImode: gen = gen_neon_vuzpv8hi_internal; break;
27927 case V4HImode: gen = gen_neon_vuzpv4hi_internal; break;
27928 case V4SImode: gen = gen_neon_vuzpv4si_internal; break;
27929 case V2SImode: gen = gen_neon_vuzpv2si_internal; break;
27930 case V2SFmode: gen = gen_neon_vuzpv2sf_internal; break;
27931 case V4SFmode: gen = gen_neon_vuzpv4sf_internal; break;
27932 default:
27933 gcc_unreachable ();
27936 in0 = d->op0;
27937 in1 = d->op1;
27938 if (BYTES_BIG_ENDIAN)
27940 x = in0, in0 = in1, in1 = x;
27941 odd = !odd;
27944 out0 = d->target;
27945 out1 = gen_reg_rtx (d->vmode);
27946 if (odd)
27947 x = out0, out0 = out1, out1 = x;
27949 emit_insn (gen (out0, in0, in1, out1));
27950 return true;
27953 /* Recognize patterns for the VZIP insns. */
27955 static bool
27956 arm_evpc_neon_vzip (struct expand_vec_perm_d *d)
27958 unsigned int i, high, mask, nelt = d->nelt;
27959 rtx out0, out1, in0, in1, x;
27960 rtx (*gen)(rtx, rtx, rtx, rtx);
27962 if (GET_MODE_UNIT_SIZE (d->vmode) >= 8)
27963 return false;
27965 /* Note that these are little-endian tests. Adjust for big-endian later. */
27966 high = nelt / 2;
27967 if (d->perm[0] == high)
27969 else if (d->perm[0] == 0)
27970 high = 0;
27971 else
27972 return false;
27973 mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
27975 for (i = 0; i < nelt / 2; i++)
27977 unsigned elt = (i + high) & mask;
27978 if (d->perm[i * 2] != elt)
27979 return false;
27980 elt = (elt + nelt) & mask;
27981 if (d->perm[i * 2 + 1] != elt)
27982 return false;
27985 /* Success! */
27986 if (d->testing_p)
27987 return true;
27989 switch (d->vmode)
27991 case V16QImode: gen = gen_neon_vzipv16qi_internal; break;
27992 case V8QImode: gen = gen_neon_vzipv8qi_internal; break;
27993 case V8HImode: gen = gen_neon_vzipv8hi_internal; break;
27994 case V4HImode: gen = gen_neon_vzipv4hi_internal; break;
27995 case V4SImode: gen = gen_neon_vzipv4si_internal; break;
27996 case V2SImode: gen = gen_neon_vzipv2si_internal; break;
27997 case V2SFmode: gen = gen_neon_vzipv2sf_internal; break;
27998 case V4SFmode: gen = gen_neon_vzipv4sf_internal; break;
27999 default:
28000 gcc_unreachable ();
28003 in0 = d->op0;
28004 in1 = d->op1;
28005 if (BYTES_BIG_ENDIAN)
28007 x = in0, in0 = in1, in1 = x;
28008 high = !high;
28011 out0 = d->target;
28012 out1 = gen_reg_rtx (d->vmode);
28013 if (high)
28014 x = out0, out0 = out1, out1 = x;
28016 emit_insn (gen (out0, in0, in1, out1));
28017 return true;
28020 /* Recognize patterns for the VREV insns. */
28022 static bool
28023 arm_evpc_neon_vrev (struct expand_vec_perm_d *d)
28025 unsigned int i, j, diff, nelt = d->nelt;
28026 rtx (*gen)(rtx, rtx, rtx);
28028 if (!d->one_vector_p)
28029 return false;
28031 diff = d->perm[0];
28032 switch (diff)
28034 case 7:
28035 switch (d->vmode)
28037 case V16QImode: gen = gen_neon_vrev64v16qi; break;
28038 case V8QImode: gen = gen_neon_vrev64v8qi; break;
28039 default:
28040 return false;
28042 break;
28043 case 3:
28044 switch (d->vmode)
28046 case V16QImode: gen = gen_neon_vrev32v16qi; break;
28047 case V8QImode: gen = gen_neon_vrev32v8qi; break;
28048 case V8HImode: gen = gen_neon_vrev64v8hi; break;
28049 case V4HImode: gen = gen_neon_vrev64v4hi; break;
28050 default:
28051 return false;
28053 break;
28054 case 1:
28055 switch (d->vmode)
28057 case V16QImode: gen = gen_neon_vrev16v16qi; break;
28058 case V8QImode: gen = gen_neon_vrev16v8qi; break;
28059 case V8HImode: gen = gen_neon_vrev32v8hi; break;
28060 case V4HImode: gen = gen_neon_vrev32v4hi; break;
28061 case V4SImode: gen = gen_neon_vrev64v4si; break;
28062 case V2SImode: gen = gen_neon_vrev64v2si; break;
28063 case V4SFmode: gen = gen_neon_vrev64v4sf; break;
28064 case V2SFmode: gen = gen_neon_vrev64v2sf; break;
28065 default:
28066 return false;
28068 break;
28069 default:
28070 return false;
28073 for (i = 0; i < nelt ; i += diff + 1)
28074 for (j = 0; j <= diff; j += 1)
28076 /* This is guaranteed to be true as the value of diff
28077 is 7, 3, 1 and we should have enough elements in the
28078 queue to generate this. Getting a vector mask with a
28079 value of diff other than these values implies that
28080 something is wrong by the time we get here. */
28081 gcc_assert (i + j < nelt);
28082 if (d->perm[i + j] != i + diff - j)
28083 return false;
28086 /* Success! */
28087 if (d->testing_p)
28088 return true;
28090 /* ??? The third operand is an artifact of the builtin infrastructure
28091 and is ignored by the actual instruction. */
28092 emit_insn (gen (d->target, d->op0, const0_rtx));
28093 return true;
28096 /* Recognize patterns for the VTRN insns. */
28098 static bool
28099 arm_evpc_neon_vtrn (struct expand_vec_perm_d *d)
28101 unsigned int i, odd, mask, nelt = d->nelt;
28102 rtx out0, out1, in0, in1, x;
28103 rtx (*gen)(rtx, rtx, rtx, rtx);
28105 if (GET_MODE_UNIT_SIZE (d->vmode) >= 8)
28106 return false;
28108 /* Note that these are little-endian tests. Adjust for big-endian later. */
28109 if (d->perm[0] == 0)
28110 odd = 0;
28111 else if (d->perm[0] == 1)
28112 odd = 1;
28113 else
28114 return false;
28115 mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
28117 for (i = 0; i < nelt; i += 2)
28119 if (d->perm[i] != i + odd)
28120 return false;
28121 if (d->perm[i + 1] != ((i + nelt + odd) & mask))
28122 return false;
28125 /* Success! */
28126 if (d->testing_p)
28127 return true;
28129 switch (d->vmode)
28131 case V16QImode: gen = gen_neon_vtrnv16qi_internal; break;
28132 case V8QImode: gen = gen_neon_vtrnv8qi_internal; break;
28133 case V8HImode: gen = gen_neon_vtrnv8hi_internal; break;
28134 case V4HImode: gen = gen_neon_vtrnv4hi_internal; break;
28135 case V4SImode: gen = gen_neon_vtrnv4si_internal; break;
28136 case V2SImode: gen = gen_neon_vtrnv2si_internal; break;
28137 case V2SFmode: gen = gen_neon_vtrnv2sf_internal; break;
28138 case V4SFmode: gen = gen_neon_vtrnv4sf_internal; break;
28139 default:
28140 gcc_unreachable ();
28143 in0 = d->op0;
28144 in1 = d->op1;
28145 if (BYTES_BIG_ENDIAN)
28147 x = in0, in0 = in1, in1 = x;
28148 odd = !odd;
28151 out0 = d->target;
28152 out1 = gen_reg_rtx (d->vmode);
28153 if (odd)
28154 x = out0, out0 = out1, out1 = x;
28156 emit_insn (gen (out0, in0, in1, out1));
28157 return true;
28160 /* Recognize patterns for the VEXT insns. */
28162 static bool
28163 arm_evpc_neon_vext (struct expand_vec_perm_d *d)
28165 unsigned int i, nelt = d->nelt;
28166 rtx (*gen) (rtx, rtx, rtx, rtx);
28167 rtx offset;
28169 unsigned int location;
28171 unsigned int next = d->perm[0] + 1;
28173 /* TODO: Handle GCC's numbering of elements for big-endian. */
28174 if (BYTES_BIG_ENDIAN)
28175 return false;
28177 /* Check if the extracted indexes are increasing by one. */
28178 for (i = 1; i < nelt; next++, i++)
28180 /* If we hit the most significant element of the 2nd vector in
28181 the previous iteration, no need to test further. */
28182 if (next == 2 * nelt)
28183 return false;
28185 /* If we are operating on only one vector: it could be a
28186 rotation. If there are only two elements of size < 64, let
28187 arm_evpc_neon_vrev catch it. */
28188 if (d->one_vector_p && (next == nelt))
28190 if ((nelt == 2) && (d->vmode != V2DImode))
28191 return false;
28192 else
28193 next = 0;
28196 if (d->perm[i] != next)
28197 return false;
28200 location = d->perm[0];
28202 switch (d->vmode)
28204 case V16QImode: gen = gen_neon_vextv16qi; break;
28205 case V8QImode: gen = gen_neon_vextv8qi; break;
28206 case V4HImode: gen = gen_neon_vextv4hi; break;
28207 case V8HImode: gen = gen_neon_vextv8hi; break;
28208 case V2SImode: gen = gen_neon_vextv2si; break;
28209 case V4SImode: gen = gen_neon_vextv4si; break;
28210 case V2SFmode: gen = gen_neon_vextv2sf; break;
28211 case V4SFmode: gen = gen_neon_vextv4sf; break;
28212 case V2DImode: gen = gen_neon_vextv2di; break;
28213 default:
28214 return false;
28217 /* Success! */
28218 if (d->testing_p)
28219 return true;
28221 offset = GEN_INT (location);
28222 emit_insn (gen (d->target, d->op0, d->op1, offset));
28223 return true;
28226 /* The NEON VTBL instruction is a fully variable permuation that's even
28227 stronger than what we expose via VEC_PERM_EXPR. What it doesn't do
28228 is mask the index operand as VEC_PERM_EXPR requires. Therefore we
28229 can do slightly better by expanding this as a constant where we don't
28230 have to apply a mask. */
28232 static bool
28233 arm_evpc_neon_vtbl (struct expand_vec_perm_d *d)
28235 rtx rperm[MAX_VECT_LEN], sel;
28236 enum machine_mode vmode = d->vmode;
28237 unsigned int i, nelt = d->nelt;
28239 /* TODO: ARM's VTBL indexing is little-endian. In order to handle GCC's
28240 numbering of elements for big-endian, we must reverse the order. */
28241 if (BYTES_BIG_ENDIAN)
28242 return false;
28244 if (d->testing_p)
28245 return true;
28247 /* Generic code will try constant permutation twice. Once with the
28248 original mode and again with the elements lowered to QImode.
28249 So wait and don't do the selector expansion ourselves. */
28250 if (vmode != V8QImode && vmode != V16QImode)
28251 return false;
28253 for (i = 0; i < nelt; ++i)
28254 rperm[i] = GEN_INT (d->perm[i]);
28255 sel = gen_rtx_CONST_VECTOR (vmode, gen_rtvec_v (nelt, rperm));
28256 sel = force_reg (vmode, sel);
28258 arm_expand_vec_perm_1 (d->target, d->op0, d->op1, sel);
28259 return true;
28262 static bool
28263 arm_expand_vec_perm_const_1 (struct expand_vec_perm_d *d)
28265 /* Check if the input mask matches vext before reordering the
28266 operands. */
28267 if (TARGET_NEON)
28268 if (arm_evpc_neon_vext (d))
28269 return true;
28271 /* The pattern matching functions above are written to look for a small
28272 number to begin the sequence (0, 1, N/2). If we begin with an index
28273 from the second operand, we can swap the operands. */
28274 if (d->perm[0] >= d->nelt)
28276 unsigned i, nelt = d->nelt;
28277 rtx x;
28279 for (i = 0; i < nelt; ++i)
28280 d->perm[i] = (d->perm[i] + nelt) & (2 * nelt - 1);
28282 x = d->op0;
28283 d->op0 = d->op1;
28284 d->op1 = x;
28287 if (TARGET_NEON)
28289 if (arm_evpc_neon_vuzp (d))
28290 return true;
28291 if (arm_evpc_neon_vzip (d))
28292 return true;
28293 if (arm_evpc_neon_vrev (d))
28294 return true;
28295 if (arm_evpc_neon_vtrn (d))
28296 return true;
28297 return arm_evpc_neon_vtbl (d);
28299 return false;
28302 /* Expand a vec_perm_const pattern. */
28304 bool
28305 arm_expand_vec_perm_const (rtx target, rtx op0, rtx op1, rtx sel)
28307 struct expand_vec_perm_d d;
28308 int i, nelt, which;
28310 d.target = target;
28311 d.op0 = op0;
28312 d.op1 = op1;
28314 d.vmode = GET_MODE (target);
28315 gcc_assert (VECTOR_MODE_P (d.vmode));
28316 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
28317 d.testing_p = false;
28319 for (i = which = 0; i < nelt; ++i)
28321 rtx e = XVECEXP (sel, 0, i);
28322 int ei = INTVAL (e) & (2 * nelt - 1);
28323 which |= (ei < nelt ? 1 : 2);
28324 d.perm[i] = ei;
28327 switch (which)
28329 default:
28330 gcc_unreachable();
28332 case 3:
28333 d.one_vector_p = false;
28334 if (!rtx_equal_p (op0, op1))
28335 break;
28337 /* The elements of PERM do not suggest that only the first operand
28338 is used, but both operands are identical. Allow easier matching
28339 of the permutation by folding the permutation into the single
28340 input vector. */
28341 /* FALLTHRU */
28342 case 2:
28343 for (i = 0; i < nelt; ++i)
28344 d.perm[i] &= nelt - 1;
28345 d.op0 = op1;
28346 d.one_vector_p = true;
28347 break;
28349 case 1:
28350 d.op1 = op0;
28351 d.one_vector_p = true;
28352 break;
28355 return arm_expand_vec_perm_const_1 (&d);
28358 /* Implement TARGET_VECTORIZE_VEC_PERM_CONST_OK. */
28360 static bool
28361 arm_vectorize_vec_perm_const_ok (enum machine_mode vmode,
28362 const unsigned char *sel)
28364 struct expand_vec_perm_d d;
28365 unsigned int i, nelt, which;
28366 bool ret;
28368 d.vmode = vmode;
28369 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
28370 d.testing_p = true;
28371 memcpy (d.perm, sel, nelt);
28373 /* Categorize the set of elements in the selector. */
28374 for (i = which = 0; i < nelt; ++i)
28376 unsigned char e = d.perm[i];
28377 gcc_assert (e < 2 * nelt);
28378 which |= (e < nelt ? 1 : 2);
28381 /* For all elements from second vector, fold the elements to first. */
28382 if (which == 2)
28383 for (i = 0; i < nelt; ++i)
28384 d.perm[i] -= nelt;
28386 /* Check whether the mask can be applied to the vector type. */
28387 d.one_vector_p = (which != 3);
28389 d.target = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 1);
28390 d.op1 = d.op0 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 2);
28391 if (!d.one_vector_p)
28392 d.op1 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 3);
28394 start_sequence ();
28395 ret = arm_expand_vec_perm_const_1 (&d);
28396 end_sequence ();
28398 return ret;
28401 bool
28402 arm_autoinc_modes_ok_p (enum machine_mode mode, enum arm_auto_incmodes code)
28404 /* If we are soft float and we do not have ldrd
28405 then all auto increment forms are ok. */
28406 if (TARGET_SOFT_FLOAT && (TARGET_LDRD || GET_MODE_SIZE (mode) <= 4))
28407 return true;
28409 switch (code)
28411 /* Post increment and Pre Decrement are supported for all
28412 instruction forms except for vector forms. */
28413 case ARM_POST_INC:
28414 case ARM_PRE_DEC:
28415 if (VECTOR_MODE_P (mode))
28417 if (code != ARM_PRE_DEC)
28418 return true;
28419 else
28420 return false;
28423 return true;
28425 case ARM_POST_DEC:
28426 case ARM_PRE_INC:
28427 /* Without LDRD and mode size greater than
28428 word size, there is no point in auto-incrementing
28429 because ldm and stm will not have these forms. */
28430 if (!TARGET_LDRD && GET_MODE_SIZE (mode) > 4)
28431 return false;
28433 /* Vector and floating point modes do not support
28434 these auto increment forms. */
28435 if (FLOAT_MODE_P (mode) || VECTOR_MODE_P (mode))
28436 return false;
28438 return true;
28440 default:
28441 return false;
28445 return false;
28448 /* The default expansion of general 64-bit shifts in core-regs is suboptimal,
28449 on ARM, since we know that shifts by negative amounts are no-ops.
28450 Additionally, the default expansion code is not available or suitable
28451 for post-reload insn splits (this can occur when the register allocator
28452 chooses not to do a shift in NEON).
28454 This function is used in both initial expand and post-reload splits, and
28455 handles all kinds of 64-bit shifts.
28457 Input requirements:
28458 - It is safe for the input and output to be the same register, but
28459 early-clobber rules apply for the shift amount and scratch registers.
28460 - Shift by register requires both scratch registers. In all other cases
28461 the scratch registers may be NULL.
28462 - Ashiftrt by a register also clobbers the CC register. */
28463 void
28464 arm_emit_coreregs_64bit_shift (enum rtx_code code, rtx out, rtx in,
28465 rtx amount, rtx scratch1, rtx scratch2)
28467 rtx out_high = gen_highpart (SImode, out);
28468 rtx out_low = gen_lowpart (SImode, out);
28469 rtx in_high = gen_highpart (SImode, in);
28470 rtx in_low = gen_lowpart (SImode, in);
28472 /* Terminology:
28473 in = the register pair containing the input value.
28474 out = the destination register pair.
28475 up = the high- or low-part of each pair.
28476 down = the opposite part to "up".
28477 In a shift, we can consider bits to shift from "up"-stream to
28478 "down"-stream, so in a left-shift "up" is the low-part and "down"
28479 is the high-part of each register pair. */
28481 rtx out_up = code == ASHIFT ? out_low : out_high;
28482 rtx out_down = code == ASHIFT ? out_high : out_low;
28483 rtx in_up = code == ASHIFT ? in_low : in_high;
28484 rtx in_down = code == ASHIFT ? in_high : in_low;
28486 gcc_assert (code == ASHIFT || code == ASHIFTRT || code == LSHIFTRT);
28487 gcc_assert (out
28488 && (REG_P (out) || GET_CODE (out) == SUBREG)
28489 && GET_MODE (out) == DImode);
28490 gcc_assert (in
28491 && (REG_P (in) || GET_CODE (in) == SUBREG)
28492 && GET_MODE (in) == DImode);
28493 gcc_assert (amount
28494 && (((REG_P (amount) || GET_CODE (amount) == SUBREG)
28495 && GET_MODE (amount) == SImode)
28496 || CONST_INT_P (amount)));
28497 gcc_assert (scratch1 == NULL
28498 || (GET_CODE (scratch1) == SCRATCH)
28499 || (GET_MODE (scratch1) == SImode
28500 && REG_P (scratch1)));
28501 gcc_assert (scratch2 == NULL
28502 || (GET_CODE (scratch2) == SCRATCH)
28503 || (GET_MODE (scratch2) == SImode
28504 && REG_P (scratch2)));
28505 gcc_assert (!REG_P (out) || !REG_P (amount)
28506 || !HARD_REGISTER_P (out)
28507 || (REGNO (out) != REGNO (amount)
28508 && REGNO (out) + 1 != REGNO (amount)));
28510 /* Macros to make following code more readable. */
28511 #define SUB_32(DEST,SRC) \
28512 gen_addsi3 ((DEST), (SRC), GEN_INT (-32))
28513 #define RSB_32(DEST,SRC) \
28514 gen_subsi3 ((DEST), GEN_INT (32), (SRC))
28515 #define SUB_S_32(DEST,SRC) \
28516 gen_addsi3_compare0 ((DEST), (SRC), \
28517 GEN_INT (-32))
28518 #define SET(DEST,SRC) \
28519 gen_rtx_SET (SImode, (DEST), (SRC))
28520 #define SHIFT(CODE,SRC,AMOUNT) \
28521 gen_rtx_fmt_ee ((CODE), SImode, (SRC), (AMOUNT))
28522 #define LSHIFT(CODE,SRC,AMOUNT) \
28523 gen_rtx_fmt_ee ((CODE) == ASHIFT ? ASHIFT : LSHIFTRT, \
28524 SImode, (SRC), (AMOUNT))
28525 #define REV_LSHIFT(CODE,SRC,AMOUNT) \
28526 gen_rtx_fmt_ee ((CODE) == ASHIFT ? LSHIFTRT : ASHIFT, \
28527 SImode, (SRC), (AMOUNT))
28528 #define ORR(A,B) \
28529 gen_rtx_IOR (SImode, (A), (B))
28530 #define BRANCH(COND,LABEL) \
28531 gen_arm_cond_branch ((LABEL), \
28532 gen_rtx_ ## COND (CCmode, cc_reg, \
28533 const0_rtx), \
28534 cc_reg)
28536 /* Shifts by register and shifts by constant are handled separately. */
28537 if (CONST_INT_P (amount))
28539 /* We have a shift-by-constant. */
28541 /* First, handle out-of-range shift amounts.
28542 In both cases we try to match the result an ARM instruction in a
28543 shift-by-register would give. This helps reduce execution
28544 differences between optimization levels, but it won't stop other
28545 parts of the compiler doing different things. This is "undefined
28546 behaviour, in any case. */
28547 if (INTVAL (amount) <= 0)
28548 emit_insn (gen_movdi (out, in));
28549 else if (INTVAL (amount) >= 64)
28551 if (code == ASHIFTRT)
28553 rtx const31_rtx = GEN_INT (31);
28554 emit_insn (SET (out_down, SHIFT (code, in_up, const31_rtx)));
28555 emit_insn (SET (out_up, SHIFT (code, in_up, const31_rtx)));
28557 else
28558 emit_insn (gen_movdi (out, const0_rtx));
28561 /* Now handle valid shifts. */
28562 else if (INTVAL (amount) < 32)
28564 /* Shifts by a constant less than 32. */
28565 rtx reverse_amount = GEN_INT (32 - INTVAL (amount));
28567 emit_insn (SET (out_down, LSHIFT (code, in_down, amount)));
28568 emit_insn (SET (out_down,
28569 ORR (REV_LSHIFT (code, in_up, reverse_amount),
28570 out_down)));
28571 emit_insn (SET (out_up, SHIFT (code, in_up, amount)));
28573 else
28575 /* Shifts by a constant greater than 31. */
28576 rtx adj_amount = GEN_INT (INTVAL (amount) - 32);
28578 emit_insn (SET (out_down, SHIFT (code, in_up, adj_amount)));
28579 if (code == ASHIFTRT)
28580 emit_insn (gen_ashrsi3 (out_up, in_up,
28581 GEN_INT (31)));
28582 else
28583 emit_insn (SET (out_up, const0_rtx));
28586 else
28588 /* We have a shift-by-register. */
28589 rtx cc_reg = gen_rtx_REG (CC_NOOVmode, CC_REGNUM);
28591 /* This alternative requires the scratch registers. */
28592 gcc_assert (scratch1 && REG_P (scratch1));
28593 gcc_assert (scratch2 && REG_P (scratch2));
28595 /* We will need the values "amount-32" and "32-amount" later.
28596 Swapping them around now allows the later code to be more general. */
28597 switch (code)
28599 case ASHIFT:
28600 emit_insn (SUB_32 (scratch1, amount));
28601 emit_insn (RSB_32 (scratch2, amount));
28602 break;
28603 case ASHIFTRT:
28604 emit_insn (RSB_32 (scratch1, amount));
28605 /* Also set CC = amount > 32. */
28606 emit_insn (SUB_S_32 (scratch2, amount));
28607 break;
28608 case LSHIFTRT:
28609 emit_insn (RSB_32 (scratch1, amount));
28610 emit_insn (SUB_32 (scratch2, amount));
28611 break;
28612 default:
28613 gcc_unreachable ();
28616 /* Emit code like this:
28618 arithmetic-left:
28619 out_down = in_down << amount;
28620 out_down = (in_up << (amount - 32)) | out_down;
28621 out_down = ((unsigned)in_up >> (32 - amount)) | out_down;
28622 out_up = in_up << amount;
28624 arithmetic-right:
28625 out_down = in_down >> amount;
28626 out_down = (in_up << (32 - amount)) | out_down;
28627 if (amount < 32)
28628 out_down = ((signed)in_up >> (amount - 32)) | out_down;
28629 out_up = in_up << amount;
28631 logical-right:
28632 out_down = in_down >> amount;
28633 out_down = (in_up << (32 - amount)) | out_down;
28634 if (amount < 32)
28635 out_down = ((unsigned)in_up >> (amount - 32)) | out_down;
28636 out_up = in_up << amount;
28638 The ARM and Thumb2 variants are the same but implemented slightly
28639 differently. If this were only called during expand we could just
28640 use the Thumb2 case and let combine do the right thing, but this
28641 can also be called from post-reload splitters. */
28643 emit_insn (SET (out_down, LSHIFT (code, in_down, amount)));
28645 if (!TARGET_THUMB2)
28647 /* Emit code for ARM mode. */
28648 emit_insn (SET (out_down,
28649 ORR (SHIFT (ASHIFT, in_up, scratch1), out_down)));
28650 if (code == ASHIFTRT)
28652 rtx done_label = gen_label_rtx ();
28653 emit_jump_insn (BRANCH (LT, done_label));
28654 emit_insn (SET (out_down, ORR (SHIFT (ASHIFTRT, in_up, scratch2),
28655 out_down)));
28656 emit_label (done_label);
28658 else
28659 emit_insn (SET (out_down, ORR (SHIFT (LSHIFTRT, in_up, scratch2),
28660 out_down)));
28662 else
28664 /* Emit code for Thumb2 mode.
28665 Thumb2 can't do shift and or in one insn. */
28666 emit_insn (SET (scratch1, SHIFT (ASHIFT, in_up, scratch1)));
28667 emit_insn (gen_iorsi3 (out_down, out_down, scratch1));
28669 if (code == ASHIFTRT)
28671 rtx done_label = gen_label_rtx ();
28672 emit_jump_insn (BRANCH (LT, done_label));
28673 emit_insn (SET (scratch2, SHIFT (ASHIFTRT, in_up, scratch2)));
28674 emit_insn (SET (out_down, ORR (out_down, scratch2)));
28675 emit_label (done_label);
28677 else
28679 emit_insn (SET (scratch2, SHIFT (LSHIFTRT, in_up, scratch2)));
28680 emit_insn (gen_iorsi3 (out_down, out_down, scratch2));
28684 emit_insn (SET (out_up, SHIFT (code, in_up, amount)));
28687 #undef SUB_32
28688 #undef RSB_32
28689 #undef SUB_S_32
28690 #undef SET
28691 #undef SHIFT
28692 #undef LSHIFT
28693 #undef REV_LSHIFT
28694 #undef ORR
28695 #undef BRANCH
28699 /* Returns true if a valid comparison operation and makes
28700 the operands in a form that is valid. */
28701 bool
28702 arm_validize_comparison (rtx *comparison, rtx * op1, rtx * op2)
28704 enum rtx_code code = GET_CODE (*comparison);
28705 int code_int;
28706 enum machine_mode mode = (GET_MODE (*op1) == VOIDmode)
28707 ? GET_MODE (*op2) : GET_MODE (*op1);
28709 gcc_assert (GET_MODE (*op1) != VOIDmode || GET_MODE (*op2) != VOIDmode);
28711 if (code == UNEQ || code == LTGT)
28712 return false;
28714 code_int = (int)code;
28715 arm_canonicalize_comparison (&code_int, op1, op2, 0);
28716 PUT_CODE (*comparison, (enum rtx_code)code_int);
28718 switch (mode)
28720 case SImode:
28721 if (!arm_add_operand (*op1, mode))
28722 *op1 = force_reg (mode, *op1);
28723 if (!arm_add_operand (*op2, mode))
28724 *op2 = force_reg (mode, *op2);
28725 return true;
28727 case DImode:
28728 if (!cmpdi_operand (*op1, mode))
28729 *op1 = force_reg (mode, *op1);
28730 if (!cmpdi_operand (*op2, mode))
28731 *op2 = force_reg (mode, *op2);
28732 return true;
28734 case SFmode:
28735 case DFmode:
28736 if (!arm_float_compare_operand (*op1, mode))
28737 *op1 = force_reg (mode, *op1);
28738 if (!arm_float_compare_operand (*op2, mode))
28739 *op2 = force_reg (mode, *op2);
28740 return true;
28741 default:
28742 break;
28745 return false;
28749 /* Implement the TARGET_ASAN_SHADOW_OFFSET hook. */
28751 static unsigned HOST_WIDE_INT
28752 arm_asan_shadow_offset (void)
28754 return (unsigned HOST_WIDE_INT) 1 << 29;
28757 #include "gt-arm.h"