2014-12-19 Andrew MacLeod <amacleod@redhat.com>
[official-gcc.git] / gcc / config / arm / arm.c
blobd5cd994c4fb882348845db426025f941f4039cc0
1 /* Output routines for GCC for ARM.
2 Copyright (C) 1991-2014 Free Software Foundation, Inc.
3 Contributed by Pieter `Tiggr' Schoenmakers (rcpieter@win.tue.nl)
4 and Martin Simmons (@harleqn.co.uk).
5 More major hacks by Richard Earnshaw (rearnsha@arm.com).
7 This file is part of GCC.
9 GCC is free software; you can redistribute it and/or modify it
10 under the terms of the GNU General Public License as published
11 by the Free Software Foundation; either version 3, or (at your
12 option) any later version.
14 GCC is distributed in the hope that it will be useful, but WITHOUT
15 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
16 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
17 License for more details.
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING3. If not see
21 <http://www.gnu.org/licenses/>. */
23 #include "config.h"
24 #include "system.h"
25 #include "coretypes.h"
26 #include "hash-table.h"
27 #include "tm.h"
28 #include "rtl.h"
29 #include "tree.h"
30 #include "stringpool.h"
31 #include "stor-layout.h"
32 #include "calls.h"
33 #include "varasm.h"
34 #include "obstack.h"
35 #include "regs.h"
36 #include "hard-reg-set.h"
37 #include "insn-config.h"
38 #include "conditions.h"
39 #include "output.h"
40 #include "insn-attr.h"
41 #include "flags.h"
42 #include "reload.h"
43 #include "hashtab.h"
44 #include "hash-set.h"
45 #include "vec.h"
46 #include "machmode.h"
47 #include "input.h"
48 #include "function.h"
49 #include "expr.h"
50 #include "insn-codes.h"
51 #include "optabs.h"
52 #include "diagnostic-core.h"
53 #include "recog.h"
54 #include "predict.h"
55 #include "dominance.h"
56 #include "cfg.h"
57 #include "cfgrtl.h"
58 #include "cfganal.h"
59 #include "lcm.h"
60 #include "cfgbuild.h"
61 #include "cfgcleanup.h"
62 #include "basic-block.h"
63 #include "hash-map.h"
64 #include "is-a.h"
65 #include "plugin-api.h"
66 #include "ipa-ref.h"
67 #include "cgraph.h"
68 #include "ggc.h"
69 #include "except.h"
70 #include "tm_p.h"
71 #include "target.h"
72 #include "sched-int.h"
73 #include "target-def.h"
74 #include "debug.h"
75 #include "langhooks.h"
76 #include "df.h"
77 #include "intl.h"
78 #include "libfuncs.h"
79 #include "params.h"
80 #include "opts.h"
81 #include "dumpfile.h"
82 #include "gimple-expr.h"
83 #include "builtins.h"
84 #include "tm-constrs.h"
85 #include "rtl-iter.h"
87 /* Forward definitions of types. */
88 typedef struct minipool_node Mnode;
89 typedef struct minipool_fixup Mfix;
91 void (*arm_lang_output_object_attributes_hook)(void);
93 struct four_ints
95 int i[4];
98 /* Forward function declarations. */
99 static bool arm_const_not_ok_for_debug_p (rtx);
100 static bool arm_lra_p (void);
101 static bool arm_needs_doubleword_align (machine_mode, const_tree);
102 static int arm_compute_static_chain_stack_bytes (void);
103 static arm_stack_offsets *arm_get_frame_offsets (void);
104 static void arm_add_gc_roots (void);
105 static int arm_gen_constant (enum rtx_code, machine_mode, rtx,
106 HOST_WIDE_INT, rtx, rtx, int, int);
107 static unsigned bit_count (unsigned long);
108 static int arm_address_register_rtx_p (rtx, int);
109 static int arm_legitimate_index_p (machine_mode, rtx, RTX_CODE, int);
110 static int thumb2_legitimate_index_p (machine_mode, rtx, int);
111 static int thumb1_base_register_rtx_p (rtx, machine_mode, int);
112 static rtx arm_legitimize_address (rtx, rtx, machine_mode);
113 static reg_class_t arm_preferred_reload_class (rtx, reg_class_t);
114 static rtx thumb_legitimize_address (rtx, rtx, machine_mode);
115 inline static int thumb1_index_register_rtx_p (rtx, int);
116 static int thumb_far_jump_used_p (void);
117 static bool thumb_force_lr_save (void);
118 static unsigned arm_size_return_regs (void);
119 static bool arm_assemble_integer (rtx, unsigned int, int);
120 static void arm_print_operand (FILE *, rtx, int);
121 static void arm_print_operand_address (FILE *, rtx);
122 static bool arm_print_operand_punct_valid_p (unsigned char code);
123 static const char *fp_const_from_val (REAL_VALUE_TYPE *);
124 static arm_cc get_arm_condition_code (rtx);
125 static HOST_WIDE_INT int_log2 (HOST_WIDE_INT);
126 static const char *output_multi_immediate (rtx *, const char *, const char *,
127 int, HOST_WIDE_INT);
128 static const char *shift_op (rtx, HOST_WIDE_INT *);
129 static struct machine_function *arm_init_machine_status (void);
130 static void thumb_exit (FILE *, int);
131 static HOST_WIDE_INT get_jump_table_size (rtx_jump_table_data *);
132 static Mnode *move_minipool_fix_forward_ref (Mnode *, Mnode *, HOST_WIDE_INT);
133 static Mnode *add_minipool_forward_ref (Mfix *);
134 static Mnode *move_minipool_fix_backward_ref (Mnode *, Mnode *, HOST_WIDE_INT);
135 static Mnode *add_minipool_backward_ref (Mfix *);
136 static void assign_minipool_offsets (Mfix *);
137 static void arm_print_value (FILE *, rtx);
138 static void dump_minipool (rtx_insn *);
139 static int arm_barrier_cost (rtx);
140 static Mfix *create_fix_barrier (Mfix *, HOST_WIDE_INT);
141 static void push_minipool_barrier (rtx_insn *, HOST_WIDE_INT);
142 static void push_minipool_fix (rtx_insn *, HOST_WIDE_INT, rtx *,
143 machine_mode, rtx);
144 static void arm_reorg (void);
145 static void note_invalid_constants (rtx_insn *, HOST_WIDE_INT, int);
146 static unsigned long arm_compute_save_reg0_reg12_mask (void);
147 static unsigned long arm_compute_save_reg_mask (void);
148 static unsigned long arm_isr_value (tree);
149 static unsigned long arm_compute_func_type (void);
150 static tree arm_handle_fndecl_attribute (tree *, tree, tree, int, bool *);
151 static tree arm_handle_pcs_attribute (tree *, tree, tree, int, bool *);
152 static tree arm_handle_isr_decl_attribute (tree *, tree, tree, int, bool *);
153 static tree arm_handle_isr_type_attribute (tree *, tree, tree, int, bool *);
154 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
155 static tree arm_handle_notshared_attribute (tree *, tree, tree, int, bool *);
156 #endif
157 static void arm_output_function_epilogue (FILE *, HOST_WIDE_INT);
158 static void arm_output_function_prologue (FILE *, HOST_WIDE_INT);
159 static int arm_comp_type_attributes (const_tree, const_tree);
160 static void arm_set_default_type_attributes (tree);
161 static int arm_adjust_cost (rtx_insn *, rtx, rtx_insn *, int);
162 static int arm_sched_reorder (FILE *, int, rtx_insn **, int *, int);
163 static int optimal_immediate_sequence (enum rtx_code code,
164 unsigned HOST_WIDE_INT val,
165 struct four_ints *return_sequence);
166 static int optimal_immediate_sequence_1 (enum rtx_code code,
167 unsigned HOST_WIDE_INT val,
168 struct four_ints *return_sequence,
169 int i);
170 static int arm_get_strip_length (int);
171 static bool arm_function_ok_for_sibcall (tree, tree);
172 static machine_mode arm_promote_function_mode (const_tree,
173 machine_mode, int *,
174 const_tree, int);
175 static bool arm_return_in_memory (const_tree, const_tree);
176 static rtx arm_function_value (const_tree, const_tree, bool);
177 static rtx arm_libcall_value_1 (machine_mode);
178 static rtx arm_libcall_value (machine_mode, const_rtx);
179 static bool arm_function_value_regno_p (const unsigned int);
180 static void arm_internal_label (FILE *, const char *, unsigned long);
181 static void arm_output_mi_thunk (FILE *, tree, HOST_WIDE_INT, HOST_WIDE_INT,
182 tree);
183 static bool arm_have_conditional_execution (void);
184 static bool arm_cannot_force_const_mem (machine_mode, rtx);
185 static bool arm_legitimate_constant_p (machine_mode, rtx);
186 static bool arm_rtx_costs_1 (rtx, enum rtx_code, int*, bool);
187 static bool arm_size_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *);
188 static bool arm_slowmul_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *, bool);
189 static bool arm_fastmul_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *, bool);
190 static bool arm_xscale_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *, bool);
191 static bool arm_9e_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *, bool);
192 static bool arm_rtx_costs (rtx, int, int, int, int *, bool);
193 static int arm_address_cost (rtx, machine_mode, addr_space_t, bool);
194 static int arm_register_move_cost (machine_mode, reg_class_t, reg_class_t);
195 static int arm_memory_move_cost (machine_mode, reg_class_t, bool);
196 static void emit_constant_insn (rtx cond, rtx pattern);
197 static rtx_insn *emit_set_insn (rtx, rtx);
198 static rtx emit_multi_reg_push (unsigned long, unsigned long);
199 static int arm_arg_partial_bytes (cumulative_args_t, machine_mode,
200 tree, bool);
201 static rtx arm_function_arg (cumulative_args_t, machine_mode,
202 const_tree, bool);
203 static void arm_function_arg_advance (cumulative_args_t, machine_mode,
204 const_tree, bool);
205 static unsigned int arm_function_arg_boundary (machine_mode, const_tree);
206 static rtx aapcs_allocate_return_reg (machine_mode, const_tree,
207 const_tree);
208 static rtx aapcs_libcall_value (machine_mode);
209 static int aapcs_select_return_coproc (const_tree, const_tree);
211 #ifdef OBJECT_FORMAT_ELF
212 static void arm_elf_asm_constructor (rtx, int) ATTRIBUTE_UNUSED;
213 static void arm_elf_asm_destructor (rtx, int) ATTRIBUTE_UNUSED;
214 #endif
215 #ifndef ARM_PE
216 static void arm_encode_section_info (tree, rtx, int);
217 #endif
219 static void arm_file_end (void);
220 static void arm_file_start (void);
222 static void arm_setup_incoming_varargs (cumulative_args_t, machine_mode,
223 tree, int *, int);
224 static bool arm_pass_by_reference (cumulative_args_t,
225 machine_mode, const_tree, bool);
226 static bool arm_promote_prototypes (const_tree);
227 static bool arm_default_short_enums (void);
228 static bool arm_align_anon_bitfield (void);
229 static bool arm_return_in_msb (const_tree);
230 static bool arm_must_pass_in_stack (machine_mode, const_tree);
231 static bool arm_return_in_memory (const_tree, const_tree);
232 #if ARM_UNWIND_INFO
233 static void arm_unwind_emit (FILE *, rtx_insn *);
234 static bool arm_output_ttype (rtx);
235 static void arm_asm_emit_except_personality (rtx);
236 static void arm_asm_init_sections (void);
237 #endif
238 static rtx arm_dwarf_register_span (rtx);
240 static tree arm_cxx_guard_type (void);
241 static bool arm_cxx_guard_mask_bit (void);
242 static tree arm_get_cookie_size (tree);
243 static bool arm_cookie_has_size (void);
244 static bool arm_cxx_cdtor_returns_this (void);
245 static bool arm_cxx_key_method_may_be_inline (void);
246 static void arm_cxx_determine_class_data_visibility (tree);
247 static bool arm_cxx_class_data_always_comdat (void);
248 static bool arm_cxx_use_aeabi_atexit (void);
249 static void arm_init_libfuncs (void);
250 static tree arm_build_builtin_va_list (void);
251 static void arm_expand_builtin_va_start (tree, rtx);
252 static tree arm_gimplify_va_arg_expr (tree, tree, gimple_seq *, gimple_seq *);
253 static void arm_option_override (void);
254 static unsigned HOST_WIDE_INT arm_shift_truncation_mask (machine_mode);
255 static bool arm_cannot_copy_insn_p (rtx_insn *);
256 static int arm_issue_rate (void);
257 static void arm_output_dwarf_dtprel (FILE *, int, rtx) ATTRIBUTE_UNUSED;
258 static bool arm_output_addr_const_extra (FILE *, rtx);
259 static bool arm_allocate_stack_slots_for_args (void);
260 static bool arm_warn_func_return (tree);
261 static const char *arm_invalid_parameter_type (const_tree t);
262 static const char *arm_invalid_return_type (const_tree t);
263 static tree arm_promoted_type (const_tree t);
264 static tree arm_convert_to_type (tree type, tree expr);
265 static bool arm_scalar_mode_supported_p (machine_mode);
266 static bool arm_frame_pointer_required (void);
267 static bool arm_can_eliminate (const int, const int);
268 static void arm_asm_trampoline_template (FILE *);
269 static void arm_trampoline_init (rtx, tree, rtx);
270 static rtx arm_trampoline_adjust_address (rtx);
271 static rtx arm_pic_static_addr (rtx orig, rtx reg);
272 static bool cortex_a9_sched_adjust_cost (rtx_insn *, rtx, rtx_insn *, int *);
273 static bool xscale_sched_adjust_cost (rtx_insn *, rtx, rtx_insn *, int *);
274 static bool fa726te_sched_adjust_cost (rtx_insn *, rtx, rtx_insn *, int *);
275 static bool arm_array_mode_supported_p (machine_mode,
276 unsigned HOST_WIDE_INT);
277 static machine_mode arm_preferred_simd_mode (machine_mode);
278 static bool arm_class_likely_spilled_p (reg_class_t);
279 static HOST_WIDE_INT arm_vector_alignment (const_tree type);
280 static bool arm_vector_alignment_reachable (const_tree type, bool is_packed);
281 static bool arm_builtin_support_vector_misalignment (machine_mode mode,
282 const_tree type,
283 int misalignment,
284 bool is_packed);
285 static void arm_conditional_register_usage (void);
286 static reg_class_t arm_preferred_rename_class (reg_class_t rclass);
287 static unsigned int arm_autovectorize_vector_sizes (void);
288 static int arm_default_branch_cost (bool, bool);
289 static int arm_cortex_a5_branch_cost (bool, bool);
290 static int arm_cortex_m_branch_cost (bool, bool);
292 static bool arm_vectorize_vec_perm_const_ok (machine_mode vmode,
293 const unsigned char *sel);
295 static int arm_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
296 tree vectype,
297 int misalign ATTRIBUTE_UNUSED);
298 static unsigned arm_add_stmt_cost (void *data, int count,
299 enum vect_cost_for_stmt kind,
300 struct _stmt_vec_info *stmt_info,
301 int misalign,
302 enum vect_cost_model_location where);
304 static void arm_canonicalize_comparison (int *code, rtx *op0, rtx *op1,
305 bool op0_preserve_value);
306 static unsigned HOST_WIDE_INT arm_asan_shadow_offset (void);
308 static void arm_sched_fusion_priority (rtx_insn *, int, int *, int*);
310 /* Table of machine attributes. */
311 static const struct attribute_spec arm_attribute_table[] =
313 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, decl_handler,
314 type_handle, affects_type_identity } */
315 /* Function calls made to this symbol must be done indirectly, because
316 it may lie outside of the 26 bit addressing range of a normal function
317 call. */
318 { "long_call", 0, 0, false, true, true, NULL, NULL, false },
319 /* Whereas these functions are always known to reside within the 26 bit
320 addressing range. */
321 { "short_call", 0, 0, false, true, true, NULL, NULL, false },
322 /* Specify the procedure call conventions for a function. */
323 { "pcs", 1, 1, false, true, true, NULL, arm_handle_pcs_attribute,
324 false },
325 /* Interrupt Service Routines have special prologue and epilogue requirements. */
326 { "isr", 0, 1, false, false, false, arm_handle_isr_decl_attribute,
327 arm_handle_isr_type_attribute, false },
328 { "interrupt", 0, 1, false, false, false, arm_handle_isr_decl_attribute,
329 arm_handle_isr_type_attribute,
330 false },
331 { "naked", 0, 0, true, false, false, arm_handle_fndecl_attribute,
332 NULL, false },
333 #ifdef ARM_PE
334 /* ARM/PE has three new attributes:
335 interfacearm - ?
336 dllexport - for exporting a function/variable that will live in a dll
337 dllimport - for importing a function/variable from a dll
339 Microsoft allows multiple declspecs in one __declspec, separating
340 them with spaces. We do NOT support this. Instead, use __declspec
341 multiple times.
343 { "dllimport", 0, 0, true, false, false, NULL, NULL, false },
344 { "dllexport", 0, 0, true, false, false, NULL, NULL, false },
345 { "interfacearm", 0, 0, true, false, false, arm_handle_fndecl_attribute,
346 NULL, false },
347 #elif TARGET_DLLIMPORT_DECL_ATTRIBUTES
348 { "dllimport", 0, 0, false, false, false, handle_dll_decl_attribute,
349 handle_dll_type_attribute, false },
350 { "dllexport", 0, 0, false, false, false, handle_dll_decl_attribute,
351 handle_dll_type_attribute, false },
352 { "notshared", 0, 0, false, true, false, NULL,
353 arm_handle_notshared_attribute, false },
354 #endif
355 { NULL, 0, 0, false, false, false, NULL, NULL, false }
358 /* Initialize the GCC target structure. */
359 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
360 #undef TARGET_MERGE_DECL_ATTRIBUTES
361 #define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
362 #endif
364 #undef TARGET_LEGITIMIZE_ADDRESS
365 #define TARGET_LEGITIMIZE_ADDRESS arm_legitimize_address
367 #undef TARGET_LRA_P
368 #define TARGET_LRA_P arm_lra_p
370 #undef TARGET_ATTRIBUTE_TABLE
371 #define TARGET_ATTRIBUTE_TABLE arm_attribute_table
373 #undef TARGET_ASM_FILE_START
374 #define TARGET_ASM_FILE_START arm_file_start
375 #undef TARGET_ASM_FILE_END
376 #define TARGET_ASM_FILE_END arm_file_end
378 #undef TARGET_ASM_ALIGNED_SI_OP
379 #define TARGET_ASM_ALIGNED_SI_OP NULL
380 #undef TARGET_ASM_INTEGER
381 #define TARGET_ASM_INTEGER arm_assemble_integer
383 #undef TARGET_PRINT_OPERAND
384 #define TARGET_PRINT_OPERAND arm_print_operand
385 #undef TARGET_PRINT_OPERAND_ADDRESS
386 #define TARGET_PRINT_OPERAND_ADDRESS arm_print_operand_address
387 #undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
388 #define TARGET_PRINT_OPERAND_PUNCT_VALID_P arm_print_operand_punct_valid_p
390 #undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA
391 #define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA arm_output_addr_const_extra
393 #undef TARGET_ASM_FUNCTION_PROLOGUE
394 #define TARGET_ASM_FUNCTION_PROLOGUE arm_output_function_prologue
396 #undef TARGET_ASM_FUNCTION_EPILOGUE
397 #define TARGET_ASM_FUNCTION_EPILOGUE arm_output_function_epilogue
399 #undef TARGET_OPTION_OVERRIDE
400 #define TARGET_OPTION_OVERRIDE arm_option_override
402 #undef TARGET_COMP_TYPE_ATTRIBUTES
403 #define TARGET_COMP_TYPE_ATTRIBUTES arm_comp_type_attributes
405 #undef TARGET_SET_DEFAULT_TYPE_ATTRIBUTES
406 #define TARGET_SET_DEFAULT_TYPE_ATTRIBUTES arm_set_default_type_attributes
408 #undef TARGET_SCHED_ADJUST_COST
409 #define TARGET_SCHED_ADJUST_COST arm_adjust_cost
411 #undef TARGET_SCHED_REORDER
412 #define TARGET_SCHED_REORDER arm_sched_reorder
414 #undef TARGET_REGISTER_MOVE_COST
415 #define TARGET_REGISTER_MOVE_COST arm_register_move_cost
417 #undef TARGET_MEMORY_MOVE_COST
418 #define TARGET_MEMORY_MOVE_COST arm_memory_move_cost
420 #undef TARGET_ENCODE_SECTION_INFO
421 #ifdef ARM_PE
422 #define TARGET_ENCODE_SECTION_INFO arm_pe_encode_section_info
423 #else
424 #define TARGET_ENCODE_SECTION_INFO arm_encode_section_info
425 #endif
427 #undef TARGET_STRIP_NAME_ENCODING
428 #define TARGET_STRIP_NAME_ENCODING arm_strip_name_encoding
430 #undef TARGET_ASM_INTERNAL_LABEL
431 #define TARGET_ASM_INTERNAL_LABEL arm_internal_label
433 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
434 #define TARGET_FUNCTION_OK_FOR_SIBCALL arm_function_ok_for_sibcall
436 #undef TARGET_FUNCTION_VALUE
437 #define TARGET_FUNCTION_VALUE arm_function_value
439 #undef TARGET_LIBCALL_VALUE
440 #define TARGET_LIBCALL_VALUE arm_libcall_value
442 #undef TARGET_FUNCTION_VALUE_REGNO_P
443 #define TARGET_FUNCTION_VALUE_REGNO_P arm_function_value_regno_p
445 #undef TARGET_ASM_OUTPUT_MI_THUNK
446 #define TARGET_ASM_OUTPUT_MI_THUNK arm_output_mi_thunk
447 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
448 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK default_can_output_mi_thunk_no_vcall
450 #undef TARGET_RTX_COSTS
451 #define TARGET_RTX_COSTS arm_rtx_costs
452 #undef TARGET_ADDRESS_COST
453 #define TARGET_ADDRESS_COST arm_address_cost
455 #undef TARGET_SHIFT_TRUNCATION_MASK
456 #define TARGET_SHIFT_TRUNCATION_MASK arm_shift_truncation_mask
457 #undef TARGET_VECTOR_MODE_SUPPORTED_P
458 #define TARGET_VECTOR_MODE_SUPPORTED_P arm_vector_mode_supported_p
459 #undef TARGET_ARRAY_MODE_SUPPORTED_P
460 #define TARGET_ARRAY_MODE_SUPPORTED_P arm_array_mode_supported_p
461 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
462 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE arm_preferred_simd_mode
463 #undef TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES
464 #define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES \
465 arm_autovectorize_vector_sizes
467 #undef TARGET_MACHINE_DEPENDENT_REORG
468 #define TARGET_MACHINE_DEPENDENT_REORG arm_reorg
470 #undef TARGET_INIT_BUILTINS
471 #define TARGET_INIT_BUILTINS arm_init_builtins
472 #undef TARGET_EXPAND_BUILTIN
473 #define TARGET_EXPAND_BUILTIN arm_expand_builtin
474 #undef TARGET_BUILTIN_DECL
475 #define TARGET_BUILTIN_DECL arm_builtin_decl
477 #undef TARGET_INIT_LIBFUNCS
478 #define TARGET_INIT_LIBFUNCS arm_init_libfuncs
480 #undef TARGET_PROMOTE_FUNCTION_MODE
481 #define TARGET_PROMOTE_FUNCTION_MODE arm_promote_function_mode
482 #undef TARGET_PROMOTE_PROTOTYPES
483 #define TARGET_PROMOTE_PROTOTYPES arm_promote_prototypes
484 #undef TARGET_PASS_BY_REFERENCE
485 #define TARGET_PASS_BY_REFERENCE arm_pass_by_reference
486 #undef TARGET_ARG_PARTIAL_BYTES
487 #define TARGET_ARG_PARTIAL_BYTES arm_arg_partial_bytes
488 #undef TARGET_FUNCTION_ARG
489 #define TARGET_FUNCTION_ARG arm_function_arg
490 #undef TARGET_FUNCTION_ARG_ADVANCE
491 #define TARGET_FUNCTION_ARG_ADVANCE arm_function_arg_advance
492 #undef TARGET_FUNCTION_ARG_BOUNDARY
493 #define TARGET_FUNCTION_ARG_BOUNDARY arm_function_arg_boundary
495 #undef TARGET_SETUP_INCOMING_VARARGS
496 #define TARGET_SETUP_INCOMING_VARARGS arm_setup_incoming_varargs
498 #undef TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS
499 #define TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS arm_allocate_stack_slots_for_args
501 #undef TARGET_ASM_TRAMPOLINE_TEMPLATE
502 #define TARGET_ASM_TRAMPOLINE_TEMPLATE arm_asm_trampoline_template
503 #undef TARGET_TRAMPOLINE_INIT
504 #define TARGET_TRAMPOLINE_INIT arm_trampoline_init
505 #undef TARGET_TRAMPOLINE_ADJUST_ADDRESS
506 #define TARGET_TRAMPOLINE_ADJUST_ADDRESS arm_trampoline_adjust_address
508 #undef TARGET_WARN_FUNC_RETURN
509 #define TARGET_WARN_FUNC_RETURN arm_warn_func_return
511 #undef TARGET_DEFAULT_SHORT_ENUMS
512 #define TARGET_DEFAULT_SHORT_ENUMS arm_default_short_enums
514 #undef TARGET_ALIGN_ANON_BITFIELD
515 #define TARGET_ALIGN_ANON_BITFIELD arm_align_anon_bitfield
517 #undef TARGET_NARROW_VOLATILE_BITFIELD
518 #define TARGET_NARROW_VOLATILE_BITFIELD hook_bool_void_false
520 #undef TARGET_CXX_GUARD_TYPE
521 #define TARGET_CXX_GUARD_TYPE arm_cxx_guard_type
523 #undef TARGET_CXX_GUARD_MASK_BIT
524 #define TARGET_CXX_GUARD_MASK_BIT arm_cxx_guard_mask_bit
526 #undef TARGET_CXX_GET_COOKIE_SIZE
527 #define TARGET_CXX_GET_COOKIE_SIZE arm_get_cookie_size
529 #undef TARGET_CXX_COOKIE_HAS_SIZE
530 #define TARGET_CXX_COOKIE_HAS_SIZE arm_cookie_has_size
532 #undef TARGET_CXX_CDTOR_RETURNS_THIS
533 #define TARGET_CXX_CDTOR_RETURNS_THIS arm_cxx_cdtor_returns_this
535 #undef TARGET_CXX_KEY_METHOD_MAY_BE_INLINE
536 #define TARGET_CXX_KEY_METHOD_MAY_BE_INLINE arm_cxx_key_method_may_be_inline
538 #undef TARGET_CXX_USE_AEABI_ATEXIT
539 #define TARGET_CXX_USE_AEABI_ATEXIT arm_cxx_use_aeabi_atexit
541 #undef TARGET_CXX_DETERMINE_CLASS_DATA_VISIBILITY
542 #define TARGET_CXX_DETERMINE_CLASS_DATA_VISIBILITY \
543 arm_cxx_determine_class_data_visibility
545 #undef TARGET_CXX_CLASS_DATA_ALWAYS_COMDAT
546 #define TARGET_CXX_CLASS_DATA_ALWAYS_COMDAT arm_cxx_class_data_always_comdat
548 #undef TARGET_RETURN_IN_MSB
549 #define TARGET_RETURN_IN_MSB arm_return_in_msb
551 #undef TARGET_RETURN_IN_MEMORY
552 #define TARGET_RETURN_IN_MEMORY arm_return_in_memory
554 #undef TARGET_MUST_PASS_IN_STACK
555 #define TARGET_MUST_PASS_IN_STACK arm_must_pass_in_stack
557 #if ARM_UNWIND_INFO
558 #undef TARGET_ASM_UNWIND_EMIT
559 #define TARGET_ASM_UNWIND_EMIT arm_unwind_emit
561 /* EABI unwinding tables use a different format for the typeinfo tables. */
562 #undef TARGET_ASM_TTYPE
563 #define TARGET_ASM_TTYPE arm_output_ttype
565 #undef TARGET_ARM_EABI_UNWINDER
566 #define TARGET_ARM_EABI_UNWINDER true
568 #undef TARGET_ASM_EMIT_EXCEPT_PERSONALITY
569 #define TARGET_ASM_EMIT_EXCEPT_PERSONALITY arm_asm_emit_except_personality
571 #undef TARGET_ASM_INIT_SECTIONS
572 #define TARGET_ASM_INIT_SECTIONS arm_asm_init_sections
573 #endif /* ARM_UNWIND_INFO */
575 #undef TARGET_DWARF_REGISTER_SPAN
576 #define TARGET_DWARF_REGISTER_SPAN arm_dwarf_register_span
578 #undef TARGET_CANNOT_COPY_INSN_P
579 #define TARGET_CANNOT_COPY_INSN_P arm_cannot_copy_insn_p
581 #ifdef HAVE_AS_TLS
582 #undef TARGET_HAVE_TLS
583 #define TARGET_HAVE_TLS true
584 #endif
586 #undef TARGET_HAVE_CONDITIONAL_EXECUTION
587 #define TARGET_HAVE_CONDITIONAL_EXECUTION arm_have_conditional_execution
589 #undef TARGET_LEGITIMATE_CONSTANT_P
590 #define TARGET_LEGITIMATE_CONSTANT_P arm_legitimate_constant_p
592 #undef TARGET_CANNOT_FORCE_CONST_MEM
593 #define TARGET_CANNOT_FORCE_CONST_MEM arm_cannot_force_const_mem
595 #undef TARGET_MAX_ANCHOR_OFFSET
596 #define TARGET_MAX_ANCHOR_OFFSET 4095
598 /* The minimum is set such that the total size of the block
599 for a particular anchor is -4088 + 1 + 4095 bytes, which is
600 divisible by eight, ensuring natural spacing of anchors. */
601 #undef TARGET_MIN_ANCHOR_OFFSET
602 #define TARGET_MIN_ANCHOR_OFFSET -4088
604 #undef TARGET_SCHED_ISSUE_RATE
605 #define TARGET_SCHED_ISSUE_RATE arm_issue_rate
607 #undef TARGET_MANGLE_TYPE
608 #define TARGET_MANGLE_TYPE arm_mangle_type
610 #undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV
611 #define TARGET_ATOMIC_ASSIGN_EXPAND_FENV arm_atomic_assign_expand_fenv
613 #undef TARGET_BUILD_BUILTIN_VA_LIST
614 #define TARGET_BUILD_BUILTIN_VA_LIST arm_build_builtin_va_list
615 #undef TARGET_EXPAND_BUILTIN_VA_START
616 #define TARGET_EXPAND_BUILTIN_VA_START arm_expand_builtin_va_start
617 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
618 #define TARGET_GIMPLIFY_VA_ARG_EXPR arm_gimplify_va_arg_expr
620 #ifdef HAVE_AS_TLS
621 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
622 #define TARGET_ASM_OUTPUT_DWARF_DTPREL arm_output_dwarf_dtprel
623 #endif
625 #undef TARGET_LEGITIMATE_ADDRESS_P
626 #define TARGET_LEGITIMATE_ADDRESS_P arm_legitimate_address_p
628 #undef TARGET_PREFERRED_RELOAD_CLASS
629 #define TARGET_PREFERRED_RELOAD_CLASS arm_preferred_reload_class
631 #undef TARGET_INVALID_PARAMETER_TYPE
632 #define TARGET_INVALID_PARAMETER_TYPE arm_invalid_parameter_type
634 #undef TARGET_INVALID_RETURN_TYPE
635 #define TARGET_INVALID_RETURN_TYPE arm_invalid_return_type
637 #undef TARGET_PROMOTED_TYPE
638 #define TARGET_PROMOTED_TYPE arm_promoted_type
640 #undef TARGET_CONVERT_TO_TYPE
641 #define TARGET_CONVERT_TO_TYPE arm_convert_to_type
643 #undef TARGET_SCALAR_MODE_SUPPORTED_P
644 #define TARGET_SCALAR_MODE_SUPPORTED_P arm_scalar_mode_supported_p
646 #undef TARGET_FRAME_POINTER_REQUIRED
647 #define TARGET_FRAME_POINTER_REQUIRED arm_frame_pointer_required
649 #undef TARGET_CAN_ELIMINATE
650 #define TARGET_CAN_ELIMINATE arm_can_eliminate
652 #undef TARGET_CONDITIONAL_REGISTER_USAGE
653 #define TARGET_CONDITIONAL_REGISTER_USAGE arm_conditional_register_usage
655 #undef TARGET_CLASS_LIKELY_SPILLED_P
656 #define TARGET_CLASS_LIKELY_SPILLED_P arm_class_likely_spilled_p
658 #undef TARGET_VECTORIZE_BUILTINS
659 #define TARGET_VECTORIZE_BUILTINS
661 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
662 #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
663 arm_builtin_vectorized_function
665 #undef TARGET_VECTOR_ALIGNMENT
666 #define TARGET_VECTOR_ALIGNMENT arm_vector_alignment
668 #undef TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE
669 #define TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE \
670 arm_vector_alignment_reachable
672 #undef TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT
673 #define TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT \
674 arm_builtin_support_vector_misalignment
676 #undef TARGET_PREFERRED_RENAME_CLASS
677 #define TARGET_PREFERRED_RENAME_CLASS \
678 arm_preferred_rename_class
680 #undef TARGET_VECTORIZE_VEC_PERM_CONST_OK
681 #define TARGET_VECTORIZE_VEC_PERM_CONST_OK \
682 arm_vectorize_vec_perm_const_ok
684 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
685 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
686 arm_builtin_vectorization_cost
687 #undef TARGET_VECTORIZE_ADD_STMT_COST
688 #define TARGET_VECTORIZE_ADD_STMT_COST arm_add_stmt_cost
690 #undef TARGET_CANONICALIZE_COMPARISON
691 #define TARGET_CANONICALIZE_COMPARISON \
692 arm_canonicalize_comparison
694 #undef TARGET_ASAN_SHADOW_OFFSET
695 #define TARGET_ASAN_SHADOW_OFFSET arm_asan_shadow_offset
697 #undef MAX_INSN_PER_IT_BLOCK
698 #define MAX_INSN_PER_IT_BLOCK (arm_restrict_it ? 1 : 4)
700 #undef TARGET_CAN_USE_DOLOOP_P
701 #define TARGET_CAN_USE_DOLOOP_P can_use_doloop_if_innermost
703 #undef TARGET_CONST_NOT_OK_FOR_DEBUG_P
704 #define TARGET_CONST_NOT_OK_FOR_DEBUG_P arm_const_not_ok_for_debug_p
706 #undef TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS
707 #define TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS true
709 #undef TARGET_SCHED_FUSION_PRIORITY
710 #define TARGET_SCHED_FUSION_PRIORITY arm_sched_fusion_priority
712 struct gcc_target targetm = TARGET_INITIALIZER;
714 /* Obstack for minipool constant handling. */
715 static struct obstack minipool_obstack;
716 static char * minipool_startobj;
718 /* The maximum number of insns skipped which
719 will be conditionalised if possible. */
720 static int max_insns_skipped = 5;
722 extern FILE * asm_out_file;
724 /* True if we are currently building a constant table. */
725 int making_const_table;
727 /* The processor for which instructions should be scheduled. */
728 enum processor_type arm_tune = arm_none;
730 /* The current tuning set. */
731 const struct tune_params *current_tune;
733 /* Which floating point hardware to schedule for. */
734 int arm_fpu_attr;
736 /* Which floating popint hardware to use. */
737 const struct arm_fpu_desc *arm_fpu_desc;
739 /* Used for Thumb call_via trampolines. */
740 rtx thumb_call_via_label[14];
741 static int thumb_call_reg_needed;
743 /* The bits in this mask specify which
744 instructions we are allowed to generate. */
745 unsigned long insn_flags = 0;
747 /* The bits in this mask specify which instruction scheduling options should
748 be used. */
749 unsigned long tune_flags = 0;
751 /* The highest ARM architecture version supported by the
752 target. */
753 enum base_architecture arm_base_arch = BASE_ARCH_0;
755 /* The following are used in the arm.md file as equivalents to bits
756 in the above two flag variables. */
758 /* Nonzero if this chip supports the ARM Architecture 3M extensions. */
759 int arm_arch3m = 0;
761 /* Nonzero if this chip supports the ARM Architecture 4 extensions. */
762 int arm_arch4 = 0;
764 /* Nonzero if this chip supports the ARM Architecture 4t extensions. */
765 int arm_arch4t = 0;
767 /* Nonzero if this chip supports the ARM Architecture 5 extensions. */
768 int arm_arch5 = 0;
770 /* Nonzero if this chip supports the ARM Architecture 5E extensions. */
771 int arm_arch5e = 0;
773 /* Nonzero if this chip supports the ARM Architecture 6 extensions. */
774 int arm_arch6 = 0;
776 /* Nonzero if this chip supports the ARM 6K extensions. */
777 int arm_arch6k = 0;
779 /* Nonzero if instructions present in ARMv6-M can be used. */
780 int arm_arch6m = 0;
782 /* Nonzero if this chip supports the ARM 7 extensions. */
783 int arm_arch7 = 0;
785 /* Nonzero if instructions not present in the 'M' profile can be used. */
786 int arm_arch_notm = 0;
788 /* Nonzero if instructions present in ARMv7E-M can be used. */
789 int arm_arch7em = 0;
791 /* Nonzero if instructions present in ARMv8 can be used. */
792 int arm_arch8 = 0;
794 /* Nonzero if this chip can benefit from load scheduling. */
795 int arm_ld_sched = 0;
797 /* Nonzero if this chip is a StrongARM. */
798 int arm_tune_strongarm = 0;
800 /* Nonzero if this chip supports Intel Wireless MMX technology. */
801 int arm_arch_iwmmxt = 0;
803 /* Nonzero if this chip supports Intel Wireless MMX2 technology. */
804 int arm_arch_iwmmxt2 = 0;
806 /* Nonzero if this chip is an XScale. */
807 int arm_arch_xscale = 0;
809 /* Nonzero if tuning for XScale */
810 int arm_tune_xscale = 0;
812 /* Nonzero if we want to tune for stores that access the write-buffer.
813 This typically means an ARM6 or ARM7 with MMU or MPU. */
814 int arm_tune_wbuf = 0;
816 /* Nonzero if tuning for Cortex-A9. */
817 int arm_tune_cortex_a9 = 0;
819 /* Nonzero if generating Thumb instructions. */
820 int thumb_code = 0;
822 /* Nonzero if generating Thumb-1 instructions. */
823 int thumb1_code = 0;
825 /* Nonzero if we should define __THUMB_INTERWORK__ in the
826 preprocessor.
827 XXX This is a bit of a hack, it's intended to help work around
828 problems in GLD which doesn't understand that armv5t code is
829 interworking clean. */
830 int arm_cpp_interwork = 0;
832 /* Nonzero if chip supports Thumb 2. */
833 int arm_arch_thumb2;
835 /* Nonzero if chip supports integer division instruction. */
836 int arm_arch_arm_hwdiv;
837 int arm_arch_thumb_hwdiv;
839 /* Nonzero if we should use Neon to handle 64-bits operations rather
840 than core registers. */
841 int prefer_neon_for_64bits = 0;
843 /* Nonzero if we shouldn't use literal pools. */
844 bool arm_disable_literal_pool = false;
846 /* In case of a PRE_INC, POST_INC, PRE_DEC, POST_DEC memory reference,
847 we must report the mode of the memory reference from
848 TARGET_PRINT_OPERAND to TARGET_PRINT_OPERAND_ADDRESS. */
849 machine_mode output_memory_reference_mode;
851 /* The register number to be used for the PIC offset register. */
852 unsigned arm_pic_register = INVALID_REGNUM;
854 enum arm_pcs arm_pcs_default;
856 /* For an explanation of these variables, see final_prescan_insn below. */
857 int arm_ccfsm_state;
858 /* arm_current_cc is also used for Thumb-2 cond_exec blocks. */
859 enum arm_cond_code arm_current_cc;
861 rtx arm_target_insn;
862 int arm_target_label;
863 /* The number of conditionally executed insns, including the current insn. */
864 int arm_condexec_count = 0;
865 /* A bitmask specifying the patterns for the IT block.
866 Zero means do not output an IT block before this insn. */
867 int arm_condexec_mask = 0;
868 /* The number of bits used in arm_condexec_mask. */
869 int arm_condexec_masklen = 0;
871 /* Nonzero if chip supports the ARMv8 CRC instructions. */
872 int arm_arch_crc = 0;
874 /* Nonzero if the core has a very small, high-latency, multiply unit. */
875 int arm_m_profile_small_mul = 0;
877 /* The condition codes of the ARM, and the inverse function. */
878 static const char * const arm_condition_codes[] =
880 "eq", "ne", "cs", "cc", "mi", "pl", "vs", "vc",
881 "hi", "ls", "ge", "lt", "gt", "le", "al", "nv"
884 /* The register numbers in sequence, for passing to arm_gen_load_multiple. */
885 int arm_regs_in_sequence[] =
887 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
890 #define ARM_LSL_NAME (TARGET_UNIFIED_ASM ? "lsl" : "asl")
891 #define streq(string1, string2) (strcmp (string1, string2) == 0)
893 #define THUMB2_WORK_REGS (0xff & ~( (1 << THUMB_HARD_FRAME_POINTER_REGNUM) \
894 | (1 << SP_REGNUM) | (1 << PC_REGNUM) \
895 | (1 << PIC_OFFSET_TABLE_REGNUM)))
897 /* Initialization code. */
899 struct processors
901 const char *const name;
902 enum processor_type core;
903 const char *arch;
904 enum base_architecture base_arch;
905 const unsigned long flags;
906 const struct tune_params *const tune;
910 #define ARM_PREFETCH_NOT_BENEFICIAL 0, -1, -1
911 #define ARM_PREFETCH_BENEFICIAL(prefetch_slots,l1_size,l1_line_size) \
912 prefetch_slots, \
913 l1_size, \
914 l1_line_size
916 /* arm generic vectorizer costs. */
917 static const
918 struct cpu_vec_costs arm_default_vec_cost = {
919 1, /* scalar_stmt_cost. */
920 1, /* scalar load_cost. */
921 1, /* scalar_store_cost. */
922 1, /* vec_stmt_cost. */
923 1, /* vec_to_scalar_cost. */
924 1, /* scalar_to_vec_cost. */
925 1, /* vec_align_load_cost. */
926 1, /* vec_unalign_load_cost. */
927 1, /* vec_unalign_store_cost. */
928 1, /* vec_store_cost. */
929 3, /* cond_taken_branch_cost. */
930 1, /* cond_not_taken_branch_cost. */
933 /* Cost tables for AArch32 + AArch64 cores should go in aarch-cost-tables.h */
934 #include "aarch-cost-tables.h"
938 const struct cpu_cost_table cortexa9_extra_costs =
940 /* ALU */
942 0, /* arith. */
943 0, /* logical. */
944 0, /* shift. */
945 COSTS_N_INSNS (1), /* shift_reg. */
946 COSTS_N_INSNS (1), /* arith_shift. */
947 COSTS_N_INSNS (2), /* arith_shift_reg. */
948 0, /* log_shift. */
949 COSTS_N_INSNS (1), /* log_shift_reg. */
950 COSTS_N_INSNS (1), /* extend. */
951 COSTS_N_INSNS (2), /* extend_arith. */
952 COSTS_N_INSNS (1), /* bfi. */
953 COSTS_N_INSNS (1), /* bfx. */
954 0, /* clz. */
955 0, /* rev. */
956 0, /* non_exec. */
957 true /* non_exec_costs_exec. */
960 /* MULT SImode */
962 COSTS_N_INSNS (3), /* simple. */
963 COSTS_N_INSNS (3), /* flag_setting. */
964 COSTS_N_INSNS (2), /* extend. */
965 COSTS_N_INSNS (3), /* add. */
966 COSTS_N_INSNS (2), /* extend_add. */
967 COSTS_N_INSNS (30) /* idiv. No HW div on Cortex A9. */
969 /* MULT DImode */
971 0, /* simple (N/A). */
972 0, /* flag_setting (N/A). */
973 COSTS_N_INSNS (4), /* extend. */
974 0, /* add (N/A). */
975 COSTS_N_INSNS (4), /* extend_add. */
976 0 /* idiv (N/A). */
979 /* LD/ST */
981 COSTS_N_INSNS (2), /* load. */
982 COSTS_N_INSNS (2), /* load_sign_extend. */
983 COSTS_N_INSNS (2), /* ldrd. */
984 COSTS_N_INSNS (2), /* ldm_1st. */
985 1, /* ldm_regs_per_insn_1st. */
986 2, /* ldm_regs_per_insn_subsequent. */
987 COSTS_N_INSNS (5), /* loadf. */
988 COSTS_N_INSNS (5), /* loadd. */
989 COSTS_N_INSNS (1), /* load_unaligned. */
990 COSTS_N_INSNS (2), /* store. */
991 COSTS_N_INSNS (2), /* strd. */
992 COSTS_N_INSNS (2), /* stm_1st. */
993 1, /* stm_regs_per_insn_1st. */
994 2, /* stm_regs_per_insn_subsequent. */
995 COSTS_N_INSNS (1), /* storef. */
996 COSTS_N_INSNS (1), /* stored. */
997 COSTS_N_INSNS (1) /* store_unaligned. */
1000 /* FP SFmode */
1002 COSTS_N_INSNS (14), /* div. */
1003 COSTS_N_INSNS (4), /* mult. */
1004 COSTS_N_INSNS (7), /* mult_addsub. */
1005 COSTS_N_INSNS (30), /* fma. */
1006 COSTS_N_INSNS (3), /* addsub. */
1007 COSTS_N_INSNS (1), /* fpconst. */
1008 COSTS_N_INSNS (1), /* neg. */
1009 COSTS_N_INSNS (3), /* compare. */
1010 COSTS_N_INSNS (3), /* widen. */
1011 COSTS_N_INSNS (3), /* narrow. */
1012 COSTS_N_INSNS (3), /* toint. */
1013 COSTS_N_INSNS (3), /* fromint. */
1014 COSTS_N_INSNS (3) /* roundint. */
1016 /* FP DFmode */
1018 COSTS_N_INSNS (24), /* div. */
1019 COSTS_N_INSNS (5), /* mult. */
1020 COSTS_N_INSNS (8), /* mult_addsub. */
1021 COSTS_N_INSNS (30), /* fma. */
1022 COSTS_N_INSNS (3), /* addsub. */
1023 COSTS_N_INSNS (1), /* fpconst. */
1024 COSTS_N_INSNS (1), /* neg. */
1025 COSTS_N_INSNS (3), /* compare. */
1026 COSTS_N_INSNS (3), /* widen. */
1027 COSTS_N_INSNS (3), /* narrow. */
1028 COSTS_N_INSNS (3), /* toint. */
1029 COSTS_N_INSNS (3), /* fromint. */
1030 COSTS_N_INSNS (3) /* roundint. */
1033 /* Vector */
1035 COSTS_N_INSNS (1) /* alu. */
1039 const struct cpu_cost_table cortexa8_extra_costs =
1041 /* ALU */
1043 0, /* arith. */
1044 0, /* logical. */
1045 COSTS_N_INSNS (1), /* shift. */
1046 0, /* shift_reg. */
1047 COSTS_N_INSNS (1), /* arith_shift. */
1048 0, /* arith_shift_reg. */
1049 COSTS_N_INSNS (1), /* log_shift. */
1050 0, /* log_shift_reg. */
1051 0, /* extend. */
1052 0, /* extend_arith. */
1053 0, /* bfi. */
1054 0, /* bfx. */
1055 0, /* clz. */
1056 0, /* rev. */
1057 0, /* non_exec. */
1058 true /* non_exec_costs_exec. */
1061 /* MULT SImode */
1063 COSTS_N_INSNS (1), /* simple. */
1064 COSTS_N_INSNS (1), /* flag_setting. */
1065 COSTS_N_INSNS (1), /* extend. */
1066 COSTS_N_INSNS (1), /* add. */
1067 COSTS_N_INSNS (1), /* extend_add. */
1068 COSTS_N_INSNS (30) /* idiv. No HW div on Cortex A8. */
1070 /* MULT DImode */
1072 0, /* simple (N/A). */
1073 0, /* flag_setting (N/A). */
1074 COSTS_N_INSNS (2), /* extend. */
1075 0, /* add (N/A). */
1076 COSTS_N_INSNS (2), /* extend_add. */
1077 0 /* idiv (N/A). */
1080 /* LD/ST */
1082 COSTS_N_INSNS (1), /* load. */
1083 COSTS_N_INSNS (1), /* load_sign_extend. */
1084 COSTS_N_INSNS (1), /* ldrd. */
1085 COSTS_N_INSNS (1), /* ldm_1st. */
1086 1, /* ldm_regs_per_insn_1st. */
1087 2, /* ldm_regs_per_insn_subsequent. */
1088 COSTS_N_INSNS (1), /* loadf. */
1089 COSTS_N_INSNS (1), /* loadd. */
1090 COSTS_N_INSNS (1), /* load_unaligned. */
1091 COSTS_N_INSNS (1), /* store. */
1092 COSTS_N_INSNS (1), /* strd. */
1093 COSTS_N_INSNS (1), /* stm_1st. */
1094 1, /* stm_regs_per_insn_1st. */
1095 2, /* stm_regs_per_insn_subsequent. */
1096 COSTS_N_INSNS (1), /* storef. */
1097 COSTS_N_INSNS (1), /* stored. */
1098 COSTS_N_INSNS (1) /* store_unaligned. */
1101 /* FP SFmode */
1103 COSTS_N_INSNS (36), /* div. */
1104 COSTS_N_INSNS (11), /* mult. */
1105 COSTS_N_INSNS (20), /* mult_addsub. */
1106 COSTS_N_INSNS (30), /* fma. */
1107 COSTS_N_INSNS (9), /* addsub. */
1108 COSTS_N_INSNS (3), /* fpconst. */
1109 COSTS_N_INSNS (3), /* neg. */
1110 COSTS_N_INSNS (6), /* compare. */
1111 COSTS_N_INSNS (4), /* widen. */
1112 COSTS_N_INSNS (4), /* narrow. */
1113 COSTS_N_INSNS (8), /* toint. */
1114 COSTS_N_INSNS (8), /* fromint. */
1115 COSTS_N_INSNS (8) /* roundint. */
1117 /* FP DFmode */
1119 COSTS_N_INSNS (64), /* div. */
1120 COSTS_N_INSNS (16), /* mult. */
1121 COSTS_N_INSNS (25), /* mult_addsub. */
1122 COSTS_N_INSNS (30), /* fma. */
1123 COSTS_N_INSNS (9), /* addsub. */
1124 COSTS_N_INSNS (3), /* fpconst. */
1125 COSTS_N_INSNS (3), /* neg. */
1126 COSTS_N_INSNS (6), /* compare. */
1127 COSTS_N_INSNS (6), /* widen. */
1128 COSTS_N_INSNS (6), /* narrow. */
1129 COSTS_N_INSNS (8), /* toint. */
1130 COSTS_N_INSNS (8), /* fromint. */
1131 COSTS_N_INSNS (8) /* roundint. */
1134 /* Vector */
1136 COSTS_N_INSNS (1) /* alu. */
1140 const struct cpu_cost_table cortexa5_extra_costs =
1142 /* ALU */
1144 0, /* arith. */
1145 0, /* logical. */
1146 COSTS_N_INSNS (1), /* shift. */
1147 COSTS_N_INSNS (1), /* shift_reg. */
1148 COSTS_N_INSNS (1), /* arith_shift. */
1149 COSTS_N_INSNS (1), /* arith_shift_reg. */
1150 COSTS_N_INSNS (1), /* log_shift. */
1151 COSTS_N_INSNS (1), /* log_shift_reg. */
1152 COSTS_N_INSNS (1), /* extend. */
1153 COSTS_N_INSNS (1), /* extend_arith. */
1154 COSTS_N_INSNS (1), /* bfi. */
1155 COSTS_N_INSNS (1), /* bfx. */
1156 COSTS_N_INSNS (1), /* clz. */
1157 COSTS_N_INSNS (1), /* rev. */
1158 0, /* non_exec. */
1159 true /* non_exec_costs_exec. */
1163 /* MULT SImode */
1165 0, /* simple. */
1166 COSTS_N_INSNS (1), /* flag_setting. */
1167 COSTS_N_INSNS (1), /* extend. */
1168 COSTS_N_INSNS (1), /* add. */
1169 COSTS_N_INSNS (1), /* extend_add. */
1170 COSTS_N_INSNS (7) /* idiv. */
1172 /* MULT DImode */
1174 0, /* simple (N/A). */
1175 0, /* flag_setting (N/A). */
1176 COSTS_N_INSNS (1), /* extend. */
1177 0, /* add. */
1178 COSTS_N_INSNS (2), /* extend_add. */
1179 0 /* idiv (N/A). */
1182 /* LD/ST */
1184 COSTS_N_INSNS (1), /* load. */
1185 COSTS_N_INSNS (1), /* load_sign_extend. */
1186 COSTS_N_INSNS (6), /* ldrd. */
1187 COSTS_N_INSNS (1), /* ldm_1st. */
1188 1, /* ldm_regs_per_insn_1st. */
1189 2, /* ldm_regs_per_insn_subsequent. */
1190 COSTS_N_INSNS (2), /* loadf. */
1191 COSTS_N_INSNS (4), /* loadd. */
1192 COSTS_N_INSNS (1), /* load_unaligned. */
1193 COSTS_N_INSNS (1), /* store. */
1194 COSTS_N_INSNS (3), /* strd. */
1195 COSTS_N_INSNS (1), /* stm_1st. */
1196 1, /* stm_regs_per_insn_1st. */
1197 2, /* stm_regs_per_insn_subsequent. */
1198 COSTS_N_INSNS (2), /* storef. */
1199 COSTS_N_INSNS (2), /* stored. */
1200 COSTS_N_INSNS (1) /* store_unaligned. */
1203 /* FP SFmode */
1205 COSTS_N_INSNS (15), /* div. */
1206 COSTS_N_INSNS (3), /* mult. */
1207 COSTS_N_INSNS (7), /* mult_addsub. */
1208 COSTS_N_INSNS (7), /* fma. */
1209 COSTS_N_INSNS (3), /* addsub. */
1210 COSTS_N_INSNS (3), /* fpconst. */
1211 COSTS_N_INSNS (3), /* neg. */
1212 COSTS_N_INSNS (3), /* compare. */
1213 COSTS_N_INSNS (3), /* widen. */
1214 COSTS_N_INSNS (3), /* narrow. */
1215 COSTS_N_INSNS (3), /* toint. */
1216 COSTS_N_INSNS (3), /* fromint. */
1217 COSTS_N_INSNS (3) /* roundint. */
1219 /* FP DFmode */
1221 COSTS_N_INSNS (30), /* div. */
1222 COSTS_N_INSNS (6), /* mult. */
1223 COSTS_N_INSNS (10), /* mult_addsub. */
1224 COSTS_N_INSNS (7), /* fma. */
1225 COSTS_N_INSNS (3), /* addsub. */
1226 COSTS_N_INSNS (3), /* fpconst. */
1227 COSTS_N_INSNS (3), /* neg. */
1228 COSTS_N_INSNS (3), /* compare. */
1229 COSTS_N_INSNS (3), /* widen. */
1230 COSTS_N_INSNS (3), /* narrow. */
1231 COSTS_N_INSNS (3), /* toint. */
1232 COSTS_N_INSNS (3), /* fromint. */
1233 COSTS_N_INSNS (3) /* roundint. */
1236 /* Vector */
1238 COSTS_N_INSNS (1) /* alu. */
1243 const struct cpu_cost_table cortexa7_extra_costs =
1245 /* ALU */
1247 0, /* arith. */
1248 0, /* logical. */
1249 COSTS_N_INSNS (1), /* shift. */
1250 COSTS_N_INSNS (1), /* shift_reg. */
1251 COSTS_N_INSNS (1), /* arith_shift. */
1252 COSTS_N_INSNS (1), /* arith_shift_reg. */
1253 COSTS_N_INSNS (1), /* log_shift. */
1254 COSTS_N_INSNS (1), /* log_shift_reg. */
1255 COSTS_N_INSNS (1), /* extend. */
1256 COSTS_N_INSNS (1), /* extend_arith. */
1257 COSTS_N_INSNS (1), /* bfi. */
1258 COSTS_N_INSNS (1), /* bfx. */
1259 COSTS_N_INSNS (1), /* clz. */
1260 COSTS_N_INSNS (1), /* rev. */
1261 0, /* non_exec. */
1262 true /* non_exec_costs_exec. */
1266 /* MULT SImode */
1268 0, /* simple. */
1269 COSTS_N_INSNS (1), /* flag_setting. */
1270 COSTS_N_INSNS (1), /* extend. */
1271 COSTS_N_INSNS (1), /* add. */
1272 COSTS_N_INSNS (1), /* extend_add. */
1273 COSTS_N_INSNS (7) /* idiv. */
1275 /* MULT DImode */
1277 0, /* simple (N/A). */
1278 0, /* flag_setting (N/A). */
1279 COSTS_N_INSNS (1), /* extend. */
1280 0, /* add. */
1281 COSTS_N_INSNS (2), /* extend_add. */
1282 0 /* idiv (N/A). */
1285 /* LD/ST */
1287 COSTS_N_INSNS (1), /* load. */
1288 COSTS_N_INSNS (1), /* load_sign_extend. */
1289 COSTS_N_INSNS (3), /* ldrd. */
1290 COSTS_N_INSNS (1), /* ldm_1st. */
1291 1, /* ldm_regs_per_insn_1st. */
1292 2, /* ldm_regs_per_insn_subsequent. */
1293 COSTS_N_INSNS (2), /* loadf. */
1294 COSTS_N_INSNS (2), /* loadd. */
1295 COSTS_N_INSNS (1), /* load_unaligned. */
1296 COSTS_N_INSNS (1), /* store. */
1297 COSTS_N_INSNS (3), /* strd. */
1298 COSTS_N_INSNS (1), /* stm_1st. */
1299 1, /* stm_regs_per_insn_1st. */
1300 2, /* stm_regs_per_insn_subsequent. */
1301 COSTS_N_INSNS (2), /* storef. */
1302 COSTS_N_INSNS (2), /* stored. */
1303 COSTS_N_INSNS (1) /* store_unaligned. */
1306 /* FP SFmode */
1308 COSTS_N_INSNS (15), /* div. */
1309 COSTS_N_INSNS (3), /* mult. */
1310 COSTS_N_INSNS (7), /* mult_addsub. */
1311 COSTS_N_INSNS (7), /* fma. */
1312 COSTS_N_INSNS (3), /* addsub. */
1313 COSTS_N_INSNS (3), /* fpconst. */
1314 COSTS_N_INSNS (3), /* neg. */
1315 COSTS_N_INSNS (3), /* compare. */
1316 COSTS_N_INSNS (3), /* widen. */
1317 COSTS_N_INSNS (3), /* narrow. */
1318 COSTS_N_INSNS (3), /* toint. */
1319 COSTS_N_INSNS (3), /* fromint. */
1320 COSTS_N_INSNS (3) /* roundint. */
1322 /* FP DFmode */
1324 COSTS_N_INSNS (30), /* div. */
1325 COSTS_N_INSNS (6), /* mult. */
1326 COSTS_N_INSNS (10), /* mult_addsub. */
1327 COSTS_N_INSNS (7), /* fma. */
1328 COSTS_N_INSNS (3), /* addsub. */
1329 COSTS_N_INSNS (3), /* fpconst. */
1330 COSTS_N_INSNS (3), /* neg. */
1331 COSTS_N_INSNS (3), /* compare. */
1332 COSTS_N_INSNS (3), /* widen. */
1333 COSTS_N_INSNS (3), /* narrow. */
1334 COSTS_N_INSNS (3), /* toint. */
1335 COSTS_N_INSNS (3), /* fromint. */
1336 COSTS_N_INSNS (3) /* roundint. */
1339 /* Vector */
1341 COSTS_N_INSNS (1) /* alu. */
1345 const struct cpu_cost_table cortexa12_extra_costs =
1347 /* ALU */
1349 0, /* arith. */
1350 0, /* logical. */
1351 0, /* shift. */
1352 COSTS_N_INSNS (1), /* shift_reg. */
1353 COSTS_N_INSNS (1), /* arith_shift. */
1354 COSTS_N_INSNS (1), /* arith_shift_reg. */
1355 COSTS_N_INSNS (1), /* log_shift. */
1356 COSTS_N_INSNS (1), /* log_shift_reg. */
1357 0, /* extend. */
1358 COSTS_N_INSNS (1), /* extend_arith. */
1359 0, /* bfi. */
1360 COSTS_N_INSNS (1), /* bfx. */
1361 COSTS_N_INSNS (1), /* clz. */
1362 COSTS_N_INSNS (1), /* rev. */
1363 0, /* non_exec. */
1364 true /* non_exec_costs_exec. */
1366 /* MULT SImode */
1369 COSTS_N_INSNS (2), /* simple. */
1370 COSTS_N_INSNS (3), /* flag_setting. */
1371 COSTS_N_INSNS (2), /* extend. */
1372 COSTS_N_INSNS (3), /* add. */
1373 COSTS_N_INSNS (2), /* extend_add. */
1374 COSTS_N_INSNS (18) /* idiv. */
1376 /* MULT DImode */
1378 0, /* simple (N/A). */
1379 0, /* flag_setting (N/A). */
1380 COSTS_N_INSNS (3), /* extend. */
1381 0, /* add (N/A). */
1382 COSTS_N_INSNS (3), /* extend_add. */
1383 0 /* idiv (N/A). */
1386 /* LD/ST */
1388 COSTS_N_INSNS (3), /* load. */
1389 COSTS_N_INSNS (3), /* load_sign_extend. */
1390 COSTS_N_INSNS (3), /* ldrd. */
1391 COSTS_N_INSNS (3), /* ldm_1st. */
1392 1, /* ldm_regs_per_insn_1st. */
1393 2, /* ldm_regs_per_insn_subsequent. */
1394 COSTS_N_INSNS (3), /* loadf. */
1395 COSTS_N_INSNS (3), /* loadd. */
1396 0, /* load_unaligned. */
1397 0, /* store. */
1398 0, /* strd. */
1399 0, /* stm_1st. */
1400 1, /* stm_regs_per_insn_1st. */
1401 2, /* stm_regs_per_insn_subsequent. */
1402 COSTS_N_INSNS (2), /* storef. */
1403 COSTS_N_INSNS (2), /* stored. */
1404 0 /* store_unaligned. */
1407 /* FP SFmode */
1409 COSTS_N_INSNS (17), /* div. */
1410 COSTS_N_INSNS (4), /* mult. */
1411 COSTS_N_INSNS (8), /* mult_addsub. */
1412 COSTS_N_INSNS (8), /* fma. */
1413 COSTS_N_INSNS (4), /* addsub. */
1414 COSTS_N_INSNS (2), /* fpconst. */
1415 COSTS_N_INSNS (2), /* neg. */
1416 COSTS_N_INSNS (2), /* compare. */
1417 COSTS_N_INSNS (4), /* widen. */
1418 COSTS_N_INSNS (4), /* narrow. */
1419 COSTS_N_INSNS (4), /* toint. */
1420 COSTS_N_INSNS (4), /* fromint. */
1421 COSTS_N_INSNS (4) /* roundint. */
1423 /* FP DFmode */
1425 COSTS_N_INSNS (31), /* div. */
1426 COSTS_N_INSNS (4), /* mult. */
1427 COSTS_N_INSNS (8), /* mult_addsub. */
1428 COSTS_N_INSNS (8), /* fma. */
1429 COSTS_N_INSNS (4), /* addsub. */
1430 COSTS_N_INSNS (2), /* fpconst. */
1431 COSTS_N_INSNS (2), /* neg. */
1432 COSTS_N_INSNS (2), /* compare. */
1433 COSTS_N_INSNS (4), /* widen. */
1434 COSTS_N_INSNS (4), /* narrow. */
1435 COSTS_N_INSNS (4), /* toint. */
1436 COSTS_N_INSNS (4), /* fromint. */
1437 COSTS_N_INSNS (4) /* roundint. */
1440 /* Vector */
1442 COSTS_N_INSNS (1) /* alu. */
1446 const struct cpu_cost_table cortexa15_extra_costs =
1448 /* ALU */
1450 0, /* arith. */
1451 0, /* logical. */
1452 0, /* shift. */
1453 0, /* shift_reg. */
1454 COSTS_N_INSNS (1), /* arith_shift. */
1455 COSTS_N_INSNS (1), /* arith_shift_reg. */
1456 COSTS_N_INSNS (1), /* log_shift. */
1457 COSTS_N_INSNS (1), /* log_shift_reg. */
1458 0, /* extend. */
1459 COSTS_N_INSNS (1), /* extend_arith. */
1460 COSTS_N_INSNS (1), /* bfi. */
1461 0, /* bfx. */
1462 0, /* clz. */
1463 0, /* rev. */
1464 0, /* non_exec. */
1465 true /* non_exec_costs_exec. */
1467 /* MULT SImode */
1470 COSTS_N_INSNS (2), /* simple. */
1471 COSTS_N_INSNS (3), /* flag_setting. */
1472 COSTS_N_INSNS (2), /* extend. */
1473 COSTS_N_INSNS (2), /* add. */
1474 COSTS_N_INSNS (2), /* extend_add. */
1475 COSTS_N_INSNS (18) /* idiv. */
1477 /* MULT DImode */
1479 0, /* simple (N/A). */
1480 0, /* flag_setting (N/A). */
1481 COSTS_N_INSNS (3), /* extend. */
1482 0, /* add (N/A). */
1483 COSTS_N_INSNS (3), /* extend_add. */
1484 0 /* idiv (N/A). */
1487 /* LD/ST */
1489 COSTS_N_INSNS (3), /* load. */
1490 COSTS_N_INSNS (3), /* load_sign_extend. */
1491 COSTS_N_INSNS (3), /* ldrd. */
1492 COSTS_N_INSNS (4), /* ldm_1st. */
1493 1, /* ldm_regs_per_insn_1st. */
1494 2, /* ldm_regs_per_insn_subsequent. */
1495 COSTS_N_INSNS (4), /* loadf. */
1496 COSTS_N_INSNS (4), /* loadd. */
1497 0, /* load_unaligned. */
1498 0, /* store. */
1499 0, /* strd. */
1500 COSTS_N_INSNS (1), /* stm_1st. */
1501 1, /* stm_regs_per_insn_1st. */
1502 2, /* stm_regs_per_insn_subsequent. */
1503 0, /* storef. */
1504 0, /* stored. */
1505 0 /* store_unaligned. */
1508 /* FP SFmode */
1510 COSTS_N_INSNS (17), /* div. */
1511 COSTS_N_INSNS (4), /* mult. */
1512 COSTS_N_INSNS (8), /* mult_addsub. */
1513 COSTS_N_INSNS (8), /* fma. */
1514 COSTS_N_INSNS (4), /* addsub. */
1515 COSTS_N_INSNS (2), /* fpconst. */
1516 COSTS_N_INSNS (2), /* neg. */
1517 COSTS_N_INSNS (5), /* compare. */
1518 COSTS_N_INSNS (4), /* widen. */
1519 COSTS_N_INSNS (4), /* narrow. */
1520 COSTS_N_INSNS (4), /* toint. */
1521 COSTS_N_INSNS (4), /* fromint. */
1522 COSTS_N_INSNS (4) /* roundint. */
1524 /* FP DFmode */
1526 COSTS_N_INSNS (31), /* div. */
1527 COSTS_N_INSNS (4), /* mult. */
1528 COSTS_N_INSNS (8), /* mult_addsub. */
1529 COSTS_N_INSNS (8), /* fma. */
1530 COSTS_N_INSNS (4), /* addsub. */
1531 COSTS_N_INSNS (2), /* fpconst. */
1532 COSTS_N_INSNS (2), /* neg. */
1533 COSTS_N_INSNS (2), /* compare. */
1534 COSTS_N_INSNS (4), /* widen. */
1535 COSTS_N_INSNS (4), /* narrow. */
1536 COSTS_N_INSNS (4), /* toint. */
1537 COSTS_N_INSNS (4), /* fromint. */
1538 COSTS_N_INSNS (4) /* roundint. */
1541 /* Vector */
1543 COSTS_N_INSNS (1) /* alu. */
1547 const struct cpu_cost_table v7m_extra_costs =
1549 /* ALU */
1551 0, /* arith. */
1552 0, /* logical. */
1553 0, /* shift. */
1554 0, /* shift_reg. */
1555 0, /* arith_shift. */
1556 COSTS_N_INSNS (1), /* arith_shift_reg. */
1557 0, /* log_shift. */
1558 COSTS_N_INSNS (1), /* log_shift_reg. */
1559 0, /* extend. */
1560 COSTS_N_INSNS (1), /* extend_arith. */
1561 0, /* bfi. */
1562 0, /* bfx. */
1563 0, /* clz. */
1564 0, /* rev. */
1565 COSTS_N_INSNS (1), /* non_exec. */
1566 false /* non_exec_costs_exec. */
1569 /* MULT SImode */
1571 COSTS_N_INSNS (1), /* simple. */
1572 COSTS_N_INSNS (1), /* flag_setting. */
1573 COSTS_N_INSNS (2), /* extend. */
1574 COSTS_N_INSNS (1), /* add. */
1575 COSTS_N_INSNS (3), /* extend_add. */
1576 COSTS_N_INSNS (8) /* idiv. */
1578 /* MULT DImode */
1580 0, /* simple (N/A). */
1581 0, /* flag_setting (N/A). */
1582 COSTS_N_INSNS (2), /* extend. */
1583 0, /* add (N/A). */
1584 COSTS_N_INSNS (3), /* extend_add. */
1585 0 /* idiv (N/A). */
1588 /* LD/ST */
1590 COSTS_N_INSNS (2), /* load. */
1591 0, /* load_sign_extend. */
1592 COSTS_N_INSNS (3), /* ldrd. */
1593 COSTS_N_INSNS (2), /* ldm_1st. */
1594 1, /* ldm_regs_per_insn_1st. */
1595 1, /* ldm_regs_per_insn_subsequent. */
1596 COSTS_N_INSNS (2), /* loadf. */
1597 COSTS_N_INSNS (3), /* loadd. */
1598 COSTS_N_INSNS (1), /* load_unaligned. */
1599 COSTS_N_INSNS (2), /* store. */
1600 COSTS_N_INSNS (3), /* strd. */
1601 COSTS_N_INSNS (2), /* stm_1st. */
1602 1, /* stm_regs_per_insn_1st. */
1603 1, /* stm_regs_per_insn_subsequent. */
1604 COSTS_N_INSNS (2), /* storef. */
1605 COSTS_N_INSNS (3), /* stored. */
1606 COSTS_N_INSNS (1) /* store_unaligned. */
1609 /* FP SFmode */
1611 COSTS_N_INSNS (7), /* div. */
1612 COSTS_N_INSNS (2), /* mult. */
1613 COSTS_N_INSNS (5), /* mult_addsub. */
1614 COSTS_N_INSNS (3), /* fma. */
1615 COSTS_N_INSNS (1), /* addsub. */
1616 0, /* fpconst. */
1617 0, /* neg. */
1618 0, /* compare. */
1619 0, /* widen. */
1620 0, /* narrow. */
1621 0, /* toint. */
1622 0, /* fromint. */
1623 0 /* roundint. */
1625 /* FP DFmode */
1627 COSTS_N_INSNS (15), /* div. */
1628 COSTS_N_INSNS (5), /* mult. */
1629 COSTS_N_INSNS (7), /* mult_addsub. */
1630 COSTS_N_INSNS (7), /* fma. */
1631 COSTS_N_INSNS (3), /* addsub. */
1632 0, /* fpconst. */
1633 0, /* neg. */
1634 0, /* compare. */
1635 0, /* widen. */
1636 0, /* narrow. */
1637 0, /* toint. */
1638 0, /* fromint. */
1639 0 /* roundint. */
1642 /* Vector */
1644 COSTS_N_INSNS (1) /* alu. */
1648 const struct tune_params arm_slowmul_tune =
1650 arm_slowmul_rtx_costs,
1651 NULL,
1652 NULL, /* Sched adj cost. */
1653 3, /* Constant limit. */
1654 5, /* Max cond insns. */
1655 ARM_PREFETCH_NOT_BENEFICIAL,
1656 true, /* Prefer constant pool. */
1657 arm_default_branch_cost,
1658 false, /* Prefer LDRD/STRD. */
1659 {true, true}, /* Prefer non short circuit. */
1660 &arm_default_vec_cost, /* Vectorizer costs. */
1661 false, /* Prefer Neon for 64-bits bitops. */
1662 false, false, /* Prefer 32-bit encodings. */
1663 false, /* Prefer Neon for stringops. */
1664 8 /* Maximum insns to inline memset. */
1667 const struct tune_params arm_fastmul_tune =
1669 arm_fastmul_rtx_costs,
1670 NULL,
1671 NULL, /* Sched adj cost. */
1672 1, /* Constant limit. */
1673 5, /* Max cond insns. */
1674 ARM_PREFETCH_NOT_BENEFICIAL,
1675 true, /* Prefer constant pool. */
1676 arm_default_branch_cost,
1677 false, /* Prefer LDRD/STRD. */
1678 {true, true}, /* Prefer non short circuit. */
1679 &arm_default_vec_cost, /* Vectorizer costs. */
1680 false, /* Prefer Neon for 64-bits bitops. */
1681 false, false, /* Prefer 32-bit encodings. */
1682 false, /* Prefer Neon for stringops. */
1683 8 /* Maximum insns to inline memset. */
1686 /* StrongARM has early execution of branches, so a sequence that is worth
1687 skipping is shorter. Set max_insns_skipped to a lower value. */
1689 const struct tune_params arm_strongarm_tune =
1691 arm_fastmul_rtx_costs,
1692 NULL,
1693 NULL, /* Sched adj cost. */
1694 1, /* Constant limit. */
1695 3, /* Max cond insns. */
1696 ARM_PREFETCH_NOT_BENEFICIAL,
1697 true, /* Prefer constant pool. */
1698 arm_default_branch_cost,
1699 false, /* Prefer LDRD/STRD. */
1700 {true, true}, /* Prefer non short circuit. */
1701 &arm_default_vec_cost, /* Vectorizer costs. */
1702 false, /* Prefer Neon for 64-bits bitops. */
1703 false, false, /* Prefer 32-bit encodings. */
1704 false, /* Prefer Neon for stringops. */
1705 8 /* Maximum insns to inline memset. */
1708 const struct tune_params arm_xscale_tune =
1710 arm_xscale_rtx_costs,
1711 NULL,
1712 xscale_sched_adjust_cost,
1713 2, /* Constant limit. */
1714 3, /* Max cond insns. */
1715 ARM_PREFETCH_NOT_BENEFICIAL,
1716 true, /* Prefer constant pool. */
1717 arm_default_branch_cost,
1718 false, /* Prefer LDRD/STRD. */
1719 {true, true}, /* Prefer non short circuit. */
1720 &arm_default_vec_cost, /* Vectorizer costs. */
1721 false, /* Prefer Neon for 64-bits bitops. */
1722 false, false, /* Prefer 32-bit encodings. */
1723 false, /* Prefer Neon for stringops. */
1724 8 /* Maximum insns to inline memset. */
1727 const struct tune_params arm_9e_tune =
1729 arm_9e_rtx_costs,
1730 NULL,
1731 NULL, /* Sched adj cost. */
1732 1, /* Constant limit. */
1733 5, /* Max cond insns. */
1734 ARM_PREFETCH_NOT_BENEFICIAL,
1735 true, /* Prefer constant pool. */
1736 arm_default_branch_cost,
1737 false, /* Prefer LDRD/STRD. */
1738 {true, true}, /* Prefer non short circuit. */
1739 &arm_default_vec_cost, /* Vectorizer costs. */
1740 false, /* Prefer Neon for 64-bits bitops. */
1741 false, false, /* Prefer 32-bit encodings. */
1742 false, /* Prefer Neon for stringops. */
1743 8 /* Maximum insns to inline memset. */
1746 const struct tune_params arm_v6t2_tune =
1748 arm_9e_rtx_costs,
1749 NULL,
1750 NULL, /* Sched adj cost. */
1751 1, /* Constant limit. */
1752 5, /* Max cond insns. */
1753 ARM_PREFETCH_NOT_BENEFICIAL,
1754 false, /* Prefer constant pool. */
1755 arm_default_branch_cost,
1756 false, /* Prefer LDRD/STRD. */
1757 {true, true}, /* Prefer non short circuit. */
1758 &arm_default_vec_cost, /* Vectorizer costs. */
1759 false, /* Prefer Neon for 64-bits bitops. */
1760 false, false, /* Prefer 32-bit encodings. */
1761 false, /* Prefer Neon for stringops. */
1762 8 /* Maximum insns to inline memset. */
1765 /* Generic Cortex tuning. Use more specific tunings if appropriate. */
1766 const struct tune_params arm_cortex_tune =
1768 arm_9e_rtx_costs,
1769 &generic_extra_costs,
1770 NULL, /* Sched adj cost. */
1771 1, /* Constant limit. */
1772 5, /* Max cond insns. */
1773 ARM_PREFETCH_NOT_BENEFICIAL,
1774 false, /* Prefer constant pool. */
1775 arm_default_branch_cost,
1776 false, /* Prefer LDRD/STRD. */
1777 {true, true}, /* Prefer non short circuit. */
1778 &arm_default_vec_cost, /* Vectorizer costs. */
1779 false, /* Prefer Neon for 64-bits bitops. */
1780 false, false, /* Prefer 32-bit encodings. */
1781 false, /* Prefer Neon for stringops. */
1782 8 /* Maximum insns to inline memset. */
1785 const struct tune_params arm_cortex_a8_tune =
1787 arm_9e_rtx_costs,
1788 &cortexa8_extra_costs,
1789 NULL, /* Sched adj cost. */
1790 1, /* Constant limit. */
1791 5, /* Max cond insns. */
1792 ARM_PREFETCH_NOT_BENEFICIAL,
1793 false, /* Prefer constant pool. */
1794 arm_default_branch_cost,
1795 false, /* Prefer LDRD/STRD. */
1796 {true, true}, /* Prefer non short circuit. */
1797 &arm_default_vec_cost, /* Vectorizer costs. */
1798 false, /* Prefer Neon for 64-bits bitops. */
1799 false, false, /* Prefer 32-bit encodings. */
1800 true, /* Prefer Neon for stringops. */
1801 8 /* Maximum insns to inline memset. */
1804 const struct tune_params arm_cortex_a7_tune =
1806 arm_9e_rtx_costs,
1807 &cortexa7_extra_costs,
1808 NULL,
1809 1, /* Constant limit. */
1810 5, /* Max cond insns. */
1811 ARM_PREFETCH_NOT_BENEFICIAL,
1812 false, /* Prefer constant pool. */
1813 arm_default_branch_cost,
1814 false, /* Prefer LDRD/STRD. */
1815 {true, true}, /* Prefer non short circuit. */
1816 &arm_default_vec_cost, /* Vectorizer costs. */
1817 false, /* Prefer Neon for 64-bits bitops. */
1818 false, false, /* Prefer 32-bit encodings. */
1819 true, /* Prefer Neon for stringops. */
1820 8 /* Maximum insns to inline memset. */
1823 const struct tune_params arm_cortex_a15_tune =
1825 arm_9e_rtx_costs,
1826 &cortexa15_extra_costs,
1827 NULL, /* Sched adj cost. */
1828 1, /* Constant limit. */
1829 2, /* Max cond insns. */
1830 ARM_PREFETCH_NOT_BENEFICIAL,
1831 false, /* Prefer constant pool. */
1832 arm_default_branch_cost,
1833 true, /* Prefer LDRD/STRD. */
1834 {true, true}, /* Prefer non short circuit. */
1835 &arm_default_vec_cost, /* Vectorizer costs. */
1836 false, /* Prefer Neon for 64-bits bitops. */
1837 true, true, /* Prefer 32-bit encodings. */
1838 true, /* Prefer Neon for stringops. */
1839 8 /* Maximum insns to inline memset. */
1842 const struct tune_params arm_cortex_a53_tune =
1844 arm_9e_rtx_costs,
1845 &cortexa53_extra_costs,
1846 NULL, /* Scheduler cost adjustment. */
1847 1, /* Constant limit. */
1848 5, /* Max cond insns. */
1849 ARM_PREFETCH_NOT_BENEFICIAL,
1850 false, /* Prefer constant pool. */
1851 arm_default_branch_cost,
1852 false, /* Prefer LDRD/STRD. */
1853 {true, true}, /* Prefer non short circuit. */
1854 &arm_default_vec_cost, /* Vectorizer costs. */
1855 false, /* Prefer Neon for 64-bits bitops. */
1856 false, false, /* Prefer 32-bit encodings. */
1857 false, /* Prefer Neon for stringops. */
1858 8 /* Maximum insns to inline memset. */
1861 const struct tune_params arm_cortex_a57_tune =
1863 arm_9e_rtx_costs,
1864 &cortexa57_extra_costs,
1865 NULL, /* Scheduler cost adjustment. */
1866 1, /* Constant limit. */
1867 2, /* Max cond insns. */
1868 ARM_PREFETCH_NOT_BENEFICIAL,
1869 false, /* Prefer constant pool. */
1870 arm_default_branch_cost,
1871 true, /* Prefer LDRD/STRD. */
1872 {true, true}, /* Prefer non short circuit. */
1873 &arm_default_vec_cost, /* Vectorizer costs. */
1874 false, /* Prefer Neon for 64-bits bitops. */
1875 true, true, /* Prefer 32-bit encodings. */
1876 false, /* Prefer Neon for stringops. */
1877 8 /* Maximum insns to inline memset. */
1880 /* Branches can be dual-issued on Cortex-A5, so conditional execution is
1881 less appealing. Set max_insns_skipped to a low value. */
1883 const struct tune_params arm_cortex_a5_tune =
1885 arm_9e_rtx_costs,
1886 &cortexa5_extra_costs,
1887 NULL, /* Sched adj cost. */
1888 1, /* Constant limit. */
1889 1, /* Max cond insns. */
1890 ARM_PREFETCH_NOT_BENEFICIAL,
1891 false, /* Prefer constant pool. */
1892 arm_cortex_a5_branch_cost,
1893 false, /* Prefer LDRD/STRD. */
1894 {false, false}, /* Prefer non short circuit. */
1895 &arm_default_vec_cost, /* Vectorizer costs. */
1896 false, /* Prefer Neon for 64-bits bitops. */
1897 false, false, /* Prefer 32-bit encodings. */
1898 true, /* Prefer Neon for stringops. */
1899 8 /* Maximum insns to inline memset. */
1902 const struct tune_params arm_cortex_a9_tune =
1904 arm_9e_rtx_costs,
1905 &cortexa9_extra_costs,
1906 cortex_a9_sched_adjust_cost,
1907 1, /* Constant limit. */
1908 5, /* Max cond insns. */
1909 ARM_PREFETCH_BENEFICIAL(4,32,32),
1910 false, /* Prefer constant pool. */
1911 arm_default_branch_cost,
1912 false, /* Prefer LDRD/STRD. */
1913 {true, true}, /* Prefer non short circuit. */
1914 &arm_default_vec_cost, /* Vectorizer costs. */
1915 false, /* Prefer Neon for 64-bits bitops. */
1916 false, false, /* Prefer 32-bit encodings. */
1917 false, /* Prefer Neon for stringops. */
1918 8 /* Maximum insns to inline memset. */
1921 const struct tune_params arm_cortex_a12_tune =
1923 arm_9e_rtx_costs,
1924 &cortexa12_extra_costs,
1925 NULL,
1926 1, /* Constant limit. */
1927 5, /* Max cond insns. */
1928 ARM_PREFETCH_BENEFICIAL(4,32,32),
1929 false, /* Prefer constant pool. */
1930 arm_default_branch_cost,
1931 true, /* Prefer LDRD/STRD. */
1932 {true, true}, /* Prefer non short circuit. */
1933 &arm_default_vec_cost, /* Vectorizer costs. */
1934 false, /* Prefer Neon for 64-bits bitops. */
1935 false, false, /* Prefer 32-bit encodings. */
1936 true, /* Prefer Neon for stringops. */
1937 8 /* Maximum insns to inline memset. */
1940 /* armv7m tuning. On Cortex-M4 cores for example, MOVW/MOVT take a single
1941 cycle to execute each. An LDR from the constant pool also takes two cycles
1942 to execute, but mildly increases pipelining opportunity (consecutive
1943 loads/stores can be pipelined together, saving one cycle), and may also
1944 improve icache utilisation. Hence we prefer the constant pool for such
1945 processors. */
1947 const struct tune_params arm_v7m_tune =
1949 arm_9e_rtx_costs,
1950 &v7m_extra_costs,
1951 NULL, /* Sched adj cost. */
1952 1, /* Constant limit. */
1953 2, /* Max cond insns. */
1954 ARM_PREFETCH_NOT_BENEFICIAL,
1955 true, /* Prefer constant pool. */
1956 arm_cortex_m_branch_cost,
1957 false, /* Prefer LDRD/STRD. */
1958 {false, false}, /* Prefer non short circuit. */
1959 &arm_default_vec_cost, /* Vectorizer costs. */
1960 false, /* Prefer Neon for 64-bits bitops. */
1961 false, false, /* Prefer 32-bit encodings. */
1962 false, /* Prefer Neon for stringops. */
1963 8 /* Maximum insns to inline memset. */
1966 /* Cortex-M7 tuning. */
1968 const struct tune_params arm_cortex_m7_tune =
1970 arm_9e_rtx_costs,
1971 &v7m_extra_costs,
1972 NULL, /* Sched adj cost. */
1973 0, /* Constant limit. */
1974 0, /* Max cond insns. */
1975 ARM_PREFETCH_NOT_BENEFICIAL,
1976 true, /* Prefer constant pool. */
1977 arm_cortex_m_branch_cost,
1978 false, /* Prefer LDRD/STRD. */
1979 {true, true}, /* Prefer non short circuit. */
1980 &arm_default_vec_cost, /* Vectorizer costs. */
1981 false, /* Prefer Neon for 64-bits bitops. */
1982 false, false, /* Prefer 32-bit encodings. */
1983 false, /* Prefer Neon for stringops. */
1984 8 /* Maximum insns to inline memset. */
1987 /* The arm_v6m_tune is duplicated from arm_cortex_tune, rather than
1988 arm_v6t2_tune. It is used for cortex-m0, cortex-m1 and cortex-m0plus. */
1989 const struct tune_params arm_v6m_tune =
1991 arm_9e_rtx_costs,
1992 NULL,
1993 NULL, /* Sched adj cost. */
1994 1, /* Constant limit. */
1995 5, /* Max cond insns. */
1996 ARM_PREFETCH_NOT_BENEFICIAL,
1997 false, /* Prefer constant pool. */
1998 arm_default_branch_cost,
1999 false, /* Prefer LDRD/STRD. */
2000 {false, false}, /* Prefer non short circuit. */
2001 &arm_default_vec_cost, /* Vectorizer costs. */
2002 false, /* Prefer Neon for 64-bits bitops. */
2003 false, false, /* Prefer 32-bit encodings. */
2004 false, /* Prefer Neon for stringops. */
2005 8 /* Maximum insns to inline memset. */
2008 const struct tune_params arm_fa726te_tune =
2010 arm_9e_rtx_costs,
2011 NULL,
2012 fa726te_sched_adjust_cost,
2013 1, /* Constant limit. */
2014 5, /* Max cond insns. */
2015 ARM_PREFETCH_NOT_BENEFICIAL,
2016 true, /* Prefer constant pool. */
2017 arm_default_branch_cost,
2018 false, /* Prefer LDRD/STRD. */
2019 {true, true}, /* Prefer non short circuit. */
2020 &arm_default_vec_cost, /* Vectorizer costs. */
2021 false, /* Prefer Neon for 64-bits bitops. */
2022 false, false, /* Prefer 32-bit encodings. */
2023 false, /* Prefer Neon for stringops. */
2024 8 /* Maximum insns to inline memset. */
2028 /* Not all of these give usefully different compilation alternatives,
2029 but there is no simple way of generalizing them. */
2030 static const struct processors all_cores[] =
2032 /* ARM Cores */
2033 #define ARM_CORE(NAME, X, IDENT, ARCH, FLAGS, COSTS) \
2034 {NAME, IDENT, #ARCH, BASE_ARCH_##ARCH, \
2035 FLAGS | FL_FOR_ARCH##ARCH, &arm_##COSTS##_tune},
2036 #include "arm-cores.def"
2037 #undef ARM_CORE
2038 {NULL, arm_none, NULL, BASE_ARCH_0, 0, NULL}
2041 static const struct processors all_architectures[] =
2043 /* ARM Architectures */
2044 /* We don't specify tuning costs here as it will be figured out
2045 from the core. */
2047 #define ARM_ARCH(NAME, CORE, ARCH, FLAGS) \
2048 {NAME, CORE, #ARCH, BASE_ARCH_##ARCH, FLAGS, NULL},
2049 #include "arm-arches.def"
2050 #undef ARM_ARCH
2051 {NULL, arm_none, NULL, BASE_ARCH_0, 0, NULL}
2055 /* These are populated as commandline arguments are processed, or NULL
2056 if not specified. */
2057 static const struct processors *arm_selected_arch;
2058 static const struct processors *arm_selected_cpu;
2059 static const struct processors *arm_selected_tune;
2061 /* The name of the preprocessor macro to define for this architecture. */
2063 char arm_arch_name[] = "__ARM_ARCH_0UNK__";
2065 /* Available values for -mfpu=. */
2067 static const struct arm_fpu_desc all_fpus[] =
2069 #define ARM_FPU(NAME, MODEL, REV, VFP_REGS, NEON, FP16, CRYPTO) \
2070 { NAME, MODEL, REV, VFP_REGS, NEON, FP16, CRYPTO },
2071 #include "arm-fpus.def"
2072 #undef ARM_FPU
2076 /* Supported TLS relocations. */
2078 enum tls_reloc {
2079 TLS_GD32,
2080 TLS_LDM32,
2081 TLS_LDO32,
2082 TLS_IE32,
2083 TLS_LE32,
2084 TLS_DESCSEQ /* GNU scheme */
2087 /* The maximum number of insns to be used when loading a constant. */
2088 inline static int
2089 arm_constant_limit (bool size_p)
2091 return size_p ? 1 : current_tune->constant_limit;
2094 /* Emit an insn that's a simple single-set. Both the operands must be known
2095 to be valid. */
2096 inline static rtx_insn *
2097 emit_set_insn (rtx x, rtx y)
2099 return emit_insn (gen_rtx_SET (VOIDmode, x, y));
2102 /* Return the number of bits set in VALUE. */
2103 static unsigned
2104 bit_count (unsigned long value)
2106 unsigned long count = 0;
2108 while (value)
2110 count++;
2111 value &= value - 1; /* Clear the least-significant set bit. */
2114 return count;
2117 typedef struct
2119 machine_mode mode;
2120 const char *name;
2121 } arm_fixed_mode_set;
2123 /* A small helper for setting fixed-point library libfuncs. */
2125 static void
2126 arm_set_fixed_optab_libfunc (optab optable, machine_mode mode,
2127 const char *funcname, const char *modename,
2128 int num_suffix)
2130 char buffer[50];
2132 if (num_suffix == 0)
2133 sprintf (buffer, "__gnu_%s%s", funcname, modename);
2134 else
2135 sprintf (buffer, "__gnu_%s%s%d", funcname, modename, num_suffix);
2137 set_optab_libfunc (optable, mode, buffer);
2140 static void
2141 arm_set_fixed_conv_libfunc (convert_optab optable, machine_mode to,
2142 machine_mode from, const char *funcname,
2143 const char *toname, const char *fromname)
2145 char buffer[50];
2146 const char *maybe_suffix_2 = "";
2148 /* Follow the logic for selecting a "2" suffix in fixed-bit.h. */
2149 if (ALL_FIXED_POINT_MODE_P (from) && ALL_FIXED_POINT_MODE_P (to)
2150 && UNSIGNED_FIXED_POINT_MODE_P (from) == UNSIGNED_FIXED_POINT_MODE_P (to)
2151 && ALL_FRACT_MODE_P (from) == ALL_FRACT_MODE_P (to))
2152 maybe_suffix_2 = "2";
2154 sprintf (buffer, "__gnu_%s%s%s%s", funcname, fromname, toname,
2155 maybe_suffix_2);
2157 set_conv_libfunc (optable, to, from, buffer);
2160 /* Set up library functions unique to ARM. */
2162 static void
2163 arm_init_libfuncs (void)
2165 /* For Linux, we have access to kernel support for atomic operations. */
2166 if (arm_abi == ARM_ABI_AAPCS_LINUX)
2167 init_sync_libfuncs (2 * UNITS_PER_WORD);
2169 /* There are no special library functions unless we are using the
2170 ARM BPABI. */
2171 if (!TARGET_BPABI)
2172 return;
2174 /* The functions below are described in Section 4 of the "Run-Time
2175 ABI for the ARM architecture", Version 1.0. */
2177 /* Double-precision floating-point arithmetic. Table 2. */
2178 set_optab_libfunc (add_optab, DFmode, "__aeabi_dadd");
2179 set_optab_libfunc (sdiv_optab, DFmode, "__aeabi_ddiv");
2180 set_optab_libfunc (smul_optab, DFmode, "__aeabi_dmul");
2181 set_optab_libfunc (neg_optab, DFmode, "__aeabi_dneg");
2182 set_optab_libfunc (sub_optab, DFmode, "__aeabi_dsub");
2184 /* Double-precision comparisons. Table 3. */
2185 set_optab_libfunc (eq_optab, DFmode, "__aeabi_dcmpeq");
2186 set_optab_libfunc (ne_optab, DFmode, NULL);
2187 set_optab_libfunc (lt_optab, DFmode, "__aeabi_dcmplt");
2188 set_optab_libfunc (le_optab, DFmode, "__aeabi_dcmple");
2189 set_optab_libfunc (ge_optab, DFmode, "__aeabi_dcmpge");
2190 set_optab_libfunc (gt_optab, DFmode, "__aeabi_dcmpgt");
2191 set_optab_libfunc (unord_optab, DFmode, "__aeabi_dcmpun");
2193 /* Single-precision floating-point arithmetic. Table 4. */
2194 set_optab_libfunc (add_optab, SFmode, "__aeabi_fadd");
2195 set_optab_libfunc (sdiv_optab, SFmode, "__aeabi_fdiv");
2196 set_optab_libfunc (smul_optab, SFmode, "__aeabi_fmul");
2197 set_optab_libfunc (neg_optab, SFmode, "__aeabi_fneg");
2198 set_optab_libfunc (sub_optab, SFmode, "__aeabi_fsub");
2200 /* Single-precision comparisons. Table 5. */
2201 set_optab_libfunc (eq_optab, SFmode, "__aeabi_fcmpeq");
2202 set_optab_libfunc (ne_optab, SFmode, NULL);
2203 set_optab_libfunc (lt_optab, SFmode, "__aeabi_fcmplt");
2204 set_optab_libfunc (le_optab, SFmode, "__aeabi_fcmple");
2205 set_optab_libfunc (ge_optab, SFmode, "__aeabi_fcmpge");
2206 set_optab_libfunc (gt_optab, SFmode, "__aeabi_fcmpgt");
2207 set_optab_libfunc (unord_optab, SFmode, "__aeabi_fcmpun");
2209 /* Floating-point to integer conversions. Table 6. */
2210 set_conv_libfunc (sfix_optab, SImode, DFmode, "__aeabi_d2iz");
2211 set_conv_libfunc (ufix_optab, SImode, DFmode, "__aeabi_d2uiz");
2212 set_conv_libfunc (sfix_optab, DImode, DFmode, "__aeabi_d2lz");
2213 set_conv_libfunc (ufix_optab, DImode, DFmode, "__aeabi_d2ulz");
2214 set_conv_libfunc (sfix_optab, SImode, SFmode, "__aeabi_f2iz");
2215 set_conv_libfunc (ufix_optab, SImode, SFmode, "__aeabi_f2uiz");
2216 set_conv_libfunc (sfix_optab, DImode, SFmode, "__aeabi_f2lz");
2217 set_conv_libfunc (ufix_optab, DImode, SFmode, "__aeabi_f2ulz");
2219 /* Conversions between floating types. Table 7. */
2220 set_conv_libfunc (trunc_optab, SFmode, DFmode, "__aeabi_d2f");
2221 set_conv_libfunc (sext_optab, DFmode, SFmode, "__aeabi_f2d");
2223 /* Integer to floating-point conversions. Table 8. */
2224 set_conv_libfunc (sfloat_optab, DFmode, SImode, "__aeabi_i2d");
2225 set_conv_libfunc (ufloat_optab, DFmode, SImode, "__aeabi_ui2d");
2226 set_conv_libfunc (sfloat_optab, DFmode, DImode, "__aeabi_l2d");
2227 set_conv_libfunc (ufloat_optab, DFmode, DImode, "__aeabi_ul2d");
2228 set_conv_libfunc (sfloat_optab, SFmode, SImode, "__aeabi_i2f");
2229 set_conv_libfunc (ufloat_optab, SFmode, SImode, "__aeabi_ui2f");
2230 set_conv_libfunc (sfloat_optab, SFmode, DImode, "__aeabi_l2f");
2231 set_conv_libfunc (ufloat_optab, SFmode, DImode, "__aeabi_ul2f");
2233 /* Long long. Table 9. */
2234 set_optab_libfunc (smul_optab, DImode, "__aeabi_lmul");
2235 set_optab_libfunc (sdivmod_optab, DImode, "__aeabi_ldivmod");
2236 set_optab_libfunc (udivmod_optab, DImode, "__aeabi_uldivmod");
2237 set_optab_libfunc (ashl_optab, DImode, "__aeabi_llsl");
2238 set_optab_libfunc (lshr_optab, DImode, "__aeabi_llsr");
2239 set_optab_libfunc (ashr_optab, DImode, "__aeabi_lasr");
2240 set_optab_libfunc (cmp_optab, DImode, "__aeabi_lcmp");
2241 set_optab_libfunc (ucmp_optab, DImode, "__aeabi_ulcmp");
2243 /* Integer (32/32->32) division. \S 4.3.1. */
2244 set_optab_libfunc (sdivmod_optab, SImode, "__aeabi_idivmod");
2245 set_optab_libfunc (udivmod_optab, SImode, "__aeabi_uidivmod");
2247 /* The divmod functions are designed so that they can be used for
2248 plain division, even though they return both the quotient and the
2249 remainder. The quotient is returned in the usual location (i.e.,
2250 r0 for SImode, {r0, r1} for DImode), just as would be expected
2251 for an ordinary division routine. Because the AAPCS calling
2252 conventions specify that all of { r0, r1, r2, r3 } are
2253 callee-saved registers, there is no need to tell the compiler
2254 explicitly that those registers are clobbered by these
2255 routines. */
2256 set_optab_libfunc (sdiv_optab, DImode, "__aeabi_ldivmod");
2257 set_optab_libfunc (udiv_optab, DImode, "__aeabi_uldivmod");
2259 /* For SImode division the ABI provides div-without-mod routines,
2260 which are faster. */
2261 set_optab_libfunc (sdiv_optab, SImode, "__aeabi_idiv");
2262 set_optab_libfunc (udiv_optab, SImode, "__aeabi_uidiv");
2264 /* We don't have mod libcalls. Fortunately gcc knows how to use the
2265 divmod libcalls instead. */
2266 set_optab_libfunc (smod_optab, DImode, NULL);
2267 set_optab_libfunc (umod_optab, DImode, NULL);
2268 set_optab_libfunc (smod_optab, SImode, NULL);
2269 set_optab_libfunc (umod_optab, SImode, NULL);
2271 /* Half-precision float operations. The compiler handles all operations
2272 with NULL libfuncs by converting the SFmode. */
2273 switch (arm_fp16_format)
2275 case ARM_FP16_FORMAT_IEEE:
2276 case ARM_FP16_FORMAT_ALTERNATIVE:
2278 /* Conversions. */
2279 set_conv_libfunc (trunc_optab, HFmode, SFmode,
2280 (arm_fp16_format == ARM_FP16_FORMAT_IEEE
2281 ? "__gnu_f2h_ieee"
2282 : "__gnu_f2h_alternative"));
2283 set_conv_libfunc (sext_optab, SFmode, HFmode,
2284 (arm_fp16_format == ARM_FP16_FORMAT_IEEE
2285 ? "__gnu_h2f_ieee"
2286 : "__gnu_h2f_alternative"));
2288 /* Arithmetic. */
2289 set_optab_libfunc (add_optab, HFmode, NULL);
2290 set_optab_libfunc (sdiv_optab, HFmode, NULL);
2291 set_optab_libfunc (smul_optab, HFmode, NULL);
2292 set_optab_libfunc (neg_optab, HFmode, NULL);
2293 set_optab_libfunc (sub_optab, HFmode, NULL);
2295 /* Comparisons. */
2296 set_optab_libfunc (eq_optab, HFmode, NULL);
2297 set_optab_libfunc (ne_optab, HFmode, NULL);
2298 set_optab_libfunc (lt_optab, HFmode, NULL);
2299 set_optab_libfunc (le_optab, HFmode, NULL);
2300 set_optab_libfunc (ge_optab, HFmode, NULL);
2301 set_optab_libfunc (gt_optab, HFmode, NULL);
2302 set_optab_libfunc (unord_optab, HFmode, NULL);
2303 break;
2305 default:
2306 break;
2309 /* Use names prefixed with __gnu_ for fixed-point helper functions. */
2311 const arm_fixed_mode_set fixed_arith_modes[] =
2313 { QQmode, "qq" },
2314 { UQQmode, "uqq" },
2315 { HQmode, "hq" },
2316 { UHQmode, "uhq" },
2317 { SQmode, "sq" },
2318 { USQmode, "usq" },
2319 { DQmode, "dq" },
2320 { UDQmode, "udq" },
2321 { TQmode, "tq" },
2322 { UTQmode, "utq" },
2323 { HAmode, "ha" },
2324 { UHAmode, "uha" },
2325 { SAmode, "sa" },
2326 { USAmode, "usa" },
2327 { DAmode, "da" },
2328 { UDAmode, "uda" },
2329 { TAmode, "ta" },
2330 { UTAmode, "uta" }
2332 const arm_fixed_mode_set fixed_conv_modes[] =
2334 { QQmode, "qq" },
2335 { UQQmode, "uqq" },
2336 { HQmode, "hq" },
2337 { UHQmode, "uhq" },
2338 { SQmode, "sq" },
2339 { USQmode, "usq" },
2340 { DQmode, "dq" },
2341 { UDQmode, "udq" },
2342 { TQmode, "tq" },
2343 { UTQmode, "utq" },
2344 { HAmode, "ha" },
2345 { UHAmode, "uha" },
2346 { SAmode, "sa" },
2347 { USAmode, "usa" },
2348 { DAmode, "da" },
2349 { UDAmode, "uda" },
2350 { TAmode, "ta" },
2351 { UTAmode, "uta" },
2352 { QImode, "qi" },
2353 { HImode, "hi" },
2354 { SImode, "si" },
2355 { DImode, "di" },
2356 { TImode, "ti" },
2357 { SFmode, "sf" },
2358 { DFmode, "df" }
2360 unsigned int i, j;
2362 for (i = 0; i < ARRAY_SIZE (fixed_arith_modes); i++)
2364 arm_set_fixed_optab_libfunc (add_optab, fixed_arith_modes[i].mode,
2365 "add", fixed_arith_modes[i].name, 3);
2366 arm_set_fixed_optab_libfunc (ssadd_optab, fixed_arith_modes[i].mode,
2367 "ssadd", fixed_arith_modes[i].name, 3);
2368 arm_set_fixed_optab_libfunc (usadd_optab, fixed_arith_modes[i].mode,
2369 "usadd", fixed_arith_modes[i].name, 3);
2370 arm_set_fixed_optab_libfunc (sub_optab, fixed_arith_modes[i].mode,
2371 "sub", fixed_arith_modes[i].name, 3);
2372 arm_set_fixed_optab_libfunc (sssub_optab, fixed_arith_modes[i].mode,
2373 "sssub", fixed_arith_modes[i].name, 3);
2374 arm_set_fixed_optab_libfunc (ussub_optab, fixed_arith_modes[i].mode,
2375 "ussub", fixed_arith_modes[i].name, 3);
2376 arm_set_fixed_optab_libfunc (smul_optab, fixed_arith_modes[i].mode,
2377 "mul", fixed_arith_modes[i].name, 3);
2378 arm_set_fixed_optab_libfunc (ssmul_optab, fixed_arith_modes[i].mode,
2379 "ssmul", fixed_arith_modes[i].name, 3);
2380 arm_set_fixed_optab_libfunc (usmul_optab, fixed_arith_modes[i].mode,
2381 "usmul", fixed_arith_modes[i].name, 3);
2382 arm_set_fixed_optab_libfunc (sdiv_optab, fixed_arith_modes[i].mode,
2383 "div", fixed_arith_modes[i].name, 3);
2384 arm_set_fixed_optab_libfunc (udiv_optab, fixed_arith_modes[i].mode,
2385 "udiv", fixed_arith_modes[i].name, 3);
2386 arm_set_fixed_optab_libfunc (ssdiv_optab, fixed_arith_modes[i].mode,
2387 "ssdiv", fixed_arith_modes[i].name, 3);
2388 arm_set_fixed_optab_libfunc (usdiv_optab, fixed_arith_modes[i].mode,
2389 "usdiv", fixed_arith_modes[i].name, 3);
2390 arm_set_fixed_optab_libfunc (neg_optab, fixed_arith_modes[i].mode,
2391 "neg", fixed_arith_modes[i].name, 2);
2392 arm_set_fixed_optab_libfunc (ssneg_optab, fixed_arith_modes[i].mode,
2393 "ssneg", fixed_arith_modes[i].name, 2);
2394 arm_set_fixed_optab_libfunc (usneg_optab, fixed_arith_modes[i].mode,
2395 "usneg", fixed_arith_modes[i].name, 2);
2396 arm_set_fixed_optab_libfunc (ashl_optab, fixed_arith_modes[i].mode,
2397 "ashl", fixed_arith_modes[i].name, 3);
2398 arm_set_fixed_optab_libfunc (ashr_optab, fixed_arith_modes[i].mode,
2399 "ashr", fixed_arith_modes[i].name, 3);
2400 arm_set_fixed_optab_libfunc (lshr_optab, fixed_arith_modes[i].mode,
2401 "lshr", fixed_arith_modes[i].name, 3);
2402 arm_set_fixed_optab_libfunc (ssashl_optab, fixed_arith_modes[i].mode,
2403 "ssashl", fixed_arith_modes[i].name, 3);
2404 arm_set_fixed_optab_libfunc (usashl_optab, fixed_arith_modes[i].mode,
2405 "usashl", fixed_arith_modes[i].name, 3);
2406 arm_set_fixed_optab_libfunc (cmp_optab, fixed_arith_modes[i].mode,
2407 "cmp", fixed_arith_modes[i].name, 2);
2410 for (i = 0; i < ARRAY_SIZE (fixed_conv_modes); i++)
2411 for (j = 0; j < ARRAY_SIZE (fixed_conv_modes); j++)
2413 if (i == j
2414 || (!ALL_FIXED_POINT_MODE_P (fixed_conv_modes[i].mode)
2415 && !ALL_FIXED_POINT_MODE_P (fixed_conv_modes[j].mode)))
2416 continue;
2418 arm_set_fixed_conv_libfunc (fract_optab, fixed_conv_modes[i].mode,
2419 fixed_conv_modes[j].mode, "fract",
2420 fixed_conv_modes[i].name,
2421 fixed_conv_modes[j].name);
2422 arm_set_fixed_conv_libfunc (satfract_optab,
2423 fixed_conv_modes[i].mode,
2424 fixed_conv_modes[j].mode, "satfract",
2425 fixed_conv_modes[i].name,
2426 fixed_conv_modes[j].name);
2427 arm_set_fixed_conv_libfunc (fractuns_optab,
2428 fixed_conv_modes[i].mode,
2429 fixed_conv_modes[j].mode, "fractuns",
2430 fixed_conv_modes[i].name,
2431 fixed_conv_modes[j].name);
2432 arm_set_fixed_conv_libfunc (satfractuns_optab,
2433 fixed_conv_modes[i].mode,
2434 fixed_conv_modes[j].mode, "satfractuns",
2435 fixed_conv_modes[i].name,
2436 fixed_conv_modes[j].name);
2440 if (TARGET_AAPCS_BASED)
2441 synchronize_libfunc = init_one_libfunc ("__sync_synchronize");
2444 /* On AAPCS systems, this is the "struct __va_list". */
2445 static GTY(()) tree va_list_type;
2447 /* Return the type to use as __builtin_va_list. */
2448 static tree
2449 arm_build_builtin_va_list (void)
2451 tree va_list_name;
2452 tree ap_field;
2454 if (!TARGET_AAPCS_BASED)
2455 return std_build_builtin_va_list ();
2457 /* AAPCS \S 7.1.4 requires that va_list be a typedef for a type
2458 defined as:
2460 struct __va_list
2462 void *__ap;
2465 The C Library ABI further reinforces this definition in \S
2466 4.1.
2468 We must follow this definition exactly. The structure tag
2469 name is visible in C++ mangled names, and thus forms a part
2470 of the ABI. The field name may be used by people who
2471 #include <stdarg.h>. */
2472 /* Create the type. */
2473 va_list_type = lang_hooks.types.make_type (RECORD_TYPE);
2474 /* Give it the required name. */
2475 va_list_name = build_decl (BUILTINS_LOCATION,
2476 TYPE_DECL,
2477 get_identifier ("__va_list"),
2478 va_list_type);
2479 DECL_ARTIFICIAL (va_list_name) = 1;
2480 TYPE_NAME (va_list_type) = va_list_name;
2481 TYPE_STUB_DECL (va_list_type) = va_list_name;
2482 /* Create the __ap field. */
2483 ap_field = build_decl (BUILTINS_LOCATION,
2484 FIELD_DECL,
2485 get_identifier ("__ap"),
2486 ptr_type_node);
2487 DECL_ARTIFICIAL (ap_field) = 1;
2488 DECL_FIELD_CONTEXT (ap_field) = va_list_type;
2489 TYPE_FIELDS (va_list_type) = ap_field;
2490 /* Compute its layout. */
2491 layout_type (va_list_type);
2493 return va_list_type;
2496 /* Return an expression of type "void *" pointing to the next
2497 available argument in a variable-argument list. VALIST is the
2498 user-level va_list object, of type __builtin_va_list. */
2499 static tree
2500 arm_extract_valist_ptr (tree valist)
2502 if (TREE_TYPE (valist) == error_mark_node)
2503 return error_mark_node;
2505 /* On an AAPCS target, the pointer is stored within "struct
2506 va_list". */
2507 if (TARGET_AAPCS_BASED)
2509 tree ap_field = TYPE_FIELDS (TREE_TYPE (valist));
2510 valist = build3 (COMPONENT_REF, TREE_TYPE (ap_field),
2511 valist, ap_field, NULL_TREE);
2514 return valist;
2517 /* Implement TARGET_EXPAND_BUILTIN_VA_START. */
2518 static void
2519 arm_expand_builtin_va_start (tree valist, rtx nextarg)
2521 valist = arm_extract_valist_ptr (valist);
2522 std_expand_builtin_va_start (valist, nextarg);
2525 /* Implement TARGET_GIMPLIFY_VA_ARG_EXPR. */
2526 static tree
2527 arm_gimplify_va_arg_expr (tree valist, tree type, gimple_seq *pre_p,
2528 gimple_seq *post_p)
2530 valist = arm_extract_valist_ptr (valist);
2531 return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
2534 /* Fix up any incompatible options that the user has specified. */
2535 static void
2536 arm_option_override (void)
2538 if (global_options_set.x_arm_arch_option)
2539 arm_selected_arch = &all_architectures[arm_arch_option];
2541 if (global_options_set.x_arm_cpu_option)
2543 arm_selected_cpu = &all_cores[(int) arm_cpu_option];
2544 arm_selected_tune = &all_cores[(int) arm_cpu_option];
2547 if (global_options_set.x_arm_tune_option)
2548 arm_selected_tune = &all_cores[(int) arm_tune_option];
2550 #ifdef SUBTARGET_OVERRIDE_OPTIONS
2551 SUBTARGET_OVERRIDE_OPTIONS;
2552 #endif
2554 if (arm_selected_arch)
2556 if (arm_selected_cpu)
2558 /* Check for conflict between mcpu and march. */
2559 if ((arm_selected_cpu->flags ^ arm_selected_arch->flags) & ~FL_TUNE)
2561 warning (0, "switch -mcpu=%s conflicts with -march=%s switch",
2562 arm_selected_cpu->name, arm_selected_arch->name);
2563 /* -march wins for code generation.
2564 -mcpu wins for default tuning. */
2565 if (!arm_selected_tune)
2566 arm_selected_tune = arm_selected_cpu;
2568 arm_selected_cpu = arm_selected_arch;
2570 else
2571 /* -mcpu wins. */
2572 arm_selected_arch = NULL;
2574 else
2575 /* Pick a CPU based on the architecture. */
2576 arm_selected_cpu = arm_selected_arch;
2579 /* If the user did not specify a processor, choose one for them. */
2580 if (!arm_selected_cpu)
2582 const struct processors * sel;
2583 unsigned int sought;
2585 arm_selected_cpu = &all_cores[TARGET_CPU_DEFAULT];
2586 if (!arm_selected_cpu->name)
2588 #ifdef SUBTARGET_CPU_DEFAULT
2589 /* Use the subtarget default CPU if none was specified by
2590 configure. */
2591 arm_selected_cpu = &all_cores[SUBTARGET_CPU_DEFAULT];
2592 #endif
2593 /* Default to ARM6. */
2594 if (!arm_selected_cpu->name)
2595 arm_selected_cpu = &all_cores[arm6];
2598 sel = arm_selected_cpu;
2599 insn_flags = sel->flags;
2601 /* Now check to see if the user has specified some command line
2602 switch that require certain abilities from the cpu. */
2603 sought = 0;
2605 if (TARGET_INTERWORK || TARGET_THUMB)
2607 sought |= (FL_THUMB | FL_MODE32);
2609 /* There are no ARM processors that support both APCS-26 and
2610 interworking. Therefore we force FL_MODE26 to be removed
2611 from insn_flags here (if it was set), so that the search
2612 below will always be able to find a compatible processor. */
2613 insn_flags &= ~FL_MODE26;
2616 if (sought != 0 && ((sought & insn_flags) != sought))
2618 /* Try to locate a CPU type that supports all of the abilities
2619 of the default CPU, plus the extra abilities requested by
2620 the user. */
2621 for (sel = all_cores; sel->name != NULL; sel++)
2622 if ((sel->flags & sought) == (sought | insn_flags))
2623 break;
2625 if (sel->name == NULL)
2627 unsigned current_bit_count = 0;
2628 const struct processors * best_fit = NULL;
2630 /* Ideally we would like to issue an error message here
2631 saying that it was not possible to find a CPU compatible
2632 with the default CPU, but which also supports the command
2633 line options specified by the programmer, and so they
2634 ought to use the -mcpu=<name> command line option to
2635 override the default CPU type.
2637 If we cannot find a cpu that has both the
2638 characteristics of the default cpu and the given
2639 command line options we scan the array again looking
2640 for a best match. */
2641 for (sel = all_cores; sel->name != NULL; sel++)
2642 if ((sel->flags & sought) == sought)
2644 unsigned count;
2646 count = bit_count (sel->flags & insn_flags);
2648 if (count >= current_bit_count)
2650 best_fit = sel;
2651 current_bit_count = count;
2655 gcc_assert (best_fit);
2656 sel = best_fit;
2659 arm_selected_cpu = sel;
2663 gcc_assert (arm_selected_cpu);
2664 /* The selected cpu may be an architecture, so lookup tuning by core ID. */
2665 if (!arm_selected_tune)
2666 arm_selected_tune = &all_cores[arm_selected_cpu->core];
2668 sprintf (arm_arch_name, "__ARM_ARCH_%s__", arm_selected_cpu->arch);
2669 insn_flags = arm_selected_cpu->flags;
2670 arm_base_arch = arm_selected_cpu->base_arch;
2672 arm_tune = arm_selected_tune->core;
2673 tune_flags = arm_selected_tune->flags;
2674 current_tune = arm_selected_tune->tune;
2676 /* Make sure that the processor choice does not conflict with any of the
2677 other command line choices. */
2678 if (TARGET_ARM && !(insn_flags & FL_NOTM))
2679 error ("target CPU does not support ARM mode");
2681 /* BPABI targets use linker tricks to allow interworking on cores
2682 without thumb support. */
2683 if (TARGET_INTERWORK && !((insn_flags & FL_THUMB) || TARGET_BPABI))
2685 warning (0, "target CPU does not support interworking" );
2686 target_flags &= ~MASK_INTERWORK;
2689 if (TARGET_THUMB && !(insn_flags & FL_THUMB))
2691 warning (0, "target CPU does not support THUMB instructions");
2692 target_flags &= ~MASK_THUMB;
2695 if (TARGET_APCS_FRAME && TARGET_THUMB)
2697 /* warning (0, "ignoring -mapcs-frame because -mthumb was used"); */
2698 target_flags &= ~MASK_APCS_FRAME;
2701 /* Callee super interworking implies thumb interworking. Adding
2702 this to the flags here simplifies the logic elsewhere. */
2703 if (TARGET_THUMB && TARGET_CALLEE_INTERWORKING)
2704 target_flags |= MASK_INTERWORK;
2706 /* TARGET_BACKTRACE calls leaf_function_p, which causes a crash if done
2707 from here where no function is being compiled currently. */
2708 if ((TARGET_TPCS_FRAME || TARGET_TPCS_LEAF_FRAME) && TARGET_ARM)
2709 warning (0, "enabling backtrace support is only meaningful when compiling for the Thumb");
2711 if (TARGET_ARM && TARGET_CALLEE_INTERWORKING)
2712 warning (0, "enabling callee interworking support is only meaningful when compiling for the Thumb");
2714 if (TARGET_APCS_STACK && !TARGET_APCS_FRAME)
2716 warning (0, "-mapcs-stack-check incompatible with -mno-apcs-frame");
2717 target_flags |= MASK_APCS_FRAME;
2720 if (TARGET_POKE_FUNCTION_NAME)
2721 target_flags |= MASK_APCS_FRAME;
2723 if (TARGET_APCS_REENT && flag_pic)
2724 error ("-fpic and -mapcs-reent are incompatible");
2726 if (TARGET_APCS_REENT)
2727 warning (0, "APCS reentrant code not supported. Ignored");
2729 /* If this target is normally configured to use APCS frames, warn if they
2730 are turned off and debugging is turned on. */
2731 if (TARGET_ARM
2732 && write_symbols != NO_DEBUG
2733 && !TARGET_APCS_FRAME
2734 && (TARGET_DEFAULT & MASK_APCS_FRAME))
2735 warning (0, "-g with -mno-apcs-frame may not give sensible debugging");
2737 if (TARGET_APCS_FLOAT)
2738 warning (0, "passing floating point arguments in fp regs not yet supported");
2740 /* Initialize boolean versions of the flags, for use in the arm.md file. */
2741 arm_arch3m = (insn_flags & FL_ARCH3M) != 0;
2742 arm_arch4 = (insn_flags & FL_ARCH4) != 0;
2743 arm_arch4t = arm_arch4 & ((insn_flags & FL_THUMB) != 0);
2744 arm_arch5 = (insn_flags & FL_ARCH5) != 0;
2745 arm_arch5e = (insn_flags & FL_ARCH5E) != 0;
2746 arm_arch6 = (insn_flags & FL_ARCH6) != 0;
2747 arm_arch6k = (insn_flags & FL_ARCH6K) != 0;
2748 arm_arch_notm = (insn_flags & FL_NOTM) != 0;
2749 arm_arch6m = arm_arch6 && !arm_arch_notm;
2750 arm_arch7 = (insn_flags & FL_ARCH7) != 0;
2751 arm_arch7em = (insn_flags & FL_ARCH7EM) != 0;
2752 arm_arch8 = (insn_flags & FL_ARCH8) != 0;
2753 arm_arch_thumb2 = (insn_flags & FL_THUMB2) != 0;
2754 arm_arch_xscale = (insn_flags & FL_XSCALE) != 0;
2756 arm_ld_sched = (tune_flags & FL_LDSCHED) != 0;
2757 arm_tune_strongarm = (tune_flags & FL_STRONG) != 0;
2758 thumb_code = TARGET_ARM == 0;
2759 thumb1_code = TARGET_THUMB1 != 0;
2760 arm_tune_wbuf = (tune_flags & FL_WBUF) != 0;
2761 arm_tune_xscale = (tune_flags & FL_XSCALE) != 0;
2762 arm_arch_iwmmxt = (insn_flags & FL_IWMMXT) != 0;
2763 arm_arch_iwmmxt2 = (insn_flags & FL_IWMMXT2) != 0;
2764 arm_arch_thumb_hwdiv = (insn_flags & FL_THUMB_DIV) != 0;
2765 arm_arch_arm_hwdiv = (insn_flags & FL_ARM_DIV) != 0;
2766 arm_tune_cortex_a9 = (arm_tune == cortexa9) != 0;
2767 arm_arch_crc = (insn_flags & FL_CRC32) != 0;
2768 arm_m_profile_small_mul = (insn_flags & FL_SMALLMUL) != 0;
2769 if (arm_restrict_it == 2)
2770 arm_restrict_it = arm_arch8 && TARGET_THUMB2;
2772 if (!TARGET_THUMB2)
2773 arm_restrict_it = 0;
2775 /* If we are not using the default (ARM mode) section anchor offset
2776 ranges, then set the correct ranges now. */
2777 if (TARGET_THUMB1)
2779 /* Thumb-1 LDR instructions cannot have negative offsets.
2780 Permissible positive offset ranges are 5-bit (for byte loads),
2781 6-bit (for halfword loads), or 7-bit (for word loads).
2782 Empirical results suggest a 7-bit anchor range gives the best
2783 overall code size. */
2784 targetm.min_anchor_offset = 0;
2785 targetm.max_anchor_offset = 127;
2787 else if (TARGET_THUMB2)
2789 /* The minimum is set such that the total size of the block
2790 for a particular anchor is 248 + 1 + 4095 bytes, which is
2791 divisible by eight, ensuring natural spacing of anchors. */
2792 targetm.min_anchor_offset = -248;
2793 targetm.max_anchor_offset = 4095;
2796 /* V5 code we generate is completely interworking capable, so we turn off
2797 TARGET_INTERWORK here to avoid many tests later on. */
2799 /* XXX However, we must pass the right pre-processor defines to CPP
2800 or GLD can get confused. This is a hack. */
2801 if (TARGET_INTERWORK)
2802 arm_cpp_interwork = 1;
2804 if (arm_arch5)
2805 target_flags &= ~MASK_INTERWORK;
2807 if (TARGET_IWMMXT && !ARM_DOUBLEWORD_ALIGN)
2808 error ("iwmmxt requires an AAPCS compatible ABI for proper operation");
2810 if (TARGET_IWMMXT_ABI && !TARGET_IWMMXT)
2811 error ("iwmmxt abi requires an iwmmxt capable cpu");
2813 if (!global_options_set.x_arm_fpu_index)
2815 const char *target_fpu_name;
2816 bool ok;
2818 #ifdef FPUTYPE_DEFAULT
2819 target_fpu_name = FPUTYPE_DEFAULT;
2820 #else
2821 target_fpu_name = "vfp";
2822 #endif
2824 ok = opt_enum_arg_to_value (OPT_mfpu_, target_fpu_name, &arm_fpu_index,
2825 CL_TARGET);
2826 gcc_assert (ok);
2829 arm_fpu_desc = &all_fpus[arm_fpu_index];
2831 switch (arm_fpu_desc->model)
2833 case ARM_FP_MODEL_VFP:
2834 arm_fpu_attr = FPU_VFP;
2835 break;
2837 default:
2838 gcc_unreachable();
2841 if (TARGET_AAPCS_BASED)
2843 if (TARGET_CALLER_INTERWORKING)
2844 error ("AAPCS does not support -mcaller-super-interworking");
2845 else
2846 if (TARGET_CALLEE_INTERWORKING)
2847 error ("AAPCS does not support -mcallee-super-interworking");
2850 /* iWMMXt and NEON are incompatible. */
2851 if (TARGET_IWMMXT && TARGET_NEON)
2852 error ("iWMMXt and NEON are incompatible");
2854 /* iWMMXt unsupported under Thumb mode. */
2855 if (TARGET_THUMB && TARGET_IWMMXT)
2856 error ("iWMMXt unsupported under Thumb mode");
2858 /* __fp16 support currently assumes the core has ldrh. */
2859 if (!arm_arch4 && arm_fp16_format != ARM_FP16_FORMAT_NONE)
2860 sorry ("__fp16 and no ldrh");
2862 /* If soft-float is specified then don't use FPU. */
2863 if (TARGET_SOFT_FLOAT)
2864 arm_fpu_attr = FPU_NONE;
2866 if (TARGET_AAPCS_BASED)
2868 if (arm_abi == ARM_ABI_IWMMXT)
2869 arm_pcs_default = ARM_PCS_AAPCS_IWMMXT;
2870 else if (arm_float_abi == ARM_FLOAT_ABI_HARD
2871 && TARGET_HARD_FLOAT
2872 && TARGET_VFP)
2873 arm_pcs_default = ARM_PCS_AAPCS_VFP;
2874 else
2875 arm_pcs_default = ARM_PCS_AAPCS;
2877 else
2879 if (arm_float_abi == ARM_FLOAT_ABI_HARD && TARGET_VFP)
2880 sorry ("-mfloat-abi=hard and VFP");
2882 if (arm_abi == ARM_ABI_APCS)
2883 arm_pcs_default = ARM_PCS_APCS;
2884 else
2885 arm_pcs_default = ARM_PCS_ATPCS;
2888 /* For arm2/3 there is no need to do any scheduling if we are doing
2889 software floating-point. */
2890 if (TARGET_SOFT_FLOAT && (tune_flags & FL_MODE32) == 0)
2891 flag_schedule_insns = flag_schedule_insns_after_reload = 0;
2893 /* Use the cp15 method if it is available. */
2894 if (target_thread_pointer == TP_AUTO)
2896 if (arm_arch6k && !TARGET_THUMB1)
2897 target_thread_pointer = TP_CP15;
2898 else
2899 target_thread_pointer = TP_SOFT;
2902 if (TARGET_HARD_TP && TARGET_THUMB1)
2903 error ("can not use -mtp=cp15 with 16-bit Thumb");
2905 /* Override the default structure alignment for AAPCS ABI. */
2906 if (!global_options_set.x_arm_structure_size_boundary)
2908 if (TARGET_AAPCS_BASED)
2909 arm_structure_size_boundary = 8;
2911 else
2913 if (arm_structure_size_boundary != 8
2914 && arm_structure_size_boundary != 32
2915 && !(ARM_DOUBLEWORD_ALIGN && arm_structure_size_boundary == 64))
2917 if (ARM_DOUBLEWORD_ALIGN)
2918 warning (0,
2919 "structure size boundary can only be set to 8, 32 or 64");
2920 else
2921 warning (0, "structure size boundary can only be set to 8 or 32");
2922 arm_structure_size_boundary
2923 = (TARGET_AAPCS_BASED ? 8 : DEFAULT_STRUCTURE_SIZE_BOUNDARY);
2927 if (!TARGET_ARM && TARGET_VXWORKS_RTP && flag_pic)
2929 error ("RTP PIC is incompatible with Thumb");
2930 flag_pic = 0;
2933 /* If stack checking is disabled, we can use r10 as the PIC register,
2934 which keeps r9 available. The EABI specifies r9 as the PIC register. */
2935 if (flag_pic && TARGET_SINGLE_PIC_BASE)
2937 if (TARGET_VXWORKS_RTP)
2938 warning (0, "RTP PIC is incompatible with -msingle-pic-base");
2939 arm_pic_register = (TARGET_APCS_STACK || TARGET_AAPCS_BASED) ? 9 : 10;
2942 if (flag_pic && TARGET_VXWORKS_RTP)
2943 arm_pic_register = 9;
2945 if (arm_pic_register_string != NULL)
2947 int pic_register = decode_reg_name (arm_pic_register_string);
2949 if (!flag_pic)
2950 warning (0, "-mpic-register= is useless without -fpic");
2952 /* Prevent the user from choosing an obviously stupid PIC register. */
2953 else if (pic_register < 0 || call_used_regs[pic_register]
2954 || pic_register == HARD_FRAME_POINTER_REGNUM
2955 || pic_register == STACK_POINTER_REGNUM
2956 || pic_register >= PC_REGNUM
2957 || (TARGET_VXWORKS_RTP
2958 && (unsigned int) pic_register != arm_pic_register))
2959 error ("unable to use '%s' for PIC register", arm_pic_register_string);
2960 else
2961 arm_pic_register = pic_register;
2964 if (TARGET_VXWORKS_RTP
2965 && !global_options_set.x_arm_pic_data_is_text_relative)
2966 arm_pic_data_is_text_relative = 0;
2968 /* Enable -mfix-cortex-m3-ldrd by default for Cortex-M3 cores. */
2969 if (fix_cm3_ldrd == 2)
2971 if (arm_selected_cpu->core == cortexm3)
2972 fix_cm3_ldrd = 1;
2973 else
2974 fix_cm3_ldrd = 0;
2977 /* Enable -munaligned-access by default for
2978 - all ARMv6 architecture-based processors
2979 - ARMv7-A, ARMv7-R, and ARMv7-M architecture-based processors.
2980 - ARMv8 architecture-base processors.
2982 Disable -munaligned-access by default for
2983 - all pre-ARMv6 architecture-based processors
2984 - ARMv6-M architecture-based processors. */
2986 if (unaligned_access == 2)
2988 if (arm_arch6 && (arm_arch_notm || arm_arch7))
2989 unaligned_access = 1;
2990 else
2991 unaligned_access = 0;
2993 else if (unaligned_access == 1
2994 && !(arm_arch6 && (arm_arch_notm || arm_arch7)))
2996 warning (0, "target CPU does not support unaligned accesses");
2997 unaligned_access = 0;
3000 if (TARGET_THUMB1 && flag_schedule_insns)
3002 /* Don't warn since it's on by default in -O2. */
3003 flag_schedule_insns = 0;
3006 if (optimize_size)
3008 /* If optimizing for size, bump the number of instructions that we
3009 are prepared to conditionally execute (even on a StrongARM). */
3010 max_insns_skipped = 6;
3012 /* For THUMB2, we limit the conditional sequence to one IT block. */
3013 if (TARGET_THUMB2)
3014 max_insns_skipped = MAX_INSN_PER_IT_BLOCK;
3016 else
3017 max_insns_skipped = current_tune->max_insns_skipped;
3019 /* Hot/Cold partitioning is not currently supported, since we can't
3020 handle literal pool placement in that case. */
3021 if (flag_reorder_blocks_and_partition)
3023 inform (input_location,
3024 "-freorder-blocks-and-partition not supported on this architecture");
3025 flag_reorder_blocks_and_partition = 0;
3026 flag_reorder_blocks = 1;
3029 if (flag_pic)
3030 /* Hoisting PIC address calculations more aggressively provides a small,
3031 but measurable, size reduction for PIC code. Therefore, we decrease
3032 the bar for unrestricted expression hoisting to the cost of PIC address
3033 calculation, which is 2 instructions. */
3034 maybe_set_param_value (PARAM_GCSE_UNRESTRICTED_COST, 2,
3035 global_options.x_param_values,
3036 global_options_set.x_param_values);
3038 /* ARM EABI defaults to strict volatile bitfields. */
3039 if (TARGET_AAPCS_BASED && flag_strict_volatile_bitfields < 0
3040 && abi_version_at_least(2))
3041 flag_strict_volatile_bitfields = 1;
3043 /* Enable sw prefetching at -O3 for CPUS that have prefetch, and we have deemed
3044 it beneficial (signified by setting num_prefetch_slots to 1 or more.) */
3045 if (flag_prefetch_loop_arrays < 0
3046 && HAVE_prefetch
3047 && optimize >= 3
3048 && current_tune->num_prefetch_slots > 0)
3049 flag_prefetch_loop_arrays = 1;
3051 /* Set up parameters to be used in prefetching algorithm. Do not override the
3052 defaults unless we are tuning for a core we have researched values for. */
3053 if (current_tune->num_prefetch_slots > 0)
3054 maybe_set_param_value (PARAM_SIMULTANEOUS_PREFETCHES,
3055 current_tune->num_prefetch_slots,
3056 global_options.x_param_values,
3057 global_options_set.x_param_values);
3058 if (current_tune->l1_cache_line_size >= 0)
3059 maybe_set_param_value (PARAM_L1_CACHE_LINE_SIZE,
3060 current_tune->l1_cache_line_size,
3061 global_options.x_param_values,
3062 global_options_set.x_param_values);
3063 if (current_tune->l1_cache_size >= 0)
3064 maybe_set_param_value (PARAM_L1_CACHE_SIZE,
3065 current_tune->l1_cache_size,
3066 global_options.x_param_values,
3067 global_options_set.x_param_values);
3069 /* Use Neon to perform 64-bits operations rather than core
3070 registers. */
3071 prefer_neon_for_64bits = current_tune->prefer_neon_for_64bits;
3072 if (use_neon_for_64bits == 1)
3073 prefer_neon_for_64bits = true;
3075 /* Use the alternative scheduling-pressure algorithm by default. */
3076 maybe_set_param_value (PARAM_SCHED_PRESSURE_ALGORITHM, SCHED_PRESSURE_MODEL,
3077 global_options.x_param_values,
3078 global_options_set.x_param_values);
3080 /* Disable shrink-wrap when optimizing function for size, since it tends to
3081 generate additional returns. */
3082 if (optimize_function_for_size_p (cfun) && TARGET_THUMB2)
3083 flag_shrink_wrap = false;
3084 /* TBD: Dwarf info for apcs frame is not handled yet. */
3085 if (TARGET_APCS_FRAME)
3086 flag_shrink_wrap = false;
3088 /* We only support -mslow-flash-data on armv7-m targets. */
3089 if (target_slow_flash_data
3090 && ((!(arm_arch7 && !arm_arch_notm) && !arm_arch7em)
3091 || (TARGET_THUMB1 || flag_pic || TARGET_NEON)))
3092 error ("-mslow-flash-data only supports non-pic code on armv7-m targets");
3094 /* Currently, for slow flash data, we just disable literal pools. */
3095 if (target_slow_flash_data)
3096 arm_disable_literal_pool = true;
3098 /* Thumb2 inline assembly code should always use unified syntax.
3099 This will apply to ARM and Thumb1 eventually. */
3100 if (TARGET_THUMB2)
3101 inline_asm_unified = 1;
3103 /* Disable scheduling fusion by default if it's not armv7 processor
3104 or doesn't prefer ldrd/strd. */
3105 if (flag_schedule_fusion == 2
3106 && (!arm_arch7 || !current_tune->prefer_ldrd_strd))
3107 flag_schedule_fusion = 0;
3109 /* In Thumb1 mode, we emit the epilogue in RTL, but the last insn
3110 - epilogue_insns - does not accurately model the corresponding insns
3111 emitted in the asm file. In particular, see the comment in thumb_exit
3112 'Find out how many of the (return) argument registers we can corrupt'.
3113 As a consequence, the epilogue may clobber registers without fipa-ra
3114 finding out about it. Therefore, disable fipa-ra in Thumb1 mode.
3115 TODO: Accurately model clobbers for epilogue_insns and reenable
3116 fipa-ra. */
3117 if (TARGET_THUMB1)
3118 flag_ipa_ra = 0;
3120 /* Register global variables with the garbage collector. */
3121 arm_add_gc_roots ();
3124 static void
3125 arm_add_gc_roots (void)
3127 gcc_obstack_init(&minipool_obstack);
3128 minipool_startobj = (char *) obstack_alloc (&minipool_obstack, 0);
3131 /* A table of known ARM exception types.
3132 For use with the interrupt function attribute. */
3134 typedef struct
3136 const char *const arg;
3137 const unsigned long return_value;
3139 isr_attribute_arg;
3141 static const isr_attribute_arg isr_attribute_args [] =
3143 { "IRQ", ARM_FT_ISR },
3144 { "irq", ARM_FT_ISR },
3145 { "FIQ", ARM_FT_FIQ },
3146 { "fiq", ARM_FT_FIQ },
3147 { "ABORT", ARM_FT_ISR },
3148 { "abort", ARM_FT_ISR },
3149 { "ABORT", ARM_FT_ISR },
3150 { "abort", ARM_FT_ISR },
3151 { "UNDEF", ARM_FT_EXCEPTION },
3152 { "undef", ARM_FT_EXCEPTION },
3153 { "SWI", ARM_FT_EXCEPTION },
3154 { "swi", ARM_FT_EXCEPTION },
3155 { NULL, ARM_FT_NORMAL }
3158 /* Returns the (interrupt) function type of the current
3159 function, or ARM_FT_UNKNOWN if the type cannot be determined. */
3161 static unsigned long
3162 arm_isr_value (tree argument)
3164 const isr_attribute_arg * ptr;
3165 const char * arg;
3167 if (!arm_arch_notm)
3168 return ARM_FT_NORMAL | ARM_FT_STACKALIGN;
3170 /* No argument - default to IRQ. */
3171 if (argument == NULL_TREE)
3172 return ARM_FT_ISR;
3174 /* Get the value of the argument. */
3175 if (TREE_VALUE (argument) == NULL_TREE
3176 || TREE_CODE (TREE_VALUE (argument)) != STRING_CST)
3177 return ARM_FT_UNKNOWN;
3179 arg = TREE_STRING_POINTER (TREE_VALUE (argument));
3181 /* Check it against the list of known arguments. */
3182 for (ptr = isr_attribute_args; ptr->arg != NULL; ptr++)
3183 if (streq (arg, ptr->arg))
3184 return ptr->return_value;
3186 /* An unrecognized interrupt type. */
3187 return ARM_FT_UNKNOWN;
3190 /* Computes the type of the current function. */
3192 static unsigned long
3193 arm_compute_func_type (void)
3195 unsigned long type = ARM_FT_UNKNOWN;
3196 tree a;
3197 tree attr;
3199 gcc_assert (TREE_CODE (current_function_decl) == FUNCTION_DECL);
3201 /* Decide if the current function is volatile. Such functions
3202 never return, and many memory cycles can be saved by not storing
3203 register values that will never be needed again. This optimization
3204 was added to speed up context switching in a kernel application. */
3205 if (optimize > 0
3206 && (TREE_NOTHROW (current_function_decl)
3207 || !(flag_unwind_tables
3208 || (flag_exceptions
3209 && arm_except_unwind_info (&global_options) != UI_SJLJ)))
3210 && TREE_THIS_VOLATILE (current_function_decl))
3211 type |= ARM_FT_VOLATILE;
3213 if (cfun->static_chain_decl != NULL)
3214 type |= ARM_FT_NESTED;
3216 attr = DECL_ATTRIBUTES (current_function_decl);
3218 a = lookup_attribute ("naked", attr);
3219 if (a != NULL_TREE)
3220 type |= ARM_FT_NAKED;
3222 a = lookup_attribute ("isr", attr);
3223 if (a == NULL_TREE)
3224 a = lookup_attribute ("interrupt", attr);
3226 if (a == NULL_TREE)
3227 type |= TARGET_INTERWORK ? ARM_FT_INTERWORKED : ARM_FT_NORMAL;
3228 else
3229 type |= arm_isr_value (TREE_VALUE (a));
3231 return type;
3234 /* Returns the type of the current function. */
3236 unsigned long
3237 arm_current_func_type (void)
3239 if (ARM_FUNC_TYPE (cfun->machine->func_type) == ARM_FT_UNKNOWN)
3240 cfun->machine->func_type = arm_compute_func_type ();
3242 return cfun->machine->func_type;
3245 bool
3246 arm_allocate_stack_slots_for_args (void)
3248 /* Naked functions should not allocate stack slots for arguments. */
3249 return !IS_NAKED (arm_current_func_type ());
3252 static bool
3253 arm_warn_func_return (tree decl)
3255 /* Naked functions are implemented entirely in assembly, including the
3256 return sequence, so suppress warnings about this. */
3257 return lookup_attribute ("naked", DECL_ATTRIBUTES (decl)) == NULL_TREE;
3261 /* Output assembler code for a block containing the constant parts
3262 of a trampoline, leaving space for the variable parts.
3264 On the ARM, (if r8 is the static chain regnum, and remembering that
3265 referencing pc adds an offset of 8) the trampoline looks like:
3266 ldr r8, [pc, #0]
3267 ldr pc, [pc]
3268 .word static chain value
3269 .word function's address
3270 XXX FIXME: When the trampoline returns, r8 will be clobbered. */
3272 static void
3273 arm_asm_trampoline_template (FILE *f)
3275 if (TARGET_ARM)
3277 asm_fprintf (f, "\tldr\t%r, [%r, #0]\n", STATIC_CHAIN_REGNUM, PC_REGNUM);
3278 asm_fprintf (f, "\tldr\t%r, [%r, #0]\n", PC_REGNUM, PC_REGNUM);
3280 else if (TARGET_THUMB2)
3282 /* The Thumb-2 trampoline is similar to the arm implementation.
3283 Unlike 16-bit Thumb, we enter the stub in thumb mode. */
3284 asm_fprintf (f, "\tldr.w\t%r, [%r, #4]\n",
3285 STATIC_CHAIN_REGNUM, PC_REGNUM);
3286 asm_fprintf (f, "\tldr.w\t%r, [%r, #4]\n", PC_REGNUM, PC_REGNUM);
3288 else
3290 ASM_OUTPUT_ALIGN (f, 2);
3291 fprintf (f, "\t.code\t16\n");
3292 fprintf (f, ".Ltrampoline_start:\n");
3293 asm_fprintf (f, "\tpush\t{r0, r1}\n");
3294 asm_fprintf (f, "\tldr\tr0, [%r, #8]\n", PC_REGNUM);
3295 asm_fprintf (f, "\tmov\t%r, r0\n", STATIC_CHAIN_REGNUM);
3296 asm_fprintf (f, "\tldr\tr0, [%r, #8]\n", PC_REGNUM);
3297 asm_fprintf (f, "\tstr\tr0, [%r, #4]\n", SP_REGNUM);
3298 asm_fprintf (f, "\tpop\t{r0, %r}\n", PC_REGNUM);
3300 assemble_aligned_integer (UNITS_PER_WORD, const0_rtx);
3301 assemble_aligned_integer (UNITS_PER_WORD, const0_rtx);
3304 /* Emit RTL insns to initialize the variable parts of a trampoline. */
3306 static void
3307 arm_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value)
3309 rtx fnaddr, mem, a_tramp;
3311 emit_block_move (m_tramp, assemble_trampoline_template (),
3312 GEN_INT (TRAMPOLINE_SIZE), BLOCK_OP_NORMAL);
3314 mem = adjust_address (m_tramp, SImode, TARGET_32BIT ? 8 : 12);
3315 emit_move_insn (mem, chain_value);
3317 mem = adjust_address (m_tramp, SImode, TARGET_32BIT ? 12 : 16);
3318 fnaddr = XEXP (DECL_RTL (fndecl), 0);
3319 emit_move_insn (mem, fnaddr);
3321 a_tramp = XEXP (m_tramp, 0);
3322 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__clear_cache"),
3323 LCT_NORMAL, VOIDmode, 2, a_tramp, Pmode,
3324 plus_constant (Pmode, a_tramp, TRAMPOLINE_SIZE), Pmode);
3327 /* Thumb trampolines should be entered in thumb mode, so set
3328 the bottom bit of the address. */
3330 static rtx
3331 arm_trampoline_adjust_address (rtx addr)
3333 if (TARGET_THUMB)
3334 addr = expand_simple_binop (Pmode, IOR, addr, const1_rtx,
3335 NULL, 0, OPTAB_LIB_WIDEN);
3336 return addr;
3339 /* Return 1 if it is possible to return using a single instruction.
3340 If SIBLING is non-null, this is a test for a return before a sibling
3341 call. SIBLING is the call insn, so we can examine its register usage. */
3344 use_return_insn (int iscond, rtx sibling)
3346 int regno;
3347 unsigned int func_type;
3348 unsigned long saved_int_regs;
3349 unsigned HOST_WIDE_INT stack_adjust;
3350 arm_stack_offsets *offsets;
3352 /* Never use a return instruction before reload has run. */
3353 if (!reload_completed)
3354 return 0;
3356 func_type = arm_current_func_type ();
3358 /* Naked, volatile and stack alignment functions need special
3359 consideration. */
3360 if (func_type & (ARM_FT_VOLATILE | ARM_FT_NAKED | ARM_FT_STACKALIGN))
3361 return 0;
3363 /* So do interrupt functions that use the frame pointer and Thumb
3364 interrupt functions. */
3365 if (IS_INTERRUPT (func_type) && (frame_pointer_needed || TARGET_THUMB))
3366 return 0;
3368 if (TARGET_LDRD && current_tune->prefer_ldrd_strd
3369 && !optimize_function_for_size_p (cfun))
3370 return 0;
3372 offsets = arm_get_frame_offsets ();
3373 stack_adjust = offsets->outgoing_args - offsets->saved_regs;
3375 /* As do variadic functions. */
3376 if (crtl->args.pretend_args_size
3377 || cfun->machine->uses_anonymous_args
3378 /* Or if the function calls __builtin_eh_return () */
3379 || crtl->calls_eh_return
3380 /* Or if the function calls alloca */
3381 || cfun->calls_alloca
3382 /* Or if there is a stack adjustment. However, if the stack pointer
3383 is saved on the stack, we can use a pre-incrementing stack load. */
3384 || !(stack_adjust == 0 || (TARGET_APCS_FRAME && frame_pointer_needed
3385 && stack_adjust == 4)))
3386 return 0;
3388 saved_int_regs = offsets->saved_regs_mask;
3390 /* Unfortunately, the insn
3392 ldmib sp, {..., sp, ...}
3394 triggers a bug on most SA-110 based devices, such that the stack
3395 pointer won't be correctly restored if the instruction takes a
3396 page fault. We work around this problem by popping r3 along with
3397 the other registers, since that is never slower than executing
3398 another instruction.
3400 We test for !arm_arch5 here, because code for any architecture
3401 less than this could potentially be run on one of the buggy
3402 chips. */
3403 if (stack_adjust == 4 && !arm_arch5 && TARGET_ARM)
3405 /* Validate that r3 is a call-clobbered register (always true in
3406 the default abi) ... */
3407 if (!call_used_regs[3])
3408 return 0;
3410 /* ... that it isn't being used for a return value ... */
3411 if (arm_size_return_regs () >= (4 * UNITS_PER_WORD))
3412 return 0;
3414 /* ... or for a tail-call argument ... */
3415 if (sibling)
3417 gcc_assert (CALL_P (sibling));
3419 if (find_regno_fusage (sibling, USE, 3))
3420 return 0;
3423 /* ... and that there are no call-saved registers in r0-r2
3424 (always true in the default ABI). */
3425 if (saved_int_regs & 0x7)
3426 return 0;
3429 /* Can't be done if interworking with Thumb, and any registers have been
3430 stacked. */
3431 if (TARGET_INTERWORK && saved_int_regs != 0 && !IS_INTERRUPT(func_type))
3432 return 0;
3434 /* On StrongARM, conditional returns are expensive if they aren't
3435 taken and multiple registers have been stacked. */
3436 if (iscond && arm_tune_strongarm)
3438 /* Conditional return when just the LR is stored is a simple
3439 conditional-load instruction, that's not expensive. */
3440 if (saved_int_regs != 0 && saved_int_regs != (1 << LR_REGNUM))
3441 return 0;
3443 if (flag_pic
3444 && arm_pic_register != INVALID_REGNUM
3445 && df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM))
3446 return 0;
3449 /* If there are saved registers but the LR isn't saved, then we need
3450 two instructions for the return. */
3451 if (saved_int_regs && !(saved_int_regs & (1 << LR_REGNUM)))
3452 return 0;
3454 /* Can't be done if any of the VFP regs are pushed,
3455 since this also requires an insn. */
3456 if (TARGET_HARD_FLOAT && TARGET_VFP)
3457 for (regno = FIRST_VFP_REGNUM; regno <= LAST_VFP_REGNUM; regno++)
3458 if (df_regs_ever_live_p (regno) && !call_used_regs[regno])
3459 return 0;
3461 if (TARGET_REALLY_IWMMXT)
3462 for (regno = FIRST_IWMMXT_REGNUM; regno <= LAST_IWMMXT_REGNUM; regno++)
3463 if (df_regs_ever_live_p (regno) && ! call_used_regs[regno])
3464 return 0;
3466 return 1;
3469 /* Return TRUE if we should try to use a simple_return insn, i.e. perform
3470 shrink-wrapping if possible. This is the case if we need to emit a
3471 prologue, which we can test by looking at the offsets. */
3472 bool
3473 use_simple_return_p (void)
3475 arm_stack_offsets *offsets;
3477 offsets = arm_get_frame_offsets ();
3478 return offsets->outgoing_args != 0;
3481 /* Return TRUE if int I is a valid immediate ARM constant. */
3484 const_ok_for_arm (HOST_WIDE_INT i)
3486 int lowbit;
3488 /* For machines with >32 bit HOST_WIDE_INT, the bits above bit 31 must
3489 be all zero, or all one. */
3490 if ((i & ~(unsigned HOST_WIDE_INT) 0xffffffff) != 0
3491 && ((i & ~(unsigned HOST_WIDE_INT) 0xffffffff)
3492 != ((~(unsigned HOST_WIDE_INT) 0)
3493 & ~(unsigned HOST_WIDE_INT) 0xffffffff)))
3494 return FALSE;
3496 i &= (unsigned HOST_WIDE_INT) 0xffffffff;
3498 /* Fast return for 0 and small values. We must do this for zero, since
3499 the code below can't handle that one case. */
3500 if ((i & ~(unsigned HOST_WIDE_INT) 0xff) == 0)
3501 return TRUE;
3503 /* Get the number of trailing zeros. */
3504 lowbit = ffs((int) i) - 1;
3506 /* Only even shifts are allowed in ARM mode so round down to the
3507 nearest even number. */
3508 if (TARGET_ARM)
3509 lowbit &= ~1;
3511 if ((i & ~(((unsigned HOST_WIDE_INT) 0xff) << lowbit)) == 0)
3512 return TRUE;
3514 if (TARGET_ARM)
3516 /* Allow rotated constants in ARM mode. */
3517 if (lowbit <= 4
3518 && ((i & ~0xc000003f) == 0
3519 || (i & ~0xf000000f) == 0
3520 || (i & ~0xfc000003) == 0))
3521 return TRUE;
3523 else
3525 HOST_WIDE_INT v;
3527 /* Allow repeated patterns 0x00XY00XY or 0xXYXYXYXY. */
3528 v = i & 0xff;
3529 v |= v << 16;
3530 if (i == v || i == (v | (v << 8)))
3531 return TRUE;
3533 /* Allow repeated pattern 0xXY00XY00. */
3534 v = i & 0xff00;
3535 v |= v << 16;
3536 if (i == v)
3537 return TRUE;
3540 return FALSE;
3543 /* Return true if I is a valid constant for the operation CODE. */
3545 const_ok_for_op (HOST_WIDE_INT i, enum rtx_code code)
3547 if (const_ok_for_arm (i))
3548 return 1;
3550 switch (code)
3552 case SET:
3553 /* See if we can use movw. */
3554 if (arm_arch_thumb2 && (i & 0xffff0000) == 0)
3555 return 1;
3556 else
3557 /* Otherwise, try mvn. */
3558 return const_ok_for_arm (ARM_SIGN_EXTEND (~i));
3560 case PLUS:
3561 /* See if we can use addw or subw. */
3562 if (TARGET_THUMB2
3563 && ((i & 0xfffff000) == 0
3564 || ((-i) & 0xfffff000) == 0))
3565 return 1;
3566 /* else fall through. */
3568 case COMPARE:
3569 case EQ:
3570 case NE:
3571 case GT:
3572 case LE:
3573 case LT:
3574 case GE:
3575 case GEU:
3576 case LTU:
3577 case GTU:
3578 case LEU:
3579 case UNORDERED:
3580 case ORDERED:
3581 case UNEQ:
3582 case UNGE:
3583 case UNLT:
3584 case UNGT:
3585 case UNLE:
3586 return const_ok_for_arm (ARM_SIGN_EXTEND (-i));
3588 case MINUS: /* Should only occur with (MINUS I reg) => rsb */
3589 case XOR:
3590 return 0;
3592 case IOR:
3593 if (TARGET_THUMB2)
3594 return const_ok_for_arm (ARM_SIGN_EXTEND (~i));
3595 return 0;
3597 case AND:
3598 return const_ok_for_arm (ARM_SIGN_EXTEND (~i));
3600 default:
3601 gcc_unreachable ();
3605 /* Return true if I is a valid di mode constant for the operation CODE. */
3607 const_ok_for_dimode_op (HOST_WIDE_INT i, enum rtx_code code)
3609 HOST_WIDE_INT hi_val = (i >> 32) & 0xFFFFFFFF;
3610 HOST_WIDE_INT lo_val = i & 0xFFFFFFFF;
3611 rtx hi = GEN_INT (hi_val);
3612 rtx lo = GEN_INT (lo_val);
3614 if (TARGET_THUMB1)
3615 return 0;
3617 switch (code)
3619 case AND:
3620 case IOR:
3621 case XOR:
3622 return (const_ok_for_op (hi_val, code) || hi_val == 0xFFFFFFFF)
3623 && (const_ok_for_op (lo_val, code) || lo_val == 0xFFFFFFFF);
3624 case PLUS:
3625 return arm_not_operand (hi, SImode) && arm_add_operand (lo, SImode);
3627 default:
3628 return 0;
3632 /* Emit a sequence of insns to handle a large constant.
3633 CODE is the code of the operation required, it can be any of SET, PLUS,
3634 IOR, AND, XOR, MINUS;
3635 MODE is the mode in which the operation is being performed;
3636 VAL is the integer to operate on;
3637 SOURCE is the other operand (a register, or a null-pointer for SET);
3638 SUBTARGETS means it is safe to create scratch registers if that will
3639 either produce a simpler sequence, or we will want to cse the values.
3640 Return value is the number of insns emitted. */
3642 /* ??? Tweak this for thumb2. */
3644 arm_split_constant (enum rtx_code code, machine_mode mode, rtx insn,
3645 HOST_WIDE_INT val, rtx target, rtx source, int subtargets)
3647 rtx cond;
3649 if (insn && GET_CODE (PATTERN (insn)) == COND_EXEC)
3650 cond = COND_EXEC_TEST (PATTERN (insn));
3651 else
3652 cond = NULL_RTX;
3654 if (subtargets || code == SET
3655 || (REG_P (target) && REG_P (source)
3656 && REGNO (target) != REGNO (source)))
3658 /* After arm_reorg has been called, we can't fix up expensive
3659 constants by pushing them into memory so we must synthesize
3660 them in-line, regardless of the cost. This is only likely to
3661 be more costly on chips that have load delay slots and we are
3662 compiling without running the scheduler (so no splitting
3663 occurred before the final instruction emission).
3665 Ref: gcc -O1 -mcpu=strongarm gcc.c-torture/compile/980506-2.c
3667 if (!cfun->machine->after_arm_reorg
3668 && !cond
3669 && (arm_gen_constant (code, mode, NULL_RTX, val, target, source,
3670 1, 0)
3671 > (arm_constant_limit (optimize_function_for_size_p (cfun))
3672 + (code != SET))))
3674 if (code == SET)
3676 /* Currently SET is the only monadic value for CODE, all
3677 the rest are diadic. */
3678 if (TARGET_USE_MOVT)
3679 arm_emit_movpair (target, GEN_INT (val));
3680 else
3681 emit_set_insn (target, GEN_INT (val));
3683 return 1;
3685 else
3687 rtx temp = subtargets ? gen_reg_rtx (mode) : target;
3689 if (TARGET_USE_MOVT)
3690 arm_emit_movpair (temp, GEN_INT (val));
3691 else
3692 emit_set_insn (temp, GEN_INT (val));
3694 /* For MINUS, the value is subtracted from, since we never
3695 have subtraction of a constant. */
3696 if (code == MINUS)
3697 emit_set_insn (target, gen_rtx_MINUS (mode, temp, source));
3698 else
3699 emit_set_insn (target,
3700 gen_rtx_fmt_ee (code, mode, source, temp));
3701 return 2;
3706 return arm_gen_constant (code, mode, cond, val, target, source, subtargets,
3710 /* Return a sequence of integers, in RETURN_SEQUENCE that fit into
3711 ARM/THUMB2 immediates, and add up to VAL.
3712 Thr function return value gives the number of insns required. */
3713 static int
3714 optimal_immediate_sequence (enum rtx_code code, unsigned HOST_WIDE_INT val,
3715 struct four_ints *return_sequence)
3717 int best_consecutive_zeros = 0;
3718 int i;
3719 int best_start = 0;
3720 int insns1, insns2;
3721 struct four_ints tmp_sequence;
3723 /* If we aren't targeting ARM, the best place to start is always at
3724 the bottom, otherwise look more closely. */
3725 if (TARGET_ARM)
3727 for (i = 0; i < 32; i += 2)
3729 int consecutive_zeros = 0;
3731 if (!(val & (3 << i)))
3733 while ((i < 32) && !(val & (3 << i)))
3735 consecutive_zeros += 2;
3736 i += 2;
3738 if (consecutive_zeros > best_consecutive_zeros)
3740 best_consecutive_zeros = consecutive_zeros;
3741 best_start = i - consecutive_zeros;
3743 i -= 2;
3748 /* So long as it won't require any more insns to do so, it's
3749 desirable to emit a small constant (in bits 0...9) in the last
3750 insn. This way there is more chance that it can be combined with
3751 a later addressing insn to form a pre-indexed load or store
3752 operation. Consider:
3754 *((volatile int *)0xe0000100) = 1;
3755 *((volatile int *)0xe0000110) = 2;
3757 We want this to wind up as:
3759 mov rA, #0xe0000000
3760 mov rB, #1
3761 str rB, [rA, #0x100]
3762 mov rB, #2
3763 str rB, [rA, #0x110]
3765 rather than having to synthesize both large constants from scratch.
3767 Therefore, we calculate how many insns would be required to emit
3768 the constant starting from `best_start', and also starting from
3769 zero (i.e. with bit 31 first to be output). If `best_start' doesn't
3770 yield a shorter sequence, we may as well use zero. */
3771 insns1 = optimal_immediate_sequence_1 (code, val, return_sequence, best_start);
3772 if (best_start != 0
3773 && ((((unsigned HOST_WIDE_INT) 1) << best_start) < val))
3775 insns2 = optimal_immediate_sequence_1 (code, val, &tmp_sequence, 0);
3776 if (insns2 <= insns1)
3778 *return_sequence = tmp_sequence;
3779 insns1 = insns2;
3783 return insns1;
3786 /* As for optimal_immediate_sequence, but starting at bit-position I. */
3787 static int
3788 optimal_immediate_sequence_1 (enum rtx_code code, unsigned HOST_WIDE_INT val,
3789 struct four_ints *return_sequence, int i)
3791 int remainder = val & 0xffffffff;
3792 int insns = 0;
3794 /* Try and find a way of doing the job in either two or three
3795 instructions.
3797 In ARM mode we can use 8-bit constants, rotated to any 2-bit aligned
3798 location. We start at position I. This may be the MSB, or
3799 optimial_immediate_sequence may have positioned it at the largest block
3800 of zeros that are aligned on a 2-bit boundary. We then fill up the temps,
3801 wrapping around to the top of the word when we drop off the bottom.
3802 In the worst case this code should produce no more than four insns.
3804 In Thumb2 mode, we can use 32/16-bit replicated constants, and 8-bit
3805 constants, shifted to any arbitrary location. We should always start
3806 at the MSB. */
3809 int end;
3810 unsigned int b1, b2, b3, b4;
3811 unsigned HOST_WIDE_INT result;
3812 int loc;
3814 gcc_assert (insns < 4);
3816 if (i <= 0)
3817 i += 32;
3819 /* First, find the next normal 12/8-bit shifted/rotated immediate. */
3820 if (remainder & ((TARGET_ARM ? (3 << (i - 2)) : (1 << (i - 1)))))
3822 loc = i;
3823 if (i <= 12 && TARGET_THUMB2 && code == PLUS)
3824 /* We can use addw/subw for the last 12 bits. */
3825 result = remainder;
3826 else
3828 /* Use an 8-bit shifted/rotated immediate. */
3829 end = i - 8;
3830 if (end < 0)
3831 end += 32;
3832 result = remainder & ((0x0ff << end)
3833 | ((i < end) ? (0xff >> (32 - end))
3834 : 0));
3835 i -= 8;
3838 else
3840 /* Arm allows rotates by a multiple of two. Thumb-2 allows
3841 arbitrary shifts. */
3842 i -= TARGET_ARM ? 2 : 1;
3843 continue;
3846 /* Next, see if we can do a better job with a thumb2 replicated
3847 constant.
3849 We do it this way around to catch the cases like 0x01F001E0 where
3850 two 8-bit immediates would work, but a replicated constant would
3851 make it worse.
3853 TODO: 16-bit constants that don't clear all the bits, but still win.
3854 TODO: Arithmetic splitting for set/add/sub, rather than bitwise. */
3855 if (TARGET_THUMB2)
3857 b1 = (remainder & 0xff000000) >> 24;
3858 b2 = (remainder & 0x00ff0000) >> 16;
3859 b3 = (remainder & 0x0000ff00) >> 8;
3860 b4 = remainder & 0xff;
3862 if (loc > 24)
3864 /* The 8-bit immediate already found clears b1 (and maybe b2),
3865 but must leave b3 and b4 alone. */
3867 /* First try to find a 32-bit replicated constant that clears
3868 almost everything. We can assume that we can't do it in one,
3869 or else we wouldn't be here. */
3870 unsigned int tmp = b1 & b2 & b3 & b4;
3871 unsigned int tmp2 = tmp + (tmp << 8) + (tmp << 16)
3872 + (tmp << 24);
3873 unsigned int matching_bytes = (tmp == b1) + (tmp == b2)
3874 + (tmp == b3) + (tmp == b4);
3875 if (tmp
3876 && (matching_bytes >= 3
3877 || (matching_bytes == 2
3878 && const_ok_for_op (remainder & ~tmp2, code))))
3880 /* At least 3 of the bytes match, and the fourth has at
3881 least as many bits set, or two of the bytes match
3882 and it will only require one more insn to finish. */
3883 result = tmp2;
3884 i = tmp != b1 ? 32
3885 : tmp != b2 ? 24
3886 : tmp != b3 ? 16
3887 : 8;
3890 /* Second, try to find a 16-bit replicated constant that can
3891 leave three of the bytes clear. If b2 or b4 is already
3892 zero, then we can. If the 8-bit from above would not
3893 clear b2 anyway, then we still win. */
3894 else if (b1 == b3 && (!b2 || !b4
3895 || (remainder & 0x00ff0000 & ~result)))
3897 result = remainder & 0xff00ff00;
3898 i = 24;
3901 else if (loc > 16)
3903 /* The 8-bit immediate already found clears b2 (and maybe b3)
3904 and we don't get here unless b1 is alredy clear, but it will
3905 leave b4 unchanged. */
3907 /* If we can clear b2 and b4 at once, then we win, since the
3908 8-bits couldn't possibly reach that far. */
3909 if (b2 == b4)
3911 result = remainder & 0x00ff00ff;
3912 i = 16;
3917 return_sequence->i[insns++] = result;
3918 remainder &= ~result;
3920 if (code == SET || code == MINUS)
3921 code = PLUS;
3923 while (remainder);
3925 return insns;
3928 /* Emit an instruction with the indicated PATTERN. If COND is
3929 non-NULL, conditionalize the execution of the instruction on COND
3930 being true. */
3932 static void
3933 emit_constant_insn (rtx cond, rtx pattern)
3935 if (cond)
3936 pattern = gen_rtx_COND_EXEC (VOIDmode, copy_rtx (cond), pattern);
3937 emit_insn (pattern);
3940 /* As above, but extra parameter GENERATE which, if clear, suppresses
3941 RTL generation. */
3943 static int
3944 arm_gen_constant (enum rtx_code code, machine_mode mode, rtx cond,
3945 HOST_WIDE_INT val, rtx target, rtx source, int subtargets,
3946 int generate)
3948 int can_invert = 0;
3949 int can_negate = 0;
3950 int final_invert = 0;
3951 int i;
3952 int set_sign_bit_copies = 0;
3953 int clear_sign_bit_copies = 0;
3954 int clear_zero_bit_copies = 0;
3955 int set_zero_bit_copies = 0;
3956 int insns = 0, neg_insns, inv_insns;
3957 unsigned HOST_WIDE_INT temp1, temp2;
3958 unsigned HOST_WIDE_INT remainder = val & 0xffffffff;
3959 struct four_ints *immediates;
3960 struct four_ints pos_immediates, neg_immediates, inv_immediates;
3962 /* Find out which operations are safe for a given CODE. Also do a quick
3963 check for degenerate cases; these can occur when DImode operations
3964 are split. */
3965 switch (code)
3967 case SET:
3968 can_invert = 1;
3969 break;
3971 case PLUS:
3972 can_negate = 1;
3973 break;
3975 case IOR:
3976 if (remainder == 0xffffffff)
3978 if (generate)
3979 emit_constant_insn (cond,
3980 gen_rtx_SET (VOIDmode, target,
3981 GEN_INT (ARM_SIGN_EXTEND (val))));
3982 return 1;
3985 if (remainder == 0)
3987 if (reload_completed && rtx_equal_p (target, source))
3988 return 0;
3990 if (generate)
3991 emit_constant_insn (cond,
3992 gen_rtx_SET (VOIDmode, target, source));
3993 return 1;
3995 break;
3997 case AND:
3998 if (remainder == 0)
4000 if (generate)
4001 emit_constant_insn (cond,
4002 gen_rtx_SET (VOIDmode, target, const0_rtx));
4003 return 1;
4005 if (remainder == 0xffffffff)
4007 if (reload_completed && rtx_equal_p (target, source))
4008 return 0;
4009 if (generate)
4010 emit_constant_insn (cond,
4011 gen_rtx_SET (VOIDmode, target, source));
4012 return 1;
4014 can_invert = 1;
4015 break;
4017 case XOR:
4018 if (remainder == 0)
4020 if (reload_completed && rtx_equal_p (target, source))
4021 return 0;
4022 if (generate)
4023 emit_constant_insn (cond,
4024 gen_rtx_SET (VOIDmode, target, source));
4025 return 1;
4028 if (remainder == 0xffffffff)
4030 if (generate)
4031 emit_constant_insn (cond,
4032 gen_rtx_SET (VOIDmode, target,
4033 gen_rtx_NOT (mode, source)));
4034 return 1;
4036 final_invert = 1;
4037 break;
4039 case MINUS:
4040 /* We treat MINUS as (val - source), since (source - val) is always
4041 passed as (source + (-val)). */
4042 if (remainder == 0)
4044 if (generate)
4045 emit_constant_insn (cond,
4046 gen_rtx_SET (VOIDmode, target,
4047 gen_rtx_NEG (mode, source)));
4048 return 1;
4050 if (const_ok_for_arm (val))
4052 if (generate)
4053 emit_constant_insn (cond,
4054 gen_rtx_SET (VOIDmode, target,
4055 gen_rtx_MINUS (mode, GEN_INT (val),
4056 source)));
4057 return 1;
4060 break;
4062 default:
4063 gcc_unreachable ();
4066 /* If we can do it in one insn get out quickly. */
4067 if (const_ok_for_op (val, code))
4069 if (generate)
4070 emit_constant_insn (cond,
4071 gen_rtx_SET (VOIDmode, target,
4072 (source
4073 ? gen_rtx_fmt_ee (code, mode, source,
4074 GEN_INT (val))
4075 : GEN_INT (val))));
4076 return 1;
4079 /* On targets with UXTH/UBFX, we can deal with AND (2^N)-1 in a single
4080 insn. */
4081 if (code == AND && (i = exact_log2 (remainder + 1)) > 0
4082 && (arm_arch_thumb2 || (i == 16 && arm_arch6 && mode == SImode)))
4084 if (generate)
4086 if (mode == SImode && i == 16)
4087 /* Use UXTH in preference to UBFX, since on Thumb2 it's a
4088 smaller insn. */
4089 emit_constant_insn (cond,
4090 gen_zero_extendhisi2
4091 (target, gen_lowpart (HImode, source)));
4092 else
4093 /* Extz only supports SImode, but we can coerce the operands
4094 into that mode. */
4095 emit_constant_insn (cond,
4096 gen_extzv_t2 (gen_lowpart (SImode, target),
4097 gen_lowpart (SImode, source),
4098 GEN_INT (i), const0_rtx));
4101 return 1;
4104 /* Calculate a few attributes that may be useful for specific
4105 optimizations. */
4106 /* Count number of leading zeros. */
4107 for (i = 31; i >= 0; i--)
4109 if ((remainder & (1 << i)) == 0)
4110 clear_sign_bit_copies++;
4111 else
4112 break;
4115 /* Count number of leading 1's. */
4116 for (i = 31; i >= 0; i--)
4118 if ((remainder & (1 << i)) != 0)
4119 set_sign_bit_copies++;
4120 else
4121 break;
4124 /* Count number of trailing zero's. */
4125 for (i = 0; i <= 31; i++)
4127 if ((remainder & (1 << i)) == 0)
4128 clear_zero_bit_copies++;
4129 else
4130 break;
4133 /* Count number of trailing 1's. */
4134 for (i = 0; i <= 31; i++)
4136 if ((remainder & (1 << i)) != 0)
4137 set_zero_bit_copies++;
4138 else
4139 break;
4142 switch (code)
4144 case SET:
4145 /* See if we can do this by sign_extending a constant that is known
4146 to be negative. This is a good, way of doing it, since the shift
4147 may well merge into a subsequent insn. */
4148 if (set_sign_bit_copies > 1)
4150 if (const_ok_for_arm
4151 (temp1 = ARM_SIGN_EXTEND (remainder
4152 << (set_sign_bit_copies - 1))))
4154 if (generate)
4156 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
4157 emit_constant_insn (cond,
4158 gen_rtx_SET (VOIDmode, new_src,
4159 GEN_INT (temp1)));
4160 emit_constant_insn (cond,
4161 gen_ashrsi3 (target, new_src,
4162 GEN_INT (set_sign_bit_copies - 1)));
4164 return 2;
4166 /* For an inverted constant, we will need to set the low bits,
4167 these will be shifted out of harm's way. */
4168 temp1 |= (1 << (set_sign_bit_copies - 1)) - 1;
4169 if (const_ok_for_arm (~temp1))
4171 if (generate)
4173 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
4174 emit_constant_insn (cond,
4175 gen_rtx_SET (VOIDmode, new_src,
4176 GEN_INT (temp1)));
4177 emit_constant_insn (cond,
4178 gen_ashrsi3 (target, new_src,
4179 GEN_INT (set_sign_bit_copies - 1)));
4181 return 2;
4185 /* See if we can calculate the value as the difference between two
4186 valid immediates. */
4187 if (clear_sign_bit_copies + clear_zero_bit_copies <= 16)
4189 int topshift = clear_sign_bit_copies & ~1;
4191 temp1 = ARM_SIGN_EXTEND ((remainder + (0x00800000 >> topshift))
4192 & (0xff000000 >> topshift));
4194 /* If temp1 is zero, then that means the 9 most significant
4195 bits of remainder were 1 and we've caused it to overflow.
4196 When topshift is 0 we don't need to do anything since we
4197 can borrow from 'bit 32'. */
4198 if (temp1 == 0 && topshift != 0)
4199 temp1 = 0x80000000 >> (topshift - 1);
4201 temp2 = ARM_SIGN_EXTEND (temp1 - remainder);
4203 if (const_ok_for_arm (temp2))
4205 if (generate)
4207 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
4208 emit_constant_insn (cond,
4209 gen_rtx_SET (VOIDmode, new_src,
4210 GEN_INT (temp1)));
4211 emit_constant_insn (cond,
4212 gen_addsi3 (target, new_src,
4213 GEN_INT (-temp2)));
4216 return 2;
4220 /* See if we can generate this by setting the bottom (or the top)
4221 16 bits, and then shifting these into the other half of the
4222 word. We only look for the simplest cases, to do more would cost
4223 too much. Be careful, however, not to generate this when the
4224 alternative would take fewer insns. */
4225 if (val & 0xffff0000)
4227 temp1 = remainder & 0xffff0000;
4228 temp2 = remainder & 0x0000ffff;
4230 /* Overlaps outside this range are best done using other methods. */
4231 for (i = 9; i < 24; i++)
4233 if ((((temp2 | (temp2 << i)) & 0xffffffff) == remainder)
4234 && !const_ok_for_arm (temp2))
4236 rtx new_src = (subtargets
4237 ? (generate ? gen_reg_rtx (mode) : NULL_RTX)
4238 : target);
4239 insns = arm_gen_constant (code, mode, cond, temp2, new_src,
4240 source, subtargets, generate);
4241 source = new_src;
4242 if (generate)
4243 emit_constant_insn
4244 (cond,
4245 gen_rtx_SET
4246 (VOIDmode, target,
4247 gen_rtx_IOR (mode,
4248 gen_rtx_ASHIFT (mode, source,
4249 GEN_INT (i)),
4250 source)));
4251 return insns + 1;
4255 /* Don't duplicate cases already considered. */
4256 for (i = 17; i < 24; i++)
4258 if (((temp1 | (temp1 >> i)) == remainder)
4259 && !const_ok_for_arm (temp1))
4261 rtx new_src = (subtargets
4262 ? (generate ? gen_reg_rtx (mode) : NULL_RTX)
4263 : target);
4264 insns = arm_gen_constant (code, mode, cond, temp1, new_src,
4265 source, subtargets, generate);
4266 source = new_src;
4267 if (generate)
4268 emit_constant_insn
4269 (cond,
4270 gen_rtx_SET (VOIDmode, target,
4271 gen_rtx_IOR
4272 (mode,
4273 gen_rtx_LSHIFTRT (mode, source,
4274 GEN_INT (i)),
4275 source)));
4276 return insns + 1;
4280 break;
4282 case IOR:
4283 case XOR:
4284 /* If we have IOR or XOR, and the constant can be loaded in a
4285 single instruction, and we can find a temporary to put it in,
4286 then this can be done in two instructions instead of 3-4. */
4287 if (subtargets
4288 /* TARGET can't be NULL if SUBTARGETS is 0 */
4289 || (reload_completed && !reg_mentioned_p (target, source)))
4291 if (const_ok_for_arm (ARM_SIGN_EXTEND (~val)))
4293 if (generate)
4295 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
4297 emit_constant_insn (cond,
4298 gen_rtx_SET (VOIDmode, sub,
4299 GEN_INT (val)));
4300 emit_constant_insn (cond,
4301 gen_rtx_SET (VOIDmode, target,
4302 gen_rtx_fmt_ee (code, mode,
4303 source, sub)));
4305 return 2;
4309 if (code == XOR)
4310 break;
4312 /* Convert.
4313 x = y | constant ( which is composed of set_sign_bit_copies of leading 1s
4314 and the remainder 0s for e.g. 0xfff00000)
4315 x = ~(~(y ashift set_sign_bit_copies) lshiftrt set_sign_bit_copies)
4317 This can be done in 2 instructions by using shifts with mov or mvn.
4318 e.g. for
4319 x = x | 0xfff00000;
4320 we generate.
4321 mvn r0, r0, asl #12
4322 mvn r0, r0, lsr #12 */
4323 if (set_sign_bit_copies > 8
4324 && (val & (-1 << (32 - set_sign_bit_copies))) == val)
4326 if (generate)
4328 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
4329 rtx shift = GEN_INT (set_sign_bit_copies);
4331 emit_constant_insn
4332 (cond,
4333 gen_rtx_SET (VOIDmode, sub,
4334 gen_rtx_NOT (mode,
4335 gen_rtx_ASHIFT (mode,
4336 source,
4337 shift))));
4338 emit_constant_insn
4339 (cond,
4340 gen_rtx_SET (VOIDmode, target,
4341 gen_rtx_NOT (mode,
4342 gen_rtx_LSHIFTRT (mode, sub,
4343 shift))));
4345 return 2;
4348 /* Convert
4349 x = y | constant (which has set_zero_bit_copies number of trailing ones).
4351 x = ~((~y lshiftrt set_zero_bit_copies) ashift set_zero_bit_copies).
4353 For eg. r0 = r0 | 0xfff
4354 mvn r0, r0, lsr #12
4355 mvn r0, r0, asl #12
4358 if (set_zero_bit_copies > 8
4359 && (remainder & ((1 << set_zero_bit_copies) - 1)) == remainder)
4361 if (generate)
4363 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
4364 rtx shift = GEN_INT (set_zero_bit_copies);
4366 emit_constant_insn
4367 (cond,
4368 gen_rtx_SET (VOIDmode, sub,
4369 gen_rtx_NOT (mode,
4370 gen_rtx_LSHIFTRT (mode,
4371 source,
4372 shift))));
4373 emit_constant_insn
4374 (cond,
4375 gen_rtx_SET (VOIDmode, target,
4376 gen_rtx_NOT (mode,
4377 gen_rtx_ASHIFT (mode, sub,
4378 shift))));
4380 return 2;
4383 /* This will never be reached for Thumb2 because orn is a valid
4384 instruction. This is for Thumb1 and the ARM 32 bit cases.
4386 x = y | constant (such that ~constant is a valid constant)
4387 Transform this to
4388 x = ~(~y & ~constant).
4390 if (const_ok_for_arm (temp1 = ARM_SIGN_EXTEND (~val)))
4392 if (generate)
4394 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
4395 emit_constant_insn (cond,
4396 gen_rtx_SET (VOIDmode, sub,
4397 gen_rtx_NOT (mode, source)));
4398 source = sub;
4399 if (subtargets)
4400 sub = gen_reg_rtx (mode);
4401 emit_constant_insn (cond,
4402 gen_rtx_SET (VOIDmode, sub,
4403 gen_rtx_AND (mode, source,
4404 GEN_INT (temp1))));
4405 emit_constant_insn (cond,
4406 gen_rtx_SET (VOIDmode, target,
4407 gen_rtx_NOT (mode, sub)));
4409 return 3;
4411 break;
4413 case AND:
4414 /* See if two shifts will do 2 or more insn's worth of work. */
4415 if (clear_sign_bit_copies >= 16 && clear_sign_bit_copies < 24)
4417 HOST_WIDE_INT shift_mask = ((0xffffffff
4418 << (32 - clear_sign_bit_copies))
4419 & 0xffffffff);
4421 if ((remainder | shift_mask) != 0xffffffff)
4423 if (generate)
4425 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
4426 insns = arm_gen_constant (AND, mode, cond,
4427 remainder | shift_mask,
4428 new_src, source, subtargets, 1);
4429 source = new_src;
4431 else
4433 rtx targ = subtargets ? NULL_RTX : target;
4434 insns = arm_gen_constant (AND, mode, cond,
4435 remainder | shift_mask,
4436 targ, source, subtargets, 0);
4440 if (generate)
4442 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
4443 rtx shift = GEN_INT (clear_sign_bit_copies);
4445 emit_insn (gen_ashlsi3 (new_src, source, shift));
4446 emit_insn (gen_lshrsi3 (target, new_src, shift));
4449 return insns + 2;
4452 if (clear_zero_bit_copies >= 16 && clear_zero_bit_copies < 24)
4454 HOST_WIDE_INT shift_mask = (1 << clear_zero_bit_copies) - 1;
4456 if ((remainder | shift_mask) != 0xffffffff)
4458 if (generate)
4460 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
4462 insns = arm_gen_constant (AND, mode, cond,
4463 remainder | shift_mask,
4464 new_src, source, subtargets, 1);
4465 source = new_src;
4467 else
4469 rtx targ = subtargets ? NULL_RTX : target;
4471 insns = arm_gen_constant (AND, mode, cond,
4472 remainder | shift_mask,
4473 targ, source, subtargets, 0);
4477 if (generate)
4479 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
4480 rtx shift = GEN_INT (clear_zero_bit_copies);
4482 emit_insn (gen_lshrsi3 (new_src, source, shift));
4483 emit_insn (gen_ashlsi3 (target, new_src, shift));
4486 return insns + 2;
4489 break;
4491 default:
4492 break;
4495 /* Calculate what the instruction sequences would be if we generated it
4496 normally, negated, or inverted. */
4497 if (code == AND)
4498 /* AND cannot be split into multiple insns, so invert and use BIC. */
4499 insns = 99;
4500 else
4501 insns = optimal_immediate_sequence (code, remainder, &pos_immediates);
4503 if (can_negate)
4504 neg_insns = optimal_immediate_sequence (code, (-remainder) & 0xffffffff,
4505 &neg_immediates);
4506 else
4507 neg_insns = 99;
4509 if (can_invert || final_invert)
4510 inv_insns = optimal_immediate_sequence (code, remainder ^ 0xffffffff,
4511 &inv_immediates);
4512 else
4513 inv_insns = 99;
4515 immediates = &pos_immediates;
4517 /* Is the negated immediate sequence more efficient? */
4518 if (neg_insns < insns && neg_insns <= inv_insns)
4520 insns = neg_insns;
4521 immediates = &neg_immediates;
4523 else
4524 can_negate = 0;
4526 /* Is the inverted immediate sequence more efficient?
4527 We must allow for an extra NOT instruction for XOR operations, although
4528 there is some chance that the final 'mvn' will get optimized later. */
4529 if ((inv_insns + 1) < insns || (!final_invert && inv_insns < insns))
4531 insns = inv_insns;
4532 immediates = &inv_immediates;
4534 else
4536 can_invert = 0;
4537 final_invert = 0;
4540 /* Now output the chosen sequence as instructions. */
4541 if (generate)
4543 for (i = 0; i < insns; i++)
4545 rtx new_src, temp1_rtx;
4547 temp1 = immediates->i[i];
4549 if (code == SET || code == MINUS)
4550 new_src = (subtargets ? gen_reg_rtx (mode) : target);
4551 else if ((final_invert || i < (insns - 1)) && subtargets)
4552 new_src = gen_reg_rtx (mode);
4553 else
4554 new_src = target;
4556 if (can_invert)
4557 temp1 = ~temp1;
4558 else if (can_negate)
4559 temp1 = -temp1;
4561 temp1 = trunc_int_for_mode (temp1, mode);
4562 temp1_rtx = GEN_INT (temp1);
4564 if (code == SET)
4566 else if (code == MINUS)
4567 temp1_rtx = gen_rtx_MINUS (mode, temp1_rtx, source);
4568 else
4569 temp1_rtx = gen_rtx_fmt_ee (code, mode, source, temp1_rtx);
4571 emit_constant_insn (cond,
4572 gen_rtx_SET (VOIDmode, new_src,
4573 temp1_rtx));
4574 source = new_src;
4576 if (code == SET)
4578 can_negate = can_invert;
4579 can_invert = 0;
4580 code = PLUS;
4582 else if (code == MINUS)
4583 code = PLUS;
4587 if (final_invert)
4589 if (generate)
4590 emit_constant_insn (cond, gen_rtx_SET (VOIDmode, target,
4591 gen_rtx_NOT (mode, source)));
4592 insns++;
4595 return insns;
4598 /* Canonicalize a comparison so that we are more likely to recognize it.
4599 This can be done for a few constant compares, where we can make the
4600 immediate value easier to load. */
4602 static void
4603 arm_canonicalize_comparison (int *code, rtx *op0, rtx *op1,
4604 bool op0_preserve_value)
4606 machine_mode mode;
4607 unsigned HOST_WIDE_INT i, maxval;
4609 mode = GET_MODE (*op0);
4610 if (mode == VOIDmode)
4611 mode = GET_MODE (*op1);
4613 maxval = (((unsigned HOST_WIDE_INT) 1) << (GET_MODE_BITSIZE(mode) - 1)) - 1;
4615 /* For DImode, we have GE/LT/GEU/LTU comparisons. In ARM mode
4616 we can also use cmp/cmpeq for GTU/LEU. GT/LE must be either
4617 reversed or (for constant OP1) adjusted to GE/LT. Similarly
4618 for GTU/LEU in Thumb mode. */
4619 if (mode == DImode)
4622 if (*code == GT || *code == LE
4623 || (!TARGET_ARM && (*code == GTU || *code == LEU)))
4625 /* Missing comparison. First try to use an available
4626 comparison. */
4627 if (CONST_INT_P (*op1))
4629 i = INTVAL (*op1);
4630 switch (*code)
4632 case GT:
4633 case LE:
4634 if (i != maxval
4635 && arm_const_double_by_immediates (GEN_INT (i + 1)))
4637 *op1 = GEN_INT (i + 1);
4638 *code = *code == GT ? GE : LT;
4639 return;
4641 break;
4642 case GTU:
4643 case LEU:
4644 if (i != ~((unsigned HOST_WIDE_INT) 0)
4645 && arm_const_double_by_immediates (GEN_INT (i + 1)))
4647 *op1 = GEN_INT (i + 1);
4648 *code = *code == GTU ? GEU : LTU;
4649 return;
4651 break;
4652 default:
4653 gcc_unreachable ();
4657 /* If that did not work, reverse the condition. */
4658 if (!op0_preserve_value)
4660 std::swap (*op0, *op1);
4661 *code = (int)swap_condition ((enum rtx_code)*code);
4664 return;
4667 /* If *op0 is (zero_extend:SI (subreg:QI (reg:SI) 0)) and comparing
4668 with const0_rtx, change it to (and:SI (reg:SI) (const_int 255)),
4669 to facilitate possible combining with a cmp into 'ands'. */
4670 if (mode == SImode
4671 && GET_CODE (*op0) == ZERO_EXTEND
4672 && GET_CODE (XEXP (*op0, 0)) == SUBREG
4673 && GET_MODE (XEXP (*op0, 0)) == QImode
4674 && GET_MODE (SUBREG_REG (XEXP (*op0, 0))) == SImode
4675 && subreg_lowpart_p (XEXP (*op0, 0))
4676 && *op1 == const0_rtx)
4677 *op0 = gen_rtx_AND (SImode, SUBREG_REG (XEXP (*op0, 0)),
4678 GEN_INT (255));
4680 /* Comparisons smaller than DImode. Only adjust comparisons against
4681 an out-of-range constant. */
4682 if (!CONST_INT_P (*op1)
4683 || const_ok_for_arm (INTVAL (*op1))
4684 || const_ok_for_arm (- INTVAL (*op1)))
4685 return;
4687 i = INTVAL (*op1);
4689 switch (*code)
4691 case EQ:
4692 case NE:
4693 return;
4695 case GT:
4696 case LE:
4697 if (i != maxval
4698 && (const_ok_for_arm (i + 1) || const_ok_for_arm (-(i + 1))))
4700 *op1 = GEN_INT (i + 1);
4701 *code = *code == GT ? GE : LT;
4702 return;
4704 break;
4706 case GE:
4707 case LT:
4708 if (i != ~maxval
4709 && (const_ok_for_arm (i - 1) || const_ok_for_arm (-(i - 1))))
4711 *op1 = GEN_INT (i - 1);
4712 *code = *code == GE ? GT : LE;
4713 return;
4715 break;
4717 case GTU:
4718 case LEU:
4719 if (i != ~((unsigned HOST_WIDE_INT) 0)
4720 && (const_ok_for_arm (i + 1) || const_ok_for_arm (-(i + 1))))
4722 *op1 = GEN_INT (i + 1);
4723 *code = *code == GTU ? GEU : LTU;
4724 return;
4726 break;
4728 case GEU:
4729 case LTU:
4730 if (i != 0
4731 && (const_ok_for_arm (i - 1) || const_ok_for_arm (-(i - 1))))
4733 *op1 = GEN_INT (i - 1);
4734 *code = *code == GEU ? GTU : LEU;
4735 return;
4737 break;
4739 default:
4740 gcc_unreachable ();
4745 /* Define how to find the value returned by a function. */
4747 static rtx
4748 arm_function_value(const_tree type, const_tree func,
4749 bool outgoing ATTRIBUTE_UNUSED)
4751 machine_mode mode;
4752 int unsignedp ATTRIBUTE_UNUSED;
4753 rtx r ATTRIBUTE_UNUSED;
4755 mode = TYPE_MODE (type);
4757 if (TARGET_AAPCS_BASED)
4758 return aapcs_allocate_return_reg (mode, type, func);
4760 /* Promote integer types. */
4761 if (INTEGRAL_TYPE_P (type))
4762 mode = arm_promote_function_mode (type, mode, &unsignedp, func, 1);
4764 /* Promotes small structs returned in a register to full-word size
4765 for big-endian AAPCS. */
4766 if (arm_return_in_msb (type))
4768 HOST_WIDE_INT size = int_size_in_bytes (type);
4769 if (size % UNITS_PER_WORD != 0)
4771 size += UNITS_PER_WORD - size % UNITS_PER_WORD;
4772 mode = mode_for_size (size * BITS_PER_UNIT, MODE_INT, 0);
4776 return arm_libcall_value_1 (mode);
4779 /* libcall hashtable helpers. */
4781 struct libcall_hasher : typed_noop_remove <rtx_def>
4783 typedef rtx_def value_type;
4784 typedef rtx_def compare_type;
4785 static inline hashval_t hash (const value_type *);
4786 static inline bool equal (const value_type *, const compare_type *);
4787 static inline void remove (value_type *);
4790 inline bool
4791 libcall_hasher::equal (const value_type *p1, const compare_type *p2)
4793 return rtx_equal_p (p1, p2);
4796 inline hashval_t
4797 libcall_hasher::hash (const value_type *p1)
4799 return hash_rtx (p1, VOIDmode, NULL, NULL, FALSE);
4802 typedef hash_table<libcall_hasher> libcall_table_type;
4804 static void
4805 add_libcall (libcall_table_type *htab, rtx libcall)
4807 *htab->find_slot (libcall, INSERT) = libcall;
4810 static bool
4811 arm_libcall_uses_aapcs_base (const_rtx libcall)
4813 static bool init_done = false;
4814 static libcall_table_type *libcall_htab = NULL;
4816 if (!init_done)
4818 init_done = true;
4820 libcall_htab = new libcall_table_type (31);
4821 add_libcall (libcall_htab,
4822 convert_optab_libfunc (sfloat_optab, SFmode, SImode));
4823 add_libcall (libcall_htab,
4824 convert_optab_libfunc (sfloat_optab, DFmode, SImode));
4825 add_libcall (libcall_htab,
4826 convert_optab_libfunc (sfloat_optab, SFmode, DImode));
4827 add_libcall (libcall_htab,
4828 convert_optab_libfunc (sfloat_optab, DFmode, DImode));
4830 add_libcall (libcall_htab,
4831 convert_optab_libfunc (ufloat_optab, SFmode, SImode));
4832 add_libcall (libcall_htab,
4833 convert_optab_libfunc (ufloat_optab, DFmode, SImode));
4834 add_libcall (libcall_htab,
4835 convert_optab_libfunc (ufloat_optab, SFmode, DImode));
4836 add_libcall (libcall_htab,
4837 convert_optab_libfunc (ufloat_optab, DFmode, DImode));
4839 add_libcall (libcall_htab,
4840 convert_optab_libfunc (sext_optab, SFmode, HFmode));
4841 add_libcall (libcall_htab,
4842 convert_optab_libfunc (trunc_optab, HFmode, SFmode));
4843 add_libcall (libcall_htab,
4844 convert_optab_libfunc (sfix_optab, SImode, DFmode));
4845 add_libcall (libcall_htab,
4846 convert_optab_libfunc (ufix_optab, SImode, DFmode));
4847 add_libcall (libcall_htab,
4848 convert_optab_libfunc (sfix_optab, DImode, DFmode));
4849 add_libcall (libcall_htab,
4850 convert_optab_libfunc (ufix_optab, DImode, DFmode));
4851 add_libcall (libcall_htab,
4852 convert_optab_libfunc (sfix_optab, DImode, SFmode));
4853 add_libcall (libcall_htab,
4854 convert_optab_libfunc (ufix_optab, DImode, SFmode));
4856 /* Values from double-precision helper functions are returned in core
4857 registers if the selected core only supports single-precision
4858 arithmetic, even if we are using the hard-float ABI. The same is
4859 true for single-precision helpers, but we will never be using the
4860 hard-float ABI on a CPU which doesn't support single-precision
4861 operations in hardware. */
4862 add_libcall (libcall_htab, optab_libfunc (add_optab, DFmode));
4863 add_libcall (libcall_htab, optab_libfunc (sdiv_optab, DFmode));
4864 add_libcall (libcall_htab, optab_libfunc (smul_optab, DFmode));
4865 add_libcall (libcall_htab, optab_libfunc (neg_optab, DFmode));
4866 add_libcall (libcall_htab, optab_libfunc (sub_optab, DFmode));
4867 add_libcall (libcall_htab, optab_libfunc (eq_optab, DFmode));
4868 add_libcall (libcall_htab, optab_libfunc (lt_optab, DFmode));
4869 add_libcall (libcall_htab, optab_libfunc (le_optab, DFmode));
4870 add_libcall (libcall_htab, optab_libfunc (ge_optab, DFmode));
4871 add_libcall (libcall_htab, optab_libfunc (gt_optab, DFmode));
4872 add_libcall (libcall_htab, optab_libfunc (unord_optab, DFmode));
4873 add_libcall (libcall_htab, convert_optab_libfunc (sext_optab, DFmode,
4874 SFmode));
4875 add_libcall (libcall_htab, convert_optab_libfunc (trunc_optab, SFmode,
4876 DFmode));
4879 return libcall && libcall_htab->find (libcall) != NULL;
4882 static rtx
4883 arm_libcall_value_1 (machine_mode mode)
4885 if (TARGET_AAPCS_BASED)
4886 return aapcs_libcall_value (mode);
4887 else if (TARGET_IWMMXT_ABI
4888 && arm_vector_mode_supported_p (mode))
4889 return gen_rtx_REG (mode, FIRST_IWMMXT_REGNUM);
4890 else
4891 return gen_rtx_REG (mode, ARG_REGISTER (1));
4894 /* Define how to find the value returned by a library function
4895 assuming the value has mode MODE. */
4897 static rtx
4898 arm_libcall_value (machine_mode mode, const_rtx libcall)
4900 if (TARGET_AAPCS_BASED && arm_pcs_default != ARM_PCS_AAPCS
4901 && GET_MODE_CLASS (mode) == MODE_FLOAT)
4903 /* The following libcalls return their result in integer registers,
4904 even though they return a floating point value. */
4905 if (arm_libcall_uses_aapcs_base (libcall))
4906 return gen_rtx_REG (mode, ARG_REGISTER(1));
4910 return arm_libcall_value_1 (mode);
4913 /* Implement TARGET_FUNCTION_VALUE_REGNO_P. */
4915 static bool
4916 arm_function_value_regno_p (const unsigned int regno)
4918 if (regno == ARG_REGISTER (1)
4919 || (TARGET_32BIT
4920 && TARGET_AAPCS_BASED
4921 && TARGET_VFP
4922 && TARGET_HARD_FLOAT
4923 && regno == FIRST_VFP_REGNUM)
4924 || (TARGET_IWMMXT_ABI
4925 && regno == FIRST_IWMMXT_REGNUM))
4926 return true;
4928 return false;
4931 /* Determine the amount of memory needed to store the possible return
4932 registers of an untyped call. */
4934 arm_apply_result_size (void)
4936 int size = 16;
4938 if (TARGET_32BIT)
4940 if (TARGET_HARD_FLOAT_ABI && TARGET_VFP)
4941 size += 32;
4942 if (TARGET_IWMMXT_ABI)
4943 size += 8;
4946 return size;
4949 /* Decide whether TYPE should be returned in memory (true)
4950 or in a register (false). FNTYPE is the type of the function making
4951 the call. */
4952 static bool
4953 arm_return_in_memory (const_tree type, const_tree fntype)
4955 HOST_WIDE_INT size;
4957 size = int_size_in_bytes (type); /* Negative if not fixed size. */
4959 if (TARGET_AAPCS_BASED)
4961 /* Simple, non-aggregate types (ie not including vectors and
4962 complex) are always returned in a register (or registers).
4963 We don't care about which register here, so we can short-cut
4964 some of the detail. */
4965 if (!AGGREGATE_TYPE_P (type)
4966 && TREE_CODE (type) != VECTOR_TYPE
4967 && TREE_CODE (type) != COMPLEX_TYPE)
4968 return false;
4970 /* Any return value that is no larger than one word can be
4971 returned in r0. */
4972 if (((unsigned HOST_WIDE_INT) size) <= UNITS_PER_WORD)
4973 return false;
4975 /* Check any available co-processors to see if they accept the
4976 type as a register candidate (VFP, for example, can return
4977 some aggregates in consecutive registers). These aren't
4978 available if the call is variadic. */
4979 if (aapcs_select_return_coproc (type, fntype) >= 0)
4980 return false;
4982 /* Vector values should be returned using ARM registers, not
4983 memory (unless they're over 16 bytes, which will break since
4984 we only have four call-clobbered registers to play with). */
4985 if (TREE_CODE (type) == VECTOR_TYPE)
4986 return (size < 0 || size > (4 * UNITS_PER_WORD));
4988 /* The rest go in memory. */
4989 return true;
4992 if (TREE_CODE (type) == VECTOR_TYPE)
4993 return (size < 0 || size > (4 * UNITS_PER_WORD));
4995 if (!AGGREGATE_TYPE_P (type) &&
4996 (TREE_CODE (type) != VECTOR_TYPE))
4997 /* All simple types are returned in registers. */
4998 return false;
5000 if (arm_abi != ARM_ABI_APCS)
5002 /* ATPCS and later return aggregate types in memory only if they are
5003 larger than a word (or are variable size). */
5004 return (size < 0 || size > UNITS_PER_WORD);
5007 /* For the arm-wince targets we choose to be compatible with Microsoft's
5008 ARM and Thumb compilers, which always return aggregates in memory. */
5009 #ifndef ARM_WINCE
5010 /* All structures/unions bigger than one word are returned in memory.
5011 Also catch the case where int_size_in_bytes returns -1. In this case
5012 the aggregate is either huge or of variable size, and in either case
5013 we will want to return it via memory and not in a register. */
5014 if (size < 0 || size > UNITS_PER_WORD)
5015 return true;
5017 if (TREE_CODE (type) == RECORD_TYPE)
5019 tree field;
5021 /* For a struct the APCS says that we only return in a register
5022 if the type is 'integer like' and every addressable element
5023 has an offset of zero. For practical purposes this means
5024 that the structure can have at most one non bit-field element
5025 and that this element must be the first one in the structure. */
5027 /* Find the first field, ignoring non FIELD_DECL things which will
5028 have been created by C++. */
5029 for (field = TYPE_FIELDS (type);
5030 field && TREE_CODE (field) != FIELD_DECL;
5031 field = DECL_CHAIN (field))
5032 continue;
5034 if (field == NULL)
5035 return false; /* An empty structure. Allowed by an extension to ANSI C. */
5037 /* Check that the first field is valid for returning in a register. */
5039 /* ... Floats are not allowed */
5040 if (FLOAT_TYPE_P (TREE_TYPE (field)))
5041 return true;
5043 /* ... Aggregates that are not themselves valid for returning in
5044 a register are not allowed. */
5045 if (arm_return_in_memory (TREE_TYPE (field), NULL_TREE))
5046 return true;
5048 /* Now check the remaining fields, if any. Only bitfields are allowed,
5049 since they are not addressable. */
5050 for (field = DECL_CHAIN (field);
5051 field;
5052 field = DECL_CHAIN (field))
5054 if (TREE_CODE (field) != FIELD_DECL)
5055 continue;
5057 if (!DECL_BIT_FIELD_TYPE (field))
5058 return true;
5061 return false;
5064 if (TREE_CODE (type) == UNION_TYPE)
5066 tree field;
5068 /* Unions can be returned in registers if every element is
5069 integral, or can be returned in an integer register. */
5070 for (field = TYPE_FIELDS (type);
5071 field;
5072 field = DECL_CHAIN (field))
5074 if (TREE_CODE (field) != FIELD_DECL)
5075 continue;
5077 if (FLOAT_TYPE_P (TREE_TYPE (field)))
5078 return true;
5080 if (arm_return_in_memory (TREE_TYPE (field), NULL_TREE))
5081 return true;
5084 return false;
5086 #endif /* not ARM_WINCE */
5088 /* Return all other types in memory. */
5089 return true;
5092 const struct pcs_attribute_arg
5094 const char *arg;
5095 enum arm_pcs value;
5096 } pcs_attribute_args[] =
5098 {"aapcs", ARM_PCS_AAPCS},
5099 {"aapcs-vfp", ARM_PCS_AAPCS_VFP},
5100 #if 0
5101 /* We could recognize these, but changes would be needed elsewhere
5102 * to implement them. */
5103 {"aapcs-iwmmxt", ARM_PCS_AAPCS_IWMMXT},
5104 {"atpcs", ARM_PCS_ATPCS},
5105 {"apcs", ARM_PCS_APCS},
5106 #endif
5107 {NULL, ARM_PCS_UNKNOWN}
5110 static enum arm_pcs
5111 arm_pcs_from_attribute (tree attr)
5113 const struct pcs_attribute_arg *ptr;
5114 const char *arg;
5116 /* Get the value of the argument. */
5117 if (TREE_VALUE (attr) == NULL_TREE
5118 || TREE_CODE (TREE_VALUE (attr)) != STRING_CST)
5119 return ARM_PCS_UNKNOWN;
5121 arg = TREE_STRING_POINTER (TREE_VALUE (attr));
5123 /* Check it against the list of known arguments. */
5124 for (ptr = pcs_attribute_args; ptr->arg != NULL; ptr++)
5125 if (streq (arg, ptr->arg))
5126 return ptr->value;
5128 /* An unrecognized interrupt type. */
5129 return ARM_PCS_UNKNOWN;
5132 /* Get the PCS variant to use for this call. TYPE is the function's type
5133 specification, DECL is the specific declartion. DECL may be null if
5134 the call could be indirect or if this is a library call. */
5135 static enum arm_pcs
5136 arm_get_pcs_model (const_tree type, const_tree decl)
5138 bool user_convention = false;
5139 enum arm_pcs user_pcs = arm_pcs_default;
5140 tree attr;
5142 gcc_assert (type);
5144 attr = lookup_attribute ("pcs", TYPE_ATTRIBUTES (type));
5145 if (attr)
5147 user_pcs = arm_pcs_from_attribute (TREE_VALUE (attr));
5148 user_convention = true;
5151 if (TARGET_AAPCS_BASED)
5153 /* Detect varargs functions. These always use the base rules
5154 (no argument is ever a candidate for a co-processor
5155 register). */
5156 bool base_rules = stdarg_p (type);
5158 if (user_convention)
5160 if (user_pcs > ARM_PCS_AAPCS_LOCAL)
5161 sorry ("non-AAPCS derived PCS variant");
5162 else if (base_rules && user_pcs != ARM_PCS_AAPCS)
5163 error ("variadic functions must use the base AAPCS variant");
5166 if (base_rules)
5167 return ARM_PCS_AAPCS;
5168 else if (user_convention)
5169 return user_pcs;
5170 else if (decl && flag_unit_at_a_time)
5172 /* Local functions never leak outside this compilation unit,
5173 so we are free to use whatever conventions are
5174 appropriate. */
5175 /* FIXME: remove CONST_CAST_TREE when cgraph is constified. */
5176 cgraph_local_info *i = cgraph_node::local_info (CONST_CAST_TREE(decl));
5177 if (i && i->local)
5178 return ARM_PCS_AAPCS_LOCAL;
5181 else if (user_convention && user_pcs != arm_pcs_default)
5182 sorry ("PCS variant");
5184 /* For everything else we use the target's default. */
5185 return arm_pcs_default;
5189 static void
5190 aapcs_vfp_cum_init (CUMULATIVE_ARGS *pcum ATTRIBUTE_UNUSED,
5191 const_tree fntype ATTRIBUTE_UNUSED,
5192 rtx libcall ATTRIBUTE_UNUSED,
5193 const_tree fndecl ATTRIBUTE_UNUSED)
5195 /* Record the unallocated VFP registers. */
5196 pcum->aapcs_vfp_regs_free = (1 << NUM_VFP_ARG_REGS) - 1;
5197 pcum->aapcs_vfp_reg_alloc = 0;
5200 /* Walk down the type tree of TYPE counting consecutive base elements.
5201 If *MODEP is VOIDmode, then set it to the first valid floating point
5202 type. If a non-floating point type is found, or if a floating point
5203 type that doesn't match a non-VOIDmode *MODEP is found, then return -1,
5204 otherwise return the count in the sub-tree. */
5205 static int
5206 aapcs_vfp_sub_candidate (const_tree type, machine_mode *modep)
5208 machine_mode mode;
5209 HOST_WIDE_INT size;
5211 switch (TREE_CODE (type))
5213 case REAL_TYPE:
5214 mode = TYPE_MODE (type);
5215 if (mode != DFmode && mode != SFmode)
5216 return -1;
5218 if (*modep == VOIDmode)
5219 *modep = mode;
5221 if (*modep == mode)
5222 return 1;
5224 break;
5226 case COMPLEX_TYPE:
5227 mode = TYPE_MODE (TREE_TYPE (type));
5228 if (mode != DFmode && mode != SFmode)
5229 return -1;
5231 if (*modep == VOIDmode)
5232 *modep = mode;
5234 if (*modep == mode)
5235 return 2;
5237 break;
5239 case VECTOR_TYPE:
5240 /* Use V2SImode and V4SImode as representatives of all 64-bit
5241 and 128-bit vector types, whether or not those modes are
5242 supported with the present options. */
5243 size = int_size_in_bytes (type);
5244 switch (size)
5246 case 8:
5247 mode = V2SImode;
5248 break;
5249 case 16:
5250 mode = V4SImode;
5251 break;
5252 default:
5253 return -1;
5256 if (*modep == VOIDmode)
5257 *modep = mode;
5259 /* Vector modes are considered to be opaque: two vectors are
5260 equivalent for the purposes of being homogeneous aggregates
5261 if they are the same size. */
5262 if (*modep == mode)
5263 return 1;
5265 break;
5267 case ARRAY_TYPE:
5269 int count;
5270 tree index = TYPE_DOMAIN (type);
5272 /* Can't handle incomplete types nor sizes that are not
5273 fixed. */
5274 if (!COMPLETE_TYPE_P (type)
5275 || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
5276 return -1;
5278 count = aapcs_vfp_sub_candidate (TREE_TYPE (type), modep);
5279 if (count == -1
5280 || !index
5281 || !TYPE_MAX_VALUE (index)
5282 || !tree_fits_uhwi_p (TYPE_MAX_VALUE (index))
5283 || !TYPE_MIN_VALUE (index)
5284 || !tree_fits_uhwi_p (TYPE_MIN_VALUE (index))
5285 || count < 0)
5286 return -1;
5288 count *= (1 + tree_to_uhwi (TYPE_MAX_VALUE (index))
5289 - tree_to_uhwi (TYPE_MIN_VALUE (index)));
5291 /* There must be no padding. */
5292 if (wi::ne_p (TYPE_SIZE (type), count * GET_MODE_BITSIZE (*modep)))
5293 return -1;
5295 return count;
5298 case RECORD_TYPE:
5300 int count = 0;
5301 int sub_count;
5302 tree field;
5304 /* Can't handle incomplete types nor sizes that are not
5305 fixed. */
5306 if (!COMPLETE_TYPE_P (type)
5307 || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
5308 return -1;
5310 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
5312 if (TREE_CODE (field) != FIELD_DECL)
5313 continue;
5315 sub_count = aapcs_vfp_sub_candidate (TREE_TYPE (field), modep);
5316 if (sub_count < 0)
5317 return -1;
5318 count += sub_count;
5321 /* There must be no padding. */
5322 if (wi::ne_p (TYPE_SIZE (type), count * GET_MODE_BITSIZE (*modep)))
5323 return -1;
5325 return count;
5328 case UNION_TYPE:
5329 case QUAL_UNION_TYPE:
5331 /* These aren't very interesting except in a degenerate case. */
5332 int count = 0;
5333 int sub_count;
5334 tree field;
5336 /* Can't handle incomplete types nor sizes that are not
5337 fixed. */
5338 if (!COMPLETE_TYPE_P (type)
5339 || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
5340 return -1;
5342 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
5344 if (TREE_CODE (field) != FIELD_DECL)
5345 continue;
5347 sub_count = aapcs_vfp_sub_candidate (TREE_TYPE (field), modep);
5348 if (sub_count < 0)
5349 return -1;
5350 count = count > sub_count ? count : sub_count;
5353 /* There must be no padding. */
5354 if (wi::ne_p (TYPE_SIZE (type), count * GET_MODE_BITSIZE (*modep)))
5355 return -1;
5357 return count;
5360 default:
5361 break;
5364 return -1;
5367 /* Return true if PCS_VARIANT should use VFP registers. */
5368 static bool
5369 use_vfp_abi (enum arm_pcs pcs_variant, bool is_double)
5371 if (pcs_variant == ARM_PCS_AAPCS_VFP)
5373 static bool seen_thumb1_vfp = false;
5375 if (TARGET_THUMB1 && !seen_thumb1_vfp)
5377 sorry ("Thumb-1 hard-float VFP ABI");
5378 /* sorry() is not immediately fatal, so only display this once. */
5379 seen_thumb1_vfp = true;
5382 return true;
5385 if (pcs_variant != ARM_PCS_AAPCS_LOCAL)
5386 return false;
5388 return (TARGET_32BIT && TARGET_VFP && TARGET_HARD_FLOAT &&
5389 (TARGET_VFP_DOUBLE || !is_double));
5392 /* Return true if an argument whose type is TYPE, or mode is MODE, is
5393 suitable for passing or returning in VFP registers for the PCS
5394 variant selected. If it is, then *BASE_MODE is updated to contain
5395 a machine mode describing each element of the argument's type and
5396 *COUNT to hold the number of such elements. */
5397 static bool
5398 aapcs_vfp_is_call_or_return_candidate (enum arm_pcs pcs_variant,
5399 machine_mode mode, const_tree type,
5400 machine_mode *base_mode, int *count)
5402 machine_mode new_mode = VOIDmode;
5404 /* If we have the type information, prefer that to working things
5405 out from the mode. */
5406 if (type)
5408 int ag_count = aapcs_vfp_sub_candidate (type, &new_mode);
5410 if (ag_count > 0 && ag_count <= 4)
5411 *count = ag_count;
5412 else
5413 return false;
5415 else if (GET_MODE_CLASS (mode) == MODE_FLOAT
5416 || GET_MODE_CLASS (mode) == MODE_VECTOR_INT
5417 || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
5419 *count = 1;
5420 new_mode = mode;
5422 else if (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT)
5424 *count = 2;
5425 new_mode = (mode == DCmode ? DFmode : SFmode);
5427 else
5428 return false;
5431 if (!use_vfp_abi (pcs_variant, ARM_NUM_REGS (new_mode) > 1))
5432 return false;
5434 *base_mode = new_mode;
5435 return true;
5438 static bool
5439 aapcs_vfp_is_return_candidate (enum arm_pcs pcs_variant,
5440 machine_mode mode, const_tree type)
5442 int count ATTRIBUTE_UNUSED;
5443 machine_mode ag_mode ATTRIBUTE_UNUSED;
5445 if (!use_vfp_abi (pcs_variant, false))
5446 return false;
5447 return aapcs_vfp_is_call_or_return_candidate (pcs_variant, mode, type,
5448 &ag_mode, &count);
5451 static bool
5452 aapcs_vfp_is_call_candidate (CUMULATIVE_ARGS *pcum, machine_mode mode,
5453 const_tree type)
5455 if (!use_vfp_abi (pcum->pcs_variant, false))
5456 return false;
5458 return aapcs_vfp_is_call_or_return_candidate (pcum->pcs_variant, mode, type,
5459 &pcum->aapcs_vfp_rmode,
5460 &pcum->aapcs_vfp_rcount);
5463 static bool
5464 aapcs_vfp_allocate (CUMULATIVE_ARGS *pcum, machine_mode mode,
5465 const_tree type ATTRIBUTE_UNUSED)
5467 int shift = GET_MODE_SIZE (pcum->aapcs_vfp_rmode) / GET_MODE_SIZE (SFmode);
5468 unsigned mask = (1 << (shift * pcum->aapcs_vfp_rcount)) - 1;
5469 int regno;
5471 for (regno = 0; regno < NUM_VFP_ARG_REGS; regno += shift)
5472 if (((pcum->aapcs_vfp_regs_free >> regno) & mask) == mask)
5474 pcum->aapcs_vfp_reg_alloc = mask << regno;
5475 if (mode == BLKmode
5476 || (mode == TImode && ! TARGET_NEON)
5477 || ! arm_hard_regno_mode_ok (FIRST_VFP_REGNUM + regno, mode))
5479 int i;
5480 int rcount = pcum->aapcs_vfp_rcount;
5481 int rshift = shift;
5482 machine_mode rmode = pcum->aapcs_vfp_rmode;
5483 rtx par;
5484 if (!TARGET_NEON)
5486 /* Avoid using unsupported vector modes. */
5487 if (rmode == V2SImode)
5488 rmode = DImode;
5489 else if (rmode == V4SImode)
5491 rmode = DImode;
5492 rcount *= 2;
5493 rshift /= 2;
5496 par = gen_rtx_PARALLEL (mode, rtvec_alloc (rcount));
5497 for (i = 0; i < rcount; i++)
5499 rtx tmp = gen_rtx_REG (rmode,
5500 FIRST_VFP_REGNUM + regno + i * rshift);
5501 tmp = gen_rtx_EXPR_LIST
5502 (VOIDmode, tmp,
5503 GEN_INT (i * GET_MODE_SIZE (rmode)));
5504 XVECEXP (par, 0, i) = tmp;
5507 pcum->aapcs_reg = par;
5509 else
5510 pcum->aapcs_reg = gen_rtx_REG (mode, FIRST_VFP_REGNUM + regno);
5511 return true;
5513 return false;
5516 static rtx
5517 aapcs_vfp_allocate_return_reg (enum arm_pcs pcs_variant ATTRIBUTE_UNUSED,
5518 machine_mode mode,
5519 const_tree type ATTRIBUTE_UNUSED)
5521 if (!use_vfp_abi (pcs_variant, false))
5522 return NULL;
5524 if (mode == BLKmode || (mode == TImode && !TARGET_NEON))
5526 int count;
5527 machine_mode ag_mode;
5528 int i;
5529 rtx par;
5530 int shift;
5532 aapcs_vfp_is_call_or_return_candidate (pcs_variant, mode, type,
5533 &ag_mode, &count);
5535 if (!TARGET_NEON)
5537 if (ag_mode == V2SImode)
5538 ag_mode = DImode;
5539 else if (ag_mode == V4SImode)
5541 ag_mode = DImode;
5542 count *= 2;
5545 shift = GET_MODE_SIZE(ag_mode) / GET_MODE_SIZE(SFmode);
5546 par = gen_rtx_PARALLEL (mode, rtvec_alloc (count));
5547 for (i = 0; i < count; i++)
5549 rtx tmp = gen_rtx_REG (ag_mode, FIRST_VFP_REGNUM + i * shift);
5550 tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp,
5551 GEN_INT (i * GET_MODE_SIZE (ag_mode)));
5552 XVECEXP (par, 0, i) = tmp;
5555 return par;
5558 return gen_rtx_REG (mode, FIRST_VFP_REGNUM);
5561 static void
5562 aapcs_vfp_advance (CUMULATIVE_ARGS *pcum ATTRIBUTE_UNUSED,
5563 machine_mode mode ATTRIBUTE_UNUSED,
5564 const_tree type ATTRIBUTE_UNUSED)
5566 pcum->aapcs_vfp_regs_free &= ~pcum->aapcs_vfp_reg_alloc;
5567 pcum->aapcs_vfp_reg_alloc = 0;
5568 return;
5571 #define AAPCS_CP(X) \
5573 aapcs_ ## X ## _cum_init, \
5574 aapcs_ ## X ## _is_call_candidate, \
5575 aapcs_ ## X ## _allocate, \
5576 aapcs_ ## X ## _is_return_candidate, \
5577 aapcs_ ## X ## _allocate_return_reg, \
5578 aapcs_ ## X ## _advance \
5581 /* Table of co-processors that can be used to pass arguments in
5582 registers. Idealy no arugment should be a candidate for more than
5583 one co-processor table entry, but the table is processed in order
5584 and stops after the first match. If that entry then fails to put
5585 the argument into a co-processor register, the argument will go on
5586 the stack. */
5587 static struct
5589 /* Initialize co-processor related state in CUMULATIVE_ARGS structure. */
5590 void (*cum_init) (CUMULATIVE_ARGS *, const_tree, rtx, const_tree);
5592 /* Return true if an argument of mode MODE (or type TYPE if MODE is
5593 BLKmode) is a candidate for this co-processor's registers; this
5594 function should ignore any position-dependent state in
5595 CUMULATIVE_ARGS and only use call-type dependent information. */
5596 bool (*is_call_candidate) (CUMULATIVE_ARGS *, machine_mode, const_tree);
5598 /* Return true if the argument does get a co-processor register; it
5599 should set aapcs_reg to an RTX of the register allocated as is
5600 required for a return from FUNCTION_ARG. */
5601 bool (*allocate) (CUMULATIVE_ARGS *, machine_mode, const_tree);
5603 /* Return true if a result of mode MODE (or type TYPE if MODE is
5604 BLKmode) is can be returned in this co-processor's registers. */
5605 bool (*is_return_candidate) (enum arm_pcs, machine_mode, const_tree);
5607 /* Allocate and return an RTX element to hold the return type of a
5608 call, this routine must not fail and will only be called if
5609 is_return_candidate returned true with the same parameters. */
5610 rtx (*allocate_return_reg) (enum arm_pcs, machine_mode, const_tree);
5612 /* Finish processing this argument and prepare to start processing
5613 the next one. */
5614 void (*advance) (CUMULATIVE_ARGS *, machine_mode, const_tree);
5615 } aapcs_cp_arg_layout[ARM_NUM_COPROC_SLOTS] =
5617 AAPCS_CP(vfp)
5620 #undef AAPCS_CP
5622 static int
5623 aapcs_select_call_coproc (CUMULATIVE_ARGS *pcum, machine_mode mode,
5624 const_tree type)
5626 int i;
5628 for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
5629 if (aapcs_cp_arg_layout[i].is_call_candidate (pcum, mode, type))
5630 return i;
5632 return -1;
5635 static int
5636 aapcs_select_return_coproc (const_tree type, const_tree fntype)
5638 /* We aren't passed a decl, so we can't check that a call is local.
5639 However, it isn't clear that that would be a win anyway, since it
5640 might limit some tail-calling opportunities. */
5641 enum arm_pcs pcs_variant;
5643 if (fntype)
5645 const_tree fndecl = NULL_TREE;
5647 if (TREE_CODE (fntype) == FUNCTION_DECL)
5649 fndecl = fntype;
5650 fntype = TREE_TYPE (fntype);
5653 pcs_variant = arm_get_pcs_model (fntype, fndecl);
5655 else
5656 pcs_variant = arm_pcs_default;
5658 if (pcs_variant != ARM_PCS_AAPCS)
5660 int i;
5662 for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
5663 if (aapcs_cp_arg_layout[i].is_return_candidate (pcs_variant,
5664 TYPE_MODE (type),
5665 type))
5666 return i;
5668 return -1;
5671 static rtx
5672 aapcs_allocate_return_reg (machine_mode mode, const_tree type,
5673 const_tree fntype)
5675 /* We aren't passed a decl, so we can't check that a call is local.
5676 However, it isn't clear that that would be a win anyway, since it
5677 might limit some tail-calling opportunities. */
5678 enum arm_pcs pcs_variant;
5679 int unsignedp ATTRIBUTE_UNUSED;
5681 if (fntype)
5683 const_tree fndecl = NULL_TREE;
5685 if (TREE_CODE (fntype) == FUNCTION_DECL)
5687 fndecl = fntype;
5688 fntype = TREE_TYPE (fntype);
5691 pcs_variant = arm_get_pcs_model (fntype, fndecl);
5693 else
5694 pcs_variant = arm_pcs_default;
5696 /* Promote integer types. */
5697 if (type && INTEGRAL_TYPE_P (type))
5698 mode = arm_promote_function_mode (type, mode, &unsignedp, fntype, 1);
5700 if (pcs_variant != ARM_PCS_AAPCS)
5702 int i;
5704 for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
5705 if (aapcs_cp_arg_layout[i].is_return_candidate (pcs_variant, mode,
5706 type))
5707 return aapcs_cp_arg_layout[i].allocate_return_reg (pcs_variant,
5708 mode, type);
5711 /* Promotes small structs returned in a register to full-word size
5712 for big-endian AAPCS. */
5713 if (type && arm_return_in_msb (type))
5715 HOST_WIDE_INT size = int_size_in_bytes (type);
5716 if (size % UNITS_PER_WORD != 0)
5718 size += UNITS_PER_WORD - size % UNITS_PER_WORD;
5719 mode = mode_for_size (size * BITS_PER_UNIT, MODE_INT, 0);
5723 return gen_rtx_REG (mode, R0_REGNUM);
5726 static rtx
5727 aapcs_libcall_value (machine_mode mode)
5729 if (BYTES_BIG_ENDIAN && ALL_FIXED_POINT_MODE_P (mode)
5730 && GET_MODE_SIZE (mode) <= 4)
5731 mode = SImode;
5733 return aapcs_allocate_return_reg (mode, NULL_TREE, NULL_TREE);
5736 /* Lay out a function argument using the AAPCS rules. The rule
5737 numbers referred to here are those in the AAPCS. */
5738 static void
5739 aapcs_layout_arg (CUMULATIVE_ARGS *pcum, machine_mode mode,
5740 const_tree type, bool named)
5742 int nregs, nregs2;
5743 int ncrn;
5745 /* We only need to do this once per argument. */
5746 if (pcum->aapcs_arg_processed)
5747 return;
5749 pcum->aapcs_arg_processed = true;
5751 /* Special case: if named is false then we are handling an incoming
5752 anonymous argument which is on the stack. */
5753 if (!named)
5754 return;
5756 /* Is this a potential co-processor register candidate? */
5757 if (pcum->pcs_variant != ARM_PCS_AAPCS)
5759 int slot = aapcs_select_call_coproc (pcum, mode, type);
5760 pcum->aapcs_cprc_slot = slot;
5762 /* We don't have to apply any of the rules from part B of the
5763 preparation phase, these are handled elsewhere in the
5764 compiler. */
5766 if (slot >= 0)
5768 /* A Co-processor register candidate goes either in its own
5769 class of registers or on the stack. */
5770 if (!pcum->aapcs_cprc_failed[slot])
5772 /* C1.cp - Try to allocate the argument to co-processor
5773 registers. */
5774 if (aapcs_cp_arg_layout[slot].allocate (pcum, mode, type))
5775 return;
5777 /* C2.cp - Put the argument on the stack and note that we
5778 can't assign any more candidates in this slot. We also
5779 need to note that we have allocated stack space, so that
5780 we won't later try to split a non-cprc candidate between
5781 core registers and the stack. */
5782 pcum->aapcs_cprc_failed[slot] = true;
5783 pcum->can_split = false;
5786 /* We didn't get a register, so this argument goes on the
5787 stack. */
5788 gcc_assert (pcum->can_split == false);
5789 return;
5793 /* C3 - For double-word aligned arguments, round the NCRN up to the
5794 next even number. */
5795 ncrn = pcum->aapcs_ncrn;
5796 if ((ncrn & 1) && arm_needs_doubleword_align (mode, type))
5797 ncrn++;
5799 nregs = ARM_NUM_REGS2(mode, type);
5801 /* Sigh, this test should really assert that nregs > 0, but a GCC
5802 extension allows empty structs and then gives them empty size; it
5803 then allows such a structure to be passed by value. For some of
5804 the code below we have to pretend that such an argument has
5805 non-zero size so that we 'locate' it correctly either in
5806 registers or on the stack. */
5807 gcc_assert (nregs >= 0);
5809 nregs2 = nregs ? nregs : 1;
5811 /* C4 - Argument fits entirely in core registers. */
5812 if (ncrn + nregs2 <= NUM_ARG_REGS)
5814 pcum->aapcs_reg = gen_rtx_REG (mode, ncrn);
5815 pcum->aapcs_next_ncrn = ncrn + nregs;
5816 return;
5819 /* C5 - Some core registers left and there are no arguments already
5820 on the stack: split this argument between the remaining core
5821 registers and the stack. */
5822 if (ncrn < NUM_ARG_REGS && pcum->can_split)
5824 pcum->aapcs_reg = gen_rtx_REG (mode, ncrn);
5825 pcum->aapcs_next_ncrn = NUM_ARG_REGS;
5826 pcum->aapcs_partial = (NUM_ARG_REGS - ncrn) * UNITS_PER_WORD;
5827 return;
5830 /* C6 - NCRN is set to 4. */
5831 pcum->aapcs_next_ncrn = NUM_ARG_REGS;
5833 /* C7,C8 - arugment goes on the stack. We have nothing to do here. */
5834 return;
5837 /* Initialize a variable CUM of type CUMULATIVE_ARGS
5838 for a call to a function whose data type is FNTYPE.
5839 For a library call, FNTYPE is NULL. */
5840 void
5841 arm_init_cumulative_args (CUMULATIVE_ARGS *pcum, tree fntype,
5842 rtx libname,
5843 tree fndecl ATTRIBUTE_UNUSED)
5845 /* Long call handling. */
5846 if (fntype)
5847 pcum->pcs_variant = arm_get_pcs_model (fntype, fndecl);
5848 else
5849 pcum->pcs_variant = arm_pcs_default;
5851 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
5853 if (arm_libcall_uses_aapcs_base (libname))
5854 pcum->pcs_variant = ARM_PCS_AAPCS;
5856 pcum->aapcs_ncrn = pcum->aapcs_next_ncrn = 0;
5857 pcum->aapcs_reg = NULL_RTX;
5858 pcum->aapcs_partial = 0;
5859 pcum->aapcs_arg_processed = false;
5860 pcum->aapcs_cprc_slot = -1;
5861 pcum->can_split = true;
5863 if (pcum->pcs_variant != ARM_PCS_AAPCS)
5865 int i;
5867 for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
5869 pcum->aapcs_cprc_failed[i] = false;
5870 aapcs_cp_arg_layout[i].cum_init (pcum, fntype, libname, fndecl);
5873 return;
5876 /* Legacy ABIs */
5878 /* On the ARM, the offset starts at 0. */
5879 pcum->nregs = 0;
5880 pcum->iwmmxt_nregs = 0;
5881 pcum->can_split = true;
5883 /* Varargs vectors are treated the same as long long.
5884 named_count avoids having to change the way arm handles 'named' */
5885 pcum->named_count = 0;
5886 pcum->nargs = 0;
5888 if (TARGET_REALLY_IWMMXT && fntype)
5890 tree fn_arg;
5892 for (fn_arg = TYPE_ARG_TYPES (fntype);
5893 fn_arg;
5894 fn_arg = TREE_CHAIN (fn_arg))
5895 pcum->named_count += 1;
5897 if (! pcum->named_count)
5898 pcum->named_count = INT_MAX;
5902 /* Return true if we use LRA instead of reload pass. */
5903 static bool
5904 arm_lra_p (void)
5906 return arm_lra_flag;
5909 /* Return true if mode/type need doubleword alignment. */
5910 static bool
5911 arm_needs_doubleword_align (machine_mode mode, const_tree type)
5913 return (GET_MODE_ALIGNMENT (mode) > PARM_BOUNDARY
5914 || (type && TYPE_ALIGN (type) > PARM_BOUNDARY));
5918 /* Determine where to put an argument to a function.
5919 Value is zero to push the argument on the stack,
5920 or a hard register in which to store the argument.
5922 MODE is the argument's machine mode.
5923 TYPE is the data type of the argument (as a tree).
5924 This is null for libcalls where that information may
5925 not be available.
5926 CUM is a variable of type CUMULATIVE_ARGS which gives info about
5927 the preceding args and about the function being called.
5928 NAMED is nonzero if this argument is a named parameter
5929 (otherwise it is an extra parameter matching an ellipsis).
5931 On the ARM, normally the first 16 bytes are passed in registers r0-r3; all
5932 other arguments are passed on the stack. If (NAMED == 0) (which happens
5933 only in assign_parms, since TARGET_SETUP_INCOMING_VARARGS is
5934 defined), say it is passed in the stack (function_prologue will
5935 indeed make it pass in the stack if necessary). */
5937 static rtx
5938 arm_function_arg (cumulative_args_t pcum_v, machine_mode mode,
5939 const_tree type, bool named)
5941 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
5942 int nregs;
5944 /* Handle the special case quickly. Pick an arbitrary value for op2 of
5945 a call insn (op3 of a call_value insn). */
5946 if (mode == VOIDmode)
5947 return const0_rtx;
5949 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
5951 aapcs_layout_arg (pcum, mode, type, named);
5952 return pcum->aapcs_reg;
5955 /* Varargs vectors are treated the same as long long.
5956 named_count avoids having to change the way arm handles 'named' */
5957 if (TARGET_IWMMXT_ABI
5958 && arm_vector_mode_supported_p (mode)
5959 && pcum->named_count > pcum->nargs + 1)
5961 if (pcum->iwmmxt_nregs <= 9)
5962 return gen_rtx_REG (mode, pcum->iwmmxt_nregs + FIRST_IWMMXT_REGNUM);
5963 else
5965 pcum->can_split = false;
5966 return NULL_RTX;
5970 /* Put doubleword aligned quantities in even register pairs. */
5971 if (pcum->nregs & 1
5972 && ARM_DOUBLEWORD_ALIGN
5973 && arm_needs_doubleword_align (mode, type))
5974 pcum->nregs++;
5976 /* Only allow splitting an arg between regs and memory if all preceding
5977 args were allocated to regs. For args passed by reference we only count
5978 the reference pointer. */
5979 if (pcum->can_split)
5980 nregs = 1;
5981 else
5982 nregs = ARM_NUM_REGS2 (mode, type);
5984 if (!named || pcum->nregs + nregs > NUM_ARG_REGS)
5985 return NULL_RTX;
5987 return gen_rtx_REG (mode, pcum->nregs);
5990 static unsigned int
5991 arm_function_arg_boundary (machine_mode mode, const_tree type)
5993 return (ARM_DOUBLEWORD_ALIGN && arm_needs_doubleword_align (mode, type)
5994 ? DOUBLEWORD_ALIGNMENT
5995 : PARM_BOUNDARY);
5998 static int
5999 arm_arg_partial_bytes (cumulative_args_t pcum_v, machine_mode mode,
6000 tree type, bool named)
6002 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
6003 int nregs = pcum->nregs;
6005 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
6007 aapcs_layout_arg (pcum, mode, type, named);
6008 return pcum->aapcs_partial;
6011 if (TARGET_IWMMXT_ABI && arm_vector_mode_supported_p (mode))
6012 return 0;
6014 if (NUM_ARG_REGS > nregs
6015 && (NUM_ARG_REGS < nregs + ARM_NUM_REGS2 (mode, type))
6016 && pcum->can_split)
6017 return (NUM_ARG_REGS - nregs) * UNITS_PER_WORD;
6019 return 0;
6022 /* Update the data in PCUM to advance over an argument
6023 of mode MODE and data type TYPE.
6024 (TYPE is null for libcalls where that information may not be available.) */
6026 static void
6027 arm_function_arg_advance (cumulative_args_t pcum_v, machine_mode mode,
6028 const_tree type, bool named)
6030 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
6032 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
6034 aapcs_layout_arg (pcum, mode, type, named);
6036 if (pcum->aapcs_cprc_slot >= 0)
6038 aapcs_cp_arg_layout[pcum->aapcs_cprc_slot].advance (pcum, mode,
6039 type);
6040 pcum->aapcs_cprc_slot = -1;
6043 /* Generic stuff. */
6044 pcum->aapcs_arg_processed = false;
6045 pcum->aapcs_ncrn = pcum->aapcs_next_ncrn;
6046 pcum->aapcs_reg = NULL_RTX;
6047 pcum->aapcs_partial = 0;
6049 else
6051 pcum->nargs += 1;
6052 if (arm_vector_mode_supported_p (mode)
6053 && pcum->named_count > pcum->nargs
6054 && TARGET_IWMMXT_ABI)
6055 pcum->iwmmxt_nregs += 1;
6056 else
6057 pcum->nregs += ARM_NUM_REGS2 (mode, type);
6061 /* Variable sized types are passed by reference. This is a GCC
6062 extension to the ARM ABI. */
6064 static bool
6065 arm_pass_by_reference (cumulative_args_t cum ATTRIBUTE_UNUSED,
6066 machine_mode mode ATTRIBUTE_UNUSED,
6067 const_tree type, bool named ATTRIBUTE_UNUSED)
6069 return type && TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST;
6072 /* Encode the current state of the #pragma [no_]long_calls. */
6073 typedef enum
6075 OFF, /* No #pragma [no_]long_calls is in effect. */
6076 LONG, /* #pragma long_calls is in effect. */
6077 SHORT /* #pragma no_long_calls is in effect. */
6078 } arm_pragma_enum;
6080 static arm_pragma_enum arm_pragma_long_calls = OFF;
6082 void
6083 arm_pr_long_calls (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
6085 arm_pragma_long_calls = LONG;
6088 void
6089 arm_pr_no_long_calls (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
6091 arm_pragma_long_calls = SHORT;
6094 void
6095 arm_pr_long_calls_off (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
6097 arm_pragma_long_calls = OFF;
6100 /* Handle an attribute requiring a FUNCTION_DECL;
6101 arguments as in struct attribute_spec.handler. */
6102 static tree
6103 arm_handle_fndecl_attribute (tree *node, tree name, tree args ATTRIBUTE_UNUSED,
6104 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
6106 if (TREE_CODE (*node) != FUNCTION_DECL)
6108 warning (OPT_Wattributes, "%qE attribute only applies to functions",
6109 name);
6110 *no_add_attrs = true;
6113 return NULL_TREE;
6116 /* Handle an "interrupt" or "isr" attribute;
6117 arguments as in struct attribute_spec.handler. */
6118 static tree
6119 arm_handle_isr_decl_attribute (tree *node, tree name, tree args, int flags,
6120 bool *no_add_attrs)
6122 if (TREE_CODE (*node) != FUNCTION_DECL)
6124 warning (OPT_Wattributes, "%qE attribute only applies to functions",
6125 name);
6126 *no_add_attrs = true;
6128 /* FIXME: the argument if any is checked for type attributes;
6129 should it be checked for decl ones? */
6131 return NULL_TREE;
6134 static tree
6135 arm_handle_isr_type_attribute (tree *node, tree name, tree args, int flags,
6136 bool *no_add_attrs)
6138 if (TREE_CODE (*node) == FUNCTION_TYPE
6139 || TREE_CODE (*node) == METHOD_TYPE)
6141 if (arm_isr_value (args) == ARM_FT_UNKNOWN)
6143 warning (OPT_Wattributes, "%qE attribute ignored",
6144 name);
6145 *no_add_attrs = true;
6148 else if (TREE_CODE (*node) == POINTER_TYPE
6149 && (TREE_CODE (TREE_TYPE (*node)) == FUNCTION_TYPE
6150 || TREE_CODE (TREE_TYPE (*node)) == METHOD_TYPE)
6151 && arm_isr_value (args) != ARM_FT_UNKNOWN)
6153 *node = build_variant_type_copy (*node);
6154 TREE_TYPE (*node) = build_type_attribute_variant
6155 (TREE_TYPE (*node),
6156 tree_cons (name, args, TYPE_ATTRIBUTES (TREE_TYPE (*node))));
6157 *no_add_attrs = true;
6159 else
6161 /* Possibly pass this attribute on from the type to a decl. */
6162 if (flags & ((int) ATTR_FLAG_DECL_NEXT
6163 | (int) ATTR_FLAG_FUNCTION_NEXT
6164 | (int) ATTR_FLAG_ARRAY_NEXT))
6166 *no_add_attrs = true;
6167 return tree_cons (name, args, NULL_TREE);
6169 else
6171 warning (OPT_Wattributes, "%qE attribute ignored",
6172 name);
6176 return NULL_TREE;
6179 /* Handle a "pcs" attribute; arguments as in struct
6180 attribute_spec.handler. */
6181 static tree
6182 arm_handle_pcs_attribute (tree *node ATTRIBUTE_UNUSED, tree name, tree args,
6183 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
6185 if (arm_pcs_from_attribute (args) == ARM_PCS_UNKNOWN)
6187 warning (OPT_Wattributes, "%qE attribute ignored", name);
6188 *no_add_attrs = true;
6190 return NULL_TREE;
6193 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
6194 /* Handle the "notshared" attribute. This attribute is another way of
6195 requesting hidden visibility. ARM's compiler supports
6196 "__declspec(notshared)"; we support the same thing via an
6197 attribute. */
6199 static tree
6200 arm_handle_notshared_attribute (tree *node,
6201 tree name ATTRIBUTE_UNUSED,
6202 tree args ATTRIBUTE_UNUSED,
6203 int flags ATTRIBUTE_UNUSED,
6204 bool *no_add_attrs)
6206 tree decl = TYPE_NAME (*node);
6208 if (decl)
6210 DECL_VISIBILITY (decl) = VISIBILITY_HIDDEN;
6211 DECL_VISIBILITY_SPECIFIED (decl) = 1;
6212 *no_add_attrs = false;
6214 return NULL_TREE;
6216 #endif
6218 /* Return 0 if the attributes for two types are incompatible, 1 if they
6219 are compatible, and 2 if they are nearly compatible (which causes a
6220 warning to be generated). */
6221 static int
6222 arm_comp_type_attributes (const_tree type1, const_tree type2)
6224 int l1, l2, s1, s2;
6226 /* Check for mismatch of non-default calling convention. */
6227 if (TREE_CODE (type1) != FUNCTION_TYPE)
6228 return 1;
6230 /* Check for mismatched call attributes. */
6231 l1 = lookup_attribute ("long_call", TYPE_ATTRIBUTES (type1)) != NULL;
6232 l2 = lookup_attribute ("long_call", TYPE_ATTRIBUTES (type2)) != NULL;
6233 s1 = lookup_attribute ("short_call", TYPE_ATTRIBUTES (type1)) != NULL;
6234 s2 = lookup_attribute ("short_call", TYPE_ATTRIBUTES (type2)) != NULL;
6236 /* Only bother to check if an attribute is defined. */
6237 if (l1 | l2 | s1 | s2)
6239 /* If one type has an attribute, the other must have the same attribute. */
6240 if ((l1 != l2) || (s1 != s2))
6241 return 0;
6243 /* Disallow mixed attributes. */
6244 if ((l1 & s2) || (l2 & s1))
6245 return 0;
6248 /* Check for mismatched ISR attribute. */
6249 l1 = lookup_attribute ("isr", TYPE_ATTRIBUTES (type1)) != NULL;
6250 if (! l1)
6251 l1 = lookup_attribute ("interrupt", TYPE_ATTRIBUTES (type1)) != NULL;
6252 l2 = lookup_attribute ("isr", TYPE_ATTRIBUTES (type2)) != NULL;
6253 if (! l2)
6254 l1 = lookup_attribute ("interrupt", TYPE_ATTRIBUTES (type2)) != NULL;
6255 if (l1 != l2)
6256 return 0;
6258 return 1;
6261 /* Assigns default attributes to newly defined type. This is used to
6262 set short_call/long_call attributes for function types of
6263 functions defined inside corresponding #pragma scopes. */
6264 static void
6265 arm_set_default_type_attributes (tree type)
6267 /* Add __attribute__ ((long_call)) to all functions, when
6268 inside #pragma long_calls or __attribute__ ((short_call)),
6269 when inside #pragma no_long_calls. */
6270 if (TREE_CODE (type) == FUNCTION_TYPE || TREE_CODE (type) == METHOD_TYPE)
6272 tree type_attr_list, attr_name;
6273 type_attr_list = TYPE_ATTRIBUTES (type);
6275 if (arm_pragma_long_calls == LONG)
6276 attr_name = get_identifier ("long_call");
6277 else if (arm_pragma_long_calls == SHORT)
6278 attr_name = get_identifier ("short_call");
6279 else
6280 return;
6282 type_attr_list = tree_cons (attr_name, NULL_TREE, type_attr_list);
6283 TYPE_ATTRIBUTES (type) = type_attr_list;
6287 /* Return true if DECL is known to be linked into section SECTION. */
6289 static bool
6290 arm_function_in_section_p (tree decl, section *section)
6292 /* We can only be certain about functions defined in the same
6293 compilation unit. */
6294 if (!TREE_STATIC (decl))
6295 return false;
6297 /* Make sure that SYMBOL always binds to the definition in this
6298 compilation unit. */
6299 if (!targetm.binds_local_p (decl))
6300 return false;
6302 /* If DECL_SECTION_NAME is set, assume it is trustworthy. */
6303 if (!DECL_SECTION_NAME (decl))
6305 /* Make sure that we will not create a unique section for DECL. */
6306 if (flag_function_sections || DECL_COMDAT_GROUP (decl))
6307 return false;
6310 return function_section (decl) == section;
6313 /* Return nonzero if a 32-bit "long_call" should be generated for
6314 a call from the current function to DECL. We generate a long_call
6315 if the function:
6317 a. has an __attribute__((long call))
6318 or b. is within the scope of a #pragma long_calls
6319 or c. the -mlong-calls command line switch has been specified
6321 However we do not generate a long call if the function:
6323 d. has an __attribute__ ((short_call))
6324 or e. is inside the scope of a #pragma no_long_calls
6325 or f. is defined in the same section as the current function. */
6327 bool
6328 arm_is_long_call_p (tree decl)
6330 tree attrs;
6332 if (!decl)
6333 return TARGET_LONG_CALLS;
6335 attrs = TYPE_ATTRIBUTES (TREE_TYPE (decl));
6336 if (lookup_attribute ("short_call", attrs))
6337 return false;
6339 /* For "f", be conservative, and only cater for cases in which the
6340 whole of the current function is placed in the same section. */
6341 if (!flag_reorder_blocks_and_partition
6342 && TREE_CODE (decl) == FUNCTION_DECL
6343 && arm_function_in_section_p (decl, current_function_section ()))
6344 return false;
6346 if (lookup_attribute ("long_call", attrs))
6347 return true;
6349 return TARGET_LONG_CALLS;
6352 /* Return nonzero if it is ok to make a tail-call to DECL. */
6353 static bool
6354 arm_function_ok_for_sibcall (tree decl, tree exp)
6356 unsigned long func_type;
6358 if (cfun->machine->sibcall_blocked)
6359 return false;
6361 /* Never tailcall something if we are generating code for Thumb-1. */
6362 if (TARGET_THUMB1)
6363 return false;
6365 /* The PIC register is live on entry to VxWorks PLT entries, so we
6366 must make the call before restoring the PIC register. */
6367 if (TARGET_VXWORKS_RTP && flag_pic && !targetm.binds_local_p (decl))
6368 return false;
6370 /* If we are interworking and the function is not declared static
6371 then we can't tail-call it unless we know that it exists in this
6372 compilation unit (since it might be a Thumb routine). */
6373 if (TARGET_INTERWORK && decl && TREE_PUBLIC (decl)
6374 && !TREE_ASM_WRITTEN (decl))
6375 return false;
6377 func_type = arm_current_func_type ();
6378 /* Never tailcall from an ISR routine - it needs a special exit sequence. */
6379 if (IS_INTERRUPT (func_type))
6380 return false;
6382 if (!VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun->decl))))
6384 /* Check that the return value locations are the same. For
6385 example that we aren't returning a value from the sibling in
6386 a VFP register but then need to transfer it to a core
6387 register. */
6388 rtx a, b;
6390 a = arm_function_value (TREE_TYPE (exp), decl, false);
6391 b = arm_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)),
6392 cfun->decl, false);
6393 if (!rtx_equal_p (a, b))
6394 return false;
6397 /* Never tailcall if function may be called with a misaligned SP. */
6398 if (IS_STACKALIGN (func_type))
6399 return false;
6401 /* The AAPCS says that, on bare-metal, calls to unresolved weak
6402 references should become a NOP. Don't convert such calls into
6403 sibling calls. */
6404 if (TARGET_AAPCS_BASED
6405 && arm_abi == ARM_ABI_AAPCS
6406 && decl
6407 && DECL_WEAK (decl))
6408 return false;
6410 /* Everything else is ok. */
6411 return true;
6415 /* Addressing mode support functions. */
6417 /* Return nonzero if X is a legitimate immediate operand when compiling
6418 for PIC. We know that X satisfies CONSTANT_P and flag_pic is true. */
6420 legitimate_pic_operand_p (rtx x)
6422 if (GET_CODE (x) == SYMBOL_REF
6423 || (GET_CODE (x) == CONST
6424 && GET_CODE (XEXP (x, 0)) == PLUS
6425 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF))
6426 return 0;
6428 return 1;
6431 /* Record that the current function needs a PIC register. Initialize
6432 cfun->machine->pic_reg if we have not already done so. */
6434 static void
6435 require_pic_register (void)
6437 /* A lot of the logic here is made obscure by the fact that this
6438 routine gets called as part of the rtx cost estimation process.
6439 We don't want those calls to affect any assumptions about the real
6440 function; and further, we can't call entry_of_function() until we
6441 start the real expansion process. */
6442 if (!crtl->uses_pic_offset_table)
6444 gcc_assert (can_create_pseudo_p ());
6445 if (arm_pic_register != INVALID_REGNUM
6446 && !(TARGET_THUMB1 && arm_pic_register > LAST_LO_REGNUM))
6448 if (!cfun->machine->pic_reg)
6449 cfun->machine->pic_reg = gen_rtx_REG (Pmode, arm_pic_register);
6451 /* Play games to avoid marking the function as needing pic
6452 if we are being called as part of the cost-estimation
6453 process. */
6454 if (current_ir_type () != IR_GIMPLE || currently_expanding_to_rtl)
6455 crtl->uses_pic_offset_table = 1;
6457 else
6459 rtx_insn *seq, *insn;
6461 if (!cfun->machine->pic_reg)
6462 cfun->machine->pic_reg = gen_reg_rtx (Pmode);
6464 /* Play games to avoid marking the function as needing pic
6465 if we are being called as part of the cost-estimation
6466 process. */
6467 if (current_ir_type () != IR_GIMPLE || currently_expanding_to_rtl)
6469 crtl->uses_pic_offset_table = 1;
6470 start_sequence ();
6472 if (TARGET_THUMB1 && arm_pic_register != INVALID_REGNUM
6473 && arm_pic_register > LAST_LO_REGNUM)
6474 emit_move_insn (cfun->machine->pic_reg,
6475 gen_rtx_REG (Pmode, arm_pic_register));
6476 else
6477 arm_load_pic_register (0UL);
6479 seq = get_insns ();
6480 end_sequence ();
6482 for (insn = seq; insn; insn = NEXT_INSN (insn))
6483 if (INSN_P (insn))
6484 INSN_LOCATION (insn) = prologue_location;
6486 /* We can be called during expansion of PHI nodes, where
6487 we can't yet emit instructions directly in the final
6488 insn stream. Queue the insns on the entry edge, they will
6489 be committed after everything else is expanded. */
6490 insert_insn_on_edge (seq,
6491 single_succ_edge (ENTRY_BLOCK_PTR_FOR_FN (cfun)));
6498 legitimize_pic_address (rtx orig, machine_mode mode, rtx reg)
6500 if (GET_CODE (orig) == SYMBOL_REF
6501 || GET_CODE (orig) == LABEL_REF)
6503 rtx insn;
6505 if (reg == 0)
6507 gcc_assert (can_create_pseudo_p ());
6508 reg = gen_reg_rtx (Pmode);
6511 /* VxWorks does not impose a fixed gap between segments; the run-time
6512 gap can be different from the object-file gap. We therefore can't
6513 use GOTOFF unless we are absolutely sure that the symbol is in the
6514 same segment as the GOT. Unfortunately, the flexibility of linker
6515 scripts means that we can't be sure of that in general, so assume
6516 that GOTOFF is never valid on VxWorks. */
6517 if ((GET_CODE (orig) == LABEL_REF
6518 || (GET_CODE (orig) == SYMBOL_REF &&
6519 SYMBOL_REF_LOCAL_P (orig)))
6520 && NEED_GOT_RELOC
6521 && arm_pic_data_is_text_relative)
6522 insn = arm_pic_static_addr (orig, reg);
6523 else
6525 rtx pat;
6526 rtx mem;
6528 /* If this function doesn't have a pic register, create one now. */
6529 require_pic_register ();
6531 pat = gen_calculate_pic_address (reg, cfun->machine->pic_reg, orig);
6533 /* Make the MEM as close to a constant as possible. */
6534 mem = SET_SRC (pat);
6535 gcc_assert (MEM_P (mem) && !MEM_VOLATILE_P (mem));
6536 MEM_READONLY_P (mem) = 1;
6537 MEM_NOTRAP_P (mem) = 1;
6539 insn = emit_insn (pat);
6542 /* Put a REG_EQUAL note on this insn, so that it can be optimized
6543 by loop. */
6544 set_unique_reg_note (insn, REG_EQUAL, orig);
6546 return reg;
6548 else if (GET_CODE (orig) == CONST)
6550 rtx base, offset;
6552 if (GET_CODE (XEXP (orig, 0)) == PLUS
6553 && XEXP (XEXP (orig, 0), 0) == cfun->machine->pic_reg)
6554 return orig;
6556 /* Handle the case where we have: const (UNSPEC_TLS). */
6557 if (GET_CODE (XEXP (orig, 0)) == UNSPEC
6558 && XINT (XEXP (orig, 0), 1) == UNSPEC_TLS)
6559 return orig;
6561 /* Handle the case where we have:
6562 const (plus (UNSPEC_TLS) (ADDEND)). The ADDEND must be a
6563 CONST_INT. */
6564 if (GET_CODE (XEXP (orig, 0)) == PLUS
6565 && GET_CODE (XEXP (XEXP (orig, 0), 0)) == UNSPEC
6566 && XINT (XEXP (XEXP (orig, 0), 0), 1) == UNSPEC_TLS)
6568 gcc_assert (CONST_INT_P (XEXP (XEXP (orig, 0), 1)));
6569 return orig;
6572 if (reg == 0)
6574 gcc_assert (can_create_pseudo_p ());
6575 reg = gen_reg_rtx (Pmode);
6578 gcc_assert (GET_CODE (XEXP (orig, 0)) == PLUS);
6580 base = legitimize_pic_address (XEXP (XEXP (orig, 0), 0), Pmode, reg);
6581 offset = legitimize_pic_address (XEXP (XEXP (orig, 0), 1), Pmode,
6582 base == reg ? 0 : reg);
6584 if (CONST_INT_P (offset))
6586 /* The base register doesn't really matter, we only want to
6587 test the index for the appropriate mode. */
6588 if (!arm_legitimate_index_p (mode, offset, SET, 0))
6590 gcc_assert (can_create_pseudo_p ());
6591 offset = force_reg (Pmode, offset);
6594 if (CONST_INT_P (offset))
6595 return plus_constant (Pmode, base, INTVAL (offset));
6598 if (GET_MODE_SIZE (mode) > 4
6599 && (GET_MODE_CLASS (mode) == MODE_INT
6600 || TARGET_SOFT_FLOAT))
6602 emit_insn (gen_addsi3 (reg, base, offset));
6603 return reg;
6606 return gen_rtx_PLUS (Pmode, base, offset);
6609 return orig;
6613 /* Find a spare register to use during the prolog of a function. */
6615 static int
6616 thumb_find_work_register (unsigned long pushed_regs_mask)
6618 int reg;
6620 /* Check the argument registers first as these are call-used. The
6621 register allocation order means that sometimes r3 might be used
6622 but earlier argument registers might not, so check them all. */
6623 for (reg = LAST_ARG_REGNUM; reg >= 0; reg --)
6624 if (!df_regs_ever_live_p (reg))
6625 return reg;
6627 /* Before going on to check the call-saved registers we can try a couple
6628 more ways of deducing that r3 is available. The first is when we are
6629 pushing anonymous arguments onto the stack and we have less than 4
6630 registers worth of fixed arguments(*). In this case r3 will be part of
6631 the variable argument list and so we can be sure that it will be
6632 pushed right at the start of the function. Hence it will be available
6633 for the rest of the prologue.
6634 (*): ie crtl->args.pretend_args_size is greater than 0. */
6635 if (cfun->machine->uses_anonymous_args
6636 && crtl->args.pretend_args_size > 0)
6637 return LAST_ARG_REGNUM;
6639 /* The other case is when we have fixed arguments but less than 4 registers
6640 worth. In this case r3 might be used in the body of the function, but
6641 it is not being used to convey an argument into the function. In theory
6642 we could just check crtl->args.size to see how many bytes are
6643 being passed in argument registers, but it seems that it is unreliable.
6644 Sometimes it will have the value 0 when in fact arguments are being
6645 passed. (See testcase execute/20021111-1.c for an example). So we also
6646 check the args_info.nregs field as well. The problem with this field is
6647 that it makes no allowances for arguments that are passed to the
6648 function but which are not used. Hence we could miss an opportunity
6649 when a function has an unused argument in r3. But it is better to be
6650 safe than to be sorry. */
6651 if (! cfun->machine->uses_anonymous_args
6652 && crtl->args.size >= 0
6653 && crtl->args.size <= (LAST_ARG_REGNUM * UNITS_PER_WORD)
6654 && (TARGET_AAPCS_BASED
6655 ? crtl->args.info.aapcs_ncrn < 4
6656 : crtl->args.info.nregs < 4))
6657 return LAST_ARG_REGNUM;
6659 /* Otherwise look for a call-saved register that is going to be pushed. */
6660 for (reg = LAST_LO_REGNUM; reg > LAST_ARG_REGNUM; reg --)
6661 if (pushed_regs_mask & (1 << reg))
6662 return reg;
6664 if (TARGET_THUMB2)
6666 /* Thumb-2 can use high regs. */
6667 for (reg = FIRST_HI_REGNUM; reg < 15; reg ++)
6668 if (pushed_regs_mask & (1 << reg))
6669 return reg;
6671 /* Something went wrong - thumb_compute_save_reg_mask()
6672 should have arranged for a suitable register to be pushed. */
6673 gcc_unreachable ();
6676 static GTY(()) int pic_labelno;
6678 /* Generate code to load the PIC register. In thumb mode SCRATCH is a
6679 low register. */
6681 void
6682 arm_load_pic_register (unsigned long saved_regs ATTRIBUTE_UNUSED)
6684 rtx l1, labelno, pic_tmp, pic_rtx, pic_reg;
6686 if (crtl->uses_pic_offset_table == 0 || TARGET_SINGLE_PIC_BASE)
6687 return;
6689 gcc_assert (flag_pic);
6691 pic_reg = cfun->machine->pic_reg;
6692 if (TARGET_VXWORKS_RTP)
6694 pic_rtx = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_BASE);
6695 pic_rtx = gen_rtx_CONST (Pmode, pic_rtx);
6696 emit_insn (gen_pic_load_addr_32bit (pic_reg, pic_rtx));
6698 emit_insn (gen_rtx_SET (Pmode, pic_reg, gen_rtx_MEM (Pmode, pic_reg)));
6700 pic_tmp = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_INDEX);
6701 emit_insn (gen_pic_offset_arm (pic_reg, pic_reg, pic_tmp));
6703 else
6705 /* We use an UNSPEC rather than a LABEL_REF because this label
6706 never appears in the code stream. */
6708 labelno = GEN_INT (pic_labelno++);
6709 l1 = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
6710 l1 = gen_rtx_CONST (VOIDmode, l1);
6712 /* On the ARM the PC register contains 'dot + 8' at the time of the
6713 addition, on the Thumb it is 'dot + 4'. */
6714 pic_rtx = plus_constant (Pmode, l1, TARGET_ARM ? 8 : 4);
6715 pic_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, pic_rtx),
6716 UNSPEC_GOTSYM_OFF);
6717 pic_rtx = gen_rtx_CONST (Pmode, pic_rtx);
6719 if (TARGET_32BIT)
6721 emit_insn (gen_pic_load_addr_unified (pic_reg, pic_rtx, labelno));
6723 else /* TARGET_THUMB1 */
6725 if (arm_pic_register != INVALID_REGNUM
6726 && REGNO (pic_reg) > LAST_LO_REGNUM)
6728 /* We will have pushed the pic register, so we should always be
6729 able to find a work register. */
6730 pic_tmp = gen_rtx_REG (SImode,
6731 thumb_find_work_register (saved_regs));
6732 emit_insn (gen_pic_load_addr_thumb1 (pic_tmp, pic_rtx));
6733 emit_insn (gen_movsi (pic_offset_table_rtx, pic_tmp));
6734 emit_insn (gen_pic_add_dot_plus_four (pic_reg, pic_reg, labelno));
6736 else if (arm_pic_register != INVALID_REGNUM
6737 && arm_pic_register > LAST_LO_REGNUM
6738 && REGNO (pic_reg) <= LAST_LO_REGNUM)
6740 emit_insn (gen_pic_load_addr_unified (pic_reg, pic_rtx, labelno));
6741 emit_move_insn (gen_rtx_REG (Pmode, arm_pic_register), pic_reg);
6742 emit_use (gen_rtx_REG (Pmode, arm_pic_register));
6744 else
6745 emit_insn (gen_pic_load_addr_unified (pic_reg, pic_rtx, labelno));
6749 /* Need to emit this whether or not we obey regdecls,
6750 since setjmp/longjmp can cause life info to screw up. */
6751 emit_use (pic_reg);
6754 /* Generate code to load the address of a static var when flag_pic is set. */
6755 static rtx
6756 arm_pic_static_addr (rtx orig, rtx reg)
6758 rtx l1, labelno, offset_rtx, insn;
6760 gcc_assert (flag_pic);
6762 /* We use an UNSPEC rather than a LABEL_REF because this label
6763 never appears in the code stream. */
6764 labelno = GEN_INT (pic_labelno++);
6765 l1 = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
6766 l1 = gen_rtx_CONST (VOIDmode, l1);
6768 /* On the ARM the PC register contains 'dot + 8' at the time of the
6769 addition, on the Thumb it is 'dot + 4'. */
6770 offset_rtx = plus_constant (Pmode, l1, TARGET_ARM ? 8 : 4);
6771 offset_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, orig, offset_rtx),
6772 UNSPEC_SYMBOL_OFFSET);
6773 offset_rtx = gen_rtx_CONST (Pmode, offset_rtx);
6775 insn = emit_insn (gen_pic_load_addr_unified (reg, offset_rtx, labelno));
6776 return insn;
6779 /* Return nonzero if X is valid as an ARM state addressing register. */
6780 static int
6781 arm_address_register_rtx_p (rtx x, int strict_p)
6783 int regno;
6785 if (!REG_P (x))
6786 return 0;
6788 regno = REGNO (x);
6790 if (strict_p)
6791 return ARM_REGNO_OK_FOR_BASE_P (regno);
6793 return (regno <= LAST_ARM_REGNUM
6794 || regno >= FIRST_PSEUDO_REGISTER
6795 || regno == FRAME_POINTER_REGNUM
6796 || regno == ARG_POINTER_REGNUM);
6799 /* Return TRUE if this rtx is the difference of a symbol and a label,
6800 and will reduce to a PC-relative relocation in the object file.
6801 Expressions like this can be left alone when generating PIC, rather
6802 than forced through the GOT. */
6803 static int
6804 pcrel_constant_p (rtx x)
6806 if (GET_CODE (x) == MINUS)
6807 return symbol_mentioned_p (XEXP (x, 0)) && label_mentioned_p (XEXP (x, 1));
6809 return FALSE;
6812 /* Return true if X will surely end up in an index register after next
6813 splitting pass. */
6814 static bool
6815 will_be_in_index_register (const_rtx x)
6817 /* arm.md: calculate_pic_address will split this into a register. */
6818 return GET_CODE (x) == UNSPEC && (XINT (x, 1) == UNSPEC_PIC_SYM);
6821 /* Return nonzero if X is a valid ARM state address operand. */
6823 arm_legitimate_address_outer_p (machine_mode mode, rtx x, RTX_CODE outer,
6824 int strict_p)
6826 bool use_ldrd;
6827 enum rtx_code code = GET_CODE (x);
6829 if (arm_address_register_rtx_p (x, strict_p))
6830 return 1;
6832 use_ldrd = (TARGET_LDRD
6833 && (mode == DImode
6834 || (mode == DFmode && (TARGET_SOFT_FLOAT || TARGET_VFP))));
6836 if (code == POST_INC || code == PRE_DEC
6837 || ((code == PRE_INC || code == POST_DEC)
6838 && (use_ldrd || GET_MODE_SIZE (mode) <= 4)))
6839 return arm_address_register_rtx_p (XEXP (x, 0), strict_p);
6841 else if ((code == POST_MODIFY || code == PRE_MODIFY)
6842 && arm_address_register_rtx_p (XEXP (x, 0), strict_p)
6843 && GET_CODE (XEXP (x, 1)) == PLUS
6844 && rtx_equal_p (XEXP (XEXP (x, 1), 0), XEXP (x, 0)))
6846 rtx addend = XEXP (XEXP (x, 1), 1);
6848 /* Don't allow ldrd post increment by register because it's hard
6849 to fixup invalid register choices. */
6850 if (use_ldrd
6851 && GET_CODE (x) == POST_MODIFY
6852 && REG_P (addend))
6853 return 0;
6855 return ((use_ldrd || GET_MODE_SIZE (mode) <= 4)
6856 && arm_legitimate_index_p (mode, addend, outer, strict_p));
6859 /* After reload constants split into minipools will have addresses
6860 from a LABEL_REF. */
6861 else if (reload_completed
6862 && (code == LABEL_REF
6863 || (code == CONST
6864 && GET_CODE (XEXP (x, 0)) == PLUS
6865 && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
6866 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))))
6867 return 1;
6869 else if (mode == TImode || (TARGET_NEON && VALID_NEON_STRUCT_MODE (mode)))
6870 return 0;
6872 else if (code == PLUS)
6874 rtx xop0 = XEXP (x, 0);
6875 rtx xop1 = XEXP (x, 1);
6877 return ((arm_address_register_rtx_p (xop0, strict_p)
6878 && ((CONST_INT_P (xop1)
6879 && arm_legitimate_index_p (mode, xop1, outer, strict_p))
6880 || (!strict_p && will_be_in_index_register (xop1))))
6881 || (arm_address_register_rtx_p (xop1, strict_p)
6882 && arm_legitimate_index_p (mode, xop0, outer, strict_p)));
6885 #if 0
6886 /* Reload currently can't handle MINUS, so disable this for now */
6887 else if (GET_CODE (x) == MINUS)
6889 rtx xop0 = XEXP (x, 0);
6890 rtx xop1 = XEXP (x, 1);
6892 return (arm_address_register_rtx_p (xop0, strict_p)
6893 && arm_legitimate_index_p (mode, xop1, outer, strict_p));
6895 #endif
6897 else if (GET_MODE_CLASS (mode) != MODE_FLOAT
6898 && code == SYMBOL_REF
6899 && CONSTANT_POOL_ADDRESS_P (x)
6900 && ! (flag_pic
6901 && symbol_mentioned_p (get_pool_constant (x))
6902 && ! pcrel_constant_p (get_pool_constant (x))))
6903 return 1;
6905 return 0;
6908 /* Return nonzero if X is a valid Thumb-2 address operand. */
6909 static int
6910 thumb2_legitimate_address_p (machine_mode mode, rtx x, int strict_p)
6912 bool use_ldrd;
6913 enum rtx_code code = GET_CODE (x);
6915 if (arm_address_register_rtx_p (x, strict_p))
6916 return 1;
6918 use_ldrd = (TARGET_LDRD
6919 && (mode == DImode
6920 || (mode == DFmode && (TARGET_SOFT_FLOAT || TARGET_VFP))));
6922 if (code == POST_INC || code == PRE_DEC
6923 || ((code == PRE_INC || code == POST_DEC)
6924 && (use_ldrd || GET_MODE_SIZE (mode) <= 4)))
6925 return arm_address_register_rtx_p (XEXP (x, 0), strict_p);
6927 else if ((code == POST_MODIFY || code == PRE_MODIFY)
6928 && arm_address_register_rtx_p (XEXP (x, 0), strict_p)
6929 && GET_CODE (XEXP (x, 1)) == PLUS
6930 && rtx_equal_p (XEXP (XEXP (x, 1), 0), XEXP (x, 0)))
6932 /* Thumb-2 only has autoincrement by constant. */
6933 rtx addend = XEXP (XEXP (x, 1), 1);
6934 HOST_WIDE_INT offset;
6936 if (!CONST_INT_P (addend))
6937 return 0;
6939 offset = INTVAL(addend);
6940 if (GET_MODE_SIZE (mode) <= 4)
6941 return (offset > -256 && offset < 256);
6943 return (use_ldrd && offset > -1024 && offset < 1024
6944 && (offset & 3) == 0);
6947 /* After reload constants split into minipools will have addresses
6948 from a LABEL_REF. */
6949 else if (reload_completed
6950 && (code == LABEL_REF
6951 || (code == CONST
6952 && GET_CODE (XEXP (x, 0)) == PLUS
6953 && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
6954 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))))
6955 return 1;
6957 else if (mode == TImode || (TARGET_NEON && VALID_NEON_STRUCT_MODE (mode)))
6958 return 0;
6960 else if (code == PLUS)
6962 rtx xop0 = XEXP (x, 0);
6963 rtx xop1 = XEXP (x, 1);
6965 return ((arm_address_register_rtx_p (xop0, strict_p)
6966 && (thumb2_legitimate_index_p (mode, xop1, strict_p)
6967 || (!strict_p && will_be_in_index_register (xop1))))
6968 || (arm_address_register_rtx_p (xop1, strict_p)
6969 && thumb2_legitimate_index_p (mode, xop0, strict_p)));
6972 /* Normally we can assign constant values to target registers without
6973 the help of constant pool. But there are cases we have to use constant
6974 pool like:
6975 1) assign a label to register.
6976 2) sign-extend a 8bit value to 32bit and then assign to register.
6978 Constant pool access in format:
6979 (set (reg r0) (mem (symbol_ref (".LC0"))))
6980 will cause the use of literal pool (later in function arm_reorg).
6981 So here we mark such format as an invalid format, then the compiler
6982 will adjust it into:
6983 (set (reg r0) (symbol_ref (".LC0")))
6984 (set (reg r0) (mem (reg r0))).
6985 No extra register is required, and (mem (reg r0)) won't cause the use
6986 of literal pools. */
6987 else if (arm_disable_literal_pool && code == SYMBOL_REF
6988 && CONSTANT_POOL_ADDRESS_P (x))
6989 return 0;
6991 else if (GET_MODE_CLASS (mode) != MODE_FLOAT
6992 && code == SYMBOL_REF
6993 && CONSTANT_POOL_ADDRESS_P (x)
6994 && ! (flag_pic
6995 && symbol_mentioned_p (get_pool_constant (x))
6996 && ! pcrel_constant_p (get_pool_constant (x))))
6997 return 1;
6999 return 0;
7002 /* Return nonzero if INDEX is valid for an address index operand in
7003 ARM state. */
7004 static int
7005 arm_legitimate_index_p (machine_mode mode, rtx index, RTX_CODE outer,
7006 int strict_p)
7008 HOST_WIDE_INT range;
7009 enum rtx_code code = GET_CODE (index);
7011 /* Standard coprocessor addressing modes. */
7012 if (TARGET_HARD_FLOAT
7013 && TARGET_VFP
7014 && (mode == SFmode || mode == DFmode))
7015 return (code == CONST_INT && INTVAL (index) < 1024
7016 && INTVAL (index) > -1024
7017 && (INTVAL (index) & 3) == 0);
7019 /* For quad modes, we restrict the constant offset to be slightly less
7020 than what the instruction format permits. We do this because for
7021 quad mode moves, we will actually decompose them into two separate
7022 double-mode reads or writes. INDEX must therefore be a valid
7023 (double-mode) offset and so should INDEX+8. */
7024 if (TARGET_NEON && VALID_NEON_QREG_MODE (mode))
7025 return (code == CONST_INT
7026 && INTVAL (index) < 1016
7027 && INTVAL (index) > -1024
7028 && (INTVAL (index) & 3) == 0);
7030 /* We have no such constraint on double mode offsets, so we permit the
7031 full range of the instruction format. */
7032 if (TARGET_NEON && VALID_NEON_DREG_MODE (mode))
7033 return (code == CONST_INT
7034 && INTVAL (index) < 1024
7035 && INTVAL (index) > -1024
7036 && (INTVAL (index) & 3) == 0);
7038 if (TARGET_REALLY_IWMMXT && VALID_IWMMXT_REG_MODE (mode))
7039 return (code == CONST_INT
7040 && INTVAL (index) < 1024
7041 && INTVAL (index) > -1024
7042 && (INTVAL (index) & 3) == 0);
7044 if (arm_address_register_rtx_p (index, strict_p)
7045 && (GET_MODE_SIZE (mode) <= 4))
7046 return 1;
7048 if (mode == DImode || mode == DFmode)
7050 if (code == CONST_INT)
7052 HOST_WIDE_INT val = INTVAL (index);
7054 if (TARGET_LDRD)
7055 return val > -256 && val < 256;
7056 else
7057 return val > -4096 && val < 4092;
7060 return TARGET_LDRD && arm_address_register_rtx_p (index, strict_p);
7063 if (GET_MODE_SIZE (mode) <= 4
7064 && ! (arm_arch4
7065 && (mode == HImode
7066 || mode == HFmode
7067 || (mode == QImode && outer == SIGN_EXTEND))))
7069 if (code == MULT)
7071 rtx xiop0 = XEXP (index, 0);
7072 rtx xiop1 = XEXP (index, 1);
7074 return ((arm_address_register_rtx_p (xiop0, strict_p)
7075 && power_of_two_operand (xiop1, SImode))
7076 || (arm_address_register_rtx_p (xiop1, strict_p)
7077 && power_of_two_operand (xiop0, SImode)));
7079 else if (code == LSHIFTRT || code == ASHIFTRT
7080 || code == ASHIFT || code == ROTATERT)
7082 rtx op = XEXP (index, 1);
7084 return (arm_address_register_rtx_p (XEXP (index, 0), strict_p)
7085 && CONST_INT_P (op)
7086 && INTVAL (op) > 0
7087 && INTVAL (op) <= 31);
7091 /* For ARM v4 we may be doing a sign-extend operation during the
7092 load. */
7093 if (arm_arch4)
7095 if (mode == HImode
7096 || mode == HFmode
7097 || (outer == SIGN_EXTEND && mode == QImode))
7098 range = 256;
7099 else
7100 range = 4096;
7102 else
7103 range = (mode == HImode || mode == HFmode) ? 4095 : 4096;
7105 return (code == CONST_INT
7106 && INTVAL (index) < range
7107 && INTVAL (index) > -range);
7110 /* Return true if OP is a valid index scaling factor for Thumb-2 address
7111 index operand. i.e. 1, 2, 4 or 8. */
7112 static bool
7113 thumb2_index_mul_operand (rtx op)
7115 HOST_WIDE_INT val;
7117 if (!CONST_INT_P (op))
7118 return false;
7120 val = INTVAL(op);
7121 return (val == 1 || val == 2 || val == 4 || val == 8);
7124 /* Return nonzero if INDEX is a valid Thumb-2 address index operand. */
7125 static int
7126 thumb2_legitimate_index_p (machine_mode mode, rtx index, int strict_p)
7128 enum rtx_code code = GET_CODE (index);
7130 /* ??? Combine arm and thumb2 coprocessor addressing modes. */
7131 /* Standard coprocessor addressing modes. */
7132 if (TARGET_HARD_FLOAT
7133 && TARGET_VFP
7134 && (mode == SFmode || mode == DFmode))
7135 return (code == CONST_INT && INTVAL (index) < 1024
7136 /* Thumb-2 allows only > -256 index range for it's core register
7137 load/stores. Since we allow SF/DF in core registers, we have
7138 to use the intersection between -256~4096 (core) and -1024~1024
7139 (coprocessor). */
7140 && INTVAL (index) > -256
7141 && (INTVAL (index) & 3) == 0);
7143 if (TARGET_REALLY_IWMMXT && VALID_IWMMXT_REG_MODE (mode))
7145 /* For DImode assume values will usually live in core regs
7146 and only allow LDRD addressing modes. */
7147 if (!TARGET_LDRD || mode != DImode)
7148 return (code == CONST_INT
7149 && INTVAL (index) < 1024
7150 && INTVAL (index) > -1024
7151 && (INTVAL (index) & 3) == 0);
7154 /* For quad modes, we restrict the constant offset to be slightly less
7155 than what the instruction format permits. We do this because for
7156 quad mode moves, we will actually decompose them into two separate
7157 double-mode reads or writes. INDEX must therefore be a valid
7158 (double-mode) offset and so should INDEX+8. */
7159 if (TARGET_NEON && VALID_NEON_QREG_MODE (mode))
7160 return (code == CONST_INT
7161 && INTVAL (index) < 1016
7162 && INTVAL (index) > -1024
7163 && (INTVAL (index) & 3) == 0);
7165 /* We have no such constraint on double mode offsets, so we permit the
7166 full range of the instruction format. */
7167 if (TARGET_NEON && VALID_NEON_DREG_MODE (mode))
7168 return (code == CONST_INT
7169 && INTVAL (index) < 1024
7170 && INTVAL (index) > -1024
7171 && (INTVAL (index) & 3) == 0);
7173 if (arm_address_register_rtx_p (index, strict_p)
7174 && (GET_MODE_SIZE (mode) <= 4))
7175 return 1;
7177 if (mode == DImode || mode == DFmode)
7179 if (code == CONST_INT)
7181 HOST_WIDE_INT val = INTVAL (index);
7182 /* ??? Can we assume ldrd for thumb2? */
7183 /* Thumb-2 ldrd only has reg+const addressing modes. */
7184 /* ldrd supports offsets of +-1020.
7185 However the ldr fallback does not. */
7186 return val > -256 && val < 256 && (val & 3) == 0;
7188 else
7189 return 0;
7192 if (code == MULT)
7194 rtx xiop0 = XEXP (index, 0);
7195 rtx xiop1 = XEXP (index, 1);
7197 return ((arm_address_register_rtx_p (xiop0, strict_p)
7198 && thumb2_index_mul_operand (xiop1))
7199 || (arm_address_register_rtx_p (xiop1, strict_p)
7200 && thumb2_index_mul_operand (xiop0)));
7202 else if (code == ASHIFT)
7204 rtx op = XEXP (index, 1);
7206 return (arm_address_register_rtx_p (XEXP (index, 0), strict_p)
7207 && CONST_INT_P (op)
7208 && INTVAL (op) > 0
7209 && INTVAL (op) <= 3);
7212 return (code == CONST_INT
7213 && INTVAL (index) < 4096
7214 && INTVAL (index) > -256);
7217 /* Return nonzero if X is valid as a 16-bit Thumb state base register. */
7218 static int
7219 thumb1_base_register_rtx_p (rtx x, machine_mode mode, int strict_p)
7221 int regno;
7223 if (!REG_P (x))
7224 return 0;
7226 regno = REGNO (x);
7228 if (strict_p)
7229 return THUMB1_REGNO_MODE_OK_FOR_BASE_P (regno, mode);
7231 return (regno <= LAST_LO_REGNUM
7232 || regno > LAST_VIRTUAL_REGISTER
7233 || regno == FRAME_POINTER_REGNUM
7234 || (GET_MODE_SIZE (mode) >= 4
7235 && (regno == STACK_POINTER_REGNUM
7236 || regno >= FIRST_PSEUDO_REGISTER
7237 || x == hard_frame_pointer_rtx
7238 || x == arg_pointer_rtx)));
7241 /* Return nonzero if x is a legitimate index register. This is the case
7242 for any base register that can access a QImode object. */
7243 inline static int
7244 thumb1_index_register_rtx_p (rtx x, int strict_p)
7246 return thumb1_base_register_rtx_p (x, QImode, strict_p);
7249 /* Return nonzero if x is a legitimate 16-bit Thumb-state address.
7251 The AP may be eliminated to either the SP or the FP, so we use the
7252 least common denominator, e.g. SImode, and offsets from 0 to 64.
7254 ??? Verify whether the above is the right approach.
7256 ??? Also, the FP may be eliminated to the SP, so perhaps that
7257 needs special handling also.
7259 ??? Look at how the mips16 port solves this problem. It probably uses
7260 better ways to solve some of these problems.
7262 Although it is not incorrect, we don't accept QImode and HImode
7263 addresses based on the frame pointer or arg pointer until the
7264 reload pass starts. This is so that eliminating such addresses
7265 into stack based ones won't produce impossible code. */
7267 thumb1_legitimate_address_p (machine_mode mode, rtx x, int strict_p)
7269 /* ??? Not clear if this is right. Experiment. */
7270 if (GET_MODE_SIZE (mode) < 4
7271 && !(reload_in_progress || reload_completed)
7272 && (reg_mentioned_p (frame_pointer_rtx, x)
7273 || reg_mentioned_p (arg_pointer_rtx, x)
7274 || reg_mentioned_p (virtual_incoming_args_rtx, x)
7275 || reg_mentioned_p (virtual_outgoing_args_rtx, x)
7276 || reg_mentioned_p (virtual_stack_dynamic_rtx, x)
7277 || reg_mentioned_p (virtual_stack_vars_rtx, x)))
7278 return 0;
7280 /* Accept any base register. SP only in SImode or larger. */
7281 else if (thumb1_base_register_rtx_p (x, mode, strict_p))
7282 return 1;
7284 /* This is PC relative data before arm_reorg runs. */
7285 else if (GET_MODE_SIZE (mode) >= 4 && CONSTANT_P (x)
7286 && GET_CODE (x) == SYMBOL_REF
7287 && CONSTANT_POOL_ADDRESS_P (x) && !flag_pic)
7288 return 1;
7290 /* This is PC relative data after arm_reorg runs. */
7291 else if ((GET_MODE_SIZE (mode) >= 4 || mode == HFmode)
7292 && reload_completed
7293 && (GET_CODE (x) == LABEL_REF
7294 || (GET_CODE (x) == CONST
7295 && GET_CODE (XEXP (x, 0)) == PLUS
7296 && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
7297 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))))
7298 return 1;
7300 /* Post-inc indexing only supported for SImode and larger. */
7301 else if (GET_CODE (x) == POST_INC && GET_MODE_SIZE (mode) >= 4
7302 && thumb1_index_register_rtx_p (XEXP (x, 0), strict_p))
7303 return 1;
7305 else if (GET_CODE (x) == PLUS)
7307 /* REG+REG address can be any two index registers. */
7308 /* We disallow FRAME+REG addressing since we know that FRAME
7309 will be replaced with STACK, and SP relative addressing only
7310 permits SP+OFFSET. */
7311 if (GET_MODE_SIZE (mode) <= 4
7312 && XEXP (x, 0) != frame_pointer_rtx
7313 && XEXP (x, 1) != frame_pointer_rtx
7314 && thumb1_index_register_rtx_p (XEXP (x, 0), strict_p)
7315 && (thumb1_index_register_rtx_p (XEXP (x, 1), strict_p)
7316 || (!strict_p && will_be_in_index_register (XEXP (x, 1)))))
7317 return 1;
7319 /* REG+const has 5-7 bit offset for non-SP registers. */
7320 else if ((thumb1_index_register_rtx_p (XEXP (x, 0), strict_p)
7321 || XEXP (x, 0) == arg_pointer_rtx)
7322 && CONST_INT_P (XEXP (x, 1))
7323 && thumb_legitimate_offset_p (mode, INTVAL (XEXP (x, 1))))
7324 return 1;
7326 /* REG+const has 10-bit offset for SP, but only SImode and
7327 larger is supported. */
7328 /* ??? Should probably check for DI/DFmode overflow here
7329 just like GO_IF_LEGITIMATE_OFFSET does. */
7330 else if (REG_P (XEXP (x, 0))
7331 && REGNO (XEXP (x, 0)) == STACK_POINTER_REGNUM
7332 && GET_MODE_SIZE (mode) >= 4
7333 && CONST_INT_P (XEXP (x, 1))
7334 && INTVAL (XEXP (x, 1)) >= 0
7335 && INTVAL (XEXP (x, 1)) + GET_MODE_SIZE (mode) <= 1024
7336 && (INTVAL (XEXP (x, 1)) & 3) == 0)
7337 return 1;
7339 else if (REG_P (XEXP (x, 0))
7340 && (REGNO (XEXP (x, 0)) == FRAME_POINTER_REGNUM
7341 || REGNO (XEXP (x, 0)) == ARG_POINTER_REGNUM
7342 || (REGNO (XEXP (x, 0)) >= FIRST_VIRTUAL_REGISTER
7343 && REGNO (XEXP (x, 0))
7344 <= LAST_VIRTUAL_POINTER_REGISTER))
7345 && GET_MODE_SIZE (mode) >= 4
7346 && CONST_INT_P (XEXP (x, 1))
7347 && (INTVAL (XEXP (x, 1)) & 3) == 0)
7348 return 1;
7351 else if (GET_MODE_CLASS (mode) != MODE_FLOAT
7352 && GET_MODE_SIZE (mode) == 4
7353 && GET_CODE (x) == SYMBOL_REF
7354 && CONSTANT_POOL_ADDRESS_P (x)
7355 && ! (flag_pic
7356 && symbol_mentioned_p (get_pool_constant (x))
7357 && ! pcrel_constant_p (get_pool_constant (x))))
7358 return 1;
7360 return 0;
7363 /* Return nonzero if VAL can be used as an offset in a Thumb-state address
7364 instruction of mode MODE. */
7366 thumb_legitimate_offset_p (machine_mode mode, HOST_WIDE_INT val)
7368 switch (GET_MODE_SIZE (mode))
7370 case 1:
7371 return val >= 0 && val < 32;
7373 case 2:
7374 return val >= 0 && val < 64 && (val & 1) == 0;
7376 default:
7377 return (val >= 0
7378 && (val + GET_MODE_SIZE (mode)) <= 128
7379 && (val & 3) == 0);
7383 bool
7384 arm_legitimate_address_p (machine_mode mode, rtx x, bool strict_p)
7386 if (TARGET_ARM)
7387 return arm_legitimate_address_outer_p (mode, x, SET, strict_p);
7388 else if (TARGET_THUMB2)
7389 return thumb2_legitimate_address_p (mode, x, strict_p);
7390 else /* if (TARGET_THUMB1) */
7391 return thumb1_legitimate_address_p (mode, x, strict_p);
7394 /* Worker function for TARGET_PREFERRED_RELOAD_CLASS.
7396 Given an rtx X being reloaded into a reg required to be
7397 in class CLASS, return the class of reg to actually use.
7398 In general this is just CLASS, but for the Thumb core registers and
7399 immediate constants we prefer a LO_REGS class or a subset. */
7401 static reg_class_t
7402 arm_preferred_reload_class (rtx x ATTRIBUTE_UNUSED, reg_class_t rclass)
7404 if (TARGET_32BIT)
7405 return rclass;
7406 else
7408 if (rclass == GENERAL_REGS)
7409 return LO_REGS;
7410 else
7411 return rclass;
7415 /* Build the SYMBOL_REF for __tls_get_addr. */
7417 static GTY(()) rtx tls_get_addr_libfunc;
7419 static rtx
7420 get_tls_get_addr (void)
7422 if (!tls_get_addr_libfunc)
7423 tls_get_addr_libfunc = init_one_libfunc ("__tls_get_addr");
7424 return tls_get_addr_libfunc;
7428 arm_load_tp (rtx target)
7430 if (!target)
7431 target = gen_reg_rtx (SImode);
7433 if (TARGET_HARD_TP)
7435 /* Can return in any reg. */
7436 emit_insn (gen_load_tp_hard (target));
7438 else
7440 /* Always returned in r0. Immediately copy the result into a pseudo,
7441 otherwise other uses of r0 (e.g. setting up function arguments) may
7442 clobber the value. */
7444 rtx tmp;
7446 emit_insn (gen_load_tp_soft ());
7448 tmp = gen_rtx_REG (SImode, 0);
7449 emit_move_insn (target, tmp);
7451 return target;
7454 static rtx
7455 load_tls_operand (rtx x, rtx reg)
7457 rtx tmp;
7459 if (reg == NULL_RTX)
7460 reg = gen_reg_rtx (SImode);
7462 tmp = gen_rtx_CONST (SImode, x);
7464 emit_move_insn (reg, tmp);
7466 return reg;
7469 static rtx
7470 arm_call_tls_get_addr (rtx x, rtx reg, rtx *valuep, int reloc)
7472 rtx insns, label, labelno, sum;
7474 gcc_assert (reloc != TLS_DESCSEQ);
7475 start_sequence ();
7477 labelno = GEN_INT (pic_labelno++);
7478 label = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
7479 label = gen_rtx_CONST (VOIDmode, label);
7481 sum = gen_rtx_UNSPEC (Pmode,
7482 gen_rtvec (4, x, GEN_INT (reloc), label,
7483 GEN_INT (TARGET_ARM ? 8 : 4)),
7484 UNSPEC_TLS);
7485 reg = load_tls_operand (sum, reg);
7487 if (TARGET_ARM)
7488 emit_insn (gen_pic_add_dot_plus_eight (reg, reg, labelno));
7489 else
7490 emit_insn (gen_pic_add_dot_plus_four (reg, reg, labelno));
7492 *valuep = emit_library_call_value (get_tls_get_addr (), NULL_RTX,
7493 LCT_PURE, /* LCT_CONST? */
7494 Pmode, 1, reg, Pmode);
7496 insns = get_insns ();
7497 end_sequence ();
7499 return insns;
7502 static rtx
7503 arm_tls_descseq_addr (rtx x, rtx reg)
7505 rtx labelno = GEN_INT (pic_labelno++);
7506 rtx label = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
7507 rtx sum = gen_rtx_UNSPEC (Pmode,
7508 gen_rtvec (4, x, GEN_INT (TLS_DESCSEQ),
7509 gen_rtx_CONST (VOIDmode, label),
7510 GEN_INT (!TARGET_ARM)),
7511 UNSPEC_TLS);
7512 rtx reg0 = load_tls_operand (sum, gen_rtx_REG (SImode, 0));
7514 emit_insn (gen_tlscall (x, labelno));
7515 if (!reg)
7516 reg = gen_reg_rtx (SImode);
7517 else
7518 gcc_assert (REGNO (reg) != 0);
7520 emit_move_insn (reg, reg0);
7522 return reg;
7526 legitimize_tls_address (rtx x, rtx reg)
7528 rtx dest, tp, label, labelno, sum, insns, ret, eqv, addend;
7529 unsigned int model = SYMBOL_REF_TLS_MODEL (x);
7531 switch (model)
7533 case TLS_MODEL_GLOBAL_DYNAMIC:
7534 if (TARGET_GNU2_TLS)
7536 reg = arm_tls_descseq_addr (x, reg);
7538 tp = arm_load_tp (NULL_RTX);
7540 dest = gen_rtx_PLUS (Pmode, tp, reg);
7542 else
7544 /* Original scheme */
7545 insns = arm_call_tls_get_addr (x, reg, &ret, TLS_GD32);
7546 dest = gen_reg_rtx (Pmode);
7547 emit_libcall_block (insns, dest, ret, x);
7549 return dest;
7551 case TLS_MODEL_LOCAL_DYNAMIC:
7552 if (TARGET_GNU2_TLS)
7554 reg = arm_tls_descseq_addr (x, reg);
7556 tp = arm_load_tp (NULL_RTX);
7558 dest = gen_rtx_PLUS (Pmode, tp, reg);
7560 else
7562 insns = arm_call_tls_get_addr (x, reg, &ret, TLS_LDM32);
7564 /* Attach a unique REG_EQUIV, to allow the RTL optimizers to
7565 share the LDM result with other LD model accesses. */
7566 eqv = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const1_rtx),
7567 UNSPEC_TLS);
7568 dest = gen_reg_rtx (Pmode);
7569 emit_libcall_block (insns, dest, ret, eqv);
7571 /* Load the addend. */
7572 addend = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, x,
7573 GEN_INT (TLS_LDO32)),
7574 UNSPEC_TLS);
7575 addend = force_reg (SImode, gen_rtx_CONST (SImode, addend));
7576 dest = gen_rtx_PLUS (Pmode, dest, addend);
7578 return dest;
7580 case TLS_MODEL_INITIAL_EXEC:
7581 labelno = GEN_INT (pic_labelno++);
7582 label = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
7583 label = gen_rtx_CONST (VOIDmode, label);
7584 sum = gen_rtx_UNSPEC (Pmode,
7585 gen_rtvec (4, x, GEN_INT (TLS_IE32), label,
7586 GEN_INT (TARGET_ARM ? 8 : 4)),
7587 UNSPEC_TLS);
7588 reg = load_tls_operand (sum, reg);
7590 if (TARGET_ARM)
7591 emit_insn (gen_tls_load_dot_plus_eight (reg, reg, labelno));
7592 else if (TARGET_THUMB2)
7593 emit_insn (gen_tls_load_dot_plus_four (reg, NULL, reg, labelno));
7594 else
7596 emit_insn (gen_pic_add_dot_plus_four (reg, reg, labelno));
7597 emit_move_insn (reg, gen_const_mem (SImode, reg));
7600 tp = arm_load_tp (NULL_RTX);
7602 return gen_rtx_PLUS (Pmode, tp, reg);
7604 case TLS_MODEL_LOCAL_EXEC:
7605 tp = arm_load_tp (NULL_RTX);
7607 reg = gen_rtx_UNSPEC (Pmode,
7608 gen_rtvec (2, x, GEN_INT (TLS_LE32)),
7609 UNSPEC_TLS);
7610 reg = force_reg (SImode, gen_rtx_CONST (SImode, reg));
7612 return gen_rtx_PLUS (Pmode, tp, reg);
7614 default:
7615 abort ();
7619 /* Try machine-dependent ways of modifying an illegitimate address
7620 to be legitimate. If we find one, return the new, valid address. */
7622 arm_legitimize_address (rtx x, rtx orig_x, machine_mode mode)
7624 if (arm_tls_referenced_p (x))
7626 rtx addend = NULL;
7628 if (GET_CODE (x) == CONST && GET_CODE (XEXP (x, 0)) == PLUS)
7630 addend = XEXP (XEXP (x, 0), 1);
7631 x = XEXP (XEXP (x, 0), 0);
7634 if (GET_CODE (x) != SYMBOL_REF)
7635 return x;
7637 gcc_assert (SYMBOL_REF_TLS_MODEL (x) != 0);
7639 x = legitimize_tls_address (x, NULL_RTX);
7641 if (addend)
7643 x = gen_rtx_PLUS (SImode, x, addend);
7644 orig_x = x;
7646 else
7647 return x;
7650 if (!TARGET_ARM)
7652 /* TODO: legitimize_address for Thumb2. */
7653 if (TARGET_THUMB2)
7654 return x;
7655 return thumb_legitimize_address (x, orig_x, mode);
7658 if (GET_CODE (x) == PLUS)
7660 rtx xop0 = XEXP (x, 0);
7661 rtx xop1 = XEXP (x, 1);
7663 if (CONSTANT_P (xop0) && !symbol_mentioned_p (xop0))
7664 xop0 = force_reg (SImode, xop0);
7666 if (CONSTANT_P (xop1) && !CONST_INT_P (xop1)
7667 && !symbol_mentioned_p (xop1))
7668 xop1 = force_reg (SImode, xop1);
7670 if (ARM_BASE_REGISTER_RTX_P (xop0)
7671 && CONST_INT_P (xop1))
7673 HOST_WIDE_INT n, low_n;
7674 rtx base_reg, val;
7675 n = INTVAL (xop1);
7677 /* VFP addressing modes actually allow greater offsets, but for
7678 now we just stick with the lowest common denominator. */
7679 if (mode == DImode
7680 || ((TARGET_SOFT_FLOAT || TARGET_VFP) && mode == DFmode))
7682 low_n = n & 0x0f;
7683 n &= ~0x0f;
7684 if (low_n > 4)
7686 n += 16;
7687 low_n -= 16;
7690 else
7692 low_n = ((mode) == TImode ? 0
7693 : n >= 0 ? (n & 0xfff) : -((-n) & 0xfff));
7694 n -= low_n;
7697 base_reg = gen_reg_rtx (SImode);
7698 val = force_operand (plus_constant (Pmode, xop0, n), NULL_RTX);
7699 emit_move_insn (base_reg, val);
7700 x = plus_constant (Pmode, base_reg, low_n);
7702 else if (xop0 != XEXP (x, 0) || xop1 != XEXP (x, 1))
7703 x = gen_rtx_PLUS (SImode, xop0, xop1);
7706 /* XXX We don't allow MINUS any more -- see comment in
7707 arm_legitimate_address_outer_p (). */
7708 else if (GET_CODE (x) == MINUS)
7710 rtx xop0 = XEXP (x, 0);
7711 rtx xop1 = XEXP (x, 1);
7713 if (CONSTANT_P (xop0))
7714 xop0 = force_reg (SImode, xop0);
7716 if (CONSTANT_P (xop1) && ! symbol_mentioned_p (xop1))
7717 xop1 = force_reg (SImode, xop1);
7719 if (xop0 != XEXP (x, 0) || xop1 != XEXP (x, 1))
7720 x = gen_rtx_MINUS (SImode, xop0, xop1);
7723 /* Make sure to take full advantage of the pre-indexed addressing mode
7724 with absolute addresses which often allows for the base register to
7725 be factorized for multiple adjacent memory references, and it might
7726 even allows for the mini pool to be avoided entirely. */
7727 else if (CONST_INT_P (x) && optimize > 0)
7729 unsigned int bits;
7730 HOST_WIDE_INT mask, base, index;
7731 rtx base_reg;
7733 /* ldr and ldrb can use a 12-bit index, ldrsb and the rest can only
7734 use a 8-bit index. So let's use a 12-bit index for SImode only and
7735 hope that arm_gen_constant will enable ldrb to use more bits. */
7736 bits = (mode == SImode) ? 12 : 8;
7737 mask = (1 << bits) - 1;
7738 base = INTVAL (x) & ~mask;
7739 index = INTVAL (x) & mask;
7740 if (bit_count (base & 0xffffffff) > (32 - bits)/2)
7742 /* It'll most probably be more efficient to generate the base
7743 with more bits set and use a negative index instead. */
7744 base |= mask;
7745 index -= mask;
7747 base_reg = force_reg (SImode, GEN_INT (base));
7748 x = plus_constant (Pmode, base_reg, index);
7751 if (flag_pic)
7753 /* We need to find and carefully transform any SYMBOL and LABEL
7754 references; so go back to the original address expression. */
7755 rtx new_x = legitimize_pic_address (orig_x, mode, NULL_RTX);
7757 if (new_x != orig_x)
7758 x = new_x;
7761 return x;
7765 /* Try machine-dependent ways of modifying an illegitimate Thumb address
7766 to be legitimate. If we find one, return the new, valid address. */
7768 thumb_legitimize_address (rtx x, rtx orig_x, machine_mode mode)
7770 if (GET_CODE (x) == PLUS
7771 && CONST_INT_P (XEXP (x, 1))
7772 && (INTVAL (XEXP (x, 1)) >= 32 * GET_MODE_SIZE (mode)
7773 || INTVAL (XEXP (x, 1)) < 0))
7775 rtx xop0 = XEXP (x, 0);
7776 rtx xop1 = XEXP (x, 1);
7777 HOST_WIDE_INT offset = INTVAL (xop1);
7779 /* Try and fold the offset into a biasing of the base register and
7780 then offsetting that. Don't do this when optimizing for space
7781 since it can cause too many CSEs. */
7782 if (optimize_size && offset >= 0
7783 && offset < 256 + 31 * GET_MODE_SIZE (mode))
7785 HOST_WIDE_INT delta;
7787 if (offset >= 256)
7788 delta = offset - (256 - GET_MODE_SIZE (mode));
7789 else if (offset < 32 * GET_MODE_SIZE (mode) + 8)
7790 delta = 31 * GET_MODE_SIZE (mode);
7791 else
7792 delta = offset & (~31 * GET_MODE_SIZE (mode));
7794 xop0 = force_operand (plus_constant (Pmode, xop0, offset - delta),
7795 NULL_RTX);
7796 x = plus_constant (Pmode, xop0, delta);
7798 else if (offset < 0 && offset > -256)
7799 /* Small negative offsets are best done with a subtract before the
7800 dereference, forcing these into a register normally takes two
7801 instructions. */
7802 x = force_operand (x, NULL_RTX);
7803 else
7805 /* For the remaining cases, force the constant into a register. */
7806 xop1 = force_reg (SImode, xop1);
7807 x = gen_rtx_PLUS (SImode, xop0, xop1);
7810 else if (GET_CODE (x) == PLUS
7811 && s_register_operand (XEXP (x, 1), SImode)
7812 && !s_register_operand (XEXP (x, 0), SImode))
7814 rtx xop0 = force_operand (XEXP (x, 0), NULL_RTX);
7816 x = gen_rtx_PLUS (SImode, xop0, XEXP (x, 1));
7819 if (flag_pic)
7821 /* We need to find and carefully transform any SYMBOL and LABEL
7822 references; so go back to the original address expression. */
7823 rtx new_x = legitimize_pic_address (orig_x, mode, NULL_RTX);
7825 if (new_x != orig_x)
7826 x = new_x;
7829 return x;
7832 bool
7833 arm_legitimize_reload_address (rtx *p,
7834 machine_mode mode,
7835 int opnum, int type,
7836 int ind_levels ATTRIBUTE_UNUSED)
7838 /* We must recognize output that we have already generated ourselves. */
7839 if (GET_CODE (*p) == PLUS
7840 && GET_CODE (XEXP (*p, 0)) == PLUS
7841 && REG_P (XEXP (XEXP (*p, 0), 0))
7842 && CONST_INT_P (XEXP (XEXP (*p, 0), 1))
7843 && CONST_INT_P (XEXP (*p, 1)))
7845 push_reload (XEXP (*p, 0), NULL_RTX, &XEXP (*p, 0), NULL,
7846 MODE_BASE_REG_CLASS (mode), GET_MODE (*p),
7847 VOIDmode, 0, 0, opnum, (enum reload_type) type);
7848 return true;
7851 if (GET_CODE (*p) == PLUS
7852 && REG_P (XEXP (*p, 0))
7853 && ARM_REGNO_OK_FOR_BASE_P (REGNO (XEXP (*p, 0)))
7854 /* If the base register is equivalent to a constant, let the generic
7855 code handle it. Otherwise we will run into problems if a future
7856 reload pass decides to rematerialize the constant. */
7857 && !reg_equiv_constant (ORIGINAL_REGNO (XEXP (*p, 0)))
7858 && CONST_INT_P (XEXP (*p, 1)))
7860 HOST_WIDE_INT val = INTVAL (XEXP (*p, 1));
7861 HOST_WIDE_INT low, high;
7863 /* Detect coprocessor load/stores. */
7864 bool coproc_p = ((TARGET_HARD_FLOAT
7865 && TARGET_VFP
7866 && (mode == SFmode || mode == DFmode))
7867 || (TARGET_REALLY_IWMMXT
7868 && VALID_IWMMXT_REG_MODE (mode))
7869 || (TARGET_NEON
7870 && (VALID_NEON_DREG_MODE (mode)
7871 || VALID_NEON_QREG_MODE (mode))));
7873 /* For some conditions, bail out when lower two bits are unaligned. */
7874 if ((val & 0x3) != 0
7875 /* Coprocessor load/store indexes are 8-bits + '00' appended. */
7876 && (coproc_p
7877 /* For DI, and DF under soft-float: */
7878 || ((mode == DImode || mode == DFmode)
7879 /* Without ldrd, we use stm/ldm, which does not
7880 fair well with unaligned bits. */
7881 && (! TARGET_LDRD
7882 /* Thumb-2 ldrd/strd is [-1020,+1020] in steps of 4. */
7883 || TARGET_THUMB2))))
7884 return false;
7886 /* When breaking down a [reg+index] reload address into [(reg+high)+low],
7887 of which the (reg+high) gets turned into a reload add insn,
7888 we try to decompose the index into high/low values that can often
7889 also lead to better reload CSE.
7890 For example:
7891 ldr r0, [r2, #4100] // Offset too large
7892 ldr r1, [r2, #4104] // Offset too large
7894 is best reloaded as:
7895 add t1, r2, #4096
7896 ldr r0, [t1, #4]
7897 add t2, r2, #4096
7898 ldr r1, [t2, #8]
7900 which post-reload CSE can simplify in most cases to eliminate the
7901 second add instruction:
7902 add t1, r2, #4096
7903 ldr r0, [t1, #4]
7904 ldr r1, [t1, #8]
7906 The idea here is that we want to split out the bits of the constant
7907 as a mask, rather than as subtracting the maximum offset that the
7908 respective type of load/store used can handle.
7910 When encountering negative offsets, we can still utilize it even if
7911 the overall offset is positive; sometimes this may lead to an immediate
7912 that can be constructed with fewer instructions.
7913 For example:
7914 ldr r0, [r2, #0x3FFFFC]
7916 This is best reloaded as:
7917 add t1, r2, #0x400000
7918 ldr r0, [t1, #-4]
7920 The trick for spotting this for a load insn with N bits of offset
7921 (i.e. bits N-1:0) is to look at bit N; if it is set, then chose a
7922 negative offset that is going to make bit N and all the bits below
7923 it become zero in the remainder part.
7925 The SIGN_MAG_LOW_ADDR_BITS macro below implements this, with respect
7926 to sign-magnitude addressing (i.e. separate +- bit, or 1's complement),
7927 used in most cases of ARM load/store instructions. */
7929 #define SIGN_MAG_LOW_ADDR_BITS(VAL, N) \
7930 (((VAL) & ((1 << (N)) - 1)) \
7931 ? (((VAL) & ((1 << ((N) + 1)) - 1)) ^ (1 << (N))) - (1 << (N)) \
7932 : 0)
7934 if (coproc_p)
7936 low = SIGN_MAG_LOW_ADDR_BITS (val, 10);
7938 /* NEON quad-word load/stores are made of two double-word accesses,
7939 so the valid index range is reduced by 8. Treat as 9-bit range if
7940 we go over it. */
7941 if (TARGET_NEON && VALID_NEON_QREG_MODE (mode) && low >= 1016)
7942 low = SIGN_MAG_LOW_ADDR_BITS (val, 9);
7944 else if (GET_MODE_SIZE (mode) == 8)
7946 if (TARGET_LDRD)
7947 low = (TARGET_THUMB2
7948 ? SIGN_MAG_LOW_ADDR_BITS (val, 10)
7949 : SIGN_MAG_LOW_ADDR_BITS (val, 8));
7950 else
7951 /* For pre-ARMv5TE (without ldrd), we use ldm/stm(db/da/ib)
7952 to access doublewords. The supported load/store offsets are
7953 -8, -4, and 4, which we try to produce here. */
7954 low = ((val & 0xf) ^ 0x8) - 0x8;
7956 else if (GET_MODE_SIZE (mode) < 8)
7958 /* NEON element load/stores do not have an offset. */
7959 if (TARGET_NEON_FP16 && mode == HFmode)
7960 return false;
7962 if (TARGET_THUMB2)
7964 /* Thumb-2 has an asymmetrical index range of (-256,4096).
7965 Try the wider 12-bit range first, and re-try if the result
7966 is out of range. */
7967 low = SIGN_MAG_LOW_ADDR_BITS (val, 12);
7968 if (low < -255)
7969 low = SIGN_MAG_LOW_ADDR_BITS (val, 8);
7971 else
7973 if (mode == HImode || mode == HFmode)
7975 if (arm_arch4)
7976 low = SIGN_MAG_LOW_ADDR_BITS (val, 8);
7977 else
7979 /* The storehi/movhi_bytes fallbacks can use only
7980 [-4094,+4094] of the full ldrb/strb index range. */
7981 low = SIGN_MAG_LOW_ADDR_BITS (val, 12);
7982 if (low == 4095 || low == -4095)
7983 return false;
7986 else
7987 low = SIGN_MAG_LOW_ADDR_BITS (val, 12);
7990 else
7991 return false;
7993 high = ((((val - low) & (unsigned HOST_WIDE_INT) 0xffffffff)
7994 ^ (unsigned HOST_WIDE_INT) 0x80000000)
7995 - (unsigned HOST_WIDE_INT) 0x80000000);
7996 /* Check for overflow or zero */
7997 if (low == 0 || high == 0 || (high + low != val))
7998 return false;
8000 /* Reload the high part into a base reg; leave the low part
8001 in the mem.
8002 Note that replacing this gen_rtx_PLUS with plus_constant is
8003 wrong in this case because we rely on the
8004 (plus (plus reg c1) c2) structure being preserved so that
8005 XEXP (*p, 0) in push_reload below uses the correct term. */
8006 *p = gen_rtx_PLUS (GET_MODE (*p),
8007 gen_rtx_PLUS (GET_MODE (*p), XEXP (*p, 0),
8008 GEN_INT (high)),
8009 GEN_INT (low));
8010 push_reload (XEXP (*p, 0), NULL_RTX, &XEXP (*p, 0), NULL,
8011 MODE_BASE_REG_CLASS (mode), GET_MODE (*p),
8012 VOIDmode, 0, 0, opnum, (enum reload_type) type);
8013 return true;
8016 return false;
8020 thumb_legitimize_reload_address (rtx *x_p,
8021 machine_mode mode,
8022 int opnum, int type,
8023 int ind_levels ATTRIBUTE_UNUSED)
8025 rtx x = *x_p;
8027 if (GET_CODE (x) == PLUS
8028 && GET_MODE_SIZE (mode) < 4
8029 && REG_P (XEXP (x, 0))
8030 && XEXP (x, 0) == stack_pointer_rtx
8031 && CONST_INT_P (XEXP (x, 1))
8032 && !thumb_legitimate_offset_p (mode, INTVAL (XEXP (x, 1))))
8034 rtx orig_x = x;
8036 x = copy_rtx (x);
8037 push_reload (orig_x, NULL_RTX, x_p, NULL, MODE_BASE_REG_CLASS (mode),
8038 Pmode, VOIDmode, 0, 0, opnum, (enum reload_type) type);
8039 return x;
8042 /* If both registers are hi-regs, then it's better to reload the
8043 entire expression rather than each register individually. That
8044 only requires one reload register rather than two. */
8045 if (GET_CODE (x) == PLUS
8046 && REG_P (XEXP (x, 0))
8047 && REG_P (XEXP (x, 1))
8048 && !REG_MODE_OK_FOR_REG_BASE_P (XEXP (x, 0), mode)
8049 && !REG_MODE_OK_FOR_REG_BASE_P (XEXP (x, 1), mode))
8051 rtx orig_x = x;
8053 x = copy_rtx (x);
8054 push_reload (orig_x, NULL_RTX, x_p, NULL, MODE_BASE_REG_CLASS (mode),
8055 Pmode, VOIDmode, 0, 0, opnum, (enum reload_type) type);
8056 return x;
8059 return NULL;
8062 /* Return TRUE if X contains any TLS symbol references. */
8064 bool
8065 arm_tls_referenced_p (rtx x)
8067 if (! TARGET_HAVE_TLS)
8068 return false;
8070 subrtx_iterator::array_type array;
8071 FOR_EACH_SUBRTX (iter, array, x, ALL)
8073 const_rtx x = *iter;
8074 if (GET_CODE (x) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (x) != 0)
8075 return true;
8077 /* Don't recurse into UNSPEC_TLS looking for TLS symbols; these are
8078 TLS offsets, not real symbol references. */
8079 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
8080 iter.skip_subrtxes ();
8082 return false;
8085 /* Implement TARGET_LEGITIMATE_CONSTANT_P.
8087 On the ARM, allow any integer (invalid ones are removed later by insn
8088 patterns), nice doubles and symbol_refs which refer to the function's
8089 constant pool XXX.
8091 When generating pic allow anything. */
8093 static bool
8094 arm_legitimate_constant_p_1 (machine_mode mode, rtx x)
8096 /* At present, we have no support for Neon structure constants, so forbid
8097 them here. It might be possible to handle simple cases like 0 and -1
8098 in future. */
8099 if (TARGET_NEON && VALID_NEON_STRUCT_MODE (mode))
8100 return false;
8102 return flag_pic || !label_mentioned_p (x);
8105 static bool
8106 thumb_legitimate_constant_p (machine_mode mode ATTRIBUTE_UNUSED, rtx x)
8108 return (CONST_INT_P (x)
8109 || CONST_DOUBLE_P (x)
8110 || CONSTANT_ADDRESS_P (x)
8111 || flag_pic);
8114 static bool
8115 arm_legitimate_constant_p (machine_mode mode, rtx x)
8117 return (!arm_cannot_force_const_mem (mode, x)
8118 && (TARGET_32BIT
8119 ? arm_legitimate_constant_p_1 (mode, x)
8120 : thumb_legitimate_constant_p (mode, x)));
8123 /* Implement TARGET_CANNOT_FORCE_CONST_MEM. */
8125 static bool
8126 arm_cannot_force_const_mem (machine_mode mode ATTRIBUTE_UNUSED, rtx x)
8128 rtx base, offset;
8130 if (ARM_OFFSETS_MUST_BE_WITHIN_SECTIONS_P)
8132 split_const (x, &base, &offset);
8133 if (GET_CODE (base) == SYMBOL_REF
8134 && !offset_within_block_p (base, INTVAL (offset)))
8135 return true;
8137 return arm_tls_referenced_p (x);
8140 #define REG_OR_SUBREG_REG(X) \
8141 (REG_P (X) \
8142 || (GET_CODE (X) == SUBREG && REG_P (SUBREG_REG (X))))
8144 #define REG_OR_SUBREG_RTX(X) \
8145 (REG_P (X) ? (X) : SUBREG_REG (X))
8147 static inline int
8148 thumb1_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer)
8150 machine_mode mode = GET_MODE (x);
8151 int total, words;
8153 switch (code)
8155 case ASHIFT:
8156 case ASHIFTRT:
8157 case LSHIFTRT:
8158 case ROTATERT:
8159 return (mode == SImode) ? COSTS_N_INSNS (1) : COSTS_N_INSNS (2);
8161 case PLUS:
8162 case MINUS:
8163 case COMPARE:
8164 case NEG:
8165 case NOT:
8166 return COSTS_N_INSNS (1);
8168 case MULT:
8169 if (CONST_INT_P (XEXP (x, 1)))
8171 int cycles = 0;
8172 unsigned HOST_WIDE_INT i = INTVAL (XEXP (x, 1));
8174 while (i)
8176 i >>= 2;
8177 cycles++;
8179 return COSTS_N_INSNS (2) + cycles;
8181 return COSTS_N_INSNS (1) + 16;
8183 case SET:
8184 /* A SET doesn't have a mode, so let's look at the SET_DEST to get
8185 the mode. */
8186 words = ARM_NUM_INTS (GET_MODE_SIZE (GET_MODE (SET_DEST (x))));
8187 return (COSTS_N_INSNS (words)
8188 + 4 * ((MEM_P (SET_SRC (x)))
8189 + MEM_P (SET_DEST (x))));
8191 case CONST_INT:
8192 if (outer == SET)
8194 if ((unsigned HOST_WIDE_INT) INTVAL (x) < 256)
8195 return 0;
8196 if (thumb_shiftable_const (INTVAL (x)))
8197 return COSTS_N_INSNS (2);
8198 return COSTS_N_INSNS (3);
8200 else if ((outer == PLUS || outer == COMPARE)
8201 && INTVAL (x) < 256 && INTVAL (x) > -256)
8202 return 0;
8203 else if ((outer == IOR || outer == XOR || outer == AND)
8204 && INTVAL (x) < 256 && INTVAL (x) >= -256)
8205 return COSTS_N_INSNS (1);
8206 else if (outer == AND)
8208 int i;
8209 /* This duplicates the tests in the andsi3 expander. */
8210 for (i = 9; i <= 31; i++)
8211 if ((((HOST_WIDE_INT) 1) << i) - 1 == INTVAL (x)
8212 || (((HOST_WIDE_INT) 1) << i) - 1 == ~INTVAL (x))
8213 return COSTS_N_INSNS (2);
8215 else if (outer == ASHIFT || outer == ASHIFTRT
8216 || outer == LSHIFTRT)
8217 return 0;
8218 return COSTS_N_INSNS (2);
8220 case CONST:
8221 case CONST_DOUBLE:
8222 case LABEL_REF:
8223 case SYMBOL_REF:
8224 return COSTS_N_INSNS (3);
8226 case UDIV:
8227 case UMOD:
8228 case DIV:
8229 case MOD:
8230 return 100;
8232 case TRUNCATE:
8233 return 99;
8235 case AND:
8236 case XOR:
8237 case IOR:
8238 /* XXX guess. */
8239 return 8;
8241 case MEM:
8242 /* XXX another guess. */
8243 /* Memory costs quite a lot for the first word, but subsequent words
8244 load at the equivalent of a single insn each. */
8245 return (10 + 4 * ((GET_MODE_SIZE (mode) - 1) / UNITS_PER_WORD)
8246 + ((GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
8247 ? 4 : 0));
8249 case IF_THEN_ELSE:
8250 /* XXX a guess. */
8251 if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
8252 return 14;
8253 return 2;
8255 case SIGN_EXTEND:
8256 case ZERO_EXTEND:
8257 total = mode == DImode ? COSTS_N_INSNS (1) : 0;
8258 total += thumb1_rtx_costs (XEXP (x, 0), GET_CODE (XEXP (x, 0)), code);
8260 if (mode == SImode)
8261 return total;
8263 if (arm_arch6)
8264 return total + COSTS_N_INSNS (1);
8266 /* Assume a two-shift sequence. Increase the cost slightly so
8267 we prefer actual shifts over an extend operation. */
8268 return total + 1 + COSTS_N_INSNS (2);
8270 default:
8271 return 99;
8275 static inline bool
8276 arm_rtx_costs_1 (rtx x, enum rtx_code outer, int* total, bool speed)
8278 machine_mode mode = GET_MODE (x);
8279 enum rtx_code subcode;
8280 rtx operand;
8281 enum rtx_code code = GET_CODE (x);
8282 *total = 0;
8284 switch (code)
8286 case MEM:
8287 /* Memory costs quite a lot for the first word, but subsequent words
8288 load at the equivalent of a single insn each. */
8289 *total = COSTS_N_INSNS (2 + ARM_NUM_REGS (mode));
8290 return true;
8292 case DIV:
8293 case MOD:
8294 case UDIV:
8295 case UMOD:
8296 if (TARGET_HARD_FLOAT && mode == SFmode)
8297 *total = COSTS_N_INSNS (2);
8298 else if (TARGET_HARD_FLOAT && mode == DFmode && !TARGET_VFP_SINGLE)
8299 *total = COSTS_N_INSNS (4);
8300 else
8301 *total = COSTS_N_INSNS (20);
8302 return false;
8304 case ROTATE:
8305 if (REG_P (XEXP (x, 1)))
8306 *total = COSTS_N_INSNS (1); /* Need to subtract from 32 */
8307 else if (!CONST_INT_P (XEXP (x, 1)))
8308 *total = rtx_cost (XEXP (x, 1), code, 1, speed);
8310 /* Fall through */
8311 case ROTATERT:
8312 if (mode != SImode)
8314 *total += COSTS_N_INSNS (4);
8315 return true;
8318 /* Fall through */
8319 case ASHIFT: case LSHIFTRT: case ASHIFTRT:
8320 *total += rtx_cost (XEXP (x, 0), code, 0, speed);
8321 if (mode == DImode)
8323 *total += COSTS_N_INSNS (3);
8324 return true;
8327 *total += COSTS_N_INSNS (1);
8328 /* Increase the cost of complex shifts because they aren't any faster,
8329 and reduce dual issue opportunities. */
8330 if (arm_tune_cortex_a9
8331 && outer != SET && !CONST_INT_P (XEXP (x, 1)))
8332 ++*total;
8334 return true;
8336 case MINUS:
8337 if (mode == DImode)
8339 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
8340 if (CONST_INT_P (XEXP (x, 0))
8341 && const_ok_for_arm (INTVAL (XEXP (x, 0))))
8343 *total += rtx_cost (XEXP (x, 1), code, 1, speed);
8344 return true;
8347 if (CONST_INT_P (XEXP (x, 1))
8348 && const_ok_for_arm (INTVAL (XEXP (x, 1))))
8350 *total += rtx_cost (XEXP (x, 0), code, 0, speed);
8351 return true;
8354 return false;
8357 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
8359 if (TARGET_HARD_FLOAT
8360 && (mode == SFmode
8361 || (mode == DFmode && !TARGET_VFP_SINGLE)))
8363 *total = COSTS_N_INSNS (1);
8364 if (CONST_DOUBLE_P (XEXP (x, 0))
8365 && arm_const_double_rtx (XEXP (x, 0)))
8367 *total += rtx_cost (XEXP (x, 1), code, 1, speed);
8368 return true;
8371 if (CONST_DOUBLE_P (XEXP (x, 1))
8372 && arm_const_double_rtx (XEXP (x, 1)))
8374 *total += rtx_cost (XEXP (x, 0), code, 0, speed);
8375 return true;
8378 return false;
8380 *total = COSTS_N_INSNS (20);
8381 return false;
8384 *total = COSTS_N_INSNS (1);
8385 if (CONST_INT_P (XEXP (x, 0))
8386 && const_ok_for_arm (INTVAL (XEXP (x, 0))))
8388 *total += rtx_cost (XEXP (x, 1), code, 1, speed);
8389 return true;
8392 subcode = GET_CODE (XEXP (x, 1));
8393 if (subcode == ASHIFT || subcode == ASHIFTRT
8394 || subcode == LSHIFTRT
8395 || subcode == ROTATE || subcode == ROTATERT)
8397 *total += rtx_cost (XEXP (x, 0), code, 0, speed);
8398 *total += rtx_cost (XEXP (XEXP (x, 1), 0), subcode, 0, speed);
8399 return true;
8402 /* A shift as a part of RSB costs no more than RSB itself. */
8403 if (GET_CODE (XEXP (x, 0)) == MULT
8404 && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode))
8406 *total += rtx_cost (XEXP (XEXP (x, 0), 0), code, 0, speed);
8407 *total += rtx_cost (XEXP (x, 1), code, 1, speed);
8408 return true;
8411 if (subcode == MULT
8412 && power_of_two_operand (XEXP (XEXP (x, 1), 1), SImode))
8414 *total += rtx_cost (XEXP (x, 0), code, 0, speed);
8415 *total += rtx_cost (XEXP (XEXP (x, 1), 0), subcode, 0, speed);
8416 return true;
8419 if (GET_RTX_CLASS (GET_CODE (XEXP (x, 1))) == RTX_COMPARE
8420 || GET_RTX_CLASS (GET_CODE (XEXP (x, 1))) == RTX_COMM_COMPARE)
8422 *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 0), code, 0, speed);
8423 if (REG_P (XEXP (XEXP (x, 1), 0))
8424 && REGNO (XEXP (XEXP (x, 1), 0)) != CC_REGNUM)
8425 *total += COSTS_N_INSNS (1);
8427 return true;
8430 /* Fall through */
8432 case PLUS:
8433 if (code == PLUS && arm_arch6 && mode == SImode
8434 && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
8435 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
8437 *total = COSTS_N_INSNS (1);
8438 *total += rtx_cost (XEXP (XEXP (x, 0), 0), GET_CODE (XEXP (x, 0)),
8439 0, speed);
8440 *total += rtx_cost (XEXP (x, 1), code, 1, speed);
8441 return true;
8444 /* MLA: All arguments must be registers. We filter out
8445 multiplication by a power of two, so that we fall down into
8446 the code below. */
8447 if (GET_CODE (XEXP (x, 0)) == MULT
8448 && !power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode))
8450 /* The cost comes from the cost of the multiply. */
8451 return false;
8454 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
8456 if (TARGET_HARD_FLOAT
8457 && (mode == SFmode
8458 || (mode == DFmode && !TARGET_VFP_SINGLE)))
8460 *total = COSTS_N_INSNS (1);
8461 if (CONST_DOUBLE_P (XEXP (x, 1))
8462 && arm_const_double_rtx (XEXP (x, 1)))
8464 *total += rtx_cost (XEXP (x, 0), code, 0, speed);
8465 return true;
8468 return false;
8471 *total = COSTS_N_INSNS (20);
8472 return false;
8475 if (GET_RTX_CLASS (GET_CODE (XEXP (x, 0))) == RTX_COMPARE
8476 || GET_RTX_CLASS (GET_CODE (XEXP (x, 0))) == RTX_COMM_COMPARE)
8478 *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 1), code, 1, speed);
8479 if (REG_P (XEXP (XEXP (x, 0), 0))
8480 && REGNO (XEXP (XEXP (x, 0), 0)) != CC_REGNUM)
8481 *total += COSTS_N_INSNS (1);
8482 return true;
8485 /* Fall through */
8487 case AND: case XOR: case IOR:
8489 /* Normally the frame registers will be spilt into reg+const during
8490 reload, so it is a bad idea to combine them with other instructions,
8491 since then they might not be moved outside of loops. As a compromise
8492 we allow integration with ops that have a constant as their second
8493 operand. */
8494 if (REG_OR_SUBREG_REG (XEXP (x, 0))
8495 && ARM_FRAME_RTX (REG_OR_SUBREG_RTX (XEXP (x, 0)))
8496 && !CONST_INT_P (XEXP (x, 1)))
8497 *total = COSTS_N_INSNS (1);
8499 if (mode == DImode)
8501 *total += COSTS_N_INSNS (2);
8502 if (CONST_INT_P (XEXP (x, 1))
8503 && const_ok_for_op (INTVAL (XEXP (x, 1)), code))
8505 *total += rtx_cost (XEXP (x, 0), code, 0, speed);
8506 return true;
8509 return false;
8512 *total += COSTS_N_INSNS (1);
8513 if (CONST_INT_P (XEXP (x, 1))
8514 && const_ok_for_op (INTVAL (XEXP (x, 1)), code))
8516 *total += rtx_cost (XEXP (x, 0), code, 0, speed);
8517 return true;
8519 subcode = GET_CODE (XEXP (x, 0));
8520 if (subcode == ASHIFT || subcode == ASHIFTRT
8521 || subcode == LSHIFTRT
8522 || subcode == ROTATE || subcode == ROTATERT)
8524 *total += rtx_cost (XEXP (x, 1), code, 1, speed);
8525 *total += rtx_cost (XEXP (XEXP (x, 0), 0), subcode, 0, speed);
8526 return true;
8529 if (subcode == MULT
8530 && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode))
8532 *total += rtx_cost (XEXP (x, 1), code, 1, speed);
8533 *total += rtx_cost (XEXP (XEXP (x, 0), 0), subcode, 0, speed);
8534 return true;
8537 if (subcode == UMIN || subcode == UMAX
8538 || subcode == SMIN || subcode == SMAX)
8540 *total = COSTS_N_INSNS (3);
8541 return true;
8544 return false;
8546 case MULT:
8547 /* This should have been handled by the CPU specific routines. */
8548 gcc_unreachable ();
8550 case TRUNCATE:
8551 if (arm_arch3m && mode == SImode
8552 && GET_CODE (XEXP (x, 0)) == LSHIFTRT
8553 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
8554 && (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0))
8555 == GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1)))
8556 && (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == ZERO_EXTEND
8557 || GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == SIGN_EXTEND))
8559 *total = rtx_cost (XEXP (XEXP (x, 0), 0), LSHIFTRT, 0, speed);
8560 return true;
8562 *total = COSTS_N_INSNS (2); /* Plus the cost of the MULT */
8563 return false;
8565 case NEG:
8566 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
8568 if (TARGET_HARD_FLOAT
8569 && (mode == SFmode
8570 || (mode == DFmode && !TARGET_VFP_SINGLE)))
8572 *total = COSTS_N_INSNS (1);
8573 return false;
8575 *total = COSTS_N_INSNS (2);
8576 return false;
8579 /* Fall through */
8580 case NOT:
8581 *total = COSTS_N_INSNS (ARM_NUM_REGS(mode));
8582 if (mode == SImode && code == NOT)
8584 subcode = GET_CODE (XEXP (x, 0));
8585 if (subcode == ASHIFT || subcode == ASHIFTRT
8586 || subcode == LSHIFTRT
8587 || subcode == ROTATE || subcode == ROTATERT
8588 || (subcode == MULT
8589 && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode)))
8591 *total += rtx_cost (XEXP (XEXP (x, 0), 0), subcode, 0, speed);
8592 /* Register shifts cost an extra cycle. */
8593 if (!CONST_INT_P (XEXP (XEXP (x, 0), 1)))
8594 *total += COSTS_N_INSNS (1) + rtx_cost (XEXP (XEXP (x, 0), 1),
8595 subcode, 1, speed);
8596 return true;
8600 return false;
8602 case IF_THEN_ELSE:
8603 if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
8605 *total = COSTS_N_INSNS (4);
8606 return true;
8609 operand = XEXP (x, 0);
8611 if (!((GET_RTX_CLASS (GET_CODE (operand)) == RTX_COMPARE
8612 || GET_RTX_CLASS (GET_CODE (operand)) == RTX_COMM_COMPARE)
8613 && REG_P (XEXP (operand, 0))
8614 && REGNO (XEXP (operand, 0)) == CC_REGNUM))
8615 *total += COSTS_N_INSNS (1);
8616 *total += (rtx_cost (XEXP (x, 1), code, 1, speed)
8617 + rtx_cost (XEXP (x, 2), code, 2, speed));
8618 return true;
8620 case NE:
8621 if (mode == SImode && XEXP (x, 1) == const0_rtx)
8623 *total = COSTS_N_INSNS (2) + rtx_cost (XEXP (x, 0), code, 0, speed);
8624 return true;
8626 goto scc_insn;
8628 case GE:
8629 if ((!REG_P (XEXP (x, 0)) || REGNO (XEXP (x, 0)) != CC_REGNUM)
8630 && mode == SImode && XEXP (x, 1) == const0_rtx)
8632 *total = COSTS_N_INSNS (2) + rtx_cost (XEXP (x, 0), code, 0, speed);
8633 return true;
8635 goto scc_insn;
8637 case LT:
8638 if ((!REG_P (XEXP (x, 0)) || REGNO (XEXP (x, 0)) != CC_REGNUM)
8639 && mode == SImode && XEXP (x, 1) == const0_rtx)
8641 *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 0), code, 0, speed);
8642 return true;
8644 goto scc_insn;
8646 case EQ:
8647 case GT:
8648 case LE:
8649 case GEU:
8650 case LTU:
8651 case GTU:
8652 case LEU:
8653 case UNORDERED:
8654 case ORDERED:
8655 case UNEQ:
8656 case UNGE:
8657 case UNLT:
8658 case UNGT:
8659 case UNLE:
8660 scc_insn:
8661 /* SCC insns. In the case where the comparison has already been
8662 performed, then they cost 2 instructions. Otherwise they need
8663 an additional comparison before them. */
8664 *total = COSTS_N_INSNS (2);
8665 if (REG_P (XEXP (x, 0)) && REGNO (XEXP (x, 0)) == CC_REGNUM)
8667 return true;
8670 /* Fall through */
8671 case COMPARE:
8672 if (REG_P (XEXP (x, 0)) && REGNO (XEXP (x, 0)) == CC_REGNUM)
8674 *total = 0;
8675 return true;
8678 *total += COSTS_N_INSNS (1);
8679 if (CONST_INT_P (XEXP (x, 1))
8680 && const_ok_for_op (INTVAL (XEXP (x, 1)), code))
8682 *total += rtx_cost (XEXP (x, 0), code, 0, speed);
8683 return true;
8686 subcode = GET_CODE (XEXP (x, 0));
8687 if (subcode == ASHIFT || subcode == ASHIFTRT
8688 || subcode == LSHIFTRT
8689 || subcode == ROTATE || subcode == ROTATERT)
8691 *total += rtx_cost (XEXP (x, 1), code, 1, speed);
8692 *total += rtx_cost (XEXP (XEXP (x, 0), 0), subcode, 0, speed);
8693 return true;
8696 if (subcode == MULT
8697 && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode))
8699 *total += rtx_cost (XEXP (x, 1), code, 1, speed);
8700 *total += rtx_cost (XEXP (XEXP (x, 0), 0), subcode, 0, speed);
8701 return true;
8704 return false;
8706 case UMIN:
8707 case UMAX:
8708 case SMIN:
8709 case SMAX:
8710 *total = COSTS_N_INSNS (2) + rtx_cost (XEXP (x, 0), code, 0, speed);
8711 if (!CONST_INT_P (XEXP (x, 1))
8712 || !const_ok_for_arm (INTVAL (XEXP (x, 1))))
8713 *total += rtx_cost (XEXP (x, 1), code, 1, speed);
8714 return true;
8716 case ABS:
8717 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
8719 if (TARGET_HARD_FLOAT
8720 && (mode == SFmode
8721 || (mode == DFmode && !TARGET_VFP_SINGLE)))
8723 *total = COSTS_N_INSNS (1);
8724 return false;
8726 *total = COSTS_N_INSNS (20);
8727 return false;
8729 *total = COSTS_N_INSNS (1);
8730 if (mode == DImode)
8731 *total += COSTS_N_INSNS (3);
8732 return false;
8734 case SIGN_EXTEND:
8735 case ZERO_EXTEND:
8736 *total = 0;
8737 if (GET_MODE_CLASS (mode) == MODE_INT)
8739 rtx op = XEXP (x, 0);
8740 machine_mode opmode = GET_MODE (op);
8742 if (mode == DImode)
8743 *total += COSTS_N_INSNS (1);
8745 if (opmode != SImode)
8747 if (MEM_P (op))
8749 /* If !arm_arch4, we use one of the extendhisi2_mem
8750 or movhi_bytes patterns for HImode. For a QImode
8751 sign extension, we first zero-extend from memory
8752 and then perform a shift sequence. */
8753 if (!arm_arch4 && (opmode != QImode || code == SIGN_EXTEND))
8754 *total += COSTS_N_INSNS (2);
8756 else if (arm_arch6)
8757 *total += COSTS_N_INSNS (1);
8759 /* We don't have the necessary insn, so we need to perform some
8760 other operation. */
8761 else if (TARGET_ARM && code == ZERO_EXTEND && mode == QImode)
8762 /* An and with constant 255. */
8763 *total += COSTS_N_INSNS (1);
8764 else
8765 /* A shift sequence. Increase costs slightly to avoid
8766 combining two shifts into an extend operation. */
8767 *total += COSTS_N_INSNS (2) + 1;
8770 return false;
8773 switch (GET_MODE (XEXP (x, 0)))
8775 case V8QImode:
8776 case V4HImode:
8777 case V2SImode:
8778 case V4QImode:
8779 case V2HImode:
8780 *total = COSTS_N_INSNS (1);
8781 return false;
8783 default:
8784 gcc_unreachable ();
8786 gcc_unreachable ();
8788 case ZERO_EXTRACT:
8789 case SIGN_EXTRACT:
8790 *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 0), code, 0, speed);
8791 return true;
8793 case CONST_INT:
8794 if (const_ok_for_arm (INTVAL (x))
8795 || const_ok_for_arm (~INTVAL (x)))
8796 *total = COSTS_N_INSNS (1);
8797 else
8798 *total = COSTS_N_INSNS (arm_gen_constant (SET, mode, NULL_RTX,
8799 INTVAL (x), NULL_RTX,
8800 NULL_RTX, 0, 0));
8801 return true;
8803 case CONST:
8804 case LABEL_REF:
8805 case SYMBOL_REF:
8806 *total = COSTS_N_INSNS (3);
8807 return true;
8809 case HIGH:
8810 *total = COSTS_N_INSNS (1);
8811 return true;
8813 case LO_SUM:
8814 *total = COSTS_N_INSNS (1);
8815 *total += rtx_cost (XEXP (x, 0), code, 0, speed);
8816 return true;
8818 case CONST_DOUBLE:
8819 if (TARGET_HARD_FLOAT && vfp3_const_double_rtx (x)
8820 && (mode == SFmode || !TARGET_VFP_SINGLE))
8821 *total = COSTS_N_INSNS (1);
8822 else
8823 *total = COSTS_N_INSNS (4);
8824 return true;
8826 case SET:
8827 /* The vec_extract patterns accept memory operands that require an
8828 address reload. Account for the cost of that reload to give the
8829 auto-inc-dec pass an incentive to try to replace them. */
8830 if (TARGET_NEON && MEM_P (SET_DEST (x))
8831 && GET_CODE (SET_SRC (x)) == VEC_SELECT)
8833 *total = rtx_cost (SET_DEST (x), code, 0, speed);
8834 if (!neon_vector_mem_operand (SET_DEST (x), 2, true))
8835 *total += COSTS_N_INSNS (1);
8836 return true;
8838 /* Likewise for the vec_set patterns. */
8839 if (TARGET_NEON && GET_CODE (SET_SRC (x)) == VEC_MERGE
8840 && GET_CODE (XEXP (SET_SRC (x), 0)) == VEC_DUPLICATE
8841 && MEM_P (XEXP (XEXP (SET_SRC (x), 0), 0)))
8843 rtx mem = XEXP (XEXP (SET_SRC (x), 0), 0);
8844 *total = rtx_cost (mem, code, 0, speed);
8845 if (!neon_vector_mem_operand (mem, 2, true))
8846 *total += COSTS_N_INSNS (1);
8847 return true;
8849 return false;
8851 case UNSPEC:
8852 /* We cost this as high as our memory costs to allow this to
8853 be hoisted from loops. */
8854 if (XINT (x, 1) == UNSPEC_PIC_UNIFIED)
8856 *total = COSTS_N_INSNS (2 + ARM_NUM_REGS (mode));
8858 return true;
8860 case CONST_VECTOR:
8861 if (TARGET_NEON
8862 && TARGET_HARD_FLOAT
8863 && outer == SET
8864 && (VALID_NEON_DREG_MODE (mode) || VALID_NEON_QREG_MODE (mode))
8865 && neon_immediate_valid_for_move (x, mode, NULL, NULL))
8866 *total = COSTS_N_INSNS (1);
8867 else
8868 *total = COSTS_N_INSNS (4);
8869 return true;
8871 default:
8872 *total = COSTS_N_INSNS (4);
8873 return false;
8877 /* Estimates the size cost of thumb1 instructions.
8878 For now most of the code is copied from thumb1_rtx_costs. We need more
8879 fine grain tuning when we have more related test cases. */
8880 static inline int
8881 thumb1_size_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer)
8883 machine_mode mode = GET_MODE (x);
8884 int words;
8886 switch (code)
8888 case ASHIFT:
8889 case ASHIFTRT:
8890 case LSHIFTRT:
8891 case ROTATERT:
8892 return (mode == SImode) ? COSTS_N_INSNS (1) : COSTS_N_INSNS (2);
8894 case PLUS:
8895 case MINUS:
8896 /* Thumb-1 needs two instructions to fulfill shiftadd/shiftsub0/shiftsub1
8897 defined by RTL expansion, especially for the expansion of
8898 multiplication. */
8899 if ((GET_CODE (XEXP (x, 0)) == MULT
8900 && power_of_two_operand (XEXP (XEXP (x,0),1), SImode))
8901 || (GET_CODE (XEXP (x, 1)) == MULT
8902 && power_of_two_operand (XEXP (XEXP (x, 1), 1), SImode)))
8903 return COSTS_N_INSNS (2);
8904 /* On purpose fall through for normal RTX. */
8905 case COMPARE:
8906 case NEG:
8907 case NOT:
8908 return COSTS_N_INSNS (1);
8910 case MULT:
8911 if (CONST_INT_P (XEXP (x, 1)))
8913 /* Thumb1 mul instruction can't operate on const. We must Load it
8914 into a register first. */
8915 int const_size = thumb1_size_rtx_costs (XEXP (x, 1), CONST_INT, SET);
8916 /* For the targets which have a very small and high-latency multiply
8917 unit, we prefer to synthesize the mult with up to 5 instructions,
8918 giving a good balance between size and performance. */
8919 if (arm_arch6m && arm_m_profile_small_mul)
8920 return COSTS_N_INSNS (5);
8921 else
8922 return COSTS_N_INSNS (1) + const_size;
8924 return COSTS_N_INSNS (1);
8926 case SET:
8927 /* A SET doesn't have a mode, so let's look at the SET_DEST to get
8928 the mode. */
8929 words = ARM_NUM_INTS (GET_MODE_SIZE (GET_MODE (SET_DEST (x))));
8930 return COSTS_N_INSNS (words)
8931 + COSTS_N_INSNS (1) * (satisfies_constraint_J (SET_SRC (x))
8932 || satisfies_constraint_K (SET_SRC (x))
8933 /* thumb1_movdi_insn. */
8934 || ((words > 1) && MEM_P (SET_SRC (x))));
8936 case CONST_INT:
8937 if (outer == SET)
8939 if ((unsigned HOST_WIDE_INT) INTVAL (x) < 256)
8940 return COSTS_N_INSNS (1);
8941 /* See split "TARGET_THUMB1 && satisfies_constraint_J". */
8942 if (INTVAL (x) >= -255 && INTVAL (x) <= -1)
8943 return COSTS_N_INSNS (2);
8944 /* See split "TARGET_THUMB1 && satisfies_constraint_K". */
8945 if (thumb_shiftable_const (INTVAL (x)))
8946 return COSTS_N_INSNS (2);
8947 return COSTS_N_INSNS (3);
8949 else if ((outer == PLUS || outer == COMPARE)
8950 && INTVAL (x) < 256 && INTVAL (x) > -256)
8951 return 0;
8952 else if ((outer == IOR || outer == XOR || outer == AND)
8953 && INTVAL (x) < 256 && INTVAL (x) >= -256)
8954 return COSTS_N_INSNS (1);
8955 else if (outer == AND)
8957 int i;
8958 /* This duplicates the tests in the andsi3 expander. */
8959 for (i = 9; i <= 31; i++)
8960 if ((((HOST_WIDE_INT) 1) << i) - 1 == INTVAL (x)
8961 || (((HOST_WIDE_INT) 1) << i) - 1 == ~INTVAL (x))
8962 return COSTS_N_INSNS (2);
8964 else if (outer == ASHIFT || outer == ASHIFTRT
8965 || outer == LSHIFTRT)
8966 return 0;
8967 return COSTS_N_INSNS (2);
8969 case CONST:
8970 case CONST_DOUBLE:
8971 case LABEL_REF:
8972 case SYMBOL_REF:
8973 return COSTS_N_INSNS (3);
8975 case UDIV:
8976 case UMOD:
8977 case DIV:
8978 case MOD:
8979 return 100;
8981 case TRUNCATE:
8982 return 99;
8984 case AND:
8985 case XOR:
8986 case IOR:
8987 return COSTS_N_INSNS (1);
8989 case MEM:
8990 return (COSTS_N_INSNS (1)
8991 + COSTS_N_INSNS (1)
8992 * ((GET_MODE_SIZE (mode) - 1) / UNITS_PER_WORD)
8993 + ((GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
8994 ? COSTS_N_INSNS (1) : 0));
8996 case IF_THEN_ELSE:
8997 /* XXX a guess. */
8998 if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
8999 return 14;
9000 return 2;
9002 case ZERO_EXTEND:
9003 /* XXX still guessing. */
9004 switch (GET_MODE (XEXP (x, 0)))
9006 case QImode:
9007 return (1 + (mode == DImode ? 4 : 0)
9008 + (MEM_P (XEXP (x, 0)) ? 10 : 0));
9010 case HImode:
9011 return (4 + (mode == DImode ? 4 : 0)
9012 + (MEM_P (XEXP (x, 0)) ? 10 : 0));
9014 case SImode:
9015 return (1 + (MEM_P (XEXP (x, 0)) ? 10 : 0));
9017 default:
9018 return 99;
9021 default:
9022 return 99;
9026 /* RTX costs when optimizing for size. */
9027 static bool
9028 arm_size_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
9029 int *total)
9031 machine_mode mode = GET_MODE (x);
9032 if (TARGET_THUMB1)
9034 *total = thumb1_size_rtx_costs (x, code, outer_code);
9035 return true;
9038 /* FIXME: This makes no attempt to prefer narrow Thumb-2 instructions. */
9039 switch (code)
9041 case MEM:
9042 /* A memory access costs 1 insn if the mode is small, or the address is
9043 a single register, otherwise it costs one insn per word. */
9044 if (REG_P (XEXP (x, 0)))
9045 *total = COSTS_N_INSNS (1);
9046 else if (flag_pic
9047 && GET_CODE (XEXP (x, 0)) == PLUS
9048 && will_be_in_index_register (XEXP (XEXP (x, 0), 1)))
9049 /* This will be split into two instructions.
9050 See arm.md:calculate_pic_address. */
9051 *total = COSTS_N_INSNS (2);
9052 else
9053 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
9054 return true;
9056 case DIV:
9057 case MOD:
9058 case UDIV:
9059 case UMOD:
9060 /* Needs a libcall, so it costs about this. */
9061 *total = COSTS_N_INSNS (2);
9062 return false;
9064 case ROTATE:
9065 if (mode == SImode && REG_P (XEXP (x, 1)))
9067 *total = COSTS_N_INSNS (2) + rtx_cost (XEXP (x, 0), code, 0, false);
9068 return true;
9070 /* Fall through */
9071 case ROTATERT:
9072 case ASHIFT:
9073 case LSHIFTRT:
9074 case ASHIFTRT:
9075 if (mode == DImode && CONST_INT_P (XEXP (x, 1)))
9077 *total = COSTS_N_INSNS (3) + rtx_cost (XEXP (x, 0), code, 0, false);
9078 return true;
9080 else if (mode == SImode)
9082 *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 0), code, 0, false);
9083 /* Slightly disparage register shifts, but not by much. */
9084 if (!CONST_INT_P (XEXP (x, 1)))
9085 *total += 1 + rtx_cost (XEXP (x, 1), code, 1, false);
9086 return true;
9089 /* Needs a libcall. */
9090 *total = COSTS_N_INSNS (2);
9091 return false;
9093 case MINUS:
9094 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
9095 && (mode == SFmode || !TARGET_VFP_SINGLE))
9097 *total = COSTS_N_INSNS (1);
9098 return false;
9101 if (mode == SImode)
9103 enum rtx_code subcode0 = GET_CODE (XEXP (x, 0));
9104 enum rtx_code subcode1 = GET_CODE (XEXP (x, 1));
9106 if (subcode0 == ROTATE || subcode0 == ROTATERT || subcode0 == ASHIFT
9107 || subcode0 == LSHIFTRT || subcode0 == ASHIFTRT
9108 || subcode1 == ROTATE || subcode1 == ROTATERT
9109 || subcode1 == ASHIFT || subcode1 == LSHIFTRT
9110 || subcode1 == ASHIFTRT)
9112 /* It's just the cost of the two operands. */
9113 *total = 0;
9114 return false;
9117 *total = COSTS_N_INSNS (1);
9118 return false;
9121 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
9122 return false;
9124 case PLUS:
9125 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
9126 && (mode == SFmode || !TARGET_VFP_SINGLE))
9128 *total = COSTS_N_INSNS (1);
9129 return false;
9132 /* A shift as a part of ADD costs nothing. */
9133 if (GET_CODE (XEXP (x, 0)) == MULT
9134 && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode))
9136 *total = COSTS_N_INSNS (TARGET_THUMB2 ? 2 : 1);
9137 *total += rtx_cost (XEXP (XEXP (x, 0), 0), code, 0, false);
9138 *total += rtx_cost (XEXP (x, 1), code, 1, false);
9139 return true;
9142 /* Fall through */
9143 case AND: case XOR: case IOR:
9144 if (mode == SImode)
9146 enum rtx_code subcode = GET_CODE (XEXP (x, 0));
9148 if (subcode == ROTATE || subcode == ROTATERT || subcode == ASHIFT
9149 || subcode == LSHIFTRT || subcode == ASHIFTRT
9150 || (code == AND && subcode == NOT))
9152 /* It's just the cost of the two operands. */
9153 *total = 0;
9154 return false;
9158 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
9159 return false;
9161 case MULT:
9162 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
9163 return false;
9165 case NEG:
9166 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
9167 && (mode == SFmode || !TARGET_VFP_SINGLE))
9169 *total = COSTS_N_INSNS (1);
9170 return false;
9173 /* Fall through */
9174 case NOT:
9175 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
9177 return false;
9179 case IF_THEN_ELSE:
9180 *total = 0;
9181 return false;
9183 case COMPARE:
9184 if (cc_register (XEXP (x, 0), VOIDmode))
9185 * total = 0;
9186 else
9187 *total = COSTS_N_INSNS (1);
9188 return false;
9190 case ABS:
9191 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
9192 && (mode == SFmode || !TARGET_VFP_SINGLE))
9193 *total = COSTS_N_INSNS (1);
9194 else
9195 *total = COSTS_N_INSNS (1 + ARM_NUM_REGS (mode));
9196 return false;
9198 case SIGN_EXTEND:
9199 case ZERO_EXTEND:
9200 return arm_rtx_costs_1 (x, outer_code, total, 0);
9202 case CONST_INT:
9203 if (const_ok_for_arm (INTVAL (x)))
9204 /* A multiplication by a constant requires another instruction
9205 to load the constant to a register. */
9206 *total = COSTS_N_INSNS ((outer_code == SET || outer_code == MULT)
9207 ? 1 : 0);
9208 else if (const_ok_for_arm (~INTVAL (x)))
9209 *total = COSTS_N_INSNS (outer_code == AND ? 0 : 1);
9210 else if (const_ok_for_arm (-INTVAL (x)))
9212 if (outer_code == COMPARE || outer_code == PLUS
9213 || outer_code == MINUS)
9214 *total = 0;
9215 else
9216 *total = COSTS_N_INSNS (1);
9218 else
9219 *total = COSTS_N_INSNS (2);
9220 return true;
9222 case CONST:
9223 case LABEL_REF:
9224 case SYMBOL_REF:
9225 *total = COSTS_N_INSNS (2);
9226 return true;
9228 case CONST_DOUBLE:
9229 *total = COSTS_N_INSNS (4);
9230 return true;
9232 case CONST_VECTOR:
9233 if (TARGET_NEON
9234 && TARGET_HARD_FLOAT
9235 && outer_code == SET
9236 && (VALID_NEON_DREG_MODE (mode) || VALID_NEON_QREG_MODE (mode))
9237 && neon_immediate_valid_for_move (x, mode, NULL, NULL))
9238 *total = COSTS_N_INSNS (1);
9239 else
9240 *total = COSTS_N_INSNS (4);
9241 return true;
9243 case HIGH:
9244 case LO_SUM:
9245 /* We prefer constant pool entries to MOVW/MOVT pairs, so bump the
9246 cost of these slightly. */
9247 *total = COSTS_N_INSNS (1) + 1;
9248 return true;
9250 case SET:
9251 return false;
9253 default:
9254 if (mode != VOIDmode)
9255 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
9256 else
9257 *total = COSTS_N_INSNS (4); /* How knows? */
9258 return false;
9262 /* Helper function for arm_rtx_costs. If the operand is a valid shift
9263 operand, then return the operand that is being shifted. If the shift
9264 is not by a constant, then set SHIFT_REG to point to the operand.
9265 Return NULL if OP is not a shifter operand. */
9266 static rtx
9267 shifter_op_p (rtx op, rtx *shift_reg)
9269 enum rtx_code code = GET_CODE (op);
9271 if (code == MULT && CONST_INT_P (XEXP (op, 1))
9272 && exact_log2 (INTVAL (XEXP (op, 1))) > 0)
9273 return XEXP (op, 0);
9274 else if (code == ROTATE && CONST_INT_P (XEXP (op, 1)))
9275 return XEXP (op, 0);
9276 else if (code == ROTATERT || code == ASHIFT || code == LSHIFTRT
9277 || code == ASHIFTRT)
9279 if (!CONST_INT_P (XEXP (op, 1)))
9280 *shift_reg = XEXP (op, 1);
9281 return XEXP (op, 0);
9284 return NULL;
9287 static bool
9288 arm_unspec_cost (rtx x, enum rtx_code /* outer_code */, bool speed_p, int *cost)
9290 const struct cpu_cost_table *extra_cost = current_tune->insn_extra_cost;
9291 gcc_assert (GET_CODE (x) == UNSPEC);
9293 switch (XINT (x, 1))
9295 case UNSPEC_UNALIGNED_LOAD:
9296 /* We can only do unaligned loads into the integer unit, and we can't
9297 use LDM or LDRD. */
9298 *cost = COSTS_N_INSNS (ARM_NUM_REGS (GET_MODE (x)));
9299 if (speed_p)
9300 *cost += (ARM_NUM_REGS (GET_MODE (x)) * extra_cost->ldst.load
9301 + extra_cost->ldst.load_unaligned);
9303 #ifdef NOT_YET
9304 *cost += arm_address_cost (XEXP (XVECEXP (x, 0, 0), 0), GET_MODE (x),
9305 ADDR_SPACE_GENERIC, speed_p);
9306 #endif
9307 return true;
9309 case UNSPEC_UNALIGNED_STORE:
9310 *cost = COSTS_N_INSNS (ARM_NUM_REGS (GET_MODE (x)));
9311 if (speed_p)
9312 *cost += (ARM_NUM_REGS (GET_MODE (x)) * extra_cost->ldst.store
9313 + extra_cost->ldst.store_unaligned);
9315 *cost += rtx_cost (XVECEXP (x, 0, 0), UNSPEC, 0, speed_p);
9316 #ifdef NOT_YET
9317 *cost += arm_address_cost (XEXP (XVECEXP (x, 0, 0), 0), GET_MODE (x),
9318 ADDR_SPACE_GENERIC, speed_p);
9319 #endif
9320 return true;
9322 case UNSPEC_VRINTZ:
9323 case UNSPEC_VRINTP:
9324 case UNSPEC_VRINTM:
9325 case UNSPEC_VRINTR:
9326 case UNSPEC_VRINTX:
9327 case UNSPEC_VRINTA:
9328 *cost = COSTS_N_INSNS (1);
9329 if (speed_p)
9330 *cost += extra_cost->fp[GET_MODE (x) == DFmode].roundint;
9332 return true;
9333 default:
9334 *cost = COSTS_N_INSNS (2);
9335 break;
9337 return false;
9340 /* Cost of a libcall. We assume one insn per argument, an amount for the
9341 call (one insn for -Os) and then one for processing the result. */
9342 #define LIBCALL_COST(N) COSTS_N_INSNS (N + (speed_p ? 18 : 2))
9344 #define HANDLE_NARROW_SHIFT_ARITH(OP, IDX) \
9345 do \
9347 shift_op = shifter_op_p (XEXP (x, IDX), &shift_reg); \
9348 if (shift_op != NULL \
9349 && arm_rtx_shift_left_p (XEXP (x, IDX))) \
9351 if (shift_reg) \
9353 if (speed_p) \
9354 *cost += extra_cost->alu.arith_shift_reg; \
9355 *cost += rtx_cost (shift_reg, ASHIFT, 1, speed_p); \
9357 else if (speed_p) \
9358 *cost += extra_cost->alu.arith_shift; \
9360 *cost += (rtx_cost (shift_op, ASHIFT, 0, speed_p) \
9361 + rtx_cost (XEXP (x, 1 - IDX), \
9362 OP, 1, speed_p)); \
9363 return true; \
9366 while (0);
9368 /* RTX costs. Make an estimate of the cost of executing the operation
9369 X, which is contained with an operation with code OUTER_CODE.
9370 SPEED_P indicates whether the cost desired is the performance cost,
9371 or the size cost. The estimate is stored in COST and the return
9372 value is TRUE if the cost calculation is final, or FALSE if the
9373 caller should recurse through the operands of X to add additional
9374 costs.
9376 We currently make no attempt to model the size savings of Thumb-2
9377 16-bit instructions. At the normal points in compilation where
9378 this code is called we have no measure of whether the condition
9379 flags are live or not, and thus no realistic way to determine what
9380 the size will eventually be. */
9381 static bool
9382 arm_new_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
9383 const struct cpu_cost_table *extra_cost,
9384 int *cost, bool speed_p)
9386 machine_mode mode = GET_MODE (x);
9388 if (TARGET_THUMB1)
9390 if (speed_p)
9391 *cost = thumb1_rtx_costs (x, code, outer_code);
9392 else
9393 *cost = thumb1_size_rtx_costs (x, code, outer_code);
9394 return true;
9397 switch (code)
9399 case SET:
9400 *cost = 0;
9401 /* SET RTXs don't have a mode so we get it from the destination. */
9402 mode = GET_MODE (SET_DEST (x));
9404 if (REG_P (SET_SRC (x))
9405 && REG_P (SET_DEST (x)))
9407 /* Assume that most copies can be done with a single insn,
9408 unless we don't have HW FP, in which case everything
9409 larger than word mode will require two insns. */
9410 *cost = COSTS_N_INSNS (((!TARGET_HARD_FLOAT
9411 && GET_MODE_SIZE (mode) > 4)
9412 || mode == DImode)
9413 ? 2 : 1);
9414 /* Conditional register moves can be encoded
9415 in 16 bits in Thumb mode. */
9416 if (!speed_p && TARGET_THUMB && outer_code == COND_EXEC)
9417 *cost >>= 1;
9419 return true;
9422 if (CONST_INT_P (SET_SRC (x)))
9424 /* Handle CONST_INT here, since the value doesn't have a mode
9425 and we would otherwise be unable to work out the true cost. */
9426 *cost = rtx_cost (SET_DEST (x), SET, 0, speed_p);
9427 outer_code = SET;
9428 /* Slightly lower the cost of setting a core reg to a constant.
9429 This helps break up chains and allows for better scheduling. */
9430 if (REG_P (SET_DEST (x))
9431 && REGNO (SET_DEST (x)) <= LR_REGNUM)
9432 *cost -= 1;
9433 x = SET_SRC (x);
9434 /* Immediate moves with an immediate in the range [0, 255] can be
9435 encoded in 16 bits in Thumb mode. */
9436 if (!speed_p && TARGET_THUMB && GET_MODE (x) == SImode
9437 && INTVAL (x) >= 0 && INTVAL (x) <=255)
9438 *cost >>= 1;
9439 goto const_int_cost;
9442 return false;
9444 case MEM:
9445 /* A memory access costs 1 insn if the mode is small, or the address is
9446 a single register, otherwise it costs one insn per word. */
9447 if (REG_P (XEXP (x, 0)))
9448 *cost = COSTS_N_INSNS (1);
9449 else if (flag_pic
9450 && GET_CODE (XEXP (x, 0)) == PLUS
9451 && will_be_in_index_register (XEXP (XEXP (x, 0), 1)))
9452 /* This will be split into two instructions.
9453 See arm.md:calculate_pic_address. */
9454 *cost = COSTS_N_INSNS (2);
9455 else
9456 *cost = COSTS_N_INSNS (ARM_NUM_REGS (mode));
9458 /* For speed optimizations, add the costs of the address and
9459 accessing memory. */
9460 if (speed_p)
9461 #ifdef NOT_YET
9462 *cost += (extra_cost->ldst.load
9463 + arm_address_cost (XEXP (x, 0), mode,
9464 ADDR_SPACE_GENERIC, speed_p));
9465 #else
9466 *cost += extra_cost->ldst.load;
9467 #endif
9468 return true;
9470 case PARALLEL:
9472 /* Calculations of LDM costs are complex. We assume an initial cost
9473 (ldm_1st) which will load the number of registers mentioned in
9474 ldm_regs_per_insn_1st registers; then each additional
9475 ldm_regs_per_insn_subsequent registers cost one more insn. The
9476 formula for N regs is thus:
9478 ldm_1st + COSTS_N_INSNS ((max (N - ldm_regs_per_insn_1st, 0)
9479 + ldm_regs_per_insn_subsequent - 1)
9480 / ldm_regs_per_insn_subsequent).
9482 Additional costs may also be added for addressing. A similar
9483 formula is used for STM. */
9485 bool is_ldm = load_multiple_operation (x, SImode);
9486 bool is_stm = store_multiple_operation (x, SImode);
9488 *cost = COSTS_N_INSNS (1);
9490 if (is_ldm || is_stm)
9492 if (speed_p)
9494 HOST_WIDE_INT nregs = XVECLEN (x, 0);
9495 HOST_WIDE_INT regs_per_insn_1st = is_ldm
9496 ? extra_cost->ldst.ldm_regs_per_insn_1st
9497 : extra_cost->ldst.stm_regs_per_insn_1st;
9498 HOST_WIDE_INT regs_per_insn_sub = is_ldm
9499 ? extra_cost->ldst.ldm_regs_per_insn_subsequent
9500 : extra_cost->ldst.stm_regs_per_insn_subsequent;
9502 *cost += regs_per_insn_1st
9503 + COSTS_N_INSNS (((MAX (nregs - regs_per_insn_1st, 0))
9504 + regs_per_insn_sub - 1)
9505 / regs_per_insn_sub);
9506 return true;
9510 return false;
9512 case DIV:
9513 case UDIV:
9514 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
9515 && (mode == SFmode || !TARGET_VFP_SINGLE))
9516 *cost = COSTS_N_INSNS (speed_p
9517 ? extra_cost->fp[mode != SFmode].div : 1);
9518 else if (mode == SImode && TARGET_IDIV)
9519 *cost = COSTS_N_INSNS (speed_p ? extra_cost->mult[0].idiv : 1);
9520 else
9521 *cost = LIBCALL_COST (2);
9522 return false; /* All arguments must be in registers. */
9524 case MOD:
9525 case UMOD:
9526 *cost = LIBCALL_COST (2);
9527 return false; /* All arguments must be in registers. */
9529 case ROTATE:
9530 if (mode == SImode && REG_P (XEXP (x, 1)))
9532 *cost = (COSTS_N_INSNS (2)
9533 + rtx_cost (XEXP (x, 0), code, 0, speed_p));
9534 if (speed_p)
9535 *cost += extra_cost->alu.shift_reg;
9536 return true;
9538 /* Fall through */
9539 case ROTATERT:
9540 case ASHIFT:
9541 case LSHIFTRT:
9542 case ASHIFTRT:
9543 if (mode == DImode && CONST_INT_P (XEXP (x, 1)))
9545 *cost = (COSTS_N_INSNS (3)
9546 + rtx_cost (XEXP (x, 0), code, 0, speed_p));
9547 if (speed_p)
9548 *cost += 2 * extra_cost->alu.shift;
9549 return true;
9551 else if (mode == SImode)
9553 *cost = (COSTS_N_INSNS (1)
9554 + rtx_cost (XEXP (x, 0), code, 0, speed_p));
9555 /* Slightly disparage register shifts at -Os, but not by much. */
9556 if (!CONST_INT_P (XEXP (x, 1)))
9557 *cost += (speed_p ? extra_cost->alu.shift_reg : 1
9558 + rtx_cost (XEXP (x, 1), code, 1, speed_p));
9559 return true;
9561 else if (GET_MODE_CLASS (mode) == MODE_INT
9562 && GET_MODE_SIZE (mode) < 4)
9564 if (code == ASHIFT)
9566 *cost = (COSTS_N_INSNS (1)
9567 + rtx_cost (XEXP (x, 0), code, 0, speed_p));
9568 /* Slightly disparage register shifts at -Os, but not by
9569 much. */
9570 if (!CONST_INT_P (XEXP (x, 1)))
9571 *cost += (speed_p ? extra_cost->alu.shift_reg : 1
9572 + rtx_cost (XEXP (x, 1), code, 1, speed_p));
9574 else if (code == LSHIFTRT || code == ASHIFTRT)
9576 if (arm_arch_thumb2 && CONST_INT_P (XEXP (x, 1)))
9578 /* Can use SBFX/UBFX. */
9579 *cost = COSTS_N_INSNS (1);
9580 if (speed_p)
9581 *cost += extra_cost->alu.bfx;
9582 *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
9584 else
9586 *cost = COSTS_N_INSNS (2);
9587 *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
9588 if (speed_p)
9590 if (CONST_INT_P (XEXP (x, 1)))
9591 *cost += 2 * extra_cost->alu.shift;
9592 else
9593 *cost += (extra_cost->alu.shift
9594 + extra_cost->alu.shift_reg);
9596 else
9597 /* Slightly disparage register shifts. */
9598 *cost += !CONST_INT_P (XEXP (x, 1));
9601 else /* Rotates. */
9603 *cost = COSTS_N_INSNS (3 + !CONST_INT_P (XEXP (x, 1)));
9604 *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
9605 if (speed_p)
9607 if (CONST_INT_P (XEXP (x, 1)))
9608 *cost += (2 * extra_cost->alu.shift
9609 + extra_cost->alu.log_shift);
9610 else
9611 *cost += (extra_cost->alu.shift
9612 + extra_cost->alu.shift_reg
9613 + extra_cost->alu.log_shift_reg);
9616 return true;
9619 *cost = LIBCALL_COST (2);
9620 return false;
9622 case BSWAP:
9623 if (arm_arch6)
9625 if (mode == SImode)
9627 *cost = COSTS_N_INSNS (1);
9628 if (speed_p)
9629 *cost += extra_cost->alu.rev;
9631 return false;
9634 else
9636 /* No rev instruction available. Look at arm_legacy_rev
9637 and thumb_legacy_rev for the form of RTL used then. */
9638 if (TARGET_THUMB)
9640 *cost = COSTS_N_INSNS (10);
9642 if (speed_p)
9644 *cost += 6 * extra_cost->alu.shift;
9645 *cost += 3 * extra_cost->alu.logical;
9648 else
9650 *cost = COSTS_N_INSNS (5);
9652 if (speed_p)
9654 *cost += 2 * extra_cost->alu.shift;
9655 *cost += extra_cost->alu.arith_shift;
9656 *cost += 2 * extra_cost->alu.logical;
9659 return true;
9661 return false;
9663 case MINUS:
9664 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
9665 && (mode == SFmode || !TARGET_VFP_SINGLE))
9667 *cost = COSTS_N_INSNS (1);
9668 if (GET_CODE (XEXP (x, 0)) == MULT
9669 || GET_CODE (XEXP (x, 1)) == MULT)
9671 rtx mul_op0, mul_op1, sub_op;
9673 if (speed_p)
9674 *cost += extra_cost->fp[mode != SFmode].mult_addsub;
9676 if (GET_CODE (XEXP (x, 0)) == MULT)
9678 mul_op0 = XEXP (XEXP (x, 0), 0);
9679 mul_op1 = XEXP (XEXP (x, 0), 1);
9680 sub_op = XEXP (x, 1);
9682 else
9684 mul_op0 = XEXP (XEXP (x, 1), 0);
9685 mul_op1 = XEXP (XEXP (x, 1), 1);
9686 sub_op = XEXP (x, 0);
9689 /* The first operand of the multiply may be optionally
9690 negated. */
9691 if (GET_CODE (mul_op0) == NEG)
9692 mul_op0 = XEXP (mul_op0, 0);
9694 *cost += (rtx_cost (mul_op0, code, 0, speed_p)
9695 + rtx_cost (mul_op1, code, 0, speed_p)
9696 + rtx_cost (sub_op, code, 0, speed_p));
9698 return true;
9701 if (speed_p)
9702 *cost += extra_cost->fp[mode != SFmode].addsub;
9703 return false;
9706 if (mode == SImode)
9708 rtx shift_by_reg = NULL;
9709 rtx shift_op;
9710 rtx non_shift_op;
9712 *cost = COSTS_N_INSNS (1);
9714 shift_op = shifter_op_p (XEXP (x, 0), &shift_by_reg);
9715 if (shift_op == NULL)
9717 shift_op = shifter_op_p (XEXP (x, 1), &shift_by_reg);
9718 non_shift_op = XEXP (x, 0);
9720 else
9721 non_shift_op = XEXP (x, 1);
9723 if (shift_op != NULL)
9725 if (shift_by_reg != NULL)
9727 if (speed_p)
9728 *cost += extra_cost->alu.arith_shift_reg;
9729 *cost += rtx_cost (shift_by_reg, code, 0, speed_p);
9731 else if (speed_p)
9732 *cost += extra_cost->alu.arith_shift;
9734 *cost += (rtx_cost (shift_op, code, 0, speed_p)
9735 + rtx_cost (non_shift_op, code, 0, speed_p));
9736 return true;
9739 if (arm_arch_thumb2
9740 && GET_CODE (XEXP (x, 1)) == MULT)
9742 /* MLS. */
9743 if (speed_p)
9744 *cost += extra_cost->mult[0].add;
9745 *cost += (rtx_cost (XEXP (x, 0), MINUS, 0, speed_p)
9746 + rtx_cost (XEXP (XEXP (x, 1), 0), MULT, 0, speed_p)
9747 + rtx_cost (XEXP (XEXP (x, 1), 1), MULT, 1, speed_p));
9748 return true;
9751 if (CONST_INT_P (XEXP (x, 0)))
9753 int insns = arm_gen_constant (MINUS, SImode, NULL_RTX,
9754 INTVAL (XEXP (x, 0)), NULL_RTX,
9755 NULL_RTX, 1, 0);
9756 *cost = COSTS_N_INSNS (insns);
9757 if (speed_p)
9758 *cost += insns * extra_cost->alu.arith;
9759 *cost += rtx_cost (XEXP (x, 1), code, 1, speed_p);
9760 return true;
9762 else if (speed_p)
9763 *cost += extra_cost->alu.arith;
9765 return false;
9768 if (GET_MODE_CLASS (mode) == MODE_INT
9769 && GET_MODE_SIZE (mode) < 4)
9771 rtx shift_op, shift_reg;
9772 shift_reg = NULL;
9774 /* We check both sides of the MINUS for shifter operands since,
9775 unlike PLUS, it's not commutative. */
9777 HANDLE_NARROW_SHIFT_ARITH (MINUS, 0)
9778 HANDLE_NARROW_SHIFT_ARITH (MINUS, 1)
9780 /* Slightly disparage, as we might need to widen the result. */
9781 *cost = 1 + COSTS_N_INSNS (1);
9782 if (speed_p)
9783 *cost += extra_cost->alu.arith;
9785 if (CONST_INT_P (XEXP (x, 0)))
9787 *cost += rtx_cost (XEXP (x, 1), code, 1, speed_p);
9788 return true;
9791 return false;
9794 if (mode == DImode)
9796 *cost = COSTS_N_INSNS (2);
9798 if (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND)
9800 rtx op1 = XEXP (x, 1);
9802 if (speed_p)
9803 *cost += 2 * extra_cost->alu.arith;
9805 if (GET_CODE (op1) == ZERO_EXTEND)
9806 *cost += rtx_cost (XEXP (op1, 0), ZERO_EXTEND, 0, speed_p);
9807 else
9808 *cost += rtx_cost (op1, MINUS, 1, speed_p);
9809 *cost += rtx_cost (XEXP (XEXP (x, 0), 0), ZERO_EXTEND,
9810 0, speed_p);
9811 return true;
9813 else if (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND)
9815 if (speed_p)
9816 *cost += extra_cost->alu.arith + extra_cost->alu.arith_shift;
9817 *cost += (rtx_cost (XEXP (XEXP (x, 0), 0), SIGN_EXTEND,
9818 0, speed_p)
9819 + rtx_cost (XEXP (x, 1), MINUS, 1, speed_p));
9820 return true;
9822 else if (GET_CODE (XEXP (x, 1)) == ZERO_EXTEND
9823 || GET_CODE (XEXP (x, 1)) == SIGN_EXTEND)
9825 if (speed_p)
9826 *cost += (extra_cost->alu.arith
9827 + (GET_CODE (XEXP (x, 1)) == ZERO_EXTEND
9828 ? extra_cost->alu.arith
9829 : extra_cost->alu.arith_shift));
9830 *cost += (rtx_cost (XEXP (x, 0), MINUS, 0, speed_p)
9831 + rtx_cost (XEXP (XEXP (x, 1), 0),
9832 GET_CODE (XEXP (x, 1)), 0, speed_p));
9833 return true;
9836 if (speed_p)
9837 *cost += 2 * extra_cost->alu.arith;
9838 return false;
9841 /* Vector mode? */
9843 *cost = LIBCALL_COST (2);
9844 return false;
9846 case PLUS:
9847 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
9848 && (mode == SFmode || !TARGET_VFP_SINGLE))
9850 *cost = COSTS_N_INSNS (1);
9851 if (GET_CODE (XEXP (x, 0)) == MULT)
9853 rtx mul_op0, mul_op1, add_op;
9855 if (speed_p)
9856 *cost += extra_cost->fp[mode != SFmode].mult_addsub;
9858 mul_op0 = XEXP (XEXP (x, 0), 0);
9859 mul_op1 = XEXP (XEXP (x, 0), 1);
9860 add_op = XEXP (x, 1);
9862 *cost += (rtx_cost (mul_op0, code, 0, speed_p)
9863 + rtx_cost (mul_op1, code, 0, speed_p)
9864 + rtx_cost (add_op, code, 0, speed_p));
9866 return true;
9869 if (speed_p)
9870 *cost += extra_cost->fp[mode != SFmode].addsub;
9871 return false;
9873 else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
9875 *cost = LIBCALL_COST (2);
9876 return false;
9879 /* Narrow modes can be synthesized in SImode, but the range
9880 of useful sub-operations is limited. Check for shift operations
9881 on one of the operands. Only left shifts can be used in the
9882 narrow modes. */
9883 if (GET_MODE_CLASS (mode) == MODE_INT
9884 && GET_MODE_SIZE (mode) < 4)
9886 rtx shift_op, shift_reg;
9887 shift_reg = NULL;
9889 HANDLE_NARROW_SHIFT_ARITH (PLUS, 0)
9891 if (CONST_INT_P (XEXP (x, 1)))
9893 int insns = arm_gen_constant (PLUS, SImode, NULL_RTX,
9894 INTVAL (XEXP (x, 1)), NULL_RTX,
9895 NULL_RTX, 1, 0);
9896 *cost = COSTS_N_INSNS (insns);
9897 if (speed_p)
9898 *cost += insns * extra_cost->alu.arith;
9899 /* Slightly penalize a narrow operation as the result may
9900 need widening. */
9901 *cost += 1 + rtx_cost (XEXP (x, 0), PLUS, 0, speed_p);
9902 return true;
9905 /* Slightly penalize a narrow operation as the result may
9906 need widening. */
9907 *cost = 1 + COSTS_N_INSNS (1);
9908 if (speed_p)
9909 *cost += extra_cost->alu.arith;
9911 return false;
9914 if (mode == SImode)
9916 rtx shift_op, shift_reg;
9918 *cost = COSTS_N_INSNS (1);
9919 if (TARGET_INT_SIMD
9920 && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
9921 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
9923 /* UXTA[BH] or SXTA[BH]. */
9924 if (speed_p)
9925 *cost += extra_cost->alu.extend_arith;
9926 *cost += (rtx_cost (XEXP (XEXP (x, 0), 0), ZERO_EXTEND, 0,
9927 speed_p)
9928 + rtx_cost (XEXP (x, 1), PLUS, 0, speed_p));
9929 return true;
9932 shift_reg = NULL;
9933 shift_op = shifter_op_p (XEXP (x, 0), &shift_reg);
9934 if (shift_op != NULL)
9936 if (shift_reg)
9938 if (speed_p)
9939 *cost += extra_cost->alu.arith_shift_reg;
9940 *cost += rtx_cost (shift_reg, ASHIFT, 1, speed_p);
9942 else if (speed_p)
9943 *cost += extra_cost->alu.arith_shift;
9945 *cost += (rtx_cost (shift_op, ASHIFT, 0, speed_p)
9946 + rtx_cost (XEXP (x, 1), PLUS, 1, speed_p));
9947 return true;
9949 if (GET_CODE (XEXP (x, 0)) == MULT)
9951 rtx mul_op = XEXP (x, 0);
9953 *cost = COSTS_N_INSNS (1);
9955 if (TARGET_DSP_MULTIPLY
9956 && ((GET_CODE (XEXP (mul_op, 0)) == SIGN_EXTEND
9957 && (GET_CODE (XEXP (mul_op, 1)) == SIGN_EXTEND
9958 || (GET_CODE (XEXP (mul_op, 1)) == ASHIFTRT
9959 && CONST_INT_P (XEXP (XEXP (mul_op, 1), 1))
9960 && INTVAL (XEXP (XEXP (mul_op, 1), 1)) == 16)))
9961 || (GET_CODE (XEXP (mul_op, 0)) == ASHIFTRT
9962 && CONST_INT_P (XEXP (XEXP (mul_op, 0), 1))
9963 && INTVAL (XEXP (XEXP (mul_op, 0), 1)) == 16
9964 && (GET_CODE (XEXP (mul_op, 1)) == SIGN_EXTEND
9965 || (GET_CODE (XEXP (mul_op, 1)) == ASHIFTRT
9966 && CONST_INT_P (XEXP (XEXP (mul_op, 1), 1))
9967 && (INTVAL (XEXP (XEXP (mul_op, 1), 1))
9968 == 16))))))
9970 /* SMLA[BT][BT]. */
9971 if (speed_p)
9972 *cost += extra_cost->mult[0].extend_add;
9973 *cost += (rtx_cost (XEXP (XEXP (mul_op, 0), 0),
9974 SIGN_EXTEND, 0, speed_p)
9975 + rtx_cost (XEXP (XEXP (mul_op, 1), 0),
9976 SIGN_EXTEND, 0, speed_p)
9977 + rtx_cost (XEXP (x, 1), PLUS, 1, speed_p));
9978 return true;
9981 if (speed_p)
9982 *cost += extra_cost->mult[0].add;
9983 *cost += (rtx_cost (XEXP (mul_op, 0), MULT, 0, speed_p)
9984 + rtx_cost (XEXP (mul_op, 1), MULT, 1, speed_p)
9985 + rtx_cost (XEXP (x, 1), PLUS, 1, speed_p));
9986 return true;
9988 if (CONST_INT_P (XEXP (x, 1)))
9990 int insns = arm_gen_constant (PLUS, SImode, NULL_RTX,
9991 INTVAL (XEXP (x, 1)), NULL_RTX,
9992 NULL_RTX, 1, 0);
9993 *cost = COSTS_N_INSNS (insns);
9994 if (speed_p)
9995 *cost += insns * extra_cost->alu.arith;
9996 *cost += rtx_cost (XEXP (x, 0), PLUS, 0, speed_p);
9997 return true;
9999 else if (speed_p)
10000 *cost += extra_cost->alu.arith;
10002 return false;
10005 if (mode == DImode)
10007 if (arm_arch3m
10008 && GET_CODE (XEXP (x, 0)) == MULT
10009 && ((GET_CODE (XEXP (XEXP (x, 0), 0)) == ZERO_EXTEND
10010 && GET_CODE (XEXP (XEXP (x, 0), 1)) == ZERO_EXTEND)
10011 || (GET_CODE (XEXP (XEXP (x, 0), 0)) == SIGN_EXTEND
10012 && GET_CODE (XEXP (XEXP (x, 0), 1)) == SIGN_EXTEND)))
10014 *cost = COSTS_N_INSNS (1);
10015 if (speed_p)
10016 *cost += extra_cost->mult[1].extend_add;
10017 *cost += (rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0),
10018 ZERO_EXTEND, 0, speed_p)
10019 + rtx_cost (XEXP (XEXP (XEXP (x, 0), 1), 0),
10020 ZERO_EXTEND, 0, speed_p)
10021 + rtx_cost (XEXP (x, 1), PLUS, 1, speed_p));
10022 return true;
10025 *cost = COSTS_N_INSNS (2);
10027 if (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
10028 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND)
10030 if (speed_p)
10031 *cost += (extra_cost->alu.arith
10032 + (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
10033 ? extra_cost->alu.arith
10034 : extra_cost->alu.arith_shift));
10036 *cost += (rtx_cost (XEXP (XEXP (x, 0), 0), ZERO_EXTEND, 0,
10037 speed_p)
10038 + rtx_cost (XEXP (x, 1), PLUS, 1, speed_p));
10039 return true;
10042 if (speed_p)
10043 *cost += 2 * extra_cost->alu.arith;
10044 return false;
10047 /* Vector mode? */
10048 *cost = LIBCALL_COST (2);
10049 return false;
10050 case IOR:
10051 if (mode == SImode && arm_arch6 && aarch_rev16_p (x))
10053 *cost = COSTS_N_INSNS (1);
10054 if (speed_p)
10055 *cost += extra_cost->alu.rev;
10057 return true;
10059 /* Fall through. */
10060 case AND: case XOR:
10061 if (mode == SImode)
10063 enum rtx_code subcode = GET_CODE (XEXP (x, 0));
10064 rtx op0 = XEXP (x, 0);
10065 rtx shift_op, shift_reg;
10067 *cost = COSTS_N_INSNS (1);
10069 if (subcode == NOT
10070 && (code == AND
10071 || (code == IOR && TARGET_THUMB2)))
10072 op0 = XEXP (op0, 0);
10074 shift_reg = NULL;
10075 shift_op = shifter_op_p (op0, &shift_reg);
10076 if (shift_op != NULL)
10078 if (shift_reg)
10080 if (speed_p)
10081 *cost += extra_cost->alu.log_shift_reg;
10082 *cost += rtx_cost (shift_reg, ASHIFT, 1, speed_p);
10084 else if (speed_p)
10085 *cost += extra_cost->alu.log_shift;
10087 *cost += (rtx_cost (shift_op, ASHIFT, 0, speed_p)
10088 + rtx_cost (XEXP (x, 1), code, 1, speed_p));
10089 return true;
10092 if (CONST_INT_P (XEXP (x, 1)))
10094 int insns = arm_gen_constant (code, SImode, NULL_RTX,
10095 INTVAL (XEXP (x, 1)), NULL_RTX,
10096 NULL_RTX, 1, 0);
10098 *cost = COSTS_N_INSNS (insns);
10099 if (speed_p)
10100 *cost += insns * extra_cost->alu.logical;
10101 *cost += rtx_cost (op0, code, 0, speed_p);
10102 return true;
10105 if (speed_p)
10106 *cost += extra_cost->alu.logical;
10107 *cost += (rtx_cost (op0, code, 0, speed_p)
10108 + rtx_cost (XEXP (x, 1), code, 1, speed_p));
10109 return true;
10112 if (mode == DImode)
10114 rtx op0 = XEXP (x, 0);
10115 enum rtx_code subcode = GET_CODE (op0);
10117 *cost = COSTS_N_INSNS (2);
10119 if (subcode == NOT
10120 && (code == AND
10121 || (code == IOR && TARGET_THUMB2)))
10122 op0 = XEXP (op0, 0);
10124 if (GET_CODE (op0) == ZERO_EXTEND)
10126 if (speed_p)
10127 *cost += 2 * extra_cost->alu.logical;
10129 *cost += (rtx_cost (XEXP (op0, 0), ZERO_EXTEND, 0, speed_p)
10130 + rtx_cost (XEXP (x, 1), code, 0, speed_p));
10131 return true;
10133 else if (GET_CODE (op0) == SIGN_EXTEND)
10135 if (speed_p)
10136 *cost += extra_cost->alu.logical + extra_cost->alu.log_shift;
10138 *cost += (rtx_cost (XEXP (op0, 0), SIGN_EXTEND, 0, speed_p)
10139 + rtx_cost (XEXP (x, 1), code, 0, speed_p));
10140 return true;
10143 if (speed_p)
10144 *cost += 2 * extra_cost->alu.logical;
10146 return true;
10148 /* Vector mode? */
10150 *cost = LIBCALL_COST (2);
10151 return false;
10153 case MULT:
10154 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
10155 && (mode == SFmode || !TARGET_VFP_SINGLE))
10157 rtx op0 = XEXP (x, 0);
10159 *cost = COSTS_N_INSNS (1);
10161 if (GET_CODE (op0) == NEG)
10162 op0 = XEXP (op0, 0);
10164 if (speed_p)
10165 *cost += extra_cost->fp[mode != SFmode].mult;
10167 *cost += (rtx_cost (op0, MULT, 0, speed_p)
10168 + rtx_cost (XEXP (x, 1), MULT, 1, speed_p));
10169 return true;
10171 else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
10173 *cost = LIBCALL_COST (2);
10174 return false;
10177 if (mode == SImode)
10179 *cost = COSTS_N_INSNS (1);
10180 if (TARGET_DSP_MULTIPLY
10181 && ((GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
10182 && (GET_CODE (XEXP (x, 1)) == SIGN_EXTEND
10183 || (GET_CODE (XEXP (x, 1)) == ASHIFTRT
10184 && CONST_INT_P (XEXP (XEXP (x, 1), 1))
10185 && INTVAL (XEXP (XEXP (x, 1), 1)) == 16)))
10186 || (GET_CODE (XEXP (x, 0)) == ASHIFTRT
10187 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
10188 && INTVAL (XEXP (XEXP (x, 0), 1)) == 16
10189 && (GET_CODE (XEXP (x, 1)) == SIGN_EXTEND
10190 || (GET_CODE (XEXP (x, 1)) == ASHIFTRT
10191 && CONST_INT_P (XEXP (XEXP (x, 1), 1))
10192 && (INTVAL (XEXP (XEXP (x, 1), 1))
10193 == 16))))))
10195 /* SMUL[TB][TB]. */
10196 if (speed_p)
10197 *cost += extra_cost->mult[0].extend;
10198 *cost += (rtx_cost (XEXP (x, 0), SIGN_EXTEND, 0, speed_p)
10199 + rtx_cost (XEXP (x, 1), SIGN_EXTEND, 0, speed_p));
10200 return true;
10202 if (speed_p)
10203 *cost += extra_cost->mult[0].simple;
10204 return false;
10207 if (mode == DImode)
10209 if (arm_arch3m
10210 && ((GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
10211 && GET_CODE (XEXP (x, 1)) == ZERO_EXTEND)
10212 || (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
10213 && GET_CODE (XEXP (x, 1)) == SIGN_EXTEND)))
10215 *cost = COSTS_N_INSNS (1);
10216 if (speed_p)
10217 *cost += extra_cost->mult[1].extend;
10218 *cost += (rtx_cost (XEXP (XEXP (x, 0), 0),
10219 ZERO_EXTEND, 0, speed_p)
10220 + rtx_cost (XEXP (XEXP (x, 1), 0),
10221 ZERO_EXTEND, 0, speed_p));
10222 return true;
10225 *cost = LIBCALL_COST (2);
10226 return false;
10229 /* Vector mode? */
10230 *cost = LIBCALL_COST (2);
10231 return false;
10233 case NEG:
10234 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
10235 && (mode == SFmode || !TARGET_VFP_SINGLE))
10237 *cost = COSTS_N_INSNS (1);
10238 if (speed_p)
10239 *cost += extra_cost->fp[mode != SFmode].neg;
10241 return false;
10243 else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
10245 *cost = LIBCALL_COST (1);
10246 return false;
10249 if (mode == SImode)
10251 if (GET_CODE (XEXP (x, 0)) == ABS)
10253 *cost = COSTS_N_INSNS (2);
10254 /* Assume the non-flag-changing variant. */
10255 if (speed_p)
10256 *cost += (extra_cost->alu.log_shift
10257 + extra_cost->alu.arith_shift);
10258 *cost += rtx_cost (XEXP (XEXP (x, 0), 0), ABS, 0, speed_p);
10259 return true;
10262 if (GET_RTX_CLASS (GET_CODE (XEXP (x, 0))) == RTX_COMPARE
10263 || GET_RTX_CLASS (GET_CODE (XEXP (x, 0))) == RTX_COMM_COMPARE)
10265 *cost = COSTS_N_INSNS (2);
10266 /* No extra cost for MOV imm and MVN imm. */
10267 /* If the comparison op is using the flags, there's no further
10268 cost, otherwise we need to add the cost of the comparison. */
10269 if (!(REG_P (XEXP (XEXP (x, 0), 0))
10270 && REGNO (XEXP (XEXP (x, 0), 0)) == CC_REGNUM
10271 && XEXP (XEXP (x, 0), 1) == const0_rtx))
10273 *cost += (COSTS_N_INSNS (1)
10274 + rtx_cost (XEXP (XEXP (x, 0), 0), COMPARE, 0,
10275 speed_p)
10276 + rtx_cost (XEXP (XEXP (x, 0), 1), COMPARE, 1,
10277 speed_p));
10278 if (speed_p)
10279 *cost += extra_cost->alu.arith;
10281 return true;
10283 *cost = COSTS_N_INSNS (1);
10284 if (speed_p)
10285 *cost += extra_cost->alu.arith;
10286 return false;
10289 if (GET_MODE_CLASS (mode) == MODE_INT
10290 && GET_MODE_SIZE (mode) < 4)
10292 /* Slightly disparage, as we might need an extend operation. */
10293 *cost = 1 + COSTS_N_INSNS (1);
10294 if (speed_p)
10295 *cost += extra_cost->alu.arith;
10296 return false;
10299 if (mode == DImode)
10301 *cost = COSTS_N_INSNS (2);
10302 if (speed_p)
10303 *cost += 2 * extra_cost->alu.arith;
10304 return false;
10307 /* Vector mode? */
10308 *cost = LIBCALL_COST (1);
10309 return false;
10311 case NOT:
10312 if (mode == SImode)
10314 rtx shift_op;
10315 rtx shift_reg = NULL;
10317 *cost = COSTS_N_INSNS (1);
10318 shift_op = shifter_op_p (XEXP (x, 0), &shift_reg);
10320 if (shift_op)
10322 if (shift_reg != NULL)
10324 if (speed_p)
10325 *cost += extra_cost->alu.log_shift_reg;
10326 *cost += rtx_cost (shift_reg, ASHIFT, 1, speed_p);
10328 else if (speed_p)
10329 *cost += extra_cost->alu.log_shift;
10330 *cost += rtx_cost (shift_op, ASHIFT, 0, speed_p);
10331 return true;
10334 if (speed_p)
10335 *cost += extra_cost->alu.logical;
10336 return false;
10338 if (mode == DImode)
10340 *cost = COSTS_N_INSNS (2);
10341 return false;
10344 /* Vector mode? */
10346 *cost += LIBCALL_COST (1);
10347 return false;
10349 case IF_THEN_ELSE:
10351 if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
10353 *cost = COSTS_N_INSNS (4);
10354 return true;
10356 int op1cost = rtx_cost (XEXP (x, 1), SET, 1, speed_p);
10357 int op2cost = rtx_cost (XEXP (x, 2), SET, 1, speed_p);
10359 *cost = rtx_cost (XEXP (x, 0), IF_THEN_ELSE, 0, speed_p);
10360 /* Assume that if one arm of the if_then_else is a register,
10361 that it will be tied with the result and eliminate the
10362 conditional insn. */
10363 if (REG_P (XEXP (x, 1)))
10364 *cost += op2cost;
10365 else if (REG_P (XEXP (x, 2)))
10366 *cost += op1cost;
10367 else
10369 if (speed_p)
10371 if (extra_cost->alu.non_exec_costs_exec)
10372 *cost += op1cost + op2cost + extra_cost->alu.non_exec;
10373 else
10374 *cost += MAX (op1cost, op2cost) + extra_cost->alu.non_exec;
10376 else
10377 *cost += op1cost + op2cost;
10380 return true;
10382 case COMPARE:
10383 if (cc_register (XEXP (x, 0), VOIDmode) && XEXP (x, 1) == const0_rtx)
10384 *cost = 0;
10385 else
10387 machine_mode op0mode;
10388 /* We'll mostly assume that the cost of a compare is the cost of the
10389 LHS. However, there are some notable exceptions. */
10391 /* Floating point compares are never done as side-effects. */
10392 op0mode = GET_MODE (XEXP (x, 0));
10393 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (op0mode) == MODE_FLOAT
10394 && (op0mode == SFmode || !TARGET_VFP_SINGLE))
10396 *cost = COSTS_N_INSNS (1);
10397 if (speed_p)
10398 *cost += extra_cost->fp[op0mode != SFmode].compare;
10400 if (XEXP (x, 1) == CONST0_RTX (op0mode))
10402 *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
10403 return true;
10406 return false;
10408 else if (GET_MODE_CLASS (op0mode) == MODE_FLOAT)
10410 *cost = LIBCALL_COST (2);
10411 return false;
10414 /* DImode compares normally take two insns. */
10415 if (op0mode == DImode)
10417 *cost = COSTS_N_INSNS (2);
10418 if (speed_p)
10419 *cost += 2 * extra_cost->alu.arith;
10420 return false;
10423 if (op0mode == SImode)
10425 rtx shift_op;
10426 rtx shift_reg;
10428 if (XEXP (x, 1) == const0_rtx
10429 && !(REG_P (XEXP (x, 0))
10430 || (GET_CODE (XEXP (x, 0)) == SUBREG
10431 && REG_P (SUBREG_REG (XEXP (x, 0))))))
10433 *cost = rtx_cost (XEXP (x, 0), COMPARE, 0, speed_p);
10435 /* Multiply operations that set the flags are often
10436 significantly more expensive. */
10437 if (speed_p
10438 && GET_CODE (XEXP (x, 0)) == MULT
10439 && !power_of_two_operand (XEXP (XEXP (x, 0), 1), mode))
10440 *cost += extra_cost->mult[0].flag_setting;
10442 if (speed_p
10443 && GET_CODE (XEXP (x, 0)) == PLUS
10444 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
10445 && !power_of_two_operand (XEXP (XEXP (XEXP (x, 0),
10446 0), 1), mode))
10447 *cost += extra_cost->mult[0].flag_setting;
10448 return true;
10451 shift_reg = NULL;
10452 shift_op = shifter_op_p (XEXP (x, 0), &shift_reg);
10453 if (shift_op != NULL)
10455 *cost = COSTS_N_INSNS (1);
10456 if (shift_reg != NULL)
10458 *cost += rtx_cost (shift_reg, ASHIFT, 1, speed_p);
10459 if (speed_p)
10460 *cost += extra_cost->alu.arith_shift_reg;
10462 else if (speed_p)
10463 *cost += extra_cost->alu.arith_shift;
10464 *cost += (rtx_cost (shift_op, ASHIFT, 0, speed_p)
10465 + rtx_cost (XEXP (x, 1), COMPARE, 1, speed_p));
10466 return true;
10469 *cost = COSTS_N_INSNS (1);
10470 if (speed_p)
10471 *cost += extra_cost->alu.arith;
10472 if (CONST_INT_P (XEXP (x, 1))
10473 && const_ok_for_op (INTVAL (XEXP (x, 1)), COMPARE))
10475 *cost += rtx_cost (XEXP (x, 0), COMPARE, 0, speed_p);
10476 return true;
10478 return false;
10481 /* Vector mode? */
10483 *cost = LIBCALL_COST (2);
10484 return false;
10486 return true;
10488 case EQ:
10489 case NE:
10490 case LT:
10491 case LE:
10492 case GT:
10493 case GE:
10494 case LTU:
10495 case LEU:
10496 case GEU:
10497 case GTU:
10498 case ORDERED:
10499 case UNORDERED:
10500 case UNEQ:
10501 case UNLE:
10502 case UNLT:
10503 case UNGE:
10504 case UNGT:
10505 case LTGT:
10506 if (outer_code == SET)
10508 /* Is it a store-flag operation? */
10509 if (REG_P (XEXP (x, 0)) && REGNO (XEXP (x, 0)) == CC_REGNUM
10510 && XEXP (x, 1) == const0_rtx)
10512 /* Thumb also needs an IT insn. */
10513 *cost = COSTS_N_INSNS (TARGET_THUMB ? 3 : 2);
10514 return true;
10516 if (XEXP (x, 1) == const0_rtx)
10518 switch (code)
10520 case LT:
10521 /* LSR Rd, Rn, #31. */
10522 *cost = COSTS_N_INSNS (1);
10523 if (speed_p)
10524 *cost += extra_cost->alu.shift;
10525 break;
10527 case EQ:
10528 /* RSBS T1, Rn, #0
10529 ADC Rd, Rn, T1. */
10531 case NE:
10532 /* SUBS T1, Rn, #1
10533 SBC Rd, Rn, T1. */
10534 *cost = COSTS_N_INSNS (2);
10535 break;
10537 case LE:
10538 /* RSBS T1, Rn, Rn, LSR #31
10539 ADC Rd, Rn, T1. */
10540 *cost = COSTS_N_INSNS (2);
10541 if (speed_p)
10542 *cost += extra_cost->alu.arith_shift;
10543 break;
10545 case GT:
10546 /* RSB Rd, Rn, Rn, ASR #1
10547 LSR Rd, Rd, #31. */
10548 *cost = COSTS_N_INSNS (2);
10549 if (speed_p)
10550 *cost += (extra_cost->alu.arith_shift
10551 + extra_cost->alu.shift);
10552 break;
10554 case GE:
10555 /* ASR Rd, Rn, #31
10556 ADD Rd, Rn, #1. */
10557 *cost = COSTS_N_INSNS (2);
10558 if (speed_p)
10559 *cost += extra_cost->alu.shift;
10560 break;
10562 default:
10563 /* Remaining cases are either meaningless or would take
10564 three insns anyway. */
10565 *cost = COSTS_N_INSNS (3);
10566 break;
10568 *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
10569 return true;
10571 else
10573 *cost = COSTS_N_INSNS (TARGET_THUMB ? 4 : 3);
10574 if (CONST_INT_P (XEXP (x, 1))
10575 && const_ok_for_op (INTVAL (XEXP (x, 1)), COMPARE))
10577 *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
10578 return true;
10581 return false;
10584 /* Not directly inside a set. If it involves the condition code
10585 register it must be the condition for a branch, cond_exec or
10586 I_T_E operation. Since the comparison is performed elsewhere
10587 this is just the control part which has no additional
10588 cost. */
10589 else if (REG_P (XEXP (x, 0)) && REGNO (XEXP (x, 0)) == CC_REGNUM
10590 && XEXP (x, 1) == const0_rtx)
10592 *cost = 0;
10593 return true;
10595 return false;
10597 case ABS:
10598 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
10599 && (mode == SFmode || !TARGET_VFP_SINGLE))
10601 *cost = COSTS_N_INSNS (1);
10602 if (speed_p)
10603 *cost += extra_cost->fp[mode != SFmode].neg;
10605 return false;
10607 else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
10609 *cost = LIBCALL_COST (1);
10610 return false;
10613 if (mode == SImode)
10615 *cost = COSTS_N_INSNS (1);
10616 if (speed_p)
10617 *cost += extra_cost->alu.log_shift + extra_cost->alu.arith_shift;
10618 return false;
10620 /* Vector mode? */
10621 *cost = LIBCALL_COST (1);
10622 return false;
10624 case SIGN_EXTEND:
10625 if ((arm_arch4 || GET_MODE (XEXP (x, 0)) == SImode)
10626 && MEM_P (XEXP (x, 0)))
10628 *cost = rtx_cost (XEXP (x, 0), code, 0, speed_p);
10630 if (mode == DImode)
10631 *cost += COSTS_N_INSNS (1);
10633 if (!speed_p)
10634 return true;
10636 if (GET_MODE (XEXP (x, 0)) == SImode)
10637 *cost += extra_cost->ldst.load;
10638 else
10639 *cost += extra_cost->ldst.load_sign_extend;
10641 if (mode == DImode)
10642 *cost += extra_cost->alu.shift;
10644 return true;
10647 /* Widening from less than 32-bits requires an extend operation. */
10648 if (GET_MODE (XEXP (x, 0)) != SImode && arm_arch6)
10650 /* We have SXTB/SXTH. */
10651 *cost = COSTS_N_INSNS (1);
10652 *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
10653 if (speed_p)
10654 *cost += extra_cost->alu.extend;
10656 else if (GET_MODE (XEXP (x, 0)) != SImode)
10658 /* Needs two shifts. */
10659 *cost = COSTS_N_INSNS (2);
10660 *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
10661 if (speed_p)
10662 *cost += 2 * extra_cost->alu.shift;
10665 /* Widening beyond 32-bits requires one more insn. */
10666 if (mode == DImode)
10668 *cost += COSTS_N_INSNS (1);
10669 if (speed_p)
10670 *cost += extra_cost->alu.shift;
10673 return true;
10675 case ZERO_EXTEND:
10676 if ((arm_arch4
10677 || GET_MODE (XEXP (x, 0)) == SImode
10678 || GET_MODE (XEXP (x, 0)) == QImode)
10679 && MEM_P (XEXP (x, 0)))
10681 *cost = rtx_cost (XEXP (x, 0), code, 0, speed_p);
10683 if (mode == DImode)
10684 *cost += COSTS_N_INSNS (1); /* No speed penalty. */
10686 return true;
10689 /* Widening from less than 32-bits requires an extend operation. */
10690 if (GET_MODE (XEXP (x, 0)) == QImode)
10692 /* UXTB can be a shorter instruction in Thumb2, but it might
10693 be slower than the AND Rd, Rn, #255 alternative. When
10694 optimizing for speed it should never be slower to use
10695 AND, and we don't really model 16-bit vs 32-bit insns
10696 here. */
10697 *cost = COSTS_N_INSNS (1);
10698 if (speed_p)
10699 *cost += extra_cost->alu.logical;
10701 else if (GET_MODE (XEXP (x, 0)) != SImode && arm_arch6)
10703 /* We have UXTB/UXTH. */
10704 *cost = COSTS_N_INSNS (1);
10705 *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
10706 if (speed_p)
10707 *cost += extra_cost->alu.extend;
10709 else if (GET_MODE (XEXP (x, 0)) != SImode)
10711 /* Needs two shifts. It's marginally preferable to use
10712 shifts rather than two BIC instructions as the second
10713 shift may merge with a subsequent insn as a shifter
10714 op. */
10715 *cost = COSTS_N_INSNS (2);
10716 *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
10717 if (speed_p)
10718 *cost += 2 * extra_cost->alu.shift;
10720 else /* GET_MODE (XEXP (x, 0)) == SImode. */
10721 *cost = COSTS_N_INSNS (1);
10723 /* Widening beyond 32-bits requires one more insn. */
10724 if (mode == DImode)
10726 *cost += COSTS_N_INSNS (1); /* No speed penalty. */
10729 return true;
10731 case CONST_INT:
10732 *cost = 0;
10733 /* CONST_INT has no mode, so we cannot tell for sure how many
10734 insns are really going to be needed. The best we can do is
10735 look at the value passed. If it fits in SImode, then assume
10736 that's the mode it will be used for. Otherwise assume it
10737 will be used in DImode. */
10738 if (INTVAL (x) == trunc_int_for_mode (INTVAL (x), SImode))
10739 mode = SImode;
10740 else
10741 mode = DImode;
10743 /* Avoid blowing up in arm_gen_constant (). */
10744 if (!(outer_code == PLUS
10745 || outer_code == AND
10746 || outer_code == IOR
10747 || outer_code == XOR
10748 || outer_code == MINUS))
10749 outer_code = SET;
10751 const_int_cost:
10752 if (mode == SImode)
10754 *cost += COSTS_N_INSNS (arm_gen_constant (outer_code, SImode, NULL,
10755 INTVAL (x), NULL, NULL,
10756 0, 0));
10757 /* Extra costs? */
10759 else
10761 *cost += COSTS_N_INSNS (arm_gen_constant
10762 (outer_code, SImode, NULL,
10763 trunc_int_for_mode (INTVAL (x), SImode),
10764 NULL, NULL, 0, 0)
10765 + arm_gen_constant (outer_code, SImode, NULL,
10766 INTVAL (x) >> 32, NULL,
10767 NULL, 0, 0));
10768 /* Extra costs? */
10771 return true;
10773 case CONST:
10774 case LABEL_REF:
10775 case SYMBOL_REF:
10776 if (speed_p)
10778 if (arm_arch_thumb2 && !flag_pic)
10779 *cost = COSTS_N_INSNS (2);
10780 else
10781 *cost = COSTS_N_INSNS (1) + extra_cost->ldst.load;
10783 else
10784 *cost = COSTS_N_INSNS (2);
10786 if (flag_pic)
10788 *cost += COSTS_N_INSNS (1);
10789 if (speed_p)
10790 *cost += extra_cost->alu.arith;
10793 return true;
10795 case CONST_FIXED:
10796 *cost = COSTS_N_INSNS (4);
10797 /* Fixme. */
10798 return true;
10800 case CONST_DOUBLE:
10801 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
10802 && (mode == SFmode || !TARGET_VFP_SINGLE))
10804 if (vfp3_const_double_rtx (x))
10806 *cost = COSTS_N_INSNS (1);
10807 if (speed_p)
10808 *cost += extra_cost->fp[mode == DFmode].fpconst;
10809 return true;
10812 if (speed_p)
10814 *cost = COSTS_N_INSNS (1);
10815 if (mode == DFmode)
10816 *cost += extra_cost->ldst.loadd;
10817 else
10818 *cost += extra_cost->ldst.loadf;
10820 else
10821 *cost = COSTS_N_INSNS (2 + (mode == DFmode));
10823 return true;
10825 *cost = COSTS_N_INSNS (4);
10826 return true;
10828 case CONST_VECTOR:
10829 /* Fixme. */
10830 if (TARGET_NEON
10831 && TARGET_HARD_FLOAT
10832 && (VALID_NEON_DREG_MODE (mode) || VALID_NEON_QREG_MODE (mode))
10833 && neon_immediate_valid_for_move (x, mode, NULL, NULL))
10834 *cost = COSTS_N_INSNS (1);
10835 else
10836 *cost = COSTS_N_INSNS (4);
10837 return true;
10839 case HIGH:
10840 case LO_SUM:
10841 *cost = COSTS_N_INSNS (1);
10842 /* When optimizing for size, we prefer constant pool entries to
10843 MOVW/MOVT pairs, so bump the cost of these slightly. */
10844 if (!speed_p)
10845 *cost += 1;
10846 return true;
10848 case CLZ:
10849 *cost = COSTS_N_INSNS (1);
10850 if (speed_p)
10851 *cost += extra_cost->alu.clz;
10852 return false;
10854 case SMIN:
10855 if (XEXP (x, 1) == const0_rtx)
10857 *cost = COSTS_N_INSNS (1);
10858 if (speed_p)
10859 *cost += extra_cost->alu.log_shift;
10860 *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
10861 return true;
10863 /* Fall through. */
10864 case SMAX:
10865 case UMIN:
10866 case UMAX:
10867 *cost = COSTS_N_INSNS (2);
10868 return false;
10870 case TRUNCATE:
10871 if (GET_CODE (XEXP (x, 0)) == ASHIFTRT
10872 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
10873 && INTVAL (XEXP (XEXP (x, 0), 1)) == 32
10874 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
10875 && ((GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == SIGN_EXTEND
10876 && GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1)) == SIGN_EXTEND)
10877 || (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == ZERO_EXTEND
10878 && (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1))
10879 == ZERO_EXTEND))))
10881 *cost = COSTS_N_INSNS (1);
10882 if (speed_p)
10883 *cost += extra_cost->mult[1].extend;
10884 *cost += (rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0), ZERO_EXTEND, 0,
10885 speed_p)
10886 + rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 1), ZERO_EXTEND,
10887 0, speed_p));
10888 return true;
10890 *cost = LIBCALL_COST (1);
10891 return false;
10893 case UNSPEC:
10894 return arm_unspec_cost (x, outer_code, speed_p, cost);
10896 case PC:
10897 /* Reading the PC is like reading any other register. Writing it
10898 is more expensive, but we take that into account elsewhere. */
10899 *cost = 0;
10900 return true;
10902 case ZERO_EXTRACT:
10903 /* TODO: Simple zero_extract of bottom bits using AND. */
10904 /* Fall through. */
10905 case SIGN_EXTRACT:
10906 if (arm_arch6
10907 && mode == SImode
10908 && CONST_INT_P (XEXP (x, 1))
10909 && CONST_INT_P (XEXP (x, 2)))
10911 *cost = COSTS_N_INSNS (1);
10912 if (speed_p)
10913 *cost += extra_cost->alu.bfx;
10914 *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
10915 return true;
10917 /* Without UBFX/SBFX, need to resort to shift operations. */
10918 *cost = COSTS_N_INSNS (2);
10919 if (speed_p)
10920 *cost += 2 * extra_cost->alu.shift;
10921 *cost += rtx_cost (XEXP (x, 0), ASHIFT, 0, speed_p);
10922 return true;
10924 case FLOAT_EXTEND:
10925 if (TARGET_HARD_FLOAT)
10927 *cost = COSTS_N_INSNS (1);
10928 if (speed_p)
10929 *cost += extra_cost->fp[mode == DFmode].widen;
10930 if (!TARGET_FPU_ARMV8
10931 && GET_MODE (XEXP (x, 0)) == HFmode)
10933 /* Pre v8, widening HF->DF is a two-step process, first
10934 widening to SFmode. */
10935 *cost += COSTS_N_INSNS (1);
10936 if (speed_p)
10937 *cost += extra_cost->fp[0].widen;
10939 *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
10940 return true;
10943 *cost = LIBCALL_COST (1);
10944 return false;
10946 case FLOAT_TRUNCATE:
10947 if (TARGET_HARD_FLOAT)
10949 *cost = COSTS_N_INSNS (1);
10950 if (speed_p)
10951 *cost += extra_cost->fp[mode == DFmode].narrow;
10952 *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
10953 return true;
10954 /* Vector modes? */
10956 *cost = LIBCALL_COST (1);
10957 return false;
10959 case FMA:
10960 if (TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_FMA)
10962 rtx op0 = XEXP (x, 0);
10963 rtx op1 = XEXP (x, 1);
10964 rtx op2 = XEXP (x, 2);
10966 *cost = COSTS_N_INSNS (1);
10968 /* vfms or vfnma. */
10969 if (GET_CODE (op0) == NEG)
10970 op0 = XEXP (op0, 0);
10972 /* vfnms or vfnma. */
10973 if (GET_CODE (op2) == NEG)
10974 op2 = XEXP (op2, 0);
10976 *cost += rtx_cost (op0, FMA, 0, speed_p);
10977 *cost += rtx_cost (op1, FMA, 1, speed_p);
10978 *cost += rtx_cost (op2, FMA, 2, speed_p);
10980 if (speed_p)
10981 *cost += extra_cost->fp[mode ==DFmode].fma;
10983 return true;
10986 *cost = LIBCALL_COST (3);
10987 return false;
10989 case FIX:
10990 case UNSIGNED_FIX:
10991 if (TARGET_HARD_FLOAT)
10993 if (GET_MODE_CLASS (mode) == MODE_INT)
10995 *cost = COSTS_N_INSNS (1);
10996 if (speed_p)
10997 *cost += extra_cost->fp[GET_MODE (XEXP (x, 0)) == DFmode].toint;
10998 /* Strip of the 'cost' of rounding towards zero. */
10999 if (GET_CODE (XEXP (x, 0)) == FIX)
11000 *cost += rtx_cost (XEXP (XEXP (x, 0), 0), code, 0, speed_p);
11001 else
11002 *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
11003 /* ??? Increase the cost to deal with transferring from
11004 FP -> CORE registers? */
11005 return true;
11007 else if (GET_MODE_CLASS (mode) == MODE_FLOAT
11008 && TARGET_FPU_ARMV8)
11010 *cost = COSTS_N_INSNS (1);
11011 if (speed_p)
11012 *cost += extra_cost->fp[mode == DFmode].roundint;
11013 return false;
11015 /* Vector costs? */
11017 *cost = LIBCALL_COST (1);
11018 return false;
11020 case FLOAT:
11021 case UNSIGNED_FLOAT:
11022 if (TARGET_HARD_FLOAT)
11024 /* ??? Increase the cost to deal with transferring from CORE
11025 -> FP registers? */
11026 *cost = COSTS_N_INSNS (1);
11027 if (speed_p)
11028 *cost += extra_cost->fp[mode == DFmode].fromint;
11029 return false;
11031 *cost = LIBCALL_COST (1);
11032 return false;
11034 case CALL:
11035 *cost = COSTS_N_INSNS (1);
11036 return true;
11038 case ASM_OPERANDS:
11040 /* Just a guess. Guess number of instructions in the asm
11041 plus one insn per input. Always a minimum of COSTS_N_INSNS (1)
11042 though (see PR60663). */
11043 int asm_length = MAX (1, asm_str_count (ASM_OPERANDS_TEMPLATE (x)));
11044 int num_operands = ASM_OPERANDS_INPUT_LENGTH (x);
11046 *cost = COSTS_N_INSNS (asm_length + num_operands);
11047 return true;
11049 default:
11050 if (mode != VOIDmode)
11051 *cost = COSTS_N_INSNS (ARM_NUM_REGS (mode));
11052 else
11053 *cost = COSTS_N_INSNS (4); /* Who knows? */
11054 return false;
11058 #undef HANDLE_NARROW_SHIFT_ARITH
11060 /* RTX costs when optimizing for size. */
11061 static bool
11062 arm_rtx_costs (rtx x, int code, int outer_code, int opno ATTRIBUTE_UNUSED,
11063 int *total, bool speed)
11065 bool result;
11067 if (TARGET_OLD_RTX_COSTS
11068 || (!current_tune->insn_extra_cost && !TARGET_NEW_GENERIC_COSTS))
11070 /* Old way. (Deprecated.) */
11071 if (!speed)
11072 result = arm_size_rtx_costs (x, (enum rtx_code) code,
11073 (enum rtx_code) outer_code, total);
11074 else
11075 result = current_tune->rtx_costs (x, (enum rtx_code) code,
11076 (enum rtx_code) outer_code, total,
11077 speed);
11079 else
11081 /* New way. */
11082 if (current_tune->insn_extra_cost)
11083 result = arm_new_rtx_costs (x, (enum rtx_code) code,
11084 (enum rtx_code) outer_code,
11085 current_tune->insn_extra_cost,
11086 total, speed);
11087 /* TARGET_NEW_GENERIC_COSTS && !TARGET_OLD_RTX_COSTS
11088 && current_tune->insn_extra_cost != NULL */
11089 else
11090 result = arm_new_rtx_costs (x, (enum rtx_code) code,
11091 (enum rtx_code) outer_code,
11092 &generic_extra_costs, total, speed);
11095 if (dump_file && (dump_flags & TDF_DETAILS))
11097 print_rtl_single (dump_file, x);
11098 fprintf (dump_file, "\n%s cost: %d (%s)\n", speed ? "Hot" : "Cold",
11099 *total, result ? "final" : "partial");
11101 return result;
11104 /* RTX costs for cores with a slow MUL implementation. Thumb-2 is not
11105 supported on any "slowmul" cores, so it can be ignored. */
11107 static bool
11108 arm_slowmul_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
11109 int *total, bool speed)
11111 machine_mode mode = GET_MODE (x);
11113 if (TARGET_THUMB)
11115 *total = thumb1_rtx_costs (x, code, outer_code);
11116 return true;
11119 switch (code)
11121 case MULT:
11122 if (GET_MODE_CLASS (mode) == MODE_FLOAT
11123 || mode == DImode)
11125 *total = COSTS_N_INSNS (20);
11126 return false;
11129 if (CONST_INT_P (XEXP (x, 1)))
11131 unsigned HOST_WIDE_INT i = (INTVAL (XEXP (x, 1))
11132 & (unsigned HOST_WIDE_INT) 0xffffffff);
11133 int cost, const_ok = const_ok_for_arm (i);
11134 int j, booth_unit_size;
11136 /* Tune as appropriate. */
11137 cost = const_ok ? 4 : 8;
11138 booth_unit_size = 2;
11139 for (j = 0; i && j < 32; j += booth_unit_size)
11141 i >>= booth_unit_size;
11142 cost++;
11145 *total = COSTS_N_INSNS (cost);
11146 *total += rtx_cost (XEXP (x, 0), code, 0, speed);
11147 return true;
11150 *total = COSTS_N_INSNS (20);
11151 return false;
11153 default:
11154 return arm_rtx_costs_1 (x, outer_code, total, speed);;
11159 /* RTX cost for cores with a fast multiply unit (M variants). */
11161 static bool
11162 arm_fastmul_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
11163 int *total, bool speed)
11165 machine_mode mode = GET_MODE (x);
11167 if (TARGET_THUMB1)
11169 *total = thumb1_rtx_costs (x, code, outer_code);
11170 return true;
11173 /* ??? should thumb2 use different costs? */
11174 switch (code)
11176 case MULT:
11177 /* There is no point basing this on the tuning, since it is always the
11178 fast variant if it exists at all. */
11179 if (mode == DImode
11180 && (GET_CODE (XEXP (x, 0)) == GET_CODE (XEXP (x, 1)))
11181 && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
11182 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
11184 *total = COSTS_N_INSNS(2);
11185 return false;
11189 if (mode == DImode)
11191 *total = COSTS_N_INSNS (5);
11192 return false;
11195 if (CONST_INT_P (XEXP (x, 1)))
11197 unsigned HOST_WIDE_INT i = (INTVAL (XEXP (x, 1))
11198 & (unsigned HOST_WIDE_INT) 0xffffffff);
11199 int cost, const_ok = const_ok_for_arm (i);
11200 int j, booth_unit_size;
11202 /* Tune as appropriate. */
11203 cost = const_ok ? 4 : 8;
11204 booth_unit_size = 8;
11205 for (j = 0; i && j < 32; j += booth_unit_size)
11207 i >>= booth_unit_size;
11208 cost++;
11211 *total = COSTS_N_INSNS(cost);
11212 return false;
11215 if (mode == SImode)
11217 *total = COSTS_N_INSNS (4);
11218 return false;
11221 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
11223 if (TARGET_HARD_FLOAT
11224 && (mode == SFmode
11225 || (mode == DFmode && !TARGET_VFP_SINGLE)))
11227 *total = COSTS_N_INSNS (1);
11228 return false;
11232 /* Requires a lib call */
11233 *total = COSTS_N_INSNS (20);
11234 return false;
11236 default:
11237 return arm_rtx_costs_1 (x, outer_code, total, speed);
11242 /* RTX cost for XScale CPUs. Thumb-2 is not supported on any xscale cores,
11243 so it can be ignored. */
11245 static bool
11246 arm_xscale_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
11247 int *total, bool speed)
11249 machine_mode mode = GET_MODE (x);
11251 if (TARGET_THUMB)
11253 *total = thumb1_rtx_costs (x, code, outer_code);
11254 return true;
11257 switch (code)
11259 case COMPARE:
11260 if (GET_CODE (XEXP (x, 0)) != MULT)
11261 return arm_rtx_costs_1 (x, outer_code, total, speed);
11263 /* A COMPARE of a MULT is slow on XScale; the muls instruction
11264 will stall until the multiplication is complete. */
11265 *total = COSTS_N_INSNS (3);
11266 return false;
11268 case MULT:
11269 /* There is no point basing this on the tuning, since it is always the
11270 fast variant if it exists at all. */
11271 if (mode == DImode
11272 && (GET_CODE (XEXP (x, 0)) == GET_CODE (XEXP (x, 1)))
11273 && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
11274 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
11276 *total = COSTS_N_INSNS (2);
11277 return false;
11281 if (mode == DImode)
11283 *total = COSTS_N_INSNS (5);
11284 return false;
11287 if (CONST_INT_P (XEXP (x, 1)))
11289 /* If operand 1 is a constant we can more accurately
11290 calculate the cost of the multiply. The multiplier can
11291 retire 15 bits on the first cycle and a further 12 on the
11292 second. We do, of course, have to load the constant into
11293 a register first. */
11294 unsigned HOST_WIDE_INT i = INTVAL (XEXP (x, 1));
11295 /* There's a general overhead of one cycle. */
11296 int cost = 1;
11297 unsigned HOST_WIDE_INT masked_const;
11299 if (i & 0x80000000)
11300 i = ~i;
11302 i &= (unsigned HOST_WIDE_INT) 0xffffffff;
11304 masked_const = i & 0xffff8000;
11305 if (masked_const != 0)
11307 cost++;
11308 masked_const = i & 0xf8000000;
11309 if (masked_const != 0)
11310 cost++;
11312 *total = COSTS_N_INSNS (cost);
11313 return false;
11316 if (mode == SImode)
11318 *total = COSTS_N_INSNS (3);
11319 return false;
11322 /* Requires a lib call */
11323 *total = COSTS_N_INSNS (20);
11324 return false;
11326 default:
11327 return arm_rtx_costs_1 (x, outer_code, total, speed);
11332 /* RTX costs for 9e (and later) cores. */
11334 static bool
11335 arm_9e_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
11336 int *total, bool speed)
11338 machine_mode mode = GET_MODE (x);
11340 if (TARGET_THUMB1)
11342 switch (code)
11344 case MULT:
11345 /* Small multiply: 32 cycles for an integer multiply inst. */
11346 if (arm_arch6m && arm_m_profile_small_mul)
11347 *total = COSTS_N_INSNS (32);
11348 else
11349 *total = COSTS_N_INSNS (3);
11350 return true;
11352 default:
11353 *total = thumb1_rtx_costs (x, code, outer_code);
11354 return true;
11358 switch (code)
11360 case MULT:
11361 /* There is no point basing this on the tuning, since it is always the
11362 fast variant if it exists at all. */
11363 if (mode == DImode
11364 && (GET_CODE (XEXP (x, 0)) == GET_CODE (XEXP (x, 1)))
11365 && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
11366 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
11368 *total = COSTS_N_INSNS (2);
11369 return false;
11373 if (mode == DImode)
11375 *total = COSTS_N_INSNS (5);
11376 return false;
11379 if (mode == SImode)
11381 *total = COSTS_N_INSNS (2);
11382 return false;
11385 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
11387 if (TARGET_HARD_FLOAT
11388 && (mode == SFmode
11389 || (mode == DFmode && !TARGET_VFP_SINGLE)))
11391 *total = COSTS_N_INSNS (1);
11392 return false;
11396 *total = COSTS_N_INSNS (20);
11397 return false;
11399 default:
11400 return arm_rtx_costs_1 (x, outer_code, total, speed);
11403 /* All address computations that can be done are free, but rtx cost returns
11404 the same for practically all of them. So we weight the different types
11405 of address here in the order (most pref first):
11406 PRE/POST_INC/DEC, SHIFT or NON-INT sum, INT sum, REG, MEM or LABEL. */
11407 static inline int
11408 arm_arm_address_cost (rtx x)
11410 enum rtx_code c = GET_CODE (x);
11412 if (c == PRE_INC || c == PRE_DEC || c == POST_INC || c == POST_DEC)
11413 return 0;
11414 if (c == MEM || c == LABEL_REF || c == SYMBOL_REF)
11415 return 10;
11417 if (c == PLUS)
11419 if (CONST_INT_P (XEXP (x, 1)))
11420 return 2;
11422 if (ARITHMETIC_P (XEXP (x, 0)) || ARITHMETIC_P (XEXP (x, 1)))
11423 return 3;
11425 return 4;
11428 return 6;
11431 static inline int
11432 arm_thumb_address_cost (rtx x)
11434 enum rtx_code c = GET_CODE (x);
11436 if (c == REG)
11437 return 1;
11438 if (c == PLUS
11439 && REG_P (XEXP (x, 0))
11440 && CONST_INT_P (XEXP (x, 1)))
11441 return 1;
11443 return 2;
11446 static int
11447 arm_address_cost (rtx x, machine_mode mode ATTRIBUTE_UNUSED,
11448 addr_space_t as ATTRIBUTE_UNUSED, bool speed ATTRIBUTE_UNUSED)
11450 return TARGET_32BIT ? arm_arm_address_cost (x) : arm_thumb_address_cost (x);
11453 /* Adjust cost hook for XScale. */
11454 static bool
11455 xscale_sched_adjust_cost (rtx_insn *insn, rtx link, rtx_insn *dep, int * cost)
11457 /* Some true dependencies can have a higher cost depending
11458 on precisely how certain input operands are used. */
11459 if (REG_NOTE_KIND(link) == 0
11460 && recog_memoized (insn) >= 0
11461 && recog_memoized (dep) >= 0)
11463 int shift_opnum = get_attr_shift (insn);
11464 enum attr_type attr_type = get_attr_type (dep);
11466 /* If nonzero, SHIFT_OPNUM contains the operand number of a shifted
11467 operand for INSN. If we have a shifted input operand and the
11468 instruction we depend on is another ALU instruction, then we may
11469 have to account for an additional stall. */
11470 if (shift_opnum != 0
11471 && (attr_type == TYPE_ALU_SHIFT_IMM
11472 || attr_type == TYPE_ALUS_SHIFT_IMM
11473 || attr_type == TYPE_LOGIC_SHIFT_IMM
11474 || attr_type == TYPE_LOGICS_SHIFT_IMM
11475 || attr_type == TYPE_ALU_SHIFT_REG
11476 || attr_type == TYPE_ALUS_SHIFT_REG
11477 || attr_type == TYPE_LOGIC_SHIFT_REG
11478 || attr_type == TYPE_LOGICS_SHIFT_REG
11479 || attr_type == TYPE_MOV_SHIFT
11480 || attr_type == TYPE_MVN_SHIFT
11481 || attr_type == TYPE_MOV_SHIFT_REG
11482 || attr_type == TYPE_MVN_SHIFT_REG))
11484 rtx shifted_operand;
11485 int opno;
11487 /* Get the shifted operand. */
11488 extract_insn (insn);
11489 shifted_operand = recog_data.operand[shift_opnum];
11491 /* Iterate over all the operands in DEP. If we write an operand
11492 that overlaps with SHIFTED_OPERAND, then we have increase the
11493 cost of this dependency. */
11494 extract_insn (dep);
11495 preprocess_constraints (dep);
11496 for (opno = 0; opno < recog_data.n_operands; opno++)
11498 /* We can ignore strict inputs. */
11499 if (recog_data.operand_type[opno] == OP_IN)
11500 continue;
11502 if (reg_overlap_mentioned_p (recog_data.operand[opno],
11503 shifted_operand))
11505 *cost = 2;
11506 return false;
11511 return true;
11514 /* Adjust cost hook for Cortex A9. */
11515 static bool
11516 cortex_a9_sched_adjust_cost (rtx_insn *insn, rtx link, rtx_insn *dep, int * cost)
11518 switch (REG_NOTE_KIND (link))
11520 case REG_DEP_ANTI:
11521 *cost = 0;
11522 return false;
11524 case REG_DEP_TRUE:
11525 case REG_DEP_OUTPUT:
11526 if (recog_memoized (insn) >= 0
11527 && recog_memoized (dep) >= 0)
11529 if (GET_CODE (PATTERN (insn)) == SET)
11531 if (GET_MODE_CLASS
11532 (GET_MODE (SET_DEST (PATTERN (insn)))) == MODE_FLOAT
11533 || GET_MODE_CLASS
11534 (GET_MODE (SET_SRC (PATTERN (insn)))) == MODE_FLOAT)
11536 enum attr_type attr_type_insn = get_attr_type (insn);
11537 enum attr_type attr_type_dep = get_attr_type (dep);
11539 /* By default all dependencies of the form
11540 s0 = s0 <op> s1
11541 s0 = s0 <op> s2
11542 have an extra latency of 1 cycle because
11543 of the input and output dependency in this
11544 case. However this gets modeled as an true
11545 dependency and hence all these checks. */
11546 if (REG_P (SET_DEST (PATTERN (insn)))
11547 && REG_P (SET_DEST (PATTERN (dep)))
11548 && reg_overlap_mentioned_p (SET_DEST (PATTERN (insn)),
11549 SET_DEST (PATTERN (dep))))
11551 /* FMACS is a special case where the dependent
11552 instruction can be issued 3 cycles before
11553 the normal latency in case of an output
11554 dependency. */
11555 if ((attr_type_insn == TYPE_FMACS
11556 || attr_type_insn == TYPE_FMACD)
11557 && (attr_type_dep == TYPE_FMACS
11558 || attr_type_dep == TYPE_FMACD))
11560 if (REG_NOTE_KIND (link) == REG_DEP_OUTPUT)
11561 *cost = insn_default_latency (dep) - 3;
11562 else
11563 *cost = insn_default_latency (dep);
11564 return false;
11566 else
11568 if (REG_NOTE_KIND (link) == REG_DEP_OUTPUT)
11569 *cost = insn_default_latency (dep) + 1;
11570 else
11571 *cost = insn_default_latency (dep);
11573 return false;
11578 break;
11580 default:
11581 gcc_unreachable ();
11584 return true;
11587 /* Adjust cost hook for FA726TE. */
11588 static bool
11589 fa726te_sched_adjust_cost (rtx_insn *insn, rtx link, rtx_insn *dep, int * cost)
11591 /* For FA726TE, true dependency on CPSR (i.e. set cond followed by predicated)
11592 have penalty of 3. */
11593 if (REG_NOTE_KIND (link) == REG_DEP_TRUE
11594 && recog_memoized (insn) >= 0
11595 && recog_memoized (dep) >= 0
11596 && get_attr_conds (dep) == CONDS_SET)
11598 /* Use of carry (e.g. 64-bit arithmetic) in ALU: 3-cycle latency. */
11599 if (get_attr_conds (insn) == CONDS_USE
11600 && get_attr_type (insn) != TYPE_BRANCH)
11602 *cost = 3;
11603 return false;
11606 if (GET_CODE (PATTERN (insn)) == COND_EXEC
11607 || get_attr_conds (insn) == CONDS_USE)
11609 *cost = 0;
11610 return false;
11614 return true;
11617 /* Implement TARGET_REGISTER_MOVE_COST.
11619 Moves between VFP_REGS and GENERAL_REGS are a single insn, but
11620 it is typically more expensive than a single memory access. We set
11621 the cost to less than two memory accesses so that floating
11622 point to integer conversion does not go through memory. */
11625 arm_register_move_cost (machine_mode mode ATTRIBUTE_UNUSED,
11626 reg_class_t from, reg_class_t to)
11628 if (TARGET_32BIT)
11630 if ((IS_VFP_CLASS (from) && !IS_VFP_CLASS (to))
11631 || (!IS_VFP_CLASS (from) && IS_VFP_CLASS (to)))
11632 return 15;
11633 else if ((from == IWMMXT_REGS && to != IWMMXT_REGS)
11634 || (from != IWMMXT_REGS && to == IWMMXT_REGS))
11635 return 4;
11636 else if (from == IWMMXT_GR_REGS || to == IWMMXT_GR_REGS)
11637 return 20;
11638 else
11639 return 2;
11641 else
11643 if (from == HI_REGS || to == HI_REGS)
11644 return 4;
11645 else
11646 return 2;
11650 /* Implement TARGET_MEMORY_MOVE_COST. */
11653 arm_memory_move_cost (machine_mode mode, reg_class_t rclass,
11654 bool in ATTRIBUTE_UNUSED)
11656 if (TARGET_32BIT)
11657 return 10;
11658 else
11660 if (GET_MODE_SIZE (mode) < 4)
11661 return 8;
11662 else
11663 return ((2 * GET_MODE_SIZE (mode)) * (rclass == LO_REGS ? 1 : 2));
11667 /* Vectorizer cost model implementation. */
11669 /* Implement targetm.vectorize.builtin_vectorization_cost. */
11670 static int
11671 arm_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
11672 tree vectype,
11673 int misalign ATTRIBUTE_UNUSED)
11675 unsigned elements;
11677 switch (type_of_cost)
11679 case scalar_stmt:
11680 return current_tune->vec_costs->scalar_stmt_cost;
11682 case scalar_load:
11683 return current_tune->vec_costs->scalar_load_cost;
11685 case scalar_store:
11686 return current_tune->vec_costs->scalar_store_cost;
11688 case vector_stmt:
11689 return current_tune->vec_costs->vec_stmt_cost;
11691 case vector_load:
11692 return current_tune->vec_costs->vec_align_load_cost;
11694 case vector_store:
11695 return current_tune->vec_costs->vec_store_cost;
11697 case vec_to_scalar:
11698 return current_tune->vec_costs->vec_to_scalar_cost;
11700 case scalar_to_vec:
11701 return current_tune->vec_costs->scalar_to_vec_cost;
11703 case unaligned_load:
11704 return current_tune->vec_costs->vec_unalign_load_cost;
11706 case unaligned_store:
11707 return current_tune->vec_costs->vec_unalign_store_cost;
11709 case cond_branch_taken:
11710 return current_tune->vec_costs->cond_taken_branch_cost;
11712 case cond_branch_not_taken:
11713 return current_tune->vec_costs->cond_not_taken_branch_cost;
11715 case vec_perm:
11716 case vec_promote_demote:
11717 return current_tune->vec_costs->vec_stmt_cost;
11719 case vec_construct:
11720 elements = TYPE_VECTOR_SUBPARTS (vectype);
11721 return elements / 2 + 1;
11723 default:
11724 gcc_unreachable ();
11728 /* Implement targetm.vectorize.add_stmt_cost. */
11730 static unsigned
11731 arm_add_stmt_cost (void *data, int count, enum vect_cost_for_stmt kind,
11732 struct _stmt_vec_info *stmt_info, int misalign,
11733 enum vect_cost_model_location where)
11735 unsigned *cost = (unsigned *) data;
11736 unsigned retval = 0;
11738 if (flag_vect_cost_model)
11740 tree vectype = stmt_info ? stmt_vectype (stmt_info) : NULL_TREE;
11741 int stmt_cost = arm_builtin_vectorization_cost (kind, vectype, misalign);
11743 /* Statements in an inner loop relative to the loop being
11744 vectorized are weighted more heavily. The value here is
11745 arbitrary and could potentially be improved with analysis. */
11746 if (where == vect_body && stmt_info && stmt_in_inner_loop_p (stmt_info))
11747 count *= 50; /* FIXME. */
11749 retval = (unsigned) (count * stmt_cost);
11750 cost[where] += retval;
11753 return retval;
11756 /* Return true if and only if this insn can dual-issue only as older. */
11757 static bool
11758 cortexa7_older_only (rtx_insn *insn)
11760 if (recog_memoized (insn) < 0)
11761 return false;
11763 switch (get_attr_type (insn))
11765 case TYPE_ALU_DSP_REG:
11766 case TYPE_ALU_SREG:
11767 case TYPE_ALUS_SREG:
11768 case TYPE_LOGIC_REG:
11769 case TYPE_LOGICS_REG:
11770 case TYPE_ADC_REG:
11771 case TYPE_ADCS_REG:
11772 case TYPE_ADR:
11773 case TYPE_BFM:
11774 case TYPE_REV:
11775 case TYPE_MVN_REG:
11776 case TYPE_SHIFT_IMM:
11777 case TYPE_SHIFT_REG:
11778 case TYPE_LOAD_BYTE:
11779 case TYPE_LOAD1:
11780 case TYPE_STORE1:
11781 case TYPE_FFARITHS:
11782 case TYPE_FADDS:
11783 case TYPE_FFARITHD:
11784 case TYPE_FADDD:
11785 case TYPE_FMOV:
11786 case TYPE_F_CVT:
11787 case TYPE_FCMPS:
11788 case TYPE_FCMPD:
11789 case TYPE_FCONSTS:
11790 case TYPE_FCONSTD:
11791 case TYPE_FMULS:
11792 case TYPE_FMACS:
11793 case TYPE_FMULD:
11794 case TYPE_FMACD:
11795 case TYPE_FDIVS:
11796 case TYPE_FDIVD:
11797 case TYPE_F_MRC:
11798 case TYPE_F_MRRC:
11799 case TYPE_F_FLAG:
11800 case TYPE_F_LOADS:
11801 case TYPE_F_STORES:
11802 return true;
11803 default:
11804 return false;
11808 /* Return true if and only if this insn can dual-issue as younger. */
11809 static bool
11810 cortexa7_younger (FILE *file, int verbose, rtx_insn *insn)
11812 if (recog_memoized (insn) < 0)
11814 if (verbose > 5)
11815 fprintf (file, ";; not cortexa7_younger %d\n", INSN_UID (insn));
11816 return false;
11819 switch (get_attr_type (insn))
11821 case TYPE_ALU_IMM:
11822 case TYPE_ALUS_IMM:
11823 case TYPE_LOGIC_IMM:
11824 case TYPE_LOGICS_IMM:
11825 case TYPE_EXTEND:
11826 case TYPE_MVN_IMM:
11827 case TYPE_MOV_IMM:
11828 case TYPE_MOV_REG:
11829 case TYPE_MOV_SHIFT:
11830 case TYPE_MOV_SHIFT_REG:
11831 case TYPE_BRANCH:
11832 case TYPE_CALL:
11833 return true;
11834 default:
11835 return false;
11840 /* Look for an instruction that can dual issue only as an older
11841 instruction, and move it in front of any instructions that can
11842 dual-issue as younger, while preserving the relative order of all
11843 other instructions in the ready list. This is a hueuristic to help
11844 dual-issue in later cycles, by postponing issue of more flexible
11845 instructions. This heuristic may affect dual issue opportunities
11846 in the current cycle. */
11847 static void
11848 cortexa7_sched_reorder (FILE *file, int verbose, rtx_insn **ready,
11849 int *n_readyp, int clock)
11851 int i;
11852 int first_older_only = -1, first_younger = -1;
11854 if (verbose > 5)
11855 fprintf (file,
11856 ";; sched_reorder for cycle %d with %d insns in ready list\n",
11857 clock,
11858 *n_readyp);
11860 /* Traverse the ready list from the head (the instruction to issue
11861 first), and looking for the first instruction that can issue as
11862 younger and the first instruction that can dual-issue only as
11863 older. */
11864 for (i = *n_readyp - 1; i >= 0; i--)
11866 rtx_insn *insn = ready[i];
11867 if (cortexa7_older_only (insn))
11869 first_older_only = i;
11870 if (verbose > 5)
11871 fprintf (file, ";; reorder older found %d\n", INSN_UID (insn));
11872 break;
11874 else if (cortexa7_younger (file, verbose, insn) && first_younger == -1)
11875 first_younger = i;
11878 /* Nothing to reorder because either no younger insn found or insn
11879 that can dual-issue only as older appears before any insn that
11880 can dual-issue as younger. */
11881 if (first_younger == -1)
11883 if (verbose > 5)
11884 fprintf (file, ";; sched_reorder nothing to reorder as no younger\n");
11885 return;
11888 /* Nothing to reorder because no older-only insn in the ready list. */
11889 if (first_older_only == -1)
11891 if (verbose > 5)
11892 fprintf (file, ";; sched_reorder nothing to reorder as no older_only\n");
11893 return;
11896 /* Move first_older_only insn before first_younger. */
11897 if (verbose > 5)
11898 fprintf (file, ";; cortexa7_sched_reorder insn %d before %d\n",
11899 INSN_UID(ready [first_older_only]),
11900 INSN_UID(ready [first_younger]));
11901 rtx_insn *first_older_only_insn = ready [first_older_only];
11902 for (i = first_older_only; i < first_younger; i++)
11904 ready[i] = ready[i+1];
11907 ready[i] = first_older_only_insn;
11908 return;
11911 /* Implement TARGET_SCHED_REORDER. */
11912 static int
11913 arm_sched_reorder (FILE *file, int verbose, rtx_insn **ready, int *n_readyp,
11914 int clock)
11916 switch (arm_tune)
11918 case cortexa7:
11919 cortexa7_sched_reorder (file, verbose, ready, n_readyp, clock);
11920 break;
11921 default:
11922 /* Do nothing for other cores. */
11923 break;
11926 return arm_issue_rate ();
11929 /* This function implements the target macro TARGET_SCHED_ADJUST_COST.
11930 It corrects the value of COST based on the relationship between
11931 INSN and DEP through the dependence LINK. It returns the new
11932 value. There is a per-core adjust_cost hook to adjust scheduler costs
11933 and the per-core hook can choose to completely override the generic
11934 adjust_cost function. Only put bits of code into arm_adjust_cost that
11935 are common across all cores. */
11936 static int
11937 arm_adjust_cost (rtx_insn *insn, rtx link, rtx_insn *dep, int cost)
11939 rtx i_pat, d_pat;
11941 /* When generating Thumb-1 code, we want to place flag-setting operations
11942 close to a conditional branch which depends on them, so that we can
11943 omit the comparison. */
11944 if (TARGET_THUMB1
11945 && REG_NOTE_KIND (link) == 0
11946 && recog_memoized (insn) == CODE_FOR_cbranchsi4_insn
11947 && recog_memoized (dep) >= 0
11948 && get_attr_conds (dep) == CONDS_SET)
11949 return 0;
11951 if (current_tune->sched_adjust_cost != NULL)
11953 if (!current_tune->sched_adjust_cost (insn, link, dep, &cost))
11954 return cost;
11957 /* XXX Is this strictly true? */
11958 if (REG_NOTE_KIND (link) == REG_DEP_ANTI
11959 || REG_NOTE_KIND (link) == REG_DEP_OUTPUT)
11960 return 0;
11962 /* Call insns don't incur a stall, even if they follow a load. */
11963 if (REG_NOTE_KIND (link) == 0
11964 && CALL_P (insn))
11965 return 1;
11967 if ((i_pat = single_set (insn)) != NULL
11968 && MEM_P (SET_SRC (i_pat))
11969 && (d_pat = single_set (dep)) != NULL
11970 && MEM_P (SET_DEST (d_pat)))
11972 rtx src_mem = XEXP (SET_SRC (i_pat), 0);
11973 /* This is a load after a store, there is no conflict if the load reads
11974 from a cached area. Assume that loads from the stack, and from the
11975 constant pool are cached, and that others will miss. This is a
11976 hack. */
11978 if ((GET_CODE (src_mem) == SYMBOL_REF
11979 && CONSTANT_POOL_ADDRESS_P (src_mem))
11980 || reg_mentioned_p (stack_pointer_rtx, src_mem)
11981 || reg_mentioned_p (frame_pointer_rtx, src_mem)
11982 || reg_mentioned_p (hard_frame_pointer_rtx, src_mem))
11983 return 1;
11986 return cost;
11990 arm_max_conditional_execute (void)
11992 return max_insns_skipped;
11995 static int
11996 arm_default_branch_cost (bool speed_p, bool predictable_p ATTRIBUTE_UNUSED)
11998 if (TARGET_32BIT)
11999 return (TARGET_THUMB2 && !speed_p) ? 1 : 4;
12000 else
12001 return (optimize > 0) ? 2 : 0;
12004 static int
12005 arm_cortex_a5_branch_cost (bool speed_p, bool predictable_p)
12007 return speed_p ? 0 : arm_default_branch_cost (speed_p, predictable_p);
12010 /* Thumb-2 branches are relatively cheap on Cortex-M processors ("1 + P cycles"
12011 on Cortex-M4, where P varies from 1 to 3 according to some criteria), since
12012 sequences of non-executed instructions in IT blocks probably take the same
12013 amount of time as executed instructions (and the IT instruction itself takes
12014 space in icache). This function was experimentally determined to give good
12015 results on a popular embedded benchmark. */
12017 static int
12018 arm_cortex_m_branch_cost (bool speed_p, bool predictable_p)
12020 return (TARGET_32BIT && speed_p) ? 1
12021 : arm_default_branch_cost (speed_p, predictable_p);
12024 static bool fp_consts_inited = false;
12026 static REAL_VALUE_TYPE value_fp0;
12028 static void
12029 init_fp_table (void)
12031 REAL_VALUE_TYPE r;
12033 r = REAL_VALUE_ATOF ("0", DFmode);
12034 value_fp0 = r;
12035 fp_consts_inited = true;
12038 /* Return TRUE if rtx X is a valid immediate FP constant. */
12040 arm_const_double_rtx (rtx x)
12042 REAL_VALUE_TYPE r;
12044 if (!fp_consts_inited)
12045 init_fp_table ();
12047 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
12048 if (REAL_VALUE_MINUS_ZERO (r))
12049 return 0;
12051 if (REAL_VALUES_EQUAL (r, value_fp0))
12052 return 1;
12054 return 0;
12057 /* VFPv3 has a fairly wide range of representable immediates, formed from
12058 "quarter-precision" floating-point values. These can be evaluated using this
12059 formula (with ^ for exponentiation):
12061 -1^s * n * 2^-r
12063 Where 's' is a sign bit (0/1), 'n' and 'r' are integers such that
12064 16 <= n <= 31 and 0 <= r <= 7.
12066 These values are mapped onto an 8-bit integer ABCDEFGH s.t.
12068 - A (most-significant) is the sign bit.
12069 - BCD are the exponent (encoded as r XOR 3).
12070 - EFGH are the mantissa (encoded as n - 16).
12073 /* Return an integer index for a VFPv3 immediate operand X suitable for the
12074 fconst[sd] instruction, or -1 if X isn't suitable. */
12075 static int
12076 vfp3_const_double_index (rtx x)
12078 REAL_VALUE_TYPE r, m;
12079 int sign, exponent;
12080 unsigned HOST_WIDE_INT mantissa, mant_hi;
12081 unsigned HOST_WIDE_INT mask;
12082 int point_pos = 2 * HOST_BITS_PER_WIDE_INT - 1;
12083 bool fail;
12085 if (!TARGET_VFP3 || !CONST_DOUBLE_P (x))
12086 return -1;
12088 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
12090 /* We can't represent these things, so detect them first. */
12091 if (REAL_VALUE_ISINF (r) || REAL_VALUE_ISNAN (r) || REAL_VALUE_MINUS_ZERO (r))
12092 return -1;
12094 /* Extract sign, exponent and mantissa. */
12095 sign = REAL_VALUE_NEGATIVE (r) ? 1 : 0;
12096 r = real_value_abs (&r);
12097 exponent = REAL_EXP (&r);
12098 /* For the mantissa, we expand into two HOST_WIDE_INTS, apart from the
12099 highest (sign) bit, with a fixed binary point at bit point_pos.
12100 WARNING: If there's ever a VFP version which uses more than 2 * H_W_I - 1
12101 bits for the mantissa, this may fail (low bits would be lost). */
12102 real_ldexp (&m, &r, point_pos - exponent);
12103 wide_int w = real_to_integer (&m, &fail, HOST_BITS_PER_WIDE_INT * 2);
12104 mantissa = w.elt (0);
12105 mant_hi = w.elt (1);
12107 /* If there are bits set in the low part of the mantissa, we can't
12108 represent this value. */
12109 if (mantissa != 0)
12110 return -1;
12112 /* Now make it so that mantissa contains the most-significant bits, and move
12113 the point_pos to indicate that the least-significant bits have been
12114 discarded. */
12115 point_pos -= HOST_BITS_PER_WIDE_INT;
12116 mantissa = mant_hi;
12118 /* We can permit four significant bits of mantissa only, plus a high bit
12119 which is always 1. */
12120 mask = ((unsigned HOST_WIDE_INT)1 << (point_pos - 5)) - 1;
12121 if ((mantissa & mask) != 0)
12122 return -1;
12124 /* Now we know the mantissa is in range, chop off the unneeded bits. */
12125 mantissa >>= point_pos - 5;
12127 /* The mantissa may be zero. Disallow that case. (It's possible to load the
12128 floating-point immediate zero with Neon using an integer-zero load, but
12129 that case is handled elsewhere.) */
12130 if (mantissa == 0)
12131 return -1;
12133 gcc_assert (mantissa >= 16 && mantissa <= 31);
12135 /* The value of 5 here would be 4 if GCC used IEEE754-like encoding (where
12136 normalized significands are in the range [1, 2). (Our mantissa is shifted
12137 left 4 places at this point relative to normalized IEEE754 values). GCC
12138 internally uses [0.5, 1) (see real.c), so the exponent returned from
12139 REAL_EXP must be altered. */
12140 exponent = 5 - exponent;
12142 if (exponent < 0 || exponent > 7)
12143 return -1;
12145 /* Sign, mantissa and exponent are now in the correct form to plug into the
12146 formula described in the comment above. */
12147 return (sign << 7) | ((exponent ^ 3) << 4) | (mantissa - 16);
12150 /* Return TRUE if rtx X is a valid immediate VFPv3 constant. */
12152 vfp3_const_double_rtx (rtx x)
12154 if (!TARGET_VFP3)
12155 return 0;
12157 return vfp3_const_double_index (x) != -1;
12160 /* Recognize immediates which can be used in various Neon instructions. Legal
12161 immediates are described by the following table (for VMVN variants, the
12162 bitwise inverse of the constant shown is recognized. In either case, VMOV
12163 is output and the correct instruction to use for a given constant is chosen
12164 by the assembler). The constant shown is replicated across all elements of
12165 the destination vector.
12167 insn elems variant constant (binary)
12168 ---- ----- ------- -----------------
12169 vmov i32 0 00000000 00000000 00000000 abcdefgh
12170 vmov i32 1 00000000 00000000 abcdefgh 00000000
12171 vmov i32 2 00000000 abcdefgh 00000000 00000000
12172 vmov i32 3 abcdefgh 00000000 00000000 00000000
12173 vmov i16 4 00000000 abcdefgh
12174 vmov i16 5 abcdefgh 00000000
12175 vmvn i32 6 00000000 00000000 00000000 abcdefgh
12176 vmvn i32 7 00000000 00000000 abcdefgh 00000000
12177 vmvn i32 8 00000000 abcdefgh 00000000 00000000
12178 vmvn i32 9 abcdefgh 00000000 00000000 00000000
12179 vmvn i16 10 00000000 abcdefgh
12180 vmvn i16 11 abcdefgh 00000000
12181 vmov i32 12 00000000 00000000 abcdefgh 11111111
12182 vmvn i32 13 00000000 00000000 abcdefgh 11111111
12183 vmov i32 14 00000000 abcdefgh 11111111 11111111
12184 vmvn i32 15 00000000 abcdefgh 11111111 11111111
12185 vmov i8 16 abcdefgh
12186 vmov i64 17 aaaaaaaa bbbbbbbb cccccccc dddddddd
12187 eeeeeeee ffffffff gggggggg hhhhhhhh
12188 vmov f32 18 aBbbbbbc defgh000 00000000 00000000
12189 vmov f32 19 00000000 00000000 00000000 00000000
12191 For case 18, B = !b. Representable values are exactly those accepted by
12192 vfp3_const_double_index, but are output as floating-point numbers rather
12193 than indices.
12195 For case 19, we will change it to vmov.i32 when assembling.
12197 Variants 0-5 (inclusive) may also be used as immediates for the second
12198 operand of VORR/VBIC instructions.
12200 The INVERSE argument causes the bitwise inverse of the given operand to be
12201 recognized instead (used for recognizing legal immediates for the VAND/VORN
12202 pseudo-instructions). If INVERSE is true, the value placed in *MODCONST is
12203 *not* inverted (i.e. the pseudo-instruction forms vand/vorn should still be
12204 output, rather than the real insns vbic/vorr).
12206 INVERSE makes no difference to the recognition of float vectors.
12208 The return value is the variant of immediate as shown in the above table, or
12209 -1 if the given value doesn't match any of the listed patterns.
12211 static int
12212 neon_valid_immediate (rtx op, machine_mode mode, int inverse,
12213 rtx *modconst, int *elementwidth)
12215 #define CHECK(STRIDE, ELSIZE, CLASS, TEST) \
12216 matches = 1; \
12217 for (i = 0; i < idx; i += (STRIDE)) \
12218 if (!(TEST)) \
12219 matches = 0; \
12220 if (matches) \
12222 immtype = (CLASS); \
12223 elsize = (ELSIZE); \
12224 break; \
12227 unsigned int i, elsize = 0, idx = 0, n_elts;
12228 unsigned int innersize;
12229 unsigned char bytes[16];
12230 int immtype = -1, matches;
12231 unsigned int invmask = inverse ? 0xff : 0;
12232 bool vector = GET_CODE (op) == CONST_VECTOR;
12234 if (vector)
12236 n_elts = CONST_VECTOR_NUNITS (op);
12237 innersize = GET_MODE_SIZE (GET_MODE_INNER (mode));
12239 else
12241 n_elts = 1;
12242 if (mode == VOIDmode)
12243 mode = DImode;
12244 innersize = GET_MODE_SIZE (mode);
12247 /* Vectors of float constants. */
12248 if (GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
12250 rtx el0 = CONST_VECTOR_ELT (op, 0);
12251 REAL_VALUE_TYPE r0;
12253 if (!vfp3_const_double_rtx (el0) && el0 != CONST0_RTX (GET_MODE (el0)))
12254 return -1;
12256 REAL_VALUE_FROM_CONST_DOUBLE (r0, el0);
12258 for (i = 1; i < n_elts; i++)
12260 rtx elt = CONST_VECTOR_ELT (op, i);
12261 REAL_VALUE_TYPE re;
12263 REAL_VALUE_FROM_CONST_DOUBLE (re, elt);
12265 if (!REAL_VALUES_EQUAL (r0, re))
12266 return -1;
12269 if (modconst)
12270 *modconst = CONST_VECTOR_ELT (op, 0);
12272 if (elementwidth)
12273 *elementwidth = 0;
12275 if (el0 == CONST0_RTX (GET_MODE (el0)))
12276 return 19;
12277 else
12278 return 18;
12281 /* Splat vector constant out into a byte vector. */
12282 for (i = 0; i < n_elts; i++)
12284 rtx el = vector ? CONST_VECTOR_ELT (op, i) : op;
12285 unsigned HOST_WIDE_INT elpart;
12286 unsigned int part, parts;
12288 if (CONST_INT_P (el))
12290 elpart = INTVAL (el);
12291 parts = 1;
12293 else if (CONST_DOUBLE_P (el))
12295 elpart = CONST_DOUBLE_LOW (el);
12296 parts = 2;
12298 else
12299 gcc_unreachable ();
12301 for (part = 0; part < parts; part++)
12303 unsigned int byte;
12304 for (byte = 0; byte < innersize; byte++)
12306 bytes[idx++] = (elpart & 0xff) ^ invmask;
12307 elpart >>= BITS_PER_UNIT;
12309 if (CONST_DOUBLE_P (el))
12310 elpart = CONST_DOUBLE_HIGH (el);
12314 /* Sanity check. */
12315 gcc_assert (idx == GET_MODE_SIZE (mode));
12319 CHECK (4, 32, 0, bytes[i] == bytes[0] && bytes[i + 1] == 0
12320 && bytes[i + 2] == 0 && bytes[i + 3] == 0);
12322 CHECK (4, 32, 1, bytes[i] == 0 && bytes[i + 1] == bytes[1]
12323 && bytes[i + 2] == 0 && bytes[i + 3] == 0);
12325 CHECK (4, 32, 2, bytes[i] == 0 && bytes[i + 1] == 0
12326 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0);
12328 CHECK (4, 32, 3, bytes[i] == 0 && bytes[i + 1] == 0
12329 && bytes[i + 2] == 0 && bytes[i + 3] == bytes[3]);
12331 CHECK (2, 16, 4, bytes[i] == bytes[0] && bytes[i + 1] == 0);
12333 CHECK (2, 16, 5, bytes[i] == 0 && bytes[i + 1] == bytes[1]);
12335 CHECK (4, 32, 6, bytes[i] == bytes[0] && bytes[i + 1] == 0xff
12336 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff);
12338 CHECK (4, 32, 7, bytes[i] == 0xff && bytes[i + 1] == bytes[1]
12339 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff);
12341 CHECK (4, 32, 8, bytes[i] == 0xff && bytes[i + 1] == 0xff
12342 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff);
12344 CHECK (4, 32, 9, bytes[i] == 0xff && bytes[i + 1] == 0xff
12345 && bytes[i + 2] == 0xff && bytes[i + 3] == bytes[3]);
12347 CHECK (2, 16, 10, bytes[i] == bytes[0] && bytes[i + 1] == 0xff);
12349 CHECK (2, 16, 11, bytes[i] == 0xff && bytes[i + 1] == bytes[1]);
12351 CHECK (4, 32, 12, bytes[i] == 0xff && bytes[i + 1] == bytes[1]
12352 && bytes[i + 2] == 0 && bytes[i + 3] == 0);
12354 CHECK (4, 32, 13, bytes[i] == 0 && bytes[i + 1] == bytes[1]
12355 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff);
12357 CHECK (4, 32, 14, bytes[i] == 0xff && bytes[i + 1] == 0xff
12358 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0);
12360 CHECK (4, 32, 15, bytes[i] == 0 && bytes[i + 1] == 0
12361 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff);
12363 CHECK (1, 8, 16, bytes[i] == bytes[0]);
12365 CHECK (1, 64, 17, (bytes[i] == 0 || bytes[i] == 0xff)
12366 && bytes[i] == bytes[(i + 8) % idx]);
12368 while (0);
12370 if (immtype == -1)
12371 return -1;
12373 if (elementwidth)
12374 *elementwidth = elsize;
12376 if (modconst)
12378 unsigned HOST_WIDE_INT imm = 0;
12380 /* Un-invert bytes of recognized vector, if necessary. */
12381 if (invmask != 0)
12382 for (i = 0; i < idx; i++)
12383 bytes[i] ^= invmask;
12385 if (immtype == 17)
12387 /* FIXME: Broken on 32-bit H_W_I hosts. */
12388 gcc_assert (sizeof (HOST_WIDE_INT) == 8);
12390 for (i = 0; i < 8; i++)
12391 imm |= (unsigned HOST_WIDE_INT) (bytes[i] ? 0xff : 0)
12392 << (i * BITS_PER_UNIT);
12394 *modconst = GEN_INT (imm);
12396 else
12398 unsigned HOST_WIDE_INT imm = 0;
12400 for (i = 0; i < elsize / BITS_PER_UNIT; i++)
12401 imm |= (unsigned HOST_WIDE_INT) bytes[i] << (i * BITS_PER_UNIT);
12403 *modconst = GEN_INT (imm);
12407 return immtype;
12408 #undef CHECK
12411 /* Return TRUE if rtx X is legal for use as either a Neon VMOV (or, implicitly,
12412 VMVN) immediate. Write back width per element to *ELEMENTWIDTH (or zero for
12413 float elements), and a modified constant (whatever should be output for a
12414 VMOV) in *MODCONST. */
12417 neon_immediate_valid_for_move (rtx op, machine_mode mode,
12418 rtx *modconst, int *elementwidth)
12420 rtx tmpconst;
12421 int tmpwidth;
12422 int retval = neon_valid_immediate (op, mode, 0, &tmpconst, &tmpwidth);
12424 if (retval == -1)
12425 return 0;
12427 if (modconst)
12428 *modconst = tmpconst;
12430 if (elementwidth)
12431 *elementwidth = tmpwidth;
12433 return 1;
12436 /* Return TRUE if rtx X is legal for use in a VORR or VBIC instruction. If
12437 the immediate is valid, write a constant suitable for using as an operand
12438 to VORR/VBIC/VAND/VORN to *MODCONST and the corresponding element width to
12439 *ELEMENTWIDTH. See neon_valid_immediate for description of INVERSE. */
12442 neon_immediate_valid_for_logic (rtx op, machine_mode mode, int inverse,
12443 rtx *modconst, int *elementwidth)
12445 rtx tmpconst;
12446 int tmpwidth;
12447 int retval = neon_valid_immediate (op, mode, inverse, &tmpconst, &tmpwidth);
12449 if (retval < 0 || retval > 5)
12450 return 0;
12452 if (modconst)
12453 *modconst = tmpconst;
12455 if (elementwidth)
12456 *elementwidth = tmpwidth;
12458 return 1;
12461 /* Return TRUE if rtx OP is legal for use in a VSHR or VSHL instruction. If
12462 the immediate is valid, write a constant suitable for using as an operand
12463 to VSHR/VSHL to *MODCONST and the corresponding element width to
12464 *ELEMENTWIDTH. ISLEFTSHIFT is for determine left or right shift,
12465 because they have different limitations. */
12468 neon_immediate_valid_for_shift (rtx op, machine_mode mode,
12469 rtx *modconst, int *elementwidth,
12470 bool isleftshift)
12472 unsigned int innersize = GET_MODE_SIZE (GET_MODE_INNER (mode));
12473 unsigned int n_elts = CONST_VECTOR_NUNITS (op), i;
12474 unsigned HOST_WIDE_INT last_elt = 0;
12475 unsigned HOST_WIDE_INT maxshift;
12477 /* Split vector constant out into a byte vector. */
12478 for (i = 0; i < n_elts; i++)
12480 rtx el = CONST_VECTOR_ELT (op, i);
12481 unsigned HOST_WIDE_INT elpart;
12483 if (CONST_INT_P (el))
12484 elpart = INTVAL (el);
12485 else if (CONST_DOUBLE_P (el))
12486 return 0;
12487 else
12488 gcc_unreachable ();
12490 if (i != 0 && elpart != last_elt)
12491 return 0;
12493 last_elt = elpart;
12496 /* Shift less than element size. */
12497 maxshift = innersize * 8;
12499 if (isleftshift)
12501 /* Left shift immediate value can be from 0 to <size>-1. */
12502 if (last_elt >= maxshift)
12503 return 0;
12505 else
12507 /* Right shift immediate value can be from 1 to <size>. */
12508 if (last_elt == 0 || last_elt > maxshift)
12509 return 0;
12512 if (elementwidth)
12513 *elementwidth = innersize * 8;
12515 if (modconst)
12516 *modconst = CONST_VECTOR_ELT (op, 0);
12518 return 1;
12521 /* Return a string suitable for output of Neon immediate logic operation
12522 MNEM. */
12524 char *
12525 neon_output_logic_immediate (const char *mnem, rtx *op2, machine_mode mode,
12526 int inverse, int quad)
12528 int width, is_valid;
12529 static char templ[40];
12531 is_valid = neon_immediate_valid_for_logic (*op2, mode, inverse, op2, &width);
12533 gcc_assert (is_valid != 0);
12535 if (quad)
12536 sprintf (templ, "%s.i%d\t%%q0, %%2", mnem, width);
12537 else
12538 sprintf (templ, "%s.i%d\t%%P0, %%2", mnem, width);
12540 return templ;
12543 /* Return a string suitable for output of Neon immediate shift operation
12544 (VSHR or VSHL) MNEM. */
12546 char *
12547 neon_output_shift_immediate (const char *mnem, char sign, rtx *op2,
12548 machine_mode mode, int quad,
12549 bool isleftshift)
12551 int width, is_valid;
12552 static char templ[40];
12554 is_valid = neon_immediate_valid_for_shift (*op2, mode, op2, &width, isleftshift);
12555 gcc_assert (is_valid != 0);
12557 if (quad)
12558 sprintf (templ, "%s.%c%d\t%%q0, %%q1, %%2", mnem, sign, width);
12559 else
12560 sprintf (templ, "%s.%c%d\t%%P0, %%P1, %%2", mnem, sign, width);
12562 return templ;
12565 /* Output a sequence of pairwise operations to implement a reduction.
12566 NOTE: We do "too much work" here, because pairwise operations work on two
12567 registers-worth of operands in one go. Unfortunately we can't exploit those
12568 extra calculations to do the full operation in fewer steps, I don't think.
12569 Although all vector elements of the result but the first are ignored, we
12570 actually calculate the same result in each of the elements. An alternative
12571 such as initially loading a vector with zero to use as each of the second
12572 operands would use up an additional register and take an extra instruction,
12573 for no particular gain. */
12575 void
12576 neon_pairwise_reduce (rtx op0, rtx op1, machine_mode mode,
12577 rtx (*reduc) (rtx, rtx, rtx))
12579 machine_mode inner = GET_MODE_INNER (mode);
12580 unsigned int i, parts = GET_MODE_SIZE (mode) / GET_MODE_SIZE (inner);
12581 rtx tmpsum = op1;
12583 for (i = parts / 2; i >= 1; i /= 2)
12585 rtx dest = (i == 1) ? op0 : gen_reg_rtx (mode);
12586 emit_insn (reduc (dest, tmpsum, tmpsum));
12587 tmpsum = dest;
12591 /* If VALS is a vector constant that can be loaded into a register
12592 using VDUP, generate instructions to do so and return an RTX to
12593 assign to the register. Otherwise return NULL_RTX. */
12595 static rtx
12596 neon_vdup_constant (rtx vals)
12598 machine_mode mode = GET_MODE (vals);
12599 machine_mode inner_mode = GET_MODE_INNER (mode);
12600 int n_elts = GET_MODE_NUNITS (mode);
12601 bool all_same = true;
12602 rtx x;
12603 int i;
12605 if (GET_CODE (vals) != CONST_VECTOR || GET_MODE_SIZE (inner_mode) > 4)
12606 return NULL_RTX;
12608 for (i = 0; i < n_elts; ++i)
12610 x = XVECEXP (vals, 0, i);
12611 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
12612 all_same = false;
12615 if (!all_same)
12616 /* The elements are not all the same. We could handle repeating
12617 patterns of a mode larger than INNER_MODE here (e.g. int8x8_t
12618 {0, C, 0, C, 0, C, 0, C} which can be loaded using
12619 vdup.i16). */
12620 return NULL_RTX;
12622 /* We can load this constant by using VDUP and a constant in a
12623 single ARM register. This will be cheaper than a vector
12624 load. */
12626 x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, 0));
12627 return gen_rtx_VEC_DUPLICATE (mode, x);
12630 /* Generate code to load VALS, which is a PARALLEL containing only
12631 constants (for vec_init) or CONST_VECTOR, efficiently into a
12632 register. Returns an RTX to copy into the register, or NULL_RTX
12633 for a PARALLEL that can not be converted into a CONST_VECTOR. */
12636 neon_make_constant (rtx vals)
12638 machine_mode mode = GET_MODE (vals);
12639 rtx target;
12640 rtx const_vec = NULL_RTX;
12641 int n_elts = GET_MODE_NUNITS (mode);
12642 int n_const = 0;
12643 int i;
12645 if (GET_CODE (vals) == CONST_VECTOR)
12646 const_vec = vals;
12647 else if (GET_CODE (vals) == PARALLEL)
12649 /* A CONST_VECTOR must contain only CONST_INTs and
12650 CONST_DOUBLEs, but CONSTANT_P allows more (e.g. SYMBOL_REF).
12651 Only store valid constants in a CONST_VECTOR. */
12652 for (i = 0; i < n_elts; ++i)
12654 rtx x = XVECEXP (vals, 0, i);
12655 if (CONST_INT_P (x) || CONST_DOUBLE_P (x))
12656 n_const++;
12658 if (n_const == n_elts)
12659 const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0));
12661 else
12662 gcc_unreachable ();
12664 if (const_vec != NULL
12665 && neon_immediate_valid_for_move (const_vec, mode, NULL, NULL))
12666 /* Load using VMOV. On Cortex-A8 this takes one cycle. */
12667 return const_vec;
12668 else if ((target = neon_vdup_constant (vals)) != NULL_RTX)
12669 /* Loaded using VDUP. On Cortex-A8 the VDUP takes one NEON
12670 pipeline cycle; creating the constant takes one or two ARM
12671 pipeline cycles. */
12672 return target;
12673 else if (const_vec != NULL_RTX)
12674 /* Load from constant pool. On Cortex-A8 this takes two cycles
12675 (for either double or quad vectors). We can not take advantage
12676 of single-cycle VLD1 because we need a PC-relative addressing
12677 mode. */
12678 return const_vec;
12679 else
12680 /* A PARALLEL containing something not valid inside CONST_VECTOR.
12681 We can not construct an initializer. */
12682 return NULL_RTX;
12685 /* Initialize vector TARGET to VALS. */
12687 void
12688 neon_expand_vector_init (rtx target, rtx vals)
12690 machine_mode mode = GET_MODE (target);
12691 machine_mode inner_mode = GET_MODE_INNER (mode);
12692 int n_elts = GET_MODE_NUNITS (mode);
12693 int n_var = 0, one_var = -1;
12694 bool all_same = true;
12695 rtx x, mem;
12696 int i;
12698 for (i = 0; i < n_elts; ++i)
12700 x = XVECEXP (vals, 0, i);
12701 if (!CONSTANT_P (x))
12702 ++n_var, one_var = i;
12704 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
12705 all_same = false;
12708 if (n_var == 0)
12710 rtx constant = neon_make_constant (vals);
12711 if (constant != NULL_RTX)
12713 emit_move_insn (target, constant);
12714 return;
12718 /* Splat a single non-constant element if we can. */
12719 if (all_same && GET_MODE_SIZE (inner_mode) <= 4)
12721 x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, 0));
12722 emit_insn (gen_rtx_SET (VOIDmode, target,
12723 gen_rtx_VEC_DUPLICATE (mode, x)));
12724 return;
12727 /* One field is non-constant. Load constant then overwrite varying
12728 field. This is more efficient than using the stack. */
12729 if (n_var == 1)
12731 rtx copy = copy_rtx (vals);
12732 rtx index = GEN_INT (one_var);
12734 /* Load constant part of vector, substitute neighboring value for
12735 varying element. */
12736 XVECEXP (copy, 0, one_var) = XVECEXP (vals, 0, (one_var + 1) % n_elts);
12737 neon_expand_vector_init (target, copy);
12739 /* Insert variable. */
12740 x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, one_var));
12741 switch (mode)
12743 case V8QImode:
12744 emit_insn (gen_neon_vset_lanev8qi (target, x, target, index));
12745 break;
12746 case V16QImode:
12747 emit_insn (gen_neon_vset_lanev16qi (target, x, target, index));
12748 break;
12749 case V4HImode:
12750 emit_insn (gen_neon_vset_lanev4hi (target, x, target, index));
12751 break;
12752 case V8HImode:
12753 emit_insn (gen_neon_vset_lanev8hi (target, x, target, index));
12754 break;
12755 case V2SImode:
12756 emit_insn (gen_neon_vset_lanev2si (target, x, target, index));
12757 break;
12758 case V4SImode:
12759 emit_insn (gen_neon_vset_lanev4si (target, x, target, index));
12760 break;
12761 case V2SFmode:
12762 emit_insn (gen_neon_vset_lanev2sf (target, x, target, index));
12763 break;
12764 case V4SFmode:
12765 emit_insn (gen_neon_vset_lanev4sf (target, x, target, index));
12766 break;
12767 case V2DImode:
12768 emit_insn (gen_neon_vset_lanev2di (target, x, target, index));
12769 break;
12770 default:
12771 gcc_unreachable ();
12773 return;
12776 /* Construct the vector in memory one field at a time
12777 and load the whole vector. */
12778 mem = assign_stack_temp (mode, GET_MODE_SIZE (mode));
12779 for (i = 0; i < n_elts; i++)
12780 emit_move_insn (adjust_address_nv (mem, inner_mode,
12781 i * GET_MODE_SIZE (inner_mode)),
12782 XVECEXP (vals, 0, i));
12783 emit_move_insn (target, mem);
12786 /* Ensure OPERAND lies between LOW (inclusive) and HIGH (exclusive). Raise
12787 ERR if it doesn't. FIXME: NEON bounds checks occur late in compilation, so
12788 reported source locations are bogus. */
12790 static void
12791 bounds_check (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high,
12792 const char *err)
12794 HOST_WIDE_INT lane;
12796 gcc_assert (CONST_INT_P (operand));
12798 lane = INTVAL (operand);
12800 if (lane < low || lane >= high)
12801 error (err);
12804 /* Bounds-check lanes. */
12806 void
12807 neon_lane_bounds (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high)
12809 bounds_check (operand, low, high, "lane out of range");
12812 /* Bounds-check constants. */
12814 void
12815 neon_const_bounds (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high)
12817 bounds_check (operand, low, high, "constant out of range");
12820 HOST_WIDE_INT
12821 neon_element_bits (machine_mode mode)
12823 if (mode == DImode)
12824 return GET_MODE_BITSIZE (mode);
12825 else
12826 return GET_MODE_BITSIZE (GET_MODE_INNER (mode));
12830 /* Predicates for `match_operand' and `match_operator'. */
12832 /* Return TRUE if OP is a valid coprocessor memory address pattern.
12833 WB is true if full writeback address modes are allowed and is false
12834 if limited writeback address modes (POST_INC and PRE_DEC) are
12835 allowed. */
12838 arm_coproc_mem_operand (rtx op, bool wb)
12840 rtx ind;
12842 /* Reject eliminable registers. */
12843 if (! (reload_in_progress || reload_completed || lra_in_progress)
12844 && ( reg_mentioned_p (frame_pointer_rtx, op)
12845 || reg_mentioned_p (arg_pointer_rtx, op)
12846 || reg_mentioned_p (virtual_incoming_args_rtx, op)
12847 || reg_mentioned_p (virtual_outgoing_args_rtx, op)
12848 || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
12849 || reg_mentioned_p (virtual_stack_vars_rtx, op)))
12850 return FALSE;
12852 /* Constants are converted into offsets from labels. */
12853 if (!MEM_P (op))
12854 return FALSE;
12856 ind = XEXP (op, 0);
12858 if (reload_completed
12859 && (GET_CODE (ind) == LABEL_REF
12860 || (GET_CODE (ind) == CONST
12861 && GET_CODE (XEXP (ind, 0)) == PLUS
12862 && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF
12863 && CONST_INT_P (XEXP (XEXP (ind, 0), 1)))))
12864 return TRUE;
12866 /* Match: (mem (reg)). */
12867 if (REG_P (ind))
12868 return arm_address_register_rtx_p (ind, 0);
12870 /* Autoincremment addressing modes. POST_INC and PRE_DEC are
12871 acceptable in any case (subject to verification by
12872 arm_address_register_rtx_p). We need WB to be true to accept
12873 PRE_INC and POST_DEC. */
12874 if (GET_CODE (ind) == POST_INC
12875 || GET_CODE (ind) == PRE_DEC
12876 || (wb
12877 && (GET_CODE (ind) == PRE_INC
12878 || GET_CODE (ind) == POST_DEC)))
12879 return arm_address_register_rtx_p (XEXP (ind, 0), 0);
12881 if (wb
12882 && (GET_CODE (ind) == POST_MODIFY || GET_CODE (ind) == PRE_MODIFY)
12883 && arm_address_register_rtx_p (XEXP (ind, 0), 0)
12884 && GET_CODE (XEXP (ind, 1)) == PLUS
12885 && rtx_equal_p (XEXP (XEXP (ind, 1), 0), XEXP (ind, 0)))
12886 ind = XEXP (ind, 1);
12888 /* Match:
12889 (plus (reg)
12890 (const)). */
12891 if (GET_CODE (ind) == PLUS
12892 && REG_P (XEXP (ind, 0))
12893 && REG_MODE_OK_FOR_BASE_P (XEXP (ind, 0), VOIDmode)
12894 && CONST_INT_P (XEXP (ind, 1))
12895 && INTVAL (XEXP (ind, 1)) > -1024
12896 && INTVAL (XEXP (ind, 1)) < 1024
12897 && (INTVAL (XEXP (ind, 1)) & 3) == 0)
12898 return TRUE;
12900 return FALSE;
12903 /* Return TRUE if OP is a memory operand which we can load or store a vector
12904 to/from. TYPE is one of the following values:
12905 0 - Vector load/stor (vldr)
12906 1 - Core registers (ldm)
12907 2 - Element/structure loads (vld1)
12910 neon_vector_mem_operand (rtx op, int type, bool strict)
12912 rtx ind;
12914 /* Reject eliminable registers. */
12915 if (! (reload_in_progress || reload_completed)
12916 && ( reg_mentioned_p (frame_pointer_rtx, op)
12917 || reg_mentioned_p (arg_pointer_rtx, op)
12918 || reg_mentioned_p (virtual_incoming_args_rtx, op)
12919 || reg_mentioned_p (virtual_outgoing_args_rtx, op)
12920 || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
12921 || reg_mentioned_p (virtual_stack_vars_rtx, op)))
12922 return !strict;
12924 /* Constants are converted into offsets from labels. */
12925 if (!MEM_P (op))
12926 return FALSE;
12928 ind = XEXP (op, 0);
12930 if (reload_completed
12931 && (GET_CODE (ind) == LABEL_REF
12932 || (GET_CODE (ind) == CONST
12933 && GET_CODE (XEXP (ind, 0)) == PLUS
12934 && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF
12935 && CONST_INT_P (XEXP (XEXP (ind, 0), 1)))))
12936 return TRUE;
12938 /* Match: (mem (reg)). */
12939 if (REG_P (ind))
12940 return arm_address_register_rtx_p (ind, 0);
12942 /* Allow post-increment with Neon registers. */
12943 if ((type != 1 && GET_CODE (ind) == POST_INC)
12944 || (type == 0 && GET_CODE (ind) == PRE_DEC))
12945 return arm_address_register_rtx_p (XEXP (ind, 0), 0);
12947 /* Allow post-increment by register for VLDn */
12948 if (type == 2 && GET_CODE (ind) == POST_MODIFY
12949 && GET_CODE (XEXP (ind, 1)) == PLUS
12950 && REG_P (XEXP (XEXP (ind, 1), 1)))
12951 return true;
12953 /* Match:
12954 (plus (reg)
12955 (const)). */
12956 if (type == 0
12957 && GET_CODE (ind) == PLUS
12958 && REG_P (XEXP (ind, 0))
12959 && REG_MODE_OK_FOR_BASE_P (XEXP (ind, 0), VOIDmode)
12960 && CONST_INT_P (XEXP (ind, 1))
12961 && INTVAL (XEXP (ind, 1)) > -1024
12962 /* For quad modes, we restrict the constant offset to be slightly less
12963 than what the instruction format permits. We have no such constraint
12964 on double mode offsets. (This must match arm_legitimate_index_p.) */
12965 && (INTVAL (XEXP (ind, 1))
12966 < (VALID_NEON_QREG_MODE (GET_MODE (op))? 1016 : 1024))
12967 && (INTVAL (XEXP (ind, 1)) & 3) == 0)
12968 return TRUE;
12970 return FALSE;
12973 /* Return TRUE if OP is a mem suitable for loading/storing a Neon struct
12974 type. */
12976 neon_struct_mem_operand (rtx op)
12978 rtx ind;
12980 /* Reject eliminable registers. */
12981 if (! (reload_in_progress || reload_completed)
12982 && ( reg_mentioned_p (frame_pointer_rtx, op)
12983 || reg_mentioned_p (arg_pointer_rtx, op)
12984 || reg_mentioned_p (virtual_incoming_args_rtx, op)
12985 || reg_mentioned_p (virtual_outgoing_args_rtx, op)
12986 || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
12987 || reg_mentioned_p (virtual_stack_vars_rtx, op)))
12988 return FALSE;
12990 /* Constants are converted into offsets from labels. */
12991 if (!MEM_P (op))
12992 return FALSE;
12994 ind = XEXP (op, 0);
12996 if (reload_completed
12997 && (GET_CODE (ind) == LABEL_REF
12998 || (GET_CODE (ind) == CONST
12999 && GET_CODE (XEXP (ind, 0)) == PLUS
13000 && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF
13001 && CONST_INT_P (XEXP (XEXP (ind, 0), 1)))))
13002 return TRUE;
13004 /* Match: (mem (reg)). */
13005 if (REG_P (ind))
13006 return arm_address_register_rtx_p (ind, 0);
13008 /* vldm/vstm allows POST_INC (ia) and PRE_DEC (db). */
13009 if (GET_CODE (ind) == POST_INC
13010 || GET_CODE (ind) == PRE_DEC)
13011 return arm_address_register_rtx_p (XEXP (ind, 0), 0);
13013 return FALSE;
13016 /* Return true if X is a register that will be eliminated later on. */
13018 arm_eliminable_register (rtx x)
13020 return REG_P (x) && (REGNO (x) == FRAME_POINTER_REGNUM
13021 || REGNO (x) == ARG_POINTER_REGNUM
13022 || (REGNO (x) >= FIRST_VIRTUAL_REGISTER
13023 && REGNO (x) <= LAST_VIRTUAL_REGISTER));
13026 /* Return GENERAL_REGS if a scratch register required to reload x to/from
13027 coprocessor registers. Otherwise return NO_REGS. */
13029 enum reg_class
13030 coproc_secondary_reload_class (machine_mode mode, rtx x, bool wb)
13032 if (mode == HFmode)
13034 if (!TARGET_NEON_FP16)
13035 return GENERAL_REGS;
13036 if (s_register_operand (x, mode) || neon_vector_mem_operand (x, 2, true))
13037 return NO_REGS;
13038 return GENERAL_REGS;
13041 /* The neon move patterns handle all legitimate vector and struct
13042 addresses. */
13043 if (TARGET_NEON
13044 && (MEM_P (x) || GET_CODE (x) == CONST_VECTOR)
13045 && (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
13046 || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT
13047 || VALID_NEON_STRUCT_MODE (mode)))
13048 return NO_REGS;
13050 if (arm_coproc_mem_operand (x, wb) || s_register_operand (x, mode))
13051 return NO_REGS;
13053 return GENERAL_REGS;
13056 /* Values which must be returned in the most-significant end of the return
13057 register. */
13059 static bool
13060 arm_return_in_msb (const_tree valtype)
13062 return (TARGET_AAPCS_BASED
13063 && BYTES_BIG_ENDIAN
13064 && (AGGREGATE_TYPE_P (valtype)
13065 || TREE_CODE (valtype) == COMPLEX_TYPE
13066 || FIXED_POINT_TYPE_P (valtype)));
13069 /* Return TRUE if X references a SYMBOL_REF. */
13071 symbol_mentioned_p (rtx x)
13073 const char * fmt;
13074 int i;
13076 if (GET_CODE (x) == SYMBOL_REF)
13077 return 1;
13079 /* UNSPEC_TLS entries for a symbol include the SYMBOL_REF, but they
13080 are constant offsets, not symbols. */
13081 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
13082 return 0;
13084 fmt = GET_RTX_FORMAT (GET_CODE (x));
13086 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
13088 if (fmt[i] == 'E')
13090 int j;
13092 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
13093 if (symbol_mentioned_p (XVECEXP (x, i, j)))
13094 return 1;
13096 else if (fmt[i] == 'e' && symbol_mentioned_p (XEXP (x, i)))
13097 return 1;
13100 return 0;
13103 /* Return TRUE if X references a LABEL_REF. */
13105 label_mentioned_p (rtx x)
13107 const char * fmt;
13108 int i;
13110 if (GET_CODE (x) == LABEL_REF)
13111 return 1;
13113 /* UNSPEC_TLS entries for a symbol include a LABEL_REF for the referencing
13114 instruction, but they are constant offsets, not symbols. */
13115 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
13116 return 0;
13118 fmt = GET_RTX_FORMAT (GET_CODE (x));
13119 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
13121 if (fmt[i] == 'E')
13123 int j;
13125 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
13126 if (label_mentioned_p (XVECEXP (x, i, j)))
13127 return 1;
13129 else if (fmt[i] == 'e' && label_mentioned_p (XEXP (x, i)))
13130 return 1;
13133 return 0;
13137 tls_mentioned_p (rtx x)
13139 switch (GET_CODE (x))
13141 case CONST:
13142 return tls_mentioned_p (XEXP (x, 0));
13144 case UNSPEC:
13145 if (XINT (x, 1) == UNSPEC_TLS)
13146 return 1;
13148 default:
13149 return 0;
13153 /* Must not copy any rtx that uses a pc-relative address. */
13155 static bool
13156 arm_cannot_copy_insn_p (rtx_insn *insn)
13158 /* The tls call insn cannot be copied, as it is paired with a data
13159 word. */
13160 if (recog_memoized (insn) == CODE_FOR_tlscall)
13161 return true;
13163 subrtx_iterator::array_type array;
13164 FOR_EACH_SUBRTX (iter, array, PATTERN (insn), ALL)
13166 const_rtx x = *iter;
13167 if (GET_CODE (x) == UNSPEC
13168 && (XINT (x, 1) == UNSPEC_PIC_BASE
13169 || XINT (x, 1) == UNSPEC_PIC_UNIFIED))
13170 return true;
13172 return false;
13175 enum rtx_code
13176 minmax_code (rtx x)
13178 enum rtx_code code = GET_CODE (x);
13180 switch (code)
13182 case SMAX:
13183 return GE;
13184 case SMIN:
13185 return LE;
13186 case UMIN:
13187 return LEU;
13188 case UMAX:
13189 return GEU;
13190 default:
13191 gcc_unreachable ();
13195 /* Match pair of min/max operators that can be implemented via usat/ssat. */
13197 bool
13198 arm_sat_operator_match (rtx lo_bound, rtx hi_bound,
13199 int *mask, bool *signed_sat)
13201 /* The high bound must be a power of two minus one. */
13202 int log = exact_log2 (INTVAL (hi_bound) + 1);
13203 if (log == -1)
13204 return false;
13206 /* The low bound is either zero (for usat) or one less than the
13207 negation of the high bound (for ssat). */
13208 if (INTVAL (lo_bound) == 0)
13210 if (mask)
13211 *mask = log;
13212 if (signed_sat)
13213 *signed_sat = false;
13215 return true;
13218 if (INTVAL (lo_bound) == -INTVAL (hi_bound) - 1)
13220 if (mask)
13221 *mask = log + 1;
13222 if (signed_sat)
13223 *signed_sat = true;
13225 return true;
13228 return false;
13231 /* Return 1 if memory locations are adjacent. */
13233 adjacent_mem_locations (rtx a, rtx b)
13235 /* We don't guarantee to preserve the order of these memory refs. */
13236 if (volatile_refs_p (a) || volatile_refs_p (b))
13237 return 0;
13239 if ((REG_P (XEXP (a, 0))
13240 || (GET_CODE (XEXP (a, 0)) == PLUS
13241 && CONST_INT_P (XEXP (XEXP (a, 0), 1))))
13242 && (REG_P (XEXP (b, 0))
13243 || (GET_CODE (XEXP (b, 0)) == PLUS
13244 && CONST_INT_P (XEXP (XEXP (b, 0), 1)))))
13246 HOST_WIDE_INT val0 = 0, val1 = 0;
13247 rtx reg0, reg1;
13248 int val_diff;
13250 if (GET_CODE (XEXP (a, 0)) == PLUS)
13252 reg0 = XEXP (XEXP (a, 0), 0);
13253 val0 = INTVAL (XEXP (XEXP (a, 0), 1));
13255 else
13256 reg0 = XEXP (a, 0);
13258 if (GET_CODE (XEXP (b, 0)) == PLUS)
13260 reg1 = XEXP (XEXP (b, 0), 0);
13261 val1 = INTVAL (XEXP (XEXP (b, 0), 1));
13263 else
13264 reg1 = XEXP (b, 0);
13266 /* Don't accept any offset that will require multiple
13267 instructions to handle, since this would cause the
13268 arith_adjacentmem pattern to output an overlong sequence. */
13269 if (!const_ok_for_op (val0, PLUS) || !const_ok_for_op (val1, PLUS))
13270 return 0;
13272 /* Don't allow an eliminable register: register elimination can make
13273 the offset too large. */
13274 if (arm_eliminable_register (reg0))
13275 return 0;
13277 val_diff = val1 - val0;
13279 if (arm_ld_sched)
13281 /* If the target has load delay slots, then there's no benefit
13282 to using an ldm instruction unless the offset is zero and
13283 we are optimizing for size. */
13284 return (optimize_size && (REGNO (reg0) == REGNO (reg1))
13285 && (val0 == 0 || val1 == 0 || val0 == 4 || val1 == 4)
13286 && (val_diff == 4 || val_diff == -4));
13289 return ((REGNO (reg0) == REGNO (reg1))
13290 && (val_diff == 4 || val_diff == -4));
13293 return 0;
13296 /* Return true if OP is a valid load or store multiple operation. LOAD is true
13297 for load operations, false for store operations. CONSECUTIVE is true
13298 if the register numbers in the operation must be consecutive in the register
13299 bank. RETURN_PC is true if value is to be loaded in PC.
13300 The pattern we are trying to match for load is:
13301 [(SET (R_d0) (MEM (PLUS (addr) (offset))))
13302 (SET (R_d1) (MEM (PLUS (addr) (offset + <reg_increment>))))
13305 (SET (R_dn) (MEM (PLUS (addr) (offset + n * <reg_increment>))))
13307 where
13308 1. If offset is 0, first insn should be (SET (R_d0) (MEM (src_addr))).
13309 2. REGNO (R_d0) < REGNO (R_d1) < ... < REGNO (R_dn).
13310 3. If consecutive is TRUE, then for kth register being loaded,
13311 REGNO (R_dk) = REGNO (R_d0) + k.
13312 The pattern for store is similar. */
13313 bool
13314 ldm_stm_operation_p (rtx op, bool load, machine_mode mode,
13315 bool consecutive, bool return_pc)
13317 HOST_WIDE_INT count = XVECLEN (op, 0);
13318 rtx reg, mem, addr;
13319 unsigned regno;
13320 unsigned first_regno;
13321 HOST_WIDE_INT i = 1, base = 0, offset = 0;
13322 rtx elt;
13323 bool addr_reg_in_reglist = false;
13324 bool update = false;
13325 int reg_increment;
13326 int offset_adj;
13327 int regs_per_val;
13329 /* If not in SImode, then registers must be consecutive
13330 (e.g., VLDM instructions for DFmode). */
13331 gcc_assert ((mode == SImode) || consecutive);
13332 /* Setting return_pc for stores is illegal. */
13333 gcc_assert (!return_pc || load);
13335 /* Set up the increments and the regs per val based on the mode. */
13336 reg_increment = GET_MODE_SIZE (mode);
13337 regs_per_val = reg_increment / 4;
13338 offset_adj = return_pc ? 1 : 0;
13340 if (count <= 1
13341 || GET_CODE (XVECEXP (op, 0, offset_adj)) != SET
13342 || (load && !REG_P (SET_DEST (XVECEXP (op, 0, offset_adj)))))
13343 return false;
13345 /* Check if this is a write-back. */
13346 elt = XVECEXP (op, 0, offset_adj);
13347 if (GET_CODE (SET_SRC (elt)) == PLUS)
13349 i++;
13350 base = 1;
13351 update = true;
13353 /* The offset adjustment must be the number of registers being
13354 popped times the size of a single register. */
13355 if (!REG_P (SET_DEST (elt))
13356 || !REG_P (XEXP (SET_SRC (elt), 0))
13357 || (REGNO (SET_DEST (elt)) != REGNO (XEXP (SET_SRC (elt), 0)))
13358 || !CONST_INT_P (XEXP (SET_SRC (elt), 1))
13359 || INTVAL (XEXP (SET_SRC (elt), 1)) !=
13360 ((count - 1 - offset_adj) * reg_increment))
13361 return false;
13364 i = i + offset_adj;
13365 base = base + offset_adj;
13366 /* Perform a quick check so we don't blow up below. If only one reg is loaded,
13367 success depends on the type: VLDM can do just one reg,
13368 LDM must do at least two. */
13369 if ((count <= i) && (mode == SImode))
13370 return false;
13372 elt = XVECEXP (op, 0, i - 1);
13373 if (GET_CODE (elt) != SET)
13374 return false;
13376 if (load)
13378 reg = SET_DEST (elt);
13379 mem = SET_SRC (elt);
13381 else
13383 reg = SET_SRC (elt);
13384 mem = SET_DEST (elt);
13387 if (!REG_P (reg) || !MEM_P (mem))
13388 return false;
13390 regno = REGNO (reg);
13391 first_regno = regno;
13392 addr = XEXP (mem, 0);
13393 if (GET_CODE (addr) == PLUS)
13395 if (!CONST_INT_P (XEXP (addr, 1)))
13396 return false;
13398 offset = INTVAL (XEXP (addr, 1));
13399 addr = XEXP (addr, 0);
13402 if (!REG_P (addr))
13403 return false;
13405 /* Don't allow SP to be loaded unless it is also the base register. It
13406 guarantees that SP is reset correctly when an LDM instruction
13407 is interrupted. Otherwise, we might end up with a corrupt stack. */
13408 if (load && (REGNO (reg) == SP_REGNUM) && (REGNO (addr) != SP_REGNUM))
13409 return false;
13411 for (; i < count; i++)
13413 elt = XVECEXP (op, 0, i);
13414 if (GET_CODE (elt) != SET)
13415 return false;
13417 if (load)
13419 reg = SET_DEST (elt);
13420 mem = SET_SRC (elt);
13422 else
13424 reg = SET_SRC (elt);
13425 mem = SET_DEST (elt);
13428 if (!REG_P (reg)
13429 || GET_MODE (reg) != mode
13430 || REGNO (reg) <= regno
13431 || (consecutive
13432 && (REGNO (reg) !=
13433 (unsigned int) (first_regno + regs_per_val * (i - base))))
13434 /* Don't allow SP to be loaded unless it is also the base register. It
13435 guarantees that SP is reset correctly when an LDM instruction
13436 is interrupted. Otherwise, we might end up with a corrupt stack. */
13437 || (load && (REGNO (reg) == SP_REGNUM) && (REGNO (addr) != SP_REGNUM))
13438 || !MEM_P (mem)
13439 || GET_MODE (mem) != mode
13440 || ((GET_CODE (XEXP (mem, 0)) != PLUS
13441 || !rtx_equal_p (XEXP (XEXP (mem, 0), 0), addr)
13442 || !CONST_INT_P (XEXP (XEXP (mem, 0), 1))
13443 || (INTVAL (XEXP (XEXP (mem, 0), 1)) !=
13444 offset + (i - base) * reg_increment))
13445 && (!REG_P (XEXP (mem, 0))
13446 || offset + (i - base) * reg_increment != 0)))
13447 return false;
13449 regno = REGNO (reg);
13450 if (regno == REGNO (addr))
13451 addr_reg_in_reglist = true;
13454 if (load)
13456 if (update && addr_reg_in_reglist)
13457 return false;
13459 /* For Thumb-1, address register is always modified - either by write-back
13460 or by explicit load. If the pattern does not describe an update,
13461 then the address register must be in the list of loaded registers. */
13462 if (TARGET_THUMB1)
13463 return update || addr_reg_in_reglist;
13466 return true;
13469 /* Return true iff it would be profitable to turn a sequence of NOPS loads
13470 or stores (depending on IS_STORE) into a load-multiple or store-multiple
13471 instruction. ADD_OFFSET is nonzero if the base address register needs
13472 to be modified with an add instruction before we can use it. */
13474 static bool
13475 multiple_operation_profitable_p (bool is_store ATTRIBUTE_UNUSED,
13476 int nops, HOST_WIDE_INT add_offset)
13478 /* For ARM8,9 & StrongARM, 2 ldr instructions are faster than an ldm
13479 if the offset isn't small enough. The reason 2 ldrs are faster
13480 is because these ARMs are able to do more than one cache access
13481 in a single cycle. The ARM9 and StrongARM have Harvard caches,
13482 whilst the ARM8 has a double bandwidth cache. This means that
13483 these cores can do both an instruction fetch and a data fetch in
13484 a single cycle, so the trick of calculating the address into a
13485 scratch register (one of the result regs) and then doing a load
13486 multiple actually becomes slower (and no smaller in code size).
13487 That is the transformation
13489 ldr rd1, [rbase + offset]
13490 ldr rd2, [rbase + offset + 4]
13494 add rd1, rbase, offset
13495 ldmia rd1, {rd1, rd2}
13497 produces worse code -- '3 cycles + any stalls on rd2' instead of
13498 '2 cycles + any stalls on rd2'. On ARMs with only one cache
13499 access per cycle, the first sequence could never complete in less
13500 than 6 cycles, whereas the ldm sequence would only take 5 and
13501 would make better use of sequential accesses if not hitting the
13502 cache.
13504 We cheat here and test 'arm_ld_sched' which we currently know to
13505 only be true for the ARM8, ARM9 and StrongARM. If this ever
13506 changes, then the test below needs to be reworked. */
13507 if (nops == 2 && arm_ld_sched && add_offset != 0)
13508 return false;
13510 /* XScale has load-store double instructions, but they have stricter
13511 alignment requirements than load-store multiple, so we cannot
13512 use them.
13514 For XScale ldm requires 2 + NREGS cycles to complete and blocks
13515 the pipeline until completion.
13517 NREGS CYCLES
13523 An ldr instruction takes 1-3 cycles, but does not block the
13524 pipeline.
13526 NREGS CYCLES
13527 1 1-3
13528 2 2-6
13529 3 3-9
13530 4 4-12
13532 Best case ldr will always win. However, the more ldr instructions
13533 we issue, the less likely we are to be able to schedule them well.
13534 Using ldr instructions also increases code size.
13536 As a compromise, we use ldr for counts of 1 or 2 regs, and ldm
13537 for counts of 3 or 4 regs. */
13538 if (nops <= 2 && arm_tune_xscale && !optimize_size)
13539 return false;
13540 return true;
13543 /* Subroutine of load_multiple_sequence and store_multiple_sequence.
13544 Given an array of UNSORTED_OFFSETS, of which there are NOPS, compute
13545 an array ORDER which describes the sequence to use when accessing the
13546 offsets that produces an ascending order. In this sequence, each
13547 offset must be larger by exactly 4 than the previous one. ORDER[0]
13548 must have been filled in with the lowest offset by the caller.
13549 If UNSORTED_REGS is nonnull, it is an array of register numbers that
13550 we use to verify that ORDER produces an ascending order of registers.
13551 Return true if it was possible to construct such an order, false if
13552 not. */
13554 static bool
13555 compute_offset_order (int nops, HOST_WIDE_INT *unsorted_offsets, int *order,
13556 int *unsorted_regs)
13558 int i;
13559 for (i = 1; i < nops; i++)
13561 int j;
13563 order[i] = order[i - 1];
13564 for (j = 0; j < nops; j++)
13565 if (unsorted_offsets[j] == unsorted_offsets[order[i - 1]] + 4)
13567 /* We must find exactly one offset that is higher than the
13568 previous one by 4. */
13569 if (order[i] != order[i - 1])
13570 return false;
13571 order[i] = j;
13573 if (order[i] == order[i - 1])
13574 return false;
13575 /* The register numbers must be ascending. */
13576 if (unsorted_regs != NULL
13577 && unsorted_regs[order[i]] <= unsorted_regs[order[i - 1]])
13578 return false;
13580 return true;
13583 /* Used to determine in a peephole whether a sequence of load
13584 instructions can be changed into a load-multiple instruction.
13585 NOPS is the number of separate load instructions we are examining. The
13586 first NOPS entries in OPERANDS are the destination registers, the
13587 next NOPS entries are memory operands. If this function is
13588 successful, *BASE is set to the common base register of the memory
13589 accesses; *LOAD_OFFSET is set to the first memory location's offset
13590 from that base register.
13591 REGS is an array filled in with the destination register numbers.
13592 SAVED_ORDER (if nonnull), is an array filled in with an order that maps
13593 insn numbers to an ascending order of stores. If CHECK_REGS is true,
13594 the sequence of registers in REGS matches the loads from ascending memory
13595 locations, and the function verifies that the register numbers are
13596 themselves ascending. If CHECK_REGS is false, the register numbers
13597 are stored in the order they are found in the operands. */
13598 static int
13599 load_multiple_sequence (rtx *operands, int nops, int *regs, int *saved_order,
13600 int *base, HOST_WIDE_INT *load_offset, bool check_regs)
13602 int unsorted_regs[MAX_LDM_STM_OPS];
13603 HOST_WIDE_INT unsorted_offsets[MAX_LDM_STM_OPS];
13604 int order[MAX_LDM_STM_OPS];
13605 rtx base_reg_rtx = NULL;
13606 int base_reg = -1;
13607 int i, ldm_case;
13609 /* Can only handle up to MAX_LDM_STM_OPS insns at present, though could be
13610 easily extended if required. */
13611 gcc_assert (nops >= 2 && nops <= MAX_LDM_STM_OPS);
13613 memset (order, 0, MAX_LDM_STM_OPS * sizeof (int));
13615 /* Loop over the operands and check that the memory references are
13616 suitable (i.e. immediate offsets from the same base register). At
13617 the same time, extract the target register, and the memory
13618 offsets. */
13619 for (i = 0; i < nops; i++)
13621 rtx reg;
13622 rtx offset;
13624 /* Convert a subreg of a mem into the mem itself. */
13625 if (GET_CODE (operands[nops + i]) == SUBREG)
13626 operands[nops + i] = alter_subreg (operands + (nops + i), true);
13628 gcc_assert (MEM_P (operands[nops + i]));
13630 /* Don't reorder volatile memory references; it doesn't seem worth
13631 looking for the case where the order is ok anyway. */
13632 if (MEM_VOLATILE_P (operands[nops + i]))
13633 return 0;
13635 offset = const0_rtx;
13637 if ((REG_P (reg = XEXP (operands[nops + i], 0))
13638 || (GET_CODE (reg) == SUBREG
13639 && REG_P (reg = SUBREG_REG (reg))))
13640 || (GET_CODE (XEXP (operands[nops + i], 0)) == PLUS
13641 && ((REG_P (reg = XEXP (XEXP (operands[nops + i], 0), 0)))
13642 || (GET_CODE (reg) == SUBREG
13643 && REG_P (reg = SUBREG_REG (reg))))
13644 && (CONST_INT_P (offset
13645 = XEXP (XEXP (operands[nops + i], 0), 1)))))
13647 if (i == 0)
13649 base_reg = REGNO (reg);
13650 base_reg_rtx = reg;
13651 if (TARGET_THUMB1 && base_reg > LAST_LO_REGNUM)
13652 return 0;
13654 else if (base_reg != (int) REGNO (reg))
13655 /* Not addressed from the same base register. */
13656 return 0;
13658 unsorted_regs[i] = (REG_P (operands[i])
13659 ? REGNO (operands[i])
13660 : REGNO (SUBREG_REG (operands[i])));
13662 /* If it isn't an integer register, or if it overwrites the
13663 base register but isn't the last insn in the list, then
13664 we can't do this. */
13665 if (unsorted_regs[i] < 0
13666 || (TARGET_THUMB1 && unsorted_regs[i] > LAST_LO_REGNUM)
13667 || unsorted_regs[i] > 14
13668 || (i != nops - 1 && unsorted_regs[i] == base_reg))
13669 return 0;
13671 /* Don't allow SP to be loaded unless it is also the base
13672 register. It guarantees that SP is reset correctly when
13673 an LDM instruction is interrupted. Otherwise, we might
13674 end up with a corrupt stack. */
13675 if (unsorted_regs[i] == SP_REGNUM && base_reg != SP_REGNUM)
13676 return 0;
13678 unsorted_offsets[i] = INTVAL (offset);
13679 if (i == 0 || unsorted_offsets[i] < unsorted_offsets[order[0]])
13680 order[0] = i;
13682 else
13683 /* Not a suitable memory address. */
13684 return 0;
13687 /* All the useful information has now been extracted from the
13688 operands into unsorted_regs and unsorted_offsets; additionally,
13689 order[0] has been set to the lowest offset in the list. Sort
13690 the offsets into order, verifying that they are adjacent, and
13691 check that the register numbers are ascending. */
13692 if (!compute_offset_order (nops, unsorted_offsets, order,
13693 check_regs ? unsorted_regs : NULL))
13694 return 0;
13696 if (saved_order)
13697 memcpy (saved_order, order, sizeof order);
13699 if (base)
13701 *base = base_reg;
13703 for (i = 0; i < nops; i++)
13704 regs[i] = unsorted_regs[check_regs ? order[i] : i];
13706 *load_offset = unsorted_offsets[order[0]];
13709 if (TARGET_THUMB1
13710 && !peep2_reg_dead_p (nops, base_reg_rtx))
13711 return 0;
13713 if (unsorted_offsets[order[0]] == 0)
13714 ldm_case = 1; /* ldmia */
13715 else if (TARGET_ARM && unsorted_offsets[order[0]] == 4)
13716 ldm_case = 2; /* ldmib */
13717 else if (TARGET_ARM && unsorted_offsets[order[nops - 1]] == 0)
13718 ldm_case = 3; /* ldmda */
13719 else if (TARGET_32BIT && unsorted_offsets[order[nops - 1]] == -4)
13720 ldm_case = 4; /* ldmdb */
13721 else if (const_ok_for_arm (unsorted_offsets[order[0]])
13722 || const_ok_for_arm (-unsorted_offsets[order[0]]))
13723 ldm_case = 5;
13724 else
13725 return 0;
13727 if (!multiple_operation_profitable_p (false, nops,
13728 ldm_case == 5
13729 ? unsorted_offsets[order[0]] : 0))
13730 return 0;
13732 return ldm_case;
13735 /* Used to determine in a peephole whether a sequence of store instructions can
13736 be changed into a store-multiple instruction.
13737 NOPS is the number of separate store instructions we are examining.
13738 NOPS_TOTAL is the total number of instructions recognized by the peephole
13739 pattern.
13740 The first NOPS entries in OPERANDS are the source registers, the next
13741 NOPS entries are memory operands. If this function is successful, *BASE is
13742 set to the common base register of the memory accesses; *LOAD_OFFSET is set
13743 to the first memory location's offset from that base register. REGS is an
13744 array filled in with the source register numbers, REG_RTXS (if nonnull) is
13745 likewise filled with the corresponding rtx's.
13746 SAVED_ORDER (if nonnull), is an array filled in with an order that maps insn
13747 numbers to an ascending order of stores.
13748 If CHECK_REGS is true, the sequence of registers in *REGS matches the stores
13749 from ascending memory locations, and the function verifies that the register
13750 numbers are themselves ascending. If CHECK_REGS is false, the register
13751 numbers are stored in the order they are found in the operands. */
13752 static int
13753 store_multiple_sequence (rtx *operands, int nops, int nops_total,
13754 int *regs, rtx *reg_rtxs, int *saved_order, int *base,
13755 HOST_WIDE_INT *load_offset, bool check_regs)
13757 int unsorted_regs[MAX_LDM_STM_OPS];
13758 rtx unsorted_reg_rtxs[MAX_LDM_STM_OPS];
13759 HOST_WIDE_INT unsorted_offsets[MAX_LDM_STM_OPS];
13760 int order[MAX_LDM_STM_OPS];
13761 int base_reg = -1;
13762 rtx base_reg_rtx = NULL;
13763 int i, stm_case;
13765 /* Write back of base register is currently only supported for Thumb 1. */
13766 int base_writeback = TARGET_THUMB1;
13768 /* Can only handle up to MAX_LDM_STM_OPS insns at present, though could be
13769 easily extended if required. */
13770 gcc_assert (nops >= 2 && nops <= MAX_LDM_STM_OPS);
13772 memset (order, 0, MAX_LDM_STM_OPS * sizeof (int));
13774 /* Loop over the operands and check that the memory references are
13775 suitable (i.e. immediate offsets from the same base register). At
13776 the same time, extract the target register, and the memory
13777 offsets. */
13778 for (i = 0; i < nops; i++)
13780 rtx reg;
13781 rtx offset;
13783 /* Convert a subreg of a mem into the mem itself. */
13784 if (GET_CODE (operands[nops + i]) == SUBREG)
13785 operands[nops + i] = alter_subreg (operands + (nops + i), true);
13787 gcc_assert (MEM_P (operands[nops + i]));
13789 /* Don't reorder volatile memory references; it doesn't seem worth
13790 looking for the case where the order is ok anyway. */
13791 if (MEM_VOLATILE_P (operands[nops + i]))
13792 return 0;
13794 offset = const0_rtx;
13796 if ((REG_P (reg = XEXP (operands[nops + i], 0))
13797 || (GET_CODE (reg) == SUBREG
13798 && REG_P (reg = SUBREG_REG (reg))))
13799 || (GET_CODE (XEXP (operands[nops + i], 0)) == PLUS
13800 && ((REG_P (reg = XEXP (XEXP (operands[nops + i], 0), 0)))
13801 || (GET_CODE (reg) == SUBREG
13802 && REG_P (reg = SUBREG_REG (reg))))
13803 && (CONST_INT_P (offset
13804 = XEXP (XEXP (operands[nops + i], 0), 1)))))
13806 unsorted_reg_rtxs[i] = (REG_P (operands[i])
13807 ? operands[i] : SUBREG_REG (operands[i]));
13808 unsorted_regs[i] = REGNO (unsorted_reg_rtxs[i]);
13810 if (i == 0)
13812 base_reg = REGNO (reg);
13813 base_reg_rtx = reg;
13814 if (TARGET_THUMB1 && base_reg > LAST_LO_REGNUM)
13815 return 0;
13817 else if (base_reg != (int) REGNO (reg))
13818 /* Not addressed from the same base register. */
13819 return 0;
13821 /* If it isn't an integer register, then we can't do this. */
13822 if (unsorted_regs[i] < 0
13823 || (TARGET_THUMB1 && unsorted_regs[i] > LAST_LO_REGNUM)
13824 /* The effects are unpredictable if the base register is
13825 both updated and stored. */
13826 || (base_writeback && unsorted_regs[i] == base_reg)
13827 || (TARGET_THUMB2 && unsorted_regs[i] == SP_REGNUM)
13828 || unsorted_regs[i] > 14)
13829 return 0;
13831 unsorted_offsets[i] = INTVAL (offset);
13832 if (i == 0 || unsorted_offsets[i] < unsorted_offsets[order[0]])
13833 order[0] = i;
13835 else
13836 /* Not a suitable memory address. */
13837 return 0;
13840 /* All the useful information has now been extracted from the
13841 operands into unsorted_regs and unsorted_offsets; additionally,
13842 order[0] has been set to the lowest offset in the list. Sort
13843 the offsets into order, verifying that they are adjacent, and
13844 check that the register numbers are ascending. */
13845 if (!compute_offset_order (nops, unsorted_offsets, order,
13846 check_regs ? unsorted_regs : NULL))
13847 return 0;
13849 if (saved_order)
13850 memcpy (saved_order, order, sizeof order);
13852 if (base)
13854 *base = base_reg;
13856 for (i = 0; i < nops; i++)
13858 regs[i] = unsorted_regs[check_regs ? order[i] : i];
13859 if (reg_rtxs)
13860 reg_rtxs[i] = unsorted_reg_rtxs[check_regs ? order[i] : i];
13863 *load_offset = unsorted_offsets[order[0]];
13866 if (TARGET_THUMB1
13867 && !peep2_reg_dead_p (nops_total, base_reg_rtx))
13868 return 0;
13870 if (unsorted_offsets[order[0]] == 0)
13871 stm_case = 1; /* stmia */
13872 else if (TARGET_ARM && unsorted_offsets[order[0]] == 4)
13873 stm_case = 2; /* stmib */
13874 else if (TARGET_ARM && unsorted_offsets[order[nops - 1]] == 0)
13875 stm_case = 3; /* stmda */
13876 else if (TARGET_32BIT && unsorted_offsets[order[nops - 1]] == -4)
13877 stm_case = 4; /* stmdb */
13878 else
13879 return 0;
13881 if (!multiple_operation_profitable_p (false, nops, 0))
13882 return 0;
13884 return stm_case;
13887 /* Routines for use in generating RTL. */
13889 /* Generate a load-multiple instruction. COUNT is the number of loads in
13890 the instruction; REGS and MEMS are arrays containing the operands.
13891 BASEREG is the base register to be used in addressing the memory operands.
13892 WBACK_OFFSET is nonzero if the instruction should update the base
13893 register. */
13895 static rtx
13896 arm_gen_load_multiple_1 (int count, int *regs, rtx *mems, rtx basereg,
13897 HOST_WIDE_INT wback_offset)
13899 int i = 0, j;
13900 rtx result;
13902 if (!multiple_operation_profitable_p (false, count, 0))
13904 rtx seq;
13906 start_sequence ();
13908 for (i = 0; i < count; i++)
13909 emit_move_insn (gen_rtx_REG (SImode, regs[i]), mems[i]);
13911 if (wback_offset != 0)
13912 emit_move_insn (basereg, plus_constant (Pmode, basereg, wback_offset));
13914 seq = get_insns ();
13915 end_sequence ();
13917 return seq;
13920 result = gen_rtx_PARALLEL (VOIDmode,
13921 rtvec_alloc (count + (wback_offset != 0 ? 1 : 0)));
13922 if (wback_offset != 0)
13924 XVECEXP (result, 0, 0)
13925 = gen_rtx_SET (VOIDmode, basereg,
13926 plus_constant (Pmode, basereg, wback_offset));
13927 i = 1;
13928 count++;
13931 for (j = 0; i < count; i++, j++)
13932 XVECEXP (result, 0, i)
13933 = gen_rtx_SET (VOIDmode, gen_rtx_REG (SImode, regs[j]), mems[j]);
13935 return result;
13938 /* Generate a store-multiple instruction. COUNT is the number of stores in
13939 the instruction; REGS and MEMS are arrays containing the operands.
13940 BASEREG is the base register to be used in addressing the memory operands.
13941 WBACK_OFFSET is nonzero if the instruction should update the base
13942 register. */
13944 static rtx
13945 arm_gen_store_multiple_1 (int count, int *regs, rtx *mems, rtx basereg,
13946 HOST_WIDE_INT wback_offset)
13948 int i = 0, j;
13949 rtx result;
13951 if (GET_CODE (basereg) == PLUS)
13952 basereg = XEXP (basereg, 0);
13954 if (!multiple_operation_profitable_p (false, count, 0))
13956 rtx seq;
13958 start_sequence ();
13960 for (i = 0; i < count; i++)
13961 emit_move_insn (mems[i], gen_rtx_REG (SImode, regs[i]));
13963 if (wback_offset != 0)
13964 emit_move_insn (basereg, plus_constant (Pmode, basereg, wback_offset));
13966 seq = get_insns ();
13967 end_sequence ();
13969 return seq;
13972 result = gen_rtx_PARALLEL (VOIDmode,
13973 rtvec_alloc (count + (wback_offset != 0 ? 1 : 0)));
13974 if (wback_offset != 0)
13976 XVECEXP (result, 0, 0)
13977 = gen_rtx_SET (VOIDmode, basereg,
13978 plus_constant (Pmode, basereg, wback_offset));
13979 i = 1;
13980 count++;
13983 for (j = 0; i < count; i++, j++)
13984 XVECEXP (result, 0, i)
13985 = gen_rtx_SET (VOIDmode, mems[j], gen_rtx_REG (SImode, regs[j]));
13987 return result;
13990 /* Generate either a load-multiple or a store-multiple instruction. This
13991 function can be used in situations where we can start with a single MEM
13992 rtx and adjust its address upwards.
13993 COUNT is the number of operations in the instruction, not counting a
13994 possible update of the base register. REGS is an array containing the
13995 register operands.
13996 BASEREG is the base register to be used in addressing the memory operands,
13997 which are constructed from BASEMEM.
13998 WRITE_BACK specifies whether the generated instruction should include an
13999 update of the base register.
14000 OFFSETP is used to pass an offset to and from this function; this offset
14001 is not used when constructing the address (instead BASEMEM should have an
14002 appropriate offset in its address), it is used only for setting
14003 MEM_OFFSET. It is updated only if WRITE_BACK is true.*/
14005 static rtx
14006 arm_gen_multiple_op (bool is_load, int *regs, int count, rtx basereg,
14007 bool write_back, rtx basemem, HOST_WIDE_INT *offsetp)
14009 rtx mems[MAX_LDM_STM_OPS];
14010 HOST_WIDE_INT offset = *offsetp;
14011 int i;
14013 gcc_assert (count <= MAX_LDM_STM_OPS);
14015 if (GET_CODE (basereg) == PLUS)
14016 basereg = XEXP (basereg, 0);
14018 for (i = 0; i < count; i++)
14020 rtx addr = plus_constant (Pmode, basereg, i * 4);
14021 mems[i] = adjust_automodify_address_nv (basemem, SImode, addr, offset);
14022 offset += 4;
14025 if (write_back)
14026 *offsetp = offset;
14028 if (is_load)
14029 return arm_gen_load_multiple_1 (count, regs, mems, basereg,
14030 write_back ? 4 * count : 0);
14031 else
14032 return arm_gen_store_multiple_1 (count, regs, mems, basereg,
14033 write_back ? 4 * count : 0);
14037 arm_gen_load_multiple (int *regs, int count, rtx basereg, int write_back,
14038 rtx basemem, HOST_WIDE_INT *offsetp)
14040 return arm_gen_multiple_op (TRUE, regs, count, basereg, write_back, basemem,
14041 offsetp);
14045 arm_gen_store_multiple (int *regs, int count, rtx basereg, int write_back,
14046 rtx basemem, HOST_WIDE_INT *offsetp)
14048 return arm_gen_multiple_op (FALSE, regs, count, basereg, write_back, basemem,
14049 offsetp);
14052 /* Called from a peephole2 expander to turn a sequence of loads into an
14053 LDM instruction. OPERANDS are the operands found by the peephole matcher;
14054 NOPS indicates how many separate loads we are trying to combine. SORT_REGS
14055 is true if we can reorder the registers because they are used commutatively
14056 subsequently.
14057 Returns true iff we could generate a new instruction. */
14059 bool
14060 gen_ldm_seq (rtx *operands, int nops, bool sort_regs)
14062 int regs[MAX_LDM_STM_OPS], mem_order[MAX_LDM_STM_OPS];
14063 rtx mems[MAX_LDM_STM_OPS];
14064 int i, j, base_reg;
14065 rtx base_reg_rtx;
14066 HOST_WIDE_INT offset;
14067 int write_back = FALSE;
14068 int ldm_case;
14069 rtx addr;
14071 ldm_case = load_multiple_sequence (operands, nops, regs, mem_order,
14072 &base_reg, &offset, !sort_regs);
14074 if (ldm_case == 0)
14075 return false;
14077 if (sort_regs)
14078 for (i = 0; i < nops - 1; i++)
14079 for (j = i + 1; j < nops; j++)
14080 if (regs[i] > regs[j])
14082 int t = regs[i];
14083 regs[i] = regs[j];
14084 regs[j] = t;
14086 base_reg_rtx = gen_rtx_REG (Pmode, base_reg);
14088 if (TARGET_THUMB1)
14090 gcc_assert (peep2_reg_dead_p (nops, base_reg_rtx));
14091 gcc_assert (ldm_case == 1 || ldm_case == 5);
14092 write_back = TRUE;
14095 if (ldm_case == 5)
14097 rtx newbase = TARGET_THUMB1 ? base_reg_rtx : gen_rtx_REG (SImode, regs[0]);
14098 emit_insn (gen_addsi3 (newbase, base_reg_rtx, GEN_INT (offset)));
14099 offset = 0;
14100 if (!TARGET_THUMB1)
14102 base_reg = regs[0];
14103 base_reg_rtx = newbase;
14107 for (i = 0; i < nops; i++)
14109 addr = plus_constant (Pmode, base_reg_rtx, offset + i * 4);
14110 mems[i] = adjust_automodify_address_nv (operands[nops + mem_order[i]],
14111 SImode, addr, 0);
14113 emit_insn (arm_gen_load_multiple_1 (nops, regs, mems, base_reg_rtx,
14114 write_back ? offset + i * 4 : 0));
14115 return true;
14118 /* Called from a peephole2 expander to turn a sequence of stores into an
14119 STM instruction. OPERANDS are the operands found by the peephole matcher;
14120 NOPS indicates how many separate stores we are trying to combine.
14121 Returns true iff we could generate a new instruction. */
14123 bool
14124 gen_stm_seq (rtx *operands, int nops)
14126 int i;
14127 int regs[MAX_LDM_STM_OPS], mem_order[MAX_LDM_STM_OPS];
14128 rtx mems[MAX_LDM_STM_OPS];
14129 int base_reg;
14130 rtx base_reg_rtx;
14131 HOST_WIDE_INT offset;
14132 int write_back = FALSE;
14133 int stm_case;
14134 rtx addr;
14135 bool base_reg_dies;
14137 stm_case = store_multiple_sequence (operands, nops, nops, regs, NULL,
14138 mem_order, &base_reg, &offset, true);
14140 if (stm_case == 0)
14141 return false;
14143 base_reg_rtx = gen_rtx_REG (Pmode, base_reg);
14145 base_reg_dies = peep2_reg_dead_p (nops, base_reg_rtx);
14146 if (TARGET_THUMB1)
14148 gcc_assert (base_reg_dies);
14149 write_back = TRUE;
14152 if (stm_case == 5)
14154 gcc_assert (base_reg_dies);
14155 emit_insn (gen_addsi3 (base_reg_rtx, base_reg_rtx, GEN_INT (offset)));
14156 offset = 0;
14159 addr = plus_constant (Pmode, base_reg_rtx, offset);
14161 for (i = 0; i < nops; i++)
14163 addr = plus_constant (Pmode, base_reg_rtx, offset + i * 4);
14164 mems[i] = adjust_automodify_address_nv (operands[nops + mem_order[i]],
14165 SImode, addr, 0);
14167 emit_insn (arm_gen_store_multiple_1 (nops, regs, mems, base_reg_rtx,
14168 write_back ? offset + i * 4 : 0));
14169 return true;
14172 /* Called from a peephole2 expander to turn a sequence of stores that are
14173 preceded by constant loads into an STM instruction. OPERANDS are the
14174 operands found by the peephole matcher; NOPS indicates how many
14175 separate stores we are trying to combine; there are 2 * NOPS
14176 instructions in the peephole.
14177 Returns true iff we could generate a new instruction. */
14179 bool
14180 gen_const_stm_seq (rtx *operands, int nops)
14182 int regs[MAX_LDM_STM_OPS], sorted_regs[MAX_LDM_STM_OPS];
14183 int reg_order[MAX_LDM_STM_OPS], mem_order[MAX_LDM_STM_OPS];
14184 rtx reg_rtxs[MAX_LDM_STM_OPS], orig_reg_rtxs[MAX_LDM_STM_OPS];
14185 rtx mems[MAX_LDM_STM_OPS];
14186 int base_reg;
14187 rtx base_reg_rtx;
14188 HOST_WIDE_INT offset;
14189 int write_back = FALSE;
14190 int stm_case;
14191 rtx addr;
14192 bool base_reg_dies;
14193 int i, j;
14194 HARD_REG_SET allocated;
14196 stm_case = store_multiple_sequence (operands, nops, 2 * nops, regs, reg_rtxs,
14197 mem_order, &base_reg, &offset, false);
14199 if (stm_case == 0)
14200 return false;
14202 memcpy (orig_reg_rtxs, reg_rtxs, sizeof orig_reg_rtxs);
14204 /* If the same register is used more than once, try to find a free
14205 register. */
14206 CLEAR_HARD_REG_SET (allocated);
14207 for (i = 0; i < nops; i++)
14209 for (j = i + 1; j < nops; j++)
14210 if (regs[i] == regs[j])
14212 rtx t = peep2_find_free_register (0, nops * 2,
14213 TARGET_THUMB1 ? "l" : "r",
14214 SImode, &allocated);
14215 if (t == NULL_RTX)
14216 return false;
14217 reg_rtxs[i] = t;
14218 regs[i] = REGNO (t);
14222 /* Compute an ordering that maps the register numbers to an ascending
14223 sequence. */
14224 reg_order[0] = 0;
14225 for (i = 0; i < nops; i++)
14226 if (regs[i] < regs[reg_order[0]])
14227 reg_order[0] = i;
14229 for (i = 1; i < nops; i++)
14231 int this_order = reg_order[i - 1];
14232 for (j = 0; j < nops; j++)
14233 if (regs[j] > regs[reg_order[i - 1]]
14234 && (this_order == reg_order[i - 1]
14235 || regs[j] < regs[this_order]))
14236 this_order = j;
14237 reg_order[i] = this_order;
14240 /* Ensure that registers that must be live after the instruction end
14241 up with the correct value. */
14242 for (i = 0; i < nops; i++)
14244 int this_order = reg_order[i];
14245 if ((this_order != mem_order[i]
14246 || orig_reg_rtxs[this_order] != reg_rtxs[this_order])
14247 && !peep2_reg_dead_p (nops * 2, orig_reg_rtxs[this_order]))
14248 return false;
14251 /* Load the constants. */
14252 for (i = 0; i < nops; i++)
14254 rtx op = operands[2 * nops + mem_order[i]];
14255 sorted_regs[i] = regs[reg_order[i]];
14256 emit_move_insn (reg_rtxs[reg_order[i]], op);
14259 base_reg_rtx = gen_rtx_REG (Pmode, base_reg);
14261 base_reg_dies = peep2_reg_dead_p (nops * 2, base_reg_rtx);
14262 if (TARGET_THUMB1)
14264 gcc_assert (base_reg_dies);
14265 write_back = TRUE;
14268 if (stm_case == 5)
14270 gcc_assert (base_reg_dies);
14271 emit_insn (gen_addsi3 (base_reg_rtx, base_reg_rtx, GEN_INT (offset)));
14272 offset = 0;
14275 addr = plus_constant (Pmode, base_reg_rtx, offset);
14277 for (i = 0; i < nops; i++)
14279 addr = plus_constant (Pmode, base_reg_rtx, offset + i * 4);
14280 mems[i] = adjust_automodify_address_nv (operands[nops + mem_order[i]],
14281 SImode, addr, 0);
14283 emit_insn (arm_gen_store_multiple_1 (nops, sorted_regs, mems, base_reg_rtx,
14284 write_back ? offset + i * 4 : 0));
14285 return true;
14288 /* Copy a block of memory using plain ldr/str/ldrh/strh instructions, to permit
14289 unaligned copies on processors which support unaligned semantics for those
14290 instructions. INTERLEAVE_FACTOR can be used to attempt to hide load latency
14291 (using more registers) by doing e.g. load/load/store/store for a factor of 2.
14292 An interleave factor of 1 (the minimum) will perform no interleaving.
14293 Load/store multiple are used for aligned addresses where possible. */
14295 static void
14296 arm_block_move_unaligned_straight (rtx dstbase, rtx srcbase,
14297 HOST_WIDE_INT length,
14298 unsigned int interleave_factor)
14300 rtx *regs = XALLOCAVEC (rtx, interleave_factor);
14301 int *regnos = XALLOCAVEC (int, interleave_factor);
14302 HOST_WIDE_INT block_size_bytes = interleave_factor * UNITS_PER_WORD;
14303 HOST_WIDE_INT i, j;
14304 HOST_WIDE_INT remaining = length, words;
14305 rtx halfword_tmp = NULL, byte_tmp = NULL;
14306 rtx dst, src;
14307 bool src_aligned = MEM_ALIGN (srcbase) >= BITS_PER_WORD;
14308 bool dst_aligned = MEM_ALIGN (dstbase) >= BITS_PER_WORD;
14309 HOST_WIDE_INT srcoffset, dstoffset;
14310 HOST_WIDE_INT src_autoinc, dst_autoinc;
14311 rtx mem, addr;
14313 gcc_assert (1 <= interleave_factor && interleave_factor <= 4);
14315 /* Use hard registers if we have aligned source or destination so we can use
14316 load/store multiple with contiguous registers. */
14317 if (dst_aligned || src_aligned)
14318 for (i = 0; i < interleave_factor; i++)
14319 regs[i] = gen_rtx_REG (SImode, i);
14320 else
14321 for (i = 0; i < interleave_factor; i++)
14322 regs[i] = gen_reg_rtx (SImode);
14324 dst = copy_addr_to_reg (XEXP (dstbase, 0));
14325 src = copy_addr_to_reg (XEXP (srcbase, 0));
14327 srcoffset = dstoffset = 0;
14329 /* Calls to arm_gen_load_multiple and arm_gen_store_multiple update SRC/DST.
14330 For copying the last bytes we want to subtract this offset again. */
14331 src_autoinc = dst_autoinc = 0;
14333 for (i = 0; i < interleave_factor; i++)
14334 regnos[i] = i;
14336 /* Copy BLOCK_SIZE_BYTES chunks. */
14338 for (i = 0; i + block_size_bytes <= length; i += block_size_bytes)
14340 /* Load words. */
14341 if (src_aligned && interleave_factor > 1)
14343 emit_insn (arm_gen_load_multiple (regnos, interleave_factor, src,
14344 TRUE, srcbase, &srcoffset));
14345 src_autoinc += UNITS_PER_WORD * interleave_factor;
14347 else
14349 for (j = 0; j < interleave_factor; j++)
14351 addr = plus_constant (Pmode, src, (srcoffset + j * UNITS_PER_WORD
14352 - src_autoinc));
14353 mem = adjust_automodify_address (srcbase, SImode, addr,
14354 srcoffset + j * UNITS_PER_WORD);
14355 emit_insn (gen_unaligned_loadsi (regs[j], mem));
14357 srcoffset += block_size_bytes;
14360 /* Store words. */
14361 if (dst_aligned && interleave_factor > 1)
14363 emit_insn (arm_gen_store_multiple (regnos, interleave_factor, dst,
14364 TRUE, dstbase, &dstoffset));
14365 dst_autoinc += UNITS_PER_WORD * interleave_factor;
14367 else
14369 for (j = 0; j < interleave_factor; j++)
14371 addr = plus_constant (Pmode, dst, (dstoffset + j * UNITS_PER_WORD
14372 - dst_autoinc));
14373 mem = adjust_automodify_address (dstbase, SImode, addr,
14374 dstoffset + j * UNITS_PER_WORD);
14375 emit_insn (gen_unaligned_storesi (mem, regs[j]));
14377 dstoffset += block_size_bytes;
14380 remaining -= block_size_bytes;
14383 /* Copy any whole words left (note these aren't interleaved with any
14384 subsequent halfword/byte load/stores in the interests of simplicity). */
14386 words = remaining / UNITS_PER_WORD;
14388 gcc_assert (words < interleave_factor);
14390 if (src_aligned && words > 1)
14392 emit_insn (arm_gen_load_multiple (regnos, words, src, TRUE, srcbase,
14393 &srcoffset));
14394 src_autoinc += UNITS_PER_WORD * words;
14396 else
14398 for (j = 0; j < words; j++)
14400 addr = plus_constant (Pmode, src,
14401 srcoffset + j * UNITS_PER_WORD - src_autoinc);
14402 mem = adjust_automodify_address (srcbase, SImode, addr,
14403 srcoffset + j * UNITS_PER_WORD);
14404 emit_insn (gen_unaligned_loadsi (regs[j], mem));
14406 srcoffset += words * UNITS_PER_WORD;
14409 if (dst_aligned && words > 1)
14411 emit_insn (arm_gen_store_multiple (regnos, words, dst, TRUE, dstbase,
14412 &dstoffset));
14413 dst_autoinc += words * UNITS_PER_WORD;
14415 else
14417 for (j = 0; j < words; j++)
14419 addr = plus_constant (Pmode, dst,
14420 dstoffset + j * UNITS_PER_WORD - dst_autoinc);
14421 mem = adjust_automodify_address (dstbase, SImode, addr,
14422 dstoffset + j * UNITS_PER_WORD);
14423 emit_insn (gen_unaligned_storesi (mem, regs[j]));
14425 dstoffset += words * UNITS_PER_WORD;
14428 remaining -= words * UNITS_PER_WORD;
14430 gcc_assert (remaining < 4);
14432 /* Copy a halfword if necessary. */
14434 if (remaining >= 2)
14436 halfword_tmp = gen_reg_rtx (SImode);
14438 addr = plus_constant (Pmode, src, srcoffset - src_autoinc);
14439 mem = adjust_automodify_address (srcbase, HImode, addr, srcoffset);
14440 emit_insn (gen_unaligned_loadhiu (halfword_tmp, mem));
14442 /* Either write out immediately, or delay until we've loaded the last
14443 byte, depending on interleave factor. */
14444 if (interleave_factor == 1)
14446 addr = plus_constant (Pmode, dst, dstoffset - dst_autoinc);
14447 mem = adjust_automodify_address (dstbase, HImode, addr, dstoffset);
14448 emit_insn (gen_unaligned_storehi (mem,
14449 gen_lowpart (HImode, halfword_tmp)));
14450 halfword_tmp = NULL;
14451 dstoffset += 2;
14454 remaining -= 2;
14455 srcoffset += 2;
14458 gcc_assert (remaining < 2);
14460 /* Copy last byte. */
14462 if ((remaining & 1) != 0)
14464 byte_tmp = gen_reg_rtx (SImode);
14466 addr = plus_constant (Pmode, src, srcoffset - src_autoinc);
14467 mem = adjust_automodify_address (srcbase, QImode, addr, srcoffset);
14468 emit_move_insn (gen_lowpart (QImode, byte_tmp), mem);
14470 if (interleave_factor == 1)
14472 addr = plus_constant (Pmode, dst, dstoffset - dst_autoinc);
14473 mem = adjust_automodify_address (dstbase, QImode, addr, dstoffset);
14474 emit_move_insn (mem, gen_lowpart (QImode, byte_tmp));
14475 byte_tmp = NULL;
14476 dstoffset++;
14479 remaining--;
14480 srcoffset++;
14483 /* Store last halfword if we haven't done so already. */
14485 if (halfword_tmp)
14487 addr = plus_constant (Pmode, dst, dstoffset - dst_autoinc);
14488 mem = adjust_automodify_address (dstbase, HImode, addr, dstoffset);
14489 emit_insn (gen_unaligned_storehi (mem,
14490 gen_lowpart (HImode, halfword_tmp)));
14491 dstoffset += 2;
14494 /* Likewise for last byte. */
14496 if (byte_tmp)
14498 addr = plus_constant (Pmode, dst, dstoffset - dst_autoinc);
14499 mem = adjust_automodify_address (dstbase, QImode, addr, dstoffset);
14500 emit_move_insn (mem, gen_lowpart (QImode, byte_tmp));
14501 dstoffset++;
14504 gcc_assert (remaining == 0 && srcoffset == dstoffset);
14507 /* From mips_adjust_block_mem:
14509 Helper function for doing a loop-based block operation on memory
14510 reference MEM. Each iteration of the loop will operate on LENGTH
14511 bytes of MEM.
14513 Create a new base register for use within the loop and point it to
14514 the start of MEM. Create a new memory reference that uses this
14515 register. Store them in *LOOP_REG and *LOOP_MEM respectively. */
14517 static void
14518 arm_adjust_block_mem (rtx mem, HOST_WIDE_INT length, rtx *loop_reg,
14519 rtx *loop_mem)
14521 *loop_reg = copy_addr_to_reg (XEXP (mem, 0));
14523 /* Although the new mem does not refer to a known location,
14524 it does keep up to LENGTH bytes of alignment. */
14525 *loop_mem = change_address (mem, BLKmode, *loop_reg);
14526 set_mem_align (*loop_mem, MIN (MEM_ALIGN (mem), length * BITS_PER_UNIT));
14529 /* From mips_block_move_loop:
14531 Move LENGTH bytes from SRC to DEST using a loop that moves BYTES_PER_ITER
14532 bytes at a time. LENGTH must be at least BYTES_PER_ITER. Assume that
14533 the memory regions do not overlap. */
14535 static void
14536 arm_block_move_unaligned_loop (rtx dest, rtx src, HOST_WIDE_INT length,
14537 unsigned int interleave_factor,
14538 HOST_WIDE_INT bytes_per_iter)
14540 rtx src_reg, dest_reg, final_src, test;
14541 HOST_WIDE_INT leftover;
14543 leftover = length % bytes_per_iter;
14544 length -= leftover;
14546 /* Create registers and memory references for use within the loop. */
14547 arm_adjust_block_mem (src, bytes_per_iter, &src_reg, &src);
14548 arm_adjust_block_mem (dest, bytes_per_iter, &dest_reg, &dest);
14550 /* Calculate the value that SRC_REG should have after the last iteration of
14551 the loop. */
14552 final_src = expand_simple_binop (Pmode, PLUS, src_reg, GEN_INT (length),
14553 0, 0, OPTAB_WIDEN);
14555 /* Emit the start of the loop. */
14556 rtx_code_label *label = gen_label_rtx ();
14557 emit_label (label);
14559 /* Emit the loop body. */
14560 arm_block_move_unaligned_straight (dest, src, bytes_per_iter,
14561 interleave_factor);
14563 /* Move on to the next block. */
14564 emit_move_insn (src_reg, plus_constant (Pmode, src_reg, bytes_per_iter));
14565 emit_move_insn (dest_reg, plus_constant (Pmode, dest_reg, bytes_per_iter));
14567 /* Emit the loop condition. */
14568 test = gen_rtx_NE (VOIDmode, src_reg, final_src);
14569 emit_jump_insn (gen_cbranchsi4 (test, src_reg, final_src, label));
14571 /* Mop up any left-over bytes. */
14572 if (leftover)
14573 arm_block_move_unaligned_straight (dest, src, leftover, interleave_factor);
14576 /* Emit a block move when either the source or destination is unaligned (not
14577 aligned to a four-byte boundary). This may need further tuning depending on
14578 core type, optimize_size setting, etc. */
14580 static int
14581 arm_movmemqi_unaligned (rtx *operands)
14583 HOST_WIDE_INT length = INTVAL (operands[2]);
14585 if (optimize_size)
14587 bool src_aligned = MEM_ALIGN (operands[1]) >= BITS_PER_WORD;
14588 bool dst_aligned = MEM_ALIGN (operands[0]) >= BITS_PER_WORD;
14589 /* Inlined memcpy using ldr/str/ldrh/strh can be quite big: try to limit
14590 size of code if optimizing for size. We'll use ldm/stm if src_aligned
14591 or dst_aligned though: allow more interleaving in those cases since the
14592 resulting code can be smaller. */
14593 unsigned int interleave_factor = (src_aligned || dst_aligned) ? 2 : 1;
14594 HOST_WIDE_INT bytes_per_iter = (src_aligned || dst_aligned) ? 8 : 4;
14596 if (length > 12)
14597 arm_block_move_unaligned_loop (operands[0], operands[1], length,
14598 interleave_factor, bytes_per_iter);
14599 else
14600 arm_block_move_unaligned_straight (operands[0], operands[1], length,
14601 interleave_factor);
14603 else
14605 /* Note that the loop created by arm_block_move_unaligned_loop may be
14606 subject to loop unrolling, which makes tuning this condition a little
14607 redundant. */
14608 if (length > 32)
14609 arm_block_move_unaligned_loop (operands[0], operands[1], length, 4, 16);
14610 else
14611 arm_block_move_unaligned_straight (operands[0], operands[1], length, 4);
14614 return 1;
14618 arm_gen_movmemqi (rtx *operands)
14620 HOST_WIDE_INT in_words_to_go, out_words_to_go, last_bytes;
14621 HOST_WIDE_INT srcoffset, dstoffset;
14622 int i;
14623 rtx src, dst, srcbase, dstbase;
14624 rtx part_bytes_reg = NULL;
14625 rtx mem;
14627 if (!CONST_INT_P (operands[2])
14628 || !CONST_INT_P (operands[3])
14629 || INTVAL (operands[2]) > 64)
14630 return 0;
14632 if (unaligned_access && (INTVAL (operands[3]) & 3) != 0)
14633 return arm_movmemqi_unaligned (operands);
14635 if (INTVAL (operands[3]) & 3)
14636 return 0;
14638 dstbase = operands[0];
14639 srcbase = operands[1];
14641 dst = copy_to_mode_reg (SImode, XEXP (dstbase, 0));
14642 src = copy_to_mode_reg (SImode, XEXP (srcbase, 0));
14644 in_words_to_go = ARM_NUM_INTS (INTVAL (operands[2]));
14645 out_words_to_go = INTVAL (operands[2]) / 4;
14646 last_bytes = INTVAL (operands[2]) & 3;
14647 dstoffset = srcoffset = 0;
14649 if (out_words_to_go != in_words_to_go && ((in_words_to_go - 1) & 3) != 0)
14650 part_bytes_reg = gen_rtx_REG (SImode, (in_words_to_go - 1) & 3);
14652 for (i = 0; in_words_to_go >= 2; i+=4)
14654 if (in_words_to_go > 4)
14655 emit_insn (arm_gen_load_multiple (arm_regs_in_sequence, 4, src,
14656 TRUE, srcbase, &srcoffset));
14657 else
14658 emit_insn (arm_gen_load_multiple (arm_regs_in_sequence, in_words_to_go,
14659 src, FALSE, srcbase,
14660 &srcoffset));
14662 if (out_words_to_go)
14664 if (out_words_to_go > 4)
14665 emit_insn (arm_gen_store_multiple (arm_regs_in_sequence, 4, dst,
14666 TRUE, dstbase, &dstoffset));
14667 else if (out_words_to_go != 1)
14668 emit_insn (arm_gen_store_multiple (arm_regs_in_sequence,
14669 out_words_to_go, dst,
14670 (last_bytes == 0
14671 ? FALSE : TRUE),
14672 dstbase, &dstoffset));
14673 else
14675 mem = adjust_automodify_address (dstbase, SImode, dst, dstoffset);
14676 emit_move_insn (mem, gen_rtx_REG (SImode, 0));
14677 if (last_bytes != 0)
14679 emit_insn (gen_addsi3 (dst, dst, GEN_INT (4)));
14680 dstoffset += 4;
14685 in_words_to_go -= in_words_to_go < 4 ? in_words_to_go : 4;
14686 out_words_to_go -= out_words_to_go < 4 ? out_words_to_go : 4;
14689 /* OUT_WORDS_TO_GO will be zero here if there are byte stores to do. */
14690 if (out_words_to_go)
14692 rtx sreg;
14694 mem = adjust_automodify_address (srcbase, SImode, src, srcoffset);
14695 sreg = copy_to_reg (mem);
14697 mem = adjust_automodify_address (dstbase, SImode, dst, dstoffset);
14698 emit_move_insn (mem, sreg);
14699 in_words_to_go--;
14701 gcc_assert (!in_words_to_go); /* Sanity check */
14704 if (in_words_to_go)
14706 gcc_assert (in_words_to_go > 0);
14708 mem = adjust_automodify_address (srcbase, SImode, src, srcoffset);
14709 part_bytes_reg = copy_to_mode_reg (SImode, mem);
14712 gcc_assert (!last_bytes || part_bytes_reg);
14714 if (BYTES_BIG_ENDIAN && last_bytes)
14716 rtx tmp = gen_reg_rtx (SImode);
14718 /* The bytes we want are in the top end of the word. */
14719 emit_insn (gen_lshrsi3 (tmp, part_bytes_reg,
14720 GEN_INT (8 * (4 - last_bytes))));
14721 part_bytes_reg = tmp;
14723 while (last_bytes)
14725 mem = adjust_automodify_address (dstbase, QImode,
14726 plus_constant (Pmode, dst,
14727 last_bytes - 1),
14728 dstoffset + last_bytes - 1);
14729 emit_move_insn (mem, gen_lowpart (QImode, part_bytes_reg));
14731 if (--last_bytes)
14733 tmp = gen_reg_rtx (SImode);
14734 emit_insn (gen_lshrsi3 (tmp, part_bytes_reg, GEN_INT (8)));
14735 part_bytes_reg = tmp;
14740 else
14742 if (last_bytes > 1)
14744 mem = adjust_automodify_address (dstbase, HImode, dst, dstoffset);
14745 emit_move_insn (mem, gen_lowpart (HImode, part_bytes_reg));
14746 last_bytes -= 2;
14747 if (last_bytes)
14749 rtx tmp = gen_reg_rtx (SImode);
14750 emit_insn (gen_addsi3 (dst, dst, const2_rtx));
14751 emit_insn (gen_lshrsi3 (tmp, part_bytes_reg, GEN_INT (16)));
14752 part_bytes_reg = tmp;
14753 dstoffset += 2;
14757 if (last_bytes)
14759 mem = adjust_automodify_address (dstbase, QImode, dst, dstoffset);
14760 emit_move_insn (mem, gen_lowpart (QImode, part_bytes_reg));
14764 return 1;
14767 /* Helper for gen_movmem_ldrd_strd. Increase the address of memory rtx
14768 by mode size. */
14769 inline static rtx
14770 next_consecutive_mem (rtx mem)
14772 machine_mode mode = GET_MODE (mem);
14773 HOST_WIDE_INT offset = GET_MODE_SIZE (mode);
14774 rtx addr = plus_constant (Pmode, XEXP (mem, 0), offset);
14776 return adjust_automodify_address (mem, mode, addr, offset);
14779 /* Copy using LDRD/STRD instructions whenever possible.
14780 Returns true upon success. */
14781 bool
14782 gen_movmem_ldrd_strd (rtx *operands)
14784 unsigned HOST_WIDE_INT len;
14785 HOST_WIDE_INT align;
14786 rtx src, dst, base;
14787 rtx reg0;
14788 bool src_aligned, dst_aligned;
14789 bool src_volatile, dst_volatile;
14791 gcc_assert (CONST_INT_P (operands[2]));
14792 gcc_assert (CONST_INT_P (operands[3]));
14794 len = UINTVAL (operands[2]);
14795 if (len > 64)
14796 return false;
14798 /* Maximum alignment we can assume for both src and dst buffers. */
14799 align = INTVAL (operands[3]);
14801 if ((!unaligned_access) && (len >= 4) && ((align & 3) != 0))
14802 return false;
14804 /* Place src and dst addresses in registers
14805 and update the corresponding mem rtx. */
14806 dst = operands[0];
14807 dst_volatile = MEM_VOLATILE_P (dst);
14808 dst_aligned = MEM_ALIGN (dst) >= BITS_PER_WORD;
14809 base = copy_to_mode_reg (SImode, XEXP (dst, 0));
14810 dst = adjust_automodify_address (dst, VOIDmode, base, 0);
14812 src = operands[1];
14813 src_volatile = MEM_VOLATILE_P (src);
14814 src_aligned = MEM_ALIGN (src) >= BITS_PER_WORD;
14815 base = copy_to_mode_reg (SImode, XEXP (src, 0));
14816 src = adjust_automodify_address (src, VOIDmode, base, 0);
14818 if (!unaligned_access && !(src_aligned && dst_aligned))
14819 return false;
14821 if (src_volatile || dst_volatile)
14822 return false;
14824 /* If we cannot generate any LDRD/STRD, try to generate LDM/STM. */
14825 if (!(dst_aligned || src_aligned))
14826 return arm_gen_movmemqi (operands);
14828 src = adjust_address (src, DImode, 0);
14829 dst = adjust_address (dst, DImode, 0);
14830 while (len >= 8)
14832 len -= 8;
14833 reg0 = gen_reg_rtx (DImode);
14834 if (src_aligned)
14835 emit_move_insn (reg0, src);
14836 else
14837 emit_insn (gen_unaligned_loaddi (reg0, src));
14839 if (dst_aligned)
14840 emit_move_insn (dst, reg0);
14841 else
14842 emit_insn (gen_unaligned_storedi (dst, reg0));
14844 src = next_consecutive_mem (src);
14845 dst = next_consecutive_mem (dst);
14848 gcc_assert (len < 8);
14849 if (len >= 4)
14851 /* More than a word but less than a double-word to copy. Copy a word. */
14852 reg0 = gen_reg_rtx (SImode);
14853 src = adjust_address (src, SImode, 0);
14854 dst = adjust_address (dst, SImode, 0);
14855 if (src_aligned)
14856 emit_move_insn (reg0, src);
14857 else
14858 emit_insn (gen_unaligned_loadsi (reg0, src));
14860 if (dst_aligned)
14861 emit_move_insn (dst, reg0);
14862 else
14863 emit_insn (gen_unaligned_storesi (dst, reg0));
14865 src = next_consecutive_mem (src);
14866 dst = next_consecutive_mem (dst);
14867 len -= 4;
14870 if (len == 0)
14871 return true;
14873 /* Copy the remaining bytes. */
14874 if (len >= 2)
14876 dst = adjust_address (dst, HImode, 0);
14877 src = adjust_address (src, HImode, 0);
14878 reg0 = gen_reg_rtx (SImode);
14879 if (src_aligned)
14880 emit_insn (gen_zero_extendhisi2 (reg0, src));
14881 else
14882 emit_insn (gen_unaligned_loadhiu (reg0, src));
14884 if (dst_aligned)
14885 emit_insn (gen_movhi (dst, gen_lowpart(HImode, reg0)));
14886 else
14887 emit_insn (gen_unaligned_storehi (dst, gen_lowpart (HImode, reg0)));
14889 src = next_consecutive_mem (src);
14890 dst = next_consecutive_mem (dst);
14891 if (len == 2)
14892 return true;
14895 dst = adjust_address (dst, QImode, 0);
14896 src = adjust_address (src, QImode, 0);
14897 reg0 = gen_reg_rtx (QImode);
14898 emit_move_insn (reg0, src);
14899 emit_move_insn (dst, reg0);
14900 return true;
14903 /* Select a dominance comparison mode if possible for a test of the general
14904 form (OP (COND_OR (X) (Y)) (const_int 0)). We support three forms.
14905 COND_OR == DOM_CC_X_AND_Y => (X && Y)
14906 COND_OR == DOM_CC_NX_OR_Y => ((! X) || Y)
14907 COND_OR == DOM_CC_X_OR_Y => (X || Y)
14908 In all cases OP will be either EQ or NE, but we don't need to know which
14909 here. If we are unable to support a dominance comparison we return
14910 CC mode. This will then fail to match for the RTL expressions that
14911 generate this call. */
14912 machine_mode
14913 arm_select_dominance_cc_mode (rtx x, rtx y, HOST_WIDE_INT cond_or)
14915 enum rtx_code cond1, cond2;
14916 int swapped = 0;
14918 /* Currently we will probably get the wrong result if the individual
14919 comparisons are not simple. This also ensures that it is safe to
14920 reverse a comparison if necessary. */
14921 if ((arm_select_cc_mode (cond1 = GET_CODE (x), XEXP (x, 0), XEXP (x, 1))
14922 != CCmode)
14923 || (arm_select_cc_mode (cond2 = GET_CODE (y), XEXP (y, 0), XEXP (y, 1))
14924 != CCmode))
14925 return CCmode;
14927 /* The if_then_else variant of this tests the second condition if the
14928 first passes, but is true if the first fails. Reverse the first
14929 condition to get a true "inclusive-or" expression. */
14930 if (cond_or == DOM_CC_NX_OR_Y)
14931 cond1 = reverse_condition (cond1);
14933 /* If the comparisons are not equal, and one doesn't dominate the other,
14934 then we can't do this. */
14935 if (cond1 != cond2
14936 && !comparison_dominates_p (cond1, cond2)
14937 && (swapped = 1, !comparison_dominates_p (cond2, cond1)))
14938 return CCmode;
14940 if (swapped)
14941 std::swap (cond1, cond2);
14943 switch (cond1)
14945 case EQ:
14946 if (cond_or == DOM_CC_X_AND_Y)
14947 return CC_DEQmode;
14949 switch (cond2)
14951 case EQ: return CC_DEQmode;
14952 case LE: return CC_DLEmode;
14953 case LEU: return CC_DLEUmode;
14954 case GE: return CC_DGEmode;
14955 case GEU: return CC_DGEUmode;
14956 default: gcc_unreachable ();
14959 case LT:
14960 if (cond_or == DOM_CC_X_AND_Y)
14961 return CC_DLTmode;
14963 switch (cond2)
14965 case LT:
14966 return CC_DLTmode;
14967 case LE:
14968 return CC_DLEmode;
14969 case NE:
14970 return CC_DNEmode;
14971 default:
14972 gcc_unreachable ();
14975 case GT:
14976 if (cond_or == DOM_CC_X_AND_Y)
14977 return CC_DGTmode;
14979 switch (cond2)
14981 case GT:
14982 return CC_DGTmode;
14983 case GE:
14984 return CC_DGEmode;
14985 case NE:
14986 return CC_DNEmode;
14987 default:
14988 gcc_unreachable ();
14991 case LTU:
14992 if (cond_or == DOM_CC_X_AND_Y)
14993 return CC_DLTUmode;
14995 switch (cond2)
14997 case LTU:
14998 return CC_DLTUmode;
14999 case LEU:
15000 return CC_DLEUmode;
15001 case NE:
15002 return CC_DNEmode;
15003 default:
15004 gcc_unreachable ();
15007 case GTU:
15008 if (cond_or == DOM_CC_X_AND_Y)
15009 return CC_DGTUmode;
15011 switch (cond2)
15013 case GTU:
15014 return CC_DGTUmode;
15015 case GEU:
15016 return CC_DGEUmode;
15017 case NE:
15018 return CC_DNEmode;
15019 default:
15020 gcc_unreachable ();
15023 /* The remaining cases only occur when both comparisons are the
15024 same. */
15025 case NE:
15026 gcc_assert (cond1 == cond2);
15027 return CC_DNEmode;
15029 case LE:
15030 gcc_assert (cond1 == cond2);
15031 return CC_DLEmode;
15033 case GE:
15034 gcc_assert (cond1 == cond2);
15035 return CC_DGEmode;
15037 case LEU:
15038 gcc_assert (cond1 == cond2);
15039 return CC_DLEUmode;
15041 case GEU:
15042 gcc_assert (cond1 == cond2);
15043 return CC_DGEUmode;
15045 default:
15046 gcc_unreachable ();
15050 machine_mode
15051 arm_select_cc_mode (enum rtx_code op, rtx x, rtx y)
15053 /* All floating point compares return CCFP if it is an equality
15054 comparison, and CCFPE otherwise. */
15055 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
15057 switch (op)
15059 case EQ:
15060 case NE:
15061 case UNORDERED:
15062 case ORDERED:
15063 case UNLT:
15064 case UNLE:
15065 case UNGT:
15066 case UNGE:
15067 case UNEQ:
15068 case LTGT:
15069 return CCFPmode;
15071 case LT:
15072 case LE:
15073 case GT:
15074 case GE:
15075 return CCFPEmode;
15077 default:
15078 gcc_unreachable ();
15082 /* A compare with a shifted operand. Because of canonicalization, the
15083 comparison will have to be swapped when we emit the assembler. */
15084 if (GET_MODE (y) == SImode
15085 && (REG_P (y) || (GET_CODE (y) == SUBREG))
15086 && (GET_CODE (x) == ASHIFT || GET_CODE (x) == ASHIFTRT
15087 || GET_CODE (x) == LSHIFTRT || GET_CODE (x) == ROTATE
15088 || GET_CODE (x) == ROTATERT))
15089 return CC_SWPmode;
15091 /* This operation is performed swapped, but since we only rely on the Z
15092 flag we don't need an additional mode. */
15093 if (GET_MODE (y) == SImode
15094 && (REG_P (y) || (GET_CODE (y) == SUBREG))
15095 && GET_CODE (x) == NEG
15096 && (op == EQ || op == NE))
15097 return CC_Zmode;
15099 /* This is a special case that is used by combine to allow a
15100 comparison of a shifted byte load to be split into a zero-extend
15101 followed by a comparison of the shifted integer (only valid for
15102 equalities and unsigned inequalities). */
15103 if (GET_MODE (x) == SImode
15104 && GET_CODE (x) == ASHIFT
15105 && CONST_INT_P (XEXP (x, 1)) && INTVAL (XEXP (x, 1)) == 24
15106 && GET_CODE (XEXP (x, 0)) == SUBREG
15107 && MEM_P (SUBREG_REG (XEXP (x, 0)))
15108 && GET_MODE (SUBREG_REG (XEXP (x, 0))) == QImode
15109 && (op == EQ || op == NE
15110 || op == GEU || op == GTU || op == LTU || op == LEU)
15111 && CONST_INT_P (y))
15112 return CC_Zmode;
15114 /* A construct for a conditional compare, if the false arm contains
15115 0, then both conditions must be true, otherwise either condition
15116 must be true. Not all conditions are possible, so CCmode is
15117 returned if it can't be done. */
15118 if (GET_CODE (x) == IF_THEN_ELSE
15119 && (XEXP (x, 2) == const0_rtx
15120 || XEXP (x, 2) == const1_rtx)
15121 && COMPARISON_P (XEXP (x, 0))
15122 && COMPARISON_P (XEXP (x, 1)))
15123 return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
15124 INTVAL (XEXP (x, 2)));
15126 /* Alternate canonicalizations of the above. These are somewhat cleaner. */
15127 if (GET_CODE (x) == AND
15128 && (op == EQ || op == NE)
15129 && COMPARISON_P (XEXP (x, 0))
15130 && COMPARISON_P (XEXP (x, 1)))
15131 return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
15132 DOM_CC_X_AND_Y);
15134 if (GET_CODE (x) == IOR
15135 && (op == EQ || op == NE)
15136 && COMPARISON_P (XEXP (x, 0))
15137 && COMPARISON_P (XEXP (x, 1)))
15138 return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
15139 DOM_CC_X_OR_Y);
15141 /* An operation (on Thumb) where we want to test for a single bit.
15142 This is done by shifting that bit up into the top bit of a
15143 scratch register; we can then branch on the sign bit. */
15144 if (TARGET_THUMB1
15145 && GET_MODE (x) == SImode
15146 && (op == EQ || op == NE)
15147 && GET_CODE (x) == ZERO_EXTRACT
15148 && XEXP (x, 1) == const1_rtx)
15149 return CC_Nmode;
15151 /* An operation that sets the condition codes as a side-effect, the
15152 V flag is not set correctly, so we can only use comparisons where
15153 this doesn't matter. (For LT and GE we can use "mi" and "pl"
15154 instead.) */
15155 /* ??? Does the ZERO_EXTRACT case really apply to thumb2? */
15156 if (GET_MODE (x) == SImode
15157 && y == const0_rtx
15158 && (op == EQ || op == NE || op == LT || op == GE)
15159 && (GET_CODE (x) == PLUS || GET_CODE (x) == MINUS
15160 || GET_CODE (x) == AND || GET_CODE (x) == IOR
15161 || GET_CODE (x) == XOR || GET_CODE (x) == MULT
15162 || GET_CODE (x) == NOT || GET_CODE (x) == NEG
15163 || GET_CODE (x) == LSHIFTRT
15164 || GET_CODE (x) == ASHIFT || GET_CODE (x) == ASHIFTRT
15165 || GET_CODE (x) == ROTATERT
15166 || (TARGET_32BIT && GET_CODE (x) == ZERO_EXTRACT)))
15167 return CC_NOOVmode;
15169 if (GET_MODE (x) == QImode && (op == EQ || op == NE))
15170 return CC_Zmode;
15172 if (GET_MODE (x) == SImode && (op == LTU || op == GEU)
15173 && GET_CODE (x) == PLUS
15174 && (rtx_equal_p (XEXP (x, 0), y) || rtx_equal_p (XEXP (x, 1), y)))
15175 return CC_Cmode;
15177 if (GET_MODE (x) == DImode || GET_MODE (y) == DImode)
15179 switch (op)
15181 case EQ:
15182 case NE:
15183 /* A DImode comparison against zero can be implemented by
15184 or'ing the two halves together. */
15185 if (y == const0_rtx)
15186 return CC_Zmode;
15188 /* We can do an equality test in three Thumb instructions. */
15189 if (!TARGET_32BIT)
15190 return CC_Zmode;
15192 /* FALLTHROUGH */
15194 case LTU:
15195 case LEU:
15196 case GTU:
15197 case GEU:
15198 /* DImode unsigned comparisons can be implemented by cmp +
15199 cmpeq without a scratch register. Not worth doing in
15200 Thumb-2. */
15201 if (TARGET_32BIT)
15202 return CC_CZmode;
15204 /* FALLTHROUGH */
15206 case LT:
15207 case LE:
15208 case GT:
15209 case GE:
15210 /* DImode signed and unsigned comparisons can be implemented
15211 by cmp + sbcs with a scratch register, but that does not
15212 set the Z flag - we must reverse GT/LE/GTU/LEU. */
15213 gcc_assert (op != EQ && op != NE);
15214 return CC_NCVmode;
15216 default:
15217 gcc_unreachable ();
15221 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_CC)
15222 return GET_MODE (x);
15224 return CCmode;
15227 /* X and Y are two things to compare using CODE. Emit the compare insn and
15228 return the rtx for register 0 in the proper mode. FP means this is a
15229 floating point compare: I don't think that it is needed on the arm. */
15231 arm_gen_compare_reg (enum rtx_code code, rtx x, rtx y, rtx scratch)
15233 machine_mode mode;
15234 rtx cc_reg;
15235 int dimode_comparison = GET_MODE (x) == DImode || GET_MODE (y) == DImode;
15237 /* We might have X as a constant, Y as a register because of the predicates
15238 used for cmpdi. If so, force X to a register here. */
15239 if (dimode_comparison && !REG_P (x))
15240 x = force_reg (DImode, x);
15242 mode = SELECT_CC_MODE (code, x, y);
15243 cc_reg = gen_rtx_REG (mode, CC_REGNUM);
15245 if (dimode_comparison
15246 && mode != CC_CZmode)
15248 rtx clobber, set;
15250 /* To compare two non-zero values for equality, XOR them and
15251 then compare against zero. Not used for ARM mode; there
15252 CC_CZmode is cheaper. */
15253 if (mode == CC_Zmode && y != const0_rtx)
15255 gcc_assert (!reload_completed);
15256 x = expand_binop (DImode, xor_optab, x, y, NULL_RTX, 0, OPTAB_WIDEN);
15257 y = const0_rtx;
15260 /* A scratch register is required. */
15261 if (reload_completed)
15262 gcc_assert (scratch != NULL && GET_MODE (scratch) == SImode);
15263 else
15264 scratch = gen_rtx_SCRATCH (SImode);
15266 clobber = gen_rtx_CLOBBER (VOIDmode, scratch);
15267 set = gen_rtx_SET (VOIDmode, cc_reg, gen_rtx_COMPARE (mode, x, y));
15268 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, set, clobber)));
15270 else
15271 emit_set_insn (cc_reg, gen_rtx_COMPARE (mode, x, y));
15273 return cc_reg;
15276 /* Generate a sequence of insns that will generate the correct return
15277 address mask depending on the physical architecture that the program
15278 is running on. */
15280 arm_gen_return_addr_mask (void)
15282 rtx reg = gen_reg_rtx (Pmode);
15284 emit_insn (gen_return_addr_mask (reg));
15285 return reg;
15288 void
15289 arm_reload_in_hi (rtx *operands)
15291 rtx ref = operands[1];
15292 rtx base, scratch;
15293 HOST_WIDE_INT offset = 0;
15295 if (GET_CODE (ref) == SUBREG)
15297 offset = SUBREG_BYTE (ref);
15298 ref = SUBREG_REG (ref);
15301 if (REG_P (ref))
15303 /* We have a pseudo which has been spilt onto the stack; there
15304 are two cases here: the first where there is a simple
15305 stack-slot replacement and a second where the stack-slot is
15306 out of range, or is used as a subreg. */
15307 if (reg_equiv_mem (REGNO (ref)))
15309 ref = reg_equiv_mem (REGNO (ref));
15310 base = find_replacement (&XEXP (ref, 0));
15312 else
15313 /* The slot is out of range, or was dressed up in a SUBREG. */
15314 base = reg_equiv_address (REGNO (ref));
15316 else
15317 base = find_replacement (&XEXP (ref, 0));
15319 /* Handle the case where the address is too complex to be offset by 1. */
15320 if (GET_CODE (base) == MINUS
15321 || (GET_CODE (base) == PLUS && !CONST_INT_P (XEXP (base, 1))))
15323 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
15325 emit_set_insn (base_plus, base);
15326 base = base_plus;
15328 else if (GET_CODE (base) == PLUS)
15330 /* The addend must be CONST_INT, or we would have dealt with it above. */
15331 HOST_WIDE_INT hi, lo;
15333 offset += INTVAL (XEXP (base, 1));
15334 base = XEXP (base, 0);
15336 /* Rework the address into a legal sequence of insns. */
15337 /* Valid range for lo is -4095 -> 4095 */
15338 lo = (offset >= 0
15339 ? (offset & 0xfff)
15340 : -((-offset) & 0xfff));
15342 /* Corner case, if lo is the max offset then we would be out of range
15343 once we have added the additional 1 below, so bump the msb into the
15344 pre-loading insn(s). */
15345 if (lo == 4095)
15346 lo &= 0x7ff;
15348 hi = ((((offset - lo) & (HOST_WIDE_INT) 0xffffffff)
15349 ^ (HOST_WIDE_INT) 0x80000000)
15350 - (HOST_WIDE_INT) 0x80000000);
15352 gcc_assert (hi + lo == offset);
15354 if (hi != 0)
15356 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
15358 /* Get the base address; addsi3 knows how to handle constants
15359 that require more than one insn. */
15360 emit_insn (gen_addsi3 (base_plus, base, GEN_INT (hi)));
15361 base = base_plus;
15362 offset = lo;
15366 /* Operands[2] may overlap operands[0] (though it won't overlap
15367 operands[1]), that's why we asked for a DImode reg -- so we can
15368 use the bit that does not overlap. */
15369 if (REGNO (operands[2]) == REGNO (operands[0]))
15370 scratch = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
15371 else
15372 scratch = gen_rtx_REG (SImode, REGNO (operands[2]));
15374 emit_insn (gen_zero_extendqisi2 (scratch,
15375 gen_rtx_MEM (QImode,
15376 plus_constant (Pmode, base,
15377 offset))));
15378 emit_insn (gen_zero_extendqisi2 (gen_rtx_SUBREG (SImode, operands[0], 0),
15379 gen_rtx_MEM (QImode,
15380 plus_constant (Pmode, base,
15381 offset + 1))));
15382 if (!BYTES_BIG_ENDIAN)
15383 emit_set_insn (gen_rtx_SUBREG (SImode, operands[0], 0),
15384 gen_rtx_IOR (SImode,
15385 gen_rtx_ASHIFT
15386 (SImode,
15387 gen_rtx_SUBREG (SImode, operands[0], 0),
15388 GEN_INT (8)),
15389 scratch));
15390 else
15391 emit_set_insn (gen_rtx_SUBREG (SImode, operands[0], 0),
15392 gen_rtx_IOR (SImode,
15393 gen_rtx_ASHIFT (SImode, scratch,
15394 GEN_INT (8)),
15395 gen_rtx_SUBREG (SImode, operands[0], 0)));
15398 /* Handle storing a half-word to memory during reload by synthesizing as two
15399 byte stores. Take care not to clobber the input values until after we
15400 have moved them somewhere safe. This code assumes that if the DImode
15401 scratch in operands[2] overlaps either the input value or output address
15402 in some way, then that value must die in this insn (we absolutely need
15403 two scratch registers for some corner cases). */
15404 void
15405 arm_reload_out_hi (rtx *operands)
15407 rtx ref = operands[0];
15408 rtx outval = operands[1];
15409 rtx base, scratch;
15410 HOST_WIDE_INT offset = 0;
15412 if (GET_CODE (ref) == SUBREG)
15414 offset = SUBREG_BYTE (ref);
15415 ref = SUBREG_REG (ref);
15418 if (REG_P (ref))
15420 /* We have a pseudo which has been spilt onto the stack; there
15421 are two cases here: the first where there is a simple
15422 stack-slot replacement and a second where the stack-slot is
15423 out of range, or is used as a subreg. */
15424 if (reg_equiv_mem (REGNO (ref)))
15426 ref = reg_equiv_mem (REGNO (ref));
15427 base = find_replacement (&XEXP (ref, 0));
15429 else
15430 /* The slot is out of range, or was dressed up in a SUBREG. */
15431 base = reg_equiv_address (REGNO (ref));
15433 else
15434 base = find_replacement (&XEXP (ref, 0));
15436 scratch = gen_rtx_REG (SImode, REGNO (operands[2]));
15438 /* Handle the case where the address is too complex to be offset by 1. */
15439 if (GET_CODE (base) == MINUS
15440 || (GET_CODE (base) == PLUS && !CONST_INT_P (XEXP (base, 1))))
15442 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
15444 /* Be careful not to destroy OUTVAL. */
15445 if (reg_overlap_mentioned_p (base_plus, outval))
15447 /* Updating base_plus might destroy outval, see if we can
15448 swap the scratch and base_plus. */
15449 if (!reg_overlap_mentioned_p (scratch, outval))
15450 std::swap (scratch, base_plus);
15451 else
15453 rtx scratch_hi = gen_rtx_REG (HImode, REGNO (operands[2]));
15455 /* Be conservative and copy OUTVAL into the scratch now,
15456 this should only be necessary if outval is a subreg
15457 of something larger than a word. */
15458 /* XXX Might this clobber base? I can't see how it can,
15459 since scratch is known to overlap with OUTVAL, and
15460 must be wider than a word. */
15461 emit_insn (gen_movhi (scratch_hi, outval));
15462 outval = scratch_hi;
15466 emit_set_insn (base_plus, base);
15467 base = base_plus;
15469 else if (GET_CODE (base) == PLUS)
15471 /* The addend must be CONST_INT, or we would have dealt with it above. */
15472 HOST_WIDE_INT hi, lo;
15474 offset += INTVAL (XEXP (base, 1));
15475 base = XEXP (base, 0);
15477 /* Rework the address into a legal sequence of insns. */
15478 /* Valid range for lo is -4095 -> 4095 */
15479 lo = (offset >= 0
15480 ? (offset & 0xfff)
15481 : -((-offset) & 0xfff));
15483 /* Corner case, if lo is the max offset then we would be out of range
15484 once we have added the additional 1 below, so bump the msb into the
15485 pre-loading insn(s). */
15486 if (lo == 4095)
15487 lo &= 0x7ff;
15489 hi = ((((offset - lo) & (HOST_WIDE_INT) 0xffffffff)
15490 ^ (HOST_WIDE_INT) 0x80000000)
15491 - (HOST_WIDE_INT) 0x80000000);
15493 gcc_assert (hi + lo == offset);
15495 if (hi != 0)
15497 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
15499 /* Be careful not to destroy OUTVAL. */
15500 if (reg_overlap_mentioned_p (base_plus, outval))
15502 /* Updating base_plus might destroy outval, see if we
15503 can swap the scratch and base_plus. */
15504 if (!reg_overlap_mentioned_p (scratch, outval))
15505 std::swap (scratch, base_plus);
15506 else
15508 rtx scratch_hi = gen_rtx_REG (HImode, REGNO (operands[2]));
15510 /* Be conservative and copy outval into scratch now,
15511 this should only be necessary if outval is a
15512 subreg of something larger than a word. */
15513 /* XXX Might this clobber base? I can't see how it
15514 can, since scratch is known to overlap with
15515 outval. */
15516 emit_insn (gen_movhi (scratch_hi, outval));
15517 outval = scratch_hi;
15521 /* Get the base address; addsi3 knows how to handle constants
15522 that require more than one insn. */
15523 emit_insn (gen_addsi3 (base_plus, base, GEN_INT (hi)));
15524 base = base_plus;
15525 offset = lo;
15529 if (BYTES_BIG_ENDIAN)
15531 emit_insn (gen_movqi (gen_rtx_MEM (QImode,
15532 plus_constant (Pmode, base,
15533 offset + 1)),
15534 gen_lowpart (QImode, outval)));
15535 emit_insn (gen_lshrsi3 (scratch,
15536 gen_rtx_SUBREG (SImode, outval, 0),
15537 GEN_INT (8)));
15538 emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (Pmode, base,
15539 offset)),
15540 gen_lowpart (QImode, scratch)));
15542 else
15544 emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (Pmode, base,
15545 offset)),
15546 gen_lowpart (QImode, outval)));
15547 emit_insn (gen_lshrsi3 (scratch,
15548 gen_rtx_SUBREG (SImode, outval, 0),
15549 GEN_INT (8)));
15550 emit_insn (gen_movqi (gen_rtx_MEM (QImode,
15551 plus_constant (Pmode, base,
15552 offset + 1)),
15553 gen_lowpart (QImode, scratch)));
15557 /* Return true if a type must be passed in memory. For AAPCS, small aggregates
15558 (padded to the size of a word) should be passed in a register. */
15560 static bool
15561 arm_must_pass_in_stack (machine_mode mode, const_tree type)
15563 if (TARGET_AAPCS_BASED)
15564 return must_pass_in_stack_var_size (mode, type);
15565 else
15566 return must_pass_in_stack_var_size_or_pad (mode, type);
15570 /* For use by FUNCTION_ARG_PADDING (MODE, TYPE).
15571 Return true if an argument passed on the stack should be padded upwards,
15572 i.e. if the least-significant byte has useful data.
15573 For legacy APCS ABIs we use the default. For AAPCS based ABIs small
15574 aggregate types are placed in the lowest memory address. */
15576 bool
15577 arm_pad_arg_upward (machine_mode mode ATTRIBUTE_UNUSED, const_tree type)
15579 if (!TARGET_AAPCS_BASED)
15580 return DEFAULT_FUNCTION_ARG_PADDING(mode, type) == upward;
15582 if (type && BYTES_BIG_ENDIAN && INTEGRAL_TYPE_P (type))
15583 return false;
15585 return true;
15589 /* Similarly, for use by BLOCK_REG_PADDING (MODE, TYPE, FIRST).
15590 Return !BYTES_BIG_ENDIAN if the least significant byte of the
15591 register has useful data, and return the opposite if the most
15592 significant byte does. */
15594 bool
15595 arm_pad_reg_upward (machine_mode mode,
15596 tree type, int first ATTRIBUTE_UNUSED)
15598 if (TARGET_AAPCS_BASED && BYTES_BIG_ENDIAN)
15600 /* For AAPCS, small aggregates, small fixed-point types,
15601 and small complex types are always padded upwards. */
15602 if (type)
15604 if ((AGGREGATE_TYPE_P (type)
15605 || TREE_CODE (type) == COMPLEX_TYPE
15606 || FIXED_POINT_TYPE_P (type))
15607 && int_size_in_bytes (type) <= 4)
15608 return true;
15610 else
15612 if ((COMPLEX_MODE_P (mode) || ALL_FIXED_POINT_MODE_P (mode))
15613 && GET_MODE_SIZE (mode) <= 4)
15614 return true;
15618 /* Otherwise, use default padding. */
15619 return !BYTES_BIG_ENDIAN;
15622 /* Returns true iff OFFSET is valid for use in an LDRD/STRD instruction,
15623 assuming that the address in the base register is word aligned. */
15624 bool
15625 offset_ok_for_ldrd_strd (HOST_WIDE_INT offset)
15627 HOST_WIDE_INT max_offset;
15629 /* Offset must be a multiple of 4 in Thumb mode. */
15630 if (TARGET_THUMB2 && ((offset & 3) != 0))
15631 return false;
15633 if (TARGET_THUMB2)
15634 max_offset = 1020;
15635 else if (TARGET_ARM)
15636 max_offset = 255;
15637 else
15638 return false;
15640 return ((offset <= max_offset) && (offset >= -max_offset));
15643 /* Checks whether the operands are valid for use in an LDRD/STRD instruction.
15644 Assumes that RT, RT2, and RN are REG. This is guaranteed by the patterns.
15645 Assumes that the address in the base register RN is word aligned. Pattern
15646 guarantees that both memory accesses use the same base register,
15647 the offsets are constants within the range, and the gap between the offsets is 4.
15648 If preload complete then check that registers are legal. WBACK indicates whether
15649 address is updated. LOAD indicates whether memory access is load or store. */
15650 bool
15651 operands_ok_ldrd_strd (rtx rt, rtx rt2, rtx rn, HOST_WIDE_INT offset,
15652 bool wback, bool load)
15654 unsigned int t, t2, n;
15656 if (!reload_completed)
15657 return true;
15659 if (!offset_ok_for_ldrd_strd (offset))
15660 return false;
15662 t = REGNO (rt);
15663 t2 = REGNO (rt2);
15664 n = REGNO (rn);
15666 if ((TARGET_THUMB2)
15667 && ((wback && (n == t || n == t2))
15668 || (t == SP_REGNUM)
15669 || (t == PC_REGNUM)
15670 || (t2 == SP_REGNUM)
15671 || (t2 == PC_REGNUM)
15672 || (!load && (n == PC_REGNUM))
15673 || (load && (t == t2))
15674 /* Triggers Cortex-M3 LDRD errata. */
15675 || (!wback && load && fix_cm3_ldrd && (n == t))))
15676 return false;
15678 if ((TARGET_ARM)
15679 && ((wback && (n == t || n == t2))
15680 || (t2 == PC_REGNUM)
15681 || (t % 2 != 0) /* First destination register is not even. */
15682 || (t2 != t + 1)
15683 /* PC can be used as base register (for offset addressing only),
15684 but it is depricated. */
15685 || (n == PC_REGNUM)))
15686 return false;
15688 return true;
15691 /* Helper for gen_operands_ldrd_strd. Returns true iff the memory
15692 operand MEM's address contains an immediate offset from the base
15693 register and has no side effects, in which case it sets BASE and
15694 OFFSET accordingly. */
15695 static bool
15696 mem_ok_for_ldrd_strd (rtx mem, rtx *base, rtx *offset)
15698 rtx addr;
15700 gcc_assert (base != NULL && offset != NULL);
15702 /* TODO: Handle more general memory operand patterns, such as
15703 PRE_DEC and PRE_INC. */
15705 if (side_effects_p (mem))
15706 return false;
15708 /* Can't deal with subregs. */
15709 if (GET_CODE (mem) == SUBREG)
15710 return false;
15712 gcc_assert (MEM_P (mem));
15714 *offset = const0_rtx;
15716 addr = XEXP (mem, 0);
15718 /* If addr isn't valid for DImode, then we can't handle it. */
15719 if (!arm_legitimate_address_p (DImode, addr,
15720 reload_in_progress || reload_completed))
15721 return false;
15723 if (REG_P (addr))
15725 *base = addr;
15726 return true;
15728 else if (GET_CODE (addr) == PLUS || GET_CODE (addr) == MINUS)
15730 *base = XEXP (addr, 0);
15731 *offset = XEXP (addr, 1);
15732 return (REG_P (*base) && CONST_INT_P (*offset));
15735 return false;
15738 /* Called from a peephole2 to replace two word-size accesses with a
15739 single LDRD/STRD instruction. Returns true iff we can generate a
15740 new instruction sequence. That is, both accesses use the same base
15741 register and the gap between constant offsets is 4. This function
15742 may reorder its operands to match ldrd/strd RTL templates.
15743 OPERANDS are the operands found by the peephole matcher;
15744 OPERANDS[0,1] are register operands, and OPERANDS[2,3] are the
15745 corresponding memory operands. LOAD indicaates whether the access
15746 is load or store. CONST_STORE indicates a store of constant
15747 integer values held in OPERANDS[4,5] and assumes that the pattern
15748 is of length 4 insn, for the purpose of checking dead registers.
15749 COMMUTE indicates that register operands may be reordered. */
15750 bool
15751 gen_operands_ldrd_strd (rtx *operands, bool load,
15752 bool const_store, bool commute)
15754 int nops = 2;
15755 HOST_WIDE_INT offsets[2], offset;
15756 rtx base = NULL_RTX;
15757 rtx cur_base, cur_offset, tmp;
15758 int i, gap;
15759 HARD_REG_SET regset;
15761 gcc_assert (!const_store || !load);
15762 /* Check that the memory references are immediate offsets from the
15763 same base register. Extract the base register, the destination
15764 registers, and the corresponding memory offsets. */
15765 for (i = 0; i < nops; i++)
15767 if (!mem_ok_for_ldrd_strd (operands[nops+i], &cur_base, &cur_offset))
15768 return false;
15770 if (i == 0)
15771 base = cur_base;
15772 else if (REGNO (base) != REGNO (cur_base))
15773 return false;
15775 offsets[i] = INTVAL (cur_offset);
15776 if (GET_CODE (operands[i]) == SUBREG)
15778 tmp = SUBREG_REG (operands[i]);
15779 gcc_assert (GET_MODE (operands[i]) == GET_MODE (tmp));
15780 operands[i] = tmp;
15784 /* Make sure there is no dependency between the individual loads. */
15785 if (load && REGNO (operands[0]) == REGNO (base))
15786 return false; /* RAW */
15788 if (load && REGNO (operands[0]) == REGNO (operands[1]))
15789 return false; /* WAW */
15791 /* If the same input register is used in both stores
15792 when storing different constants, try to find a free register.
15793 For example, the code
15794 mov r0, 0
15795 str r0, [r2]
15796 mov r0, 1
15797 str r0, [r2, #4]
15798 can be transformed into
15799 mov r1, 0
15800 strd r1, r0, [r2]
15801 in Thumb mode assuming that r1 is free. */
15802 if (const_store
15803 && REGNO (operands[0]) == REGNO (operands[1])
15804 && INTVAL (operands[4]) != INTVAL (operands[5]))
15806 if (TARGET_THUMB2)
15808 CLEAR_HARD_REG_SET (regset);
15809 tmp = peep2_find_free_register (0, 4, "r", SImode, &regset);
15810 if (tmp == NULL_RTX)
15811 return false;
15813 /* Use the new register in the first load to ensure that
15814 if the original input register is not dead after peephole,
15815 then it will have the correct constant value. */
15816 operands[0] = tmp;
15818 else if (TARGET_ARM)
15820 return false;
15821 int regno = REGNO (operands[0]);
15822 if (!peep2_reg_dead_p (4, operands[0]))
15824 /* When the input register is even and is not dead after the
15825 pattern, it has to hold the second constant but we cannot
15826 form a legal STRD in ARM mode with this register as the second
15827 register. */
15828 if (regno % 2 == 0)
15829 return false;
15831 /* Is regno-1 free? */
15832 SET_HARD_REG_SET (regset);
15833 CLEAR_HARD_REG_BIT(regset, regno - 1);
15834 tmp = peep2_find_free_register (0, 4, "r", SImode, &regset);
15835 if (tmp == NULL_RTX)
15836 return false;
15838 operands[0] = tmp;
15840 else
15842 /* Find a DImode register. */
15843 CLEAR_HARD_REG_SET (regset);
15844 tmp = peep2_find_free_register (0, 4, "r", DImode, &regset);
15845 if (tmp != NULL_RTX)
15847 operands[0] = simplify_gen_subreg (SImode, tmp, DImode, 0);
15848 operands[1] = simplify_gen_subreg (SImode, tmp, DImode, 4);
15850 else
15852 /* Can we use the input register to form a DI register? */
15853 SET_HARD_REG_SET (regset);
15854 CLEAR_HARD_REG_BIT(regset,
15855 regno % 2 == 0 ? regno + 1 : regno - 1);
15856 tmp = peep2_find_free_register (0, 4, "r", SImode, &regset);
15857 if (tmp == NULL_RTX)
15858 return false;
15859 operands[regno % 2 == 1 ? 0 : 1] = tmp;
15863 gcc_assert (operands[0] != NULL_RTX);
15864 gcc_assert (operands[1] != NULL_RTX);
15865 gcc_assert (REGNO (operands[0]) % 2 == 0);
15866 gcc_assert (REGNO (operands[1]) == REGNO (operands[0]) + 1);
15870 /* Make sure the instructions are ordered with lower memory access first. */
15871 if (offsets[0] > offsets[1])
15873 gap = offsets[0] - offsets[1];
15874 offset = offsets[1];
15876 /* Swap the instructions such that lower memory is accessed first. */
15877 std::swap (operands[0], operands[1]);
15878 std::swap (operands[2], operands[3]);
15879 if (const_store)
15880 std::swap (operands[4], operands[5]);
15882 else
15884 gap = offsets[1] - offsets[0];
15885 offset = offsets[0];
15888 /* Make sure accesses are to consecutive memory locations. */
15889 if (gap != 4)
15890 return false;
15892 /* Make sure we generate legal instructions. */
15893 if (operands_ok_ldrd_strd (operands[0], operands[1], base, offset,
15894 false, load))
15895 return true;
15897 /* In Thumb state, where registers are almost unconstrained, there
15898 is little hope to fix it. */
15899 if (TARGET_THUMB2)
15900 return false;
15902 if (load && commute)
15904 /* Try reordering registers. */
15905 std::swap (operands[0], operands[1]);
15906 if (operands_ok_ldrd_strd (operands[0], operands[1], base, offset,
15907 false, load))
15908 return true;
15911 if (const_store)
15913 /* If input registers are dead after this pattern, they can be
15914 reordered or replaced by other registers that are free in the
15915 current pattern. */
15916 if (!peep2_reg_dead_p (4, operands[0])
15917 || !peep2_reg_dead_p (4, operands[1]))
15918 return false;
15920 /* Try to reorder the input registers. */
15921 /* For example, the code
15922 mov r0, 0
15923 mov r1, 1
15924 str r1, [r2]
15925 str r0, [r2, #4]
15926 can be transformed into
15927 mov r1, 0
15928 mov r0, 1
15929 strd r0, [r2]
15931 if (operands_ok_ldrd_strd (operands[1], operands[0], base, offset,
15932 false, false))
15934 std::swap (operands[0], operands[1]);
15935 return true;
15938 /* Try to find a free DI register. */
15939 CLEAR_HARD_REG_SET (regset);
15940 add_to_hard_reg_set (&regset, SImode, REGNO (operands[0]));
15941 add_to_hard_reg_set (&regset, SImode, REGNO (operands[1]));
15942 while (true)
15944 tmp = peep2_find_free_register (0, 4, "r", DImode, &regset);
15945 if (tmp == NULL_RTX)
15946 return false;
15948 /* DREG must be an even-numbered register in DImode.
15949 Split it into SI registers. */
15950 operands[0] = simplify_gen_subreg (SImode, tmp, DImode, 0);
15951 operands[1] = simplify_gen_subreg (SImode, tmp, DImode, 4);
15952 gcc_assert (operands[0] != NULL_RTX);
15953 gcc_assert (operands[1] != NULL_RTX);
15954 gcc_assert (REGNO (operands[0]) % 2 == 0);
15955 gcc_assert (REGNO (operands[0]) + 1 == REGNO (operands[1]));
15957 return (operands_ok_ldrd_strd (operands[0], operands[1],
15958 base, offset,
15959 false, load));
15963 return false;
15969 /* Print a symbolic form of X to the debug file, F. */
15970 static void
15971 arm_print_value (FILE *f, rtx x)
15973 switch (GET_CODE (x))
15975 case CONST_INT:
15976 fprintf (f, HOST_WIDE_INT_PRINT_HEX, INTVAL (x));
15977 return;
15979 case CONST_DOUBLE:
15980 fprintf (f, "<0x%lx,0x%lx>", (long)XWINT (x, 2), (long)XWINT (x, 3));
15981 return;
15983 case CONST_VECTOR:
15985 int i;
15987 fprintf (f, "<");
15988 for (i = 0; i < CONST_VECTOR_NUNITS (x); i++)
15990 fprintf (f, HOST_WIDE_INT_PRINT_HEX, INTVAL (CONST_VECTOR_ELT (x, i)));
15991 if (i < (CONST_VECTOR_NUNITS (x) - 1))
15992 fputc (',', f);
15994 fprintf (f, ">");
15996 return;
15998 case CONST_STRING:
15999 fprintf (f, "\"%s\"", XSTR (x, 0));
16000 return;
16002 case SYMBOL_REF:
16003 fprintf (f, "`%s'", XSTR (x, 0));
16004 return;
16006 case LABEL_REF:
16007 fprintf (f, "L%d", INSN_UID (XEXP (x, 0)));
16008 return;
16010 case CONST:
16011 arm_print_value (f, XEXP (x, 0));
16012 return;
16014 case PLUS:
16015 arm_print_value (f, XEXP (x, 0));
16016 fprintf (f, "+");
16017 arm_print_value (f, XEXP (x, 1));
16018 return;
16020 case PC:
16021 fprintf (f, "pc");
16022 return;
16024 default:
16025 fprintf (f, "????");
16026 return;
16030 /* Routines for manipulation of the constant pool. */
16032 /* Arm instructions cannot load a large constant directly into a
16033 register; they have to come from a pc relative load. The constant
16034 must therefore be placed in the addressable range of the pc
16035 relative load. Depending on the precise pc relative load
16036 instruction the range is somewhere between 256 bytes and 4k. This
16037 means that we often have to dump a constant inside a function, and
16038 generate code to branch around it.
16040 It is important to minimize this, since the branches will slow
16041 things down and make the code larger.
16043 Normally we can hide the table after an existing unconditional
16044 branch so that there is no interruption of the flow, but in the
16045 worst case the code looks like this:
16047 ldr rn, L1
16049 b L2
16050 align
16051 L1: .long value
16055 ldr rn, L3
16057 b L4
16058 align
16059 L3: .long value
16063 We fix this by performing a scan after scheduling, which notices
16064 which instructions need to have their operands fetched from the
16065 constant table and builds the table.
16067 The algorithm starts by building a table of all the constants that
16068 need fixing up and all the natural barriers in the function (places
16069 where a constant table can be dropped without breaking the flow).
16070 For each fixup we note how far the pc-relative replacement will be
16071 able to reach and the offset of the instruction into the function.
16073 Having built the table we then group the fixes together to form
16074 tables that are as large as possible (subject to addressing
16075 constraints) and emit each table of constants after the last
16076 barrier that is within range of all the instructions in the group.
16077 If a group does not contain a barrier, then we forcibly create one
16078 by inserting a jump instruction into the flow. Once the table has
16079 been inserted, the insns are then modified to reference the
16080 relevant entry in the pool.
16082 Possible enhancements to the algorithm (not implemented) are:
16084 1) For some processors and object formats, there may be benefit in
16085 aligning the pools to the start of cache lines; this alignment
16086 would need to be taken into account when calculating addressability
16087 of a pool. */
16089 /* These typedefs are located at the start of this file, so that
16090 they can be used in the prototypes there. This comment is to
16091 remind readers of that fact so that the following structures
16092 can be understood more easily.
16094 typedef struct minipool_node Mnode;
16095 typedef struct minipool_fixup Mfix; */
16097 struct minipool_node
16099 /* Doubly linked chain of entries. */
16100 Mnode * next;
16101 Mnode * prev;
16102 /* The maximum offset into the code that this entry can be placed. While
16103 pushing fixes for forward references, all entries are sorted in order
16104 of increasing max_address. */
16105 HOST_WIDE_INT max_address;
16106 /* Similarly for an entry inserted for a backwards ref. */
16107 HOST_WIDE_INT min_address;
16108 /* The number of fixes referencing this entry. This can become zero
16109 if we "unpush" an entry. In this case we ignore the entry when we
16110 come to emit the code. */
16111 int refcount;
16112 /* The offset from the start of the minipool. */
16113 HOST_WIDE_INT offset;
16114 /* The value in table. */
16115 rtx value;
16116 /* The mode of value. */
16117 machine_mode mode;
16118 /* The size of the value. With iWMMXt enabled
16119 sizes > 4 also imply an alignment of 8-bytes. */
16120 int fix_size;
16123 struct minipool_fixup
16125 Mfix * next;
16126 rtx_insn * insn;
16127 HOST_WIDE_INT address;
16128 rtx * loc;
16129 machine_mode mode;
16130 int fix_size;
16131 rtx value;
16132 Mnode * minipool;
16133 HOST_WIDE_INT forwards;
16134 HOST_WIDE_INT backwards;
16137 /* Fixes less than a word need padding out to a word boundary. */
16138 #define MINIPOOL_FIX_SIZE(mode) \
16139 (GET_MODE_SIZE ((mode)) >= 4 ? GET_MODE_SIZE ((mode)) : 4)
16141 static Mnode * minipool_vector_head;
16142 static Mnode * minipool_vector_tail;
16143 static rtx_code_label *minipool_vector_label;
16144 static int minipool_pad;
16146 /* The linked list of all minipool fixes required for this function. */
16147 Mfix * minipool_fix_head;
16148 Mfix * minipool_fix_tail;
16149 /* The fix entry for the current minipool, once it has been placed. */
16150 Mfix * minipool_barrier;
16152 #ifndef JUMP_TABLES_IN_TEXT_SECTION
16153 #define JUMP_TABLES_IN_TEXT_SECTION 0
16154 #endif
16156 static HOST_WIDE_INT
16157 get_jump_table_size (rtx_jump_table_data *insn)
16159 /* ADDR_VECs only take room if read-only data does into the text
16160 section. */
16161 if (JUMP_TABLES_IN_TEXT_SECTION || readonly_data_section == text_section)
16163 rtx body = PATTERN (insn);
16164 int elt = GET_CODE (body) == ADDR_DIFF_VEC ? 1 : 0;
16165 HOST_WIDE_INT size;
16166 HOST_WIDE_INT modesize;
16168 modesize = GET_MODE_SIZE (GET_MODE (body));
16169 size = modesize * XVECLEN (body, elt);
16170 switch (modesize)
16172 case 1:
16173 /* Round up size of TBB table to a halfword boundary. */
16174 size = (size + 1) & ~(HOST_WIDE_INT)1;
16175 break;
16176 case 2:
16177 /* No padding necessary for TBH. */
16178 break;
16179 case 4:
16180 /* Add two bytes for alignment on Thumb. */
16181 if (TARGET_THUMB)
16182 size += 2;
16183 break;
16184 default:
16185 gcc_unreachable ();
16187 return size;
16190 return 0;
16193 /* Return the maximum amount of padding that will be inserted before
16194 label LABEL. */
16196 static HOST_WIDE_INT
16197 get_label_padding (rtx label)
16199 HOST_WIDE_INT align, min_insn_size;
16201 align = 1 << label_to_alignment (label);
16202 min_insn_size = TARGET_THUMB ? 2 : 4;
16203 return align > min_insn_size ? align - min_insn_size : 0;
16206 /* Move a minipool fix MP from its current location to before MAX_MP.
16207 If MAX_MP is NULL, then MP doesn't need moving, but the addressing
16208 constraints may need updating. */
16209 static Mnode *
16210 move_minipool_fix_forward_ref (Mnode *mp, Mnode *max_mp,
16211 HOST_WIDE_INT max_address)
16213 /* The code below assumes these are different. */
16214 gcc_assert (mp != max_mp);
16216 if (max_mp == NULL)
16218 if (max_address < mp->max_address)
16219 mp->max_address = max_address;
16221 else
16223 if (max_address > max_mp->max_address - mp->fix_size)
16224 mp->max_address = max_mp->max_address - mp->fix_size;
16225 else
16226 mp->max_address = max_address;
16228 /* Unlink MP from its current position. Since max_mp is non-null,
16229 mp->prev must be non-null. */
16230 mp->prev->next = mp->next;
16231 if (mp->next != NULL)
16232 mp->next->prev = mp->prev;
16233 else
16234 minipool_vector_tail = mp->prev;
16236 /* Re-insert it before MAX_MP. */
16237 mp->next = max_mp;
16238 mp->prev = max_mp->prev;
16239 max_mp->prev = mp;
16241 if (mp->prev != NULL)
16242 mp->prev->next = mp;
16243 else
16244 minipool_vector_head = mp;
16247 /* Save the new entry. */
16248 max_mp = mp;
16250 /* Scan over the preceding entries and adjust their addresses as
16251 required. */
16252 while (mp->prev != NULL
16253 && mp->prev->max_address > mp->max_address - mp->prev->fix_size)
16255 mp->prev->max_address = mp->max_address - mp->prev->fix_size;
16256 mp = mp->prev;
16259 return max_mp;
16262 /* Add a constant to the minipool for a forward reference. Returns the
16263 node added or NULL if the constant will not fit in this pool. */
16264 static Mnode *
16265 add_minipool_forward_ref (Mfix *fix)
16267 /* If set, max_mp is the first pool_entry that has a lower
16268 constraint than the one we are trying to add. */
16269 Mnode * max_mp = NULL;
16270 HOST_WIDE_INT max_address = fix->address + fix->forwards - minipool_pad;
16271 Mnode * mp;
16273 /* If the minipool starts before the end of FIX->INSN then this FIX
16274 can not be placed into the current pool. Furthermore, adding the
16275 new constant pool entry may cause the pool to start FIX_SIZE bytes
16276 earlier. */
16277 if (minipool_vector_head &&
16278 (fix->address + get_attr_length (fix->insn)
16279 >= minipool_vector_head->max_address - fix->fix_size))
16280 return NULL;
16282 /* Scan the pool to see if a constant with the same value has
16283 already been added. While we are doing this, also note the
16284 location where we must insert the constant if it doesn't already
16285 exist. */
16286 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
16288 if (GET_CODE (fix->value) == GET_CODE (mp->value)
16289 && fix->mode == mp->mode
16290 && (!LABEL_P (fix->value)
16291 || (CODE_LABEL_NUMBER (fix->value)
16292 == CODE_LABEL_NUMBER (mp->value)))
16293 && rtx_equal_p (fix->value, mp->value))
16295 /* More than one fix references this entry. */
16296 mp->refcount++;
16297 return move_minipool_fix_forward_ref (mp, max_mp, max_address);
16300 /* Note the insertion point if necessary. */
16301 if (max_mp == NULL
16302 && mp->max_address > max_address)
16303 max_mp = mp;
16305 /* If we are inserting an 8-bytes aligned quantity and
16306 we have not already found an insertion point, then
16307 make sure that all such 8-byte aligned quantities are
16308 placed at the start of the pool. */
16309 if (ARM_DOUBLEWORD_ALIGN
16310 && max_mp == NULL
16311 && fix->fix_size >= 8
16312 && mp->fix_size < 8)
16314 max_mp = mp;
16315 max_address = mp->max_address;
16319 /* The value is not currently in the minipool, so we need to create
16320 a new entry for it. If MAX_MP is NULL, the entry will be put on
16321 the end of the list since the placement is less constrained than
16322 any existing entry. Otherwise, we insert the new fix before
16323 MAX_MP and, if necessary, adjust the constraints on the other
16324 entries. */
16325 mp = XNEW (Mnode);
16326 mp->fix_size = fix->fix_size;
16327 mp->mode = fix->mode;
16328 mp->value = fix->value;
16329 mp->refcount = 1;
16330 /* Not yet required for a backwards ref. */
16331 mp->min_address = -65536;
16333 if (max_mp == NULL)
16335 mp->max_address = max_address;
16336 mp->next = NULL;
16337 mp->prev = minipool_vector_tail;
16339 if (mp->prev == NULL)
16341 minipool_vector_head = mp;
16342 minipool_vector_label = gen_label_rtx ();
16344 else
16345 mp->prev->next = mp;
16347 minipool_vector_tail = mp;
16349 else
16351 if (max_address > max_mp->max_address - mp->fix_size)
16352 mp->max_address = max_mp->max_address - mp->fix_size;
16353 else
16354 mp->max_address = max_address;
16356 mp->next = max_mp;
16357 mp->prev = max_mp->prev;
16358 max_mp->prev = mp;
16359 if (mp->prev != NULL)
16360 mp->prev->next = mp;
16361 else
16362 minipool_vector_head = mp;
16365 /* Save the new entry. */
16366 max_mp = mp;
16368 /* Scan over the preceding entries and adjust their addresses as
16369 required. */
16370 while (mp->prev != NULL
16371 && mp->prev->max_address > mp->max_address - mp->prev->fix_size)
16373 mp->prev->max_address = mp->max_address - mp->prev->fix_size;
16374 mp = mp->prev;
16377 return max_mp;
16380 static Mnode *
16381 move_minipool_fix_backward_ref (Mnode *mp, Mnode *min_mp,
16382 HOST_WIDE_INT min_address)
16384 HOST_WIDE_INT offset;
16386 /* The code below assumes these are different. */
16387 gcc_assert (mp != min_mp);
16389 if (min_mp == NULL)
16391 if (min_address > mp->min_address)
16392 mp->min_address = min_address;
16394 else
16396 /* We will adjust this below if it is too loose. */
16397 mp->min_address = min_address;
16399 /* Unlink MP from its current position. Since min_mp is non-null,
16400 mp->next must be non-null. */
16401 mp->next->prev = mp->prev;
16402 if (mp->prev != NULL)
16403 mp->prev->next = mp->next;
16404 else
16405 minipool_vector_head = mp->next;
16407 /* Reinsert it after MIN_MP. */
16408 mp->prev = min_mp;
16409 mp->next = min_mp->next;
16410 min_mp->next = mp;
16411 if (mp->next != NULL)
16412 mp->next->prev = mp;
16413 else
16414 minipool_vector_tail = mp;
16417 min_mp = mp;
16419 offset = 0;
16420 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
16422 mp->offset = offset;
16423 if (mp->refcount > 0)
16424 offset += mp->fix_size;
16426 if (mp->next && mp->next->min_address < mp->min_address + mp->fix_size)
16427 mp->next->min_address = mp->min_address + mp->fix_size;
16430 return min_mp;
16433 /* Add a constant to the minipool for a backward reference. Returns the
16434 node added or NULL if the constant will not fit in this pool.
16436 Note that the code for insertion for a backwards reference can be
16437 somewhat confusing because the calculated offsets for each fix do
16438 not take into account the size of the pool (which is still under
16439 construction. */
16440 static Mnode *
16441 add_minipool_backward_ref (Mfix *fix)
16443 /* If set, min_mp is the last pool_entry that has a lower constraint
16444 than the one we are trying to add. */
16445 Mnode *min_mp = NULL;
16446 /* This can be negative, since it is only a constraint. */
16447 HOST_WIDE_INT min_address = fix->address - fix->backwards;
16448 Mnode *mp;
16450 /* If we can't reach the current pool from this insn, or if we can't
16451 insert this entry at the end of the pool without pushing other
16452 fixes out of range, then we don't try. This ensures that we
16453 can't fail later on. */
16454 if (min_address >= minipool_barrier->address
16455 || (minipool_vector_tail->min_address + fix->fix_size
16456 >= minipool_barrier->address))
16457 return NULL;
16459 /* Scan the pool to see if a constant with the same value has
16460 already been added. While we are doing this, also note the
16461 location where we must insert the constant if it doesn't already
16462 exist. */
16463 for (mp = minipool_vector_tail; mp != NULL; mp = mp->prev)
16465 if (GET_CODE (fix->value) == GET_CODE (mp->value)
16466 && fix->mode == mp->mode
16467 && (!LABEL_P (fix->value)
16468 || (CODE_LABEL_NUMBER (fix->value)
16469 == CODE_LABEL_NUMBER (mp->value)))
16470 && rtx_equal_p (fix->value, mp->value)
16471 /* Check that there is enough slack to move this entry to the
16472 end of the table (this is conservative). */
16473 && (mp->max_address
16474 > (minipool_barrier->address
16475 + minipool_vector_tail->offset
16476 + minipool_vector_tail->fix_size)))
16478 mp->refcount++;
16479 return move_minipool_fix_backward_ref (mp, min_mp, min_address);
16482 if (min_mp != NULL)
16483 mp->min_address += fix->fix_size;
16484 else
16486 /* Note the insertion point if necessary. */
16487 if (mp->min_address < min_address)
16489 /* For now, we do not allow the insertion of 8-byte alignment
16490 requiring nodes anywhere but at the start of the pool. */
16491 if (ARM_DOUBLEWORD_ALIGN
16492 && fix->fix_size >= 8 && mp->fix_size < 8)
16493 return NULL;
16494 else
16495 min_mp = mp;
16497 else if (mp->max_address
16498 < minipool_barrier->address + mp->offset + fix->fix_size)
16500 /* Inserting before this entry would push the fix beyond
16501 its maximum address (which can happen if we have
16502 re-located a forwards fix); force the new fix to come
16503 after it. */
16504 if (ARM_DOUBLEWORD_ALIGN
16505 && fix->fix_size >= 8 && mp->fix_size < 8)
16506 return NULL;
16507 else
16509 min_mp = mp;
16510 min_address = mp->min_address + fix->fix_size;
16513 /* Do not insert a non-8-byte aligned quantity before 8-byte
16514 aligned quantities. */
16515 else if (ARM_DOUBLEWORD_ALIGN
16516 && fix->fix_size < 8
16517 && mp->fix_size >= 8)
16519 min_mp = mp;
16520 min_address = mp->min_address + fix->fix_size;
16525 /* We need to create a new entry. */
16526 mp = XNEW (Mnode);
16527 mp->fix_size = fix->fix_size;
16528 mp->mode = fix->mode;
16529 mp->value = fix->value;
16530 mp->refcount = 1;
16531 mp->max_address = minipool_barrier->address + 65536;
16533 mp->min_address = min_address;
16535 if (min_mp == NULL)
16537 mp->prev = NULL;
16538 mp->next = minipool_vector_head;
16540 if (mp->next == NULL)
16542 minipool_vector_tail = mp;
16543 minipool_vector_label = gen_label_rtx ();
16545 else
16546 mp->next->prev = mp;
16548 minipool_vector_head = mp;
16550 else
16552 mp->next = min_mp->next;
16553 mp->prev = min_mp;
16554 min_mp->next = mp;
16556 if (mp->next != NULL)
16557 mp->next->prev = mp;
16558 else
16559 minipool_vector_tail = mp;
16562 /* Save the new entry. */
16563 min_mp = mp;
16565 if (mp->prev)
16566 mp = mp->prev;
16567 else
16568 mp->offset = 0;
16570 /* Scan over the following entries and adjust their offsets. */
16571 while (mp->next != NULL)
16573 if (mp->next->min_address < mp->min_address + mp->fix_size)
16574 mp->next->min_address = mp->min_address + mp->fix_size;
16576 if (mp->refcount)
16577 mp->next->offset = mp->offset + mp->fix_size;
16578 else
16579 mp->next->offset = mp->offset;
16581 mp = mp->next;
16584 return min_mp;
16587 static void
16588 assign_minipool_offsets (Mfix *barrier)
16590 HOST_WIDE_INT offset = 0;
16591 Mnode *mp;
16593 minipool_barrier = barrier;
16595 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
16597 mp->offset = offset;
16599 if (mp->refcount > 0)
16600 offset += mp->fix_size;
16604 /* Output the literal table */
16605 static void
16606 dump_minipool (rtx_insn *scan)
16608 Mnode * mp;
16609 Mnode * nmp;
16610 int align64 = 0;
16612 if (ARM_DOUBLEWORD_ALIGN)
16613 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
16614 if (mp->refcount > 0 && mp->fix_size >= 8)
16616 align64 = 1;
16617 break;
16620 if (dump_file)
16621 fprintf (dump_file,
16622 ";; Emitting minipool after insn %u; address %ld; align %d (bytes)\n",
16623 INSN_UID (scan), (unsigned long) minipool_barrier->address, align64 ? 8 : 4);
16625 scan = emit_label_after (gen_label_rtx (), scan);
16626 scan = emit_insn_after (align64 ? gen_align_8 () : gen_align_4 (), scan);
16627 scan = emit_label_after (minipool_vector_label, scan);
16629 for (mp = minipool_vector_head; mp != NULL; mp = nmp)
16631 if (mp->refcount > 0)
16633 if (dump_file)
16635 fprintf (dump_file,
16636 ";; Offset %u, min %ld, max %ld ",
16637 (unsigned) mp->offset, (unsigned long) mp->min_address,
16638 (unsigned long) mp->max_address);
16639 arm_print_value (dump_file, mp->value);
16640 fputc ('\n', dump_file);
16643 switch (GET_MODE_SIZE (mp->mode))
16645 #ifdef HAVE_consttable_1
16646 case 1:
16647 scan = emit_insn_after (gen_consttable_1 (mp->value), scan);
16648 break;
16650 #endif
16651 #ifdef HAVE_consttable_2
16652 case 2:
16653 scan = emit_insn_after (gen_consttable_2 (mp->value), scan);
16654 break;
16656 #endif
16657 #ifdef HAVE_consttable_4
16658 case 4:
16659 scan = emit_insn_after (gen_consttable_4 (mp->value), scan);
16660 break;
16662 #endif
16663 #ifdef HAVE_consttable_8
16664 case 8:
16665 scan = emit_insn_after (gen_consttable_8 (mp->value), scan);
16666 break;
16668 #endif
16669 #ifdef HAVE_consttable_16
16670 case 16:
16671 scan = emit_insn_after (gen_consttable_16 (mp->value), scan);
16672 break;
16674 #endif
16675 default:
16676 gcc_unreachable ();
16680 nmp = mp->next;
16681 free (mp);
16684 minipool_vector_head = minipool_vector_tail = NULL;
16685 scan = emit_insn_after (gen_consttable_end (), scan);
16686 scan = emit_barrier_after (scan);
16689 /* Return the cost of forcibly inserting a barrier after INSN. */
16690 static int
16691 arm_barrier_cost (rtx insn)
16693 /* Basing the location of the pool on the loop depth is preferable,
16694 but at the moment, the basic block information seems to be
16695 corrupt by this stage of the compilation. */
16696 int base_cost = 50;
16697 rtx next = next_nonnote_insn (insn);
16699 if (next != NULL && LABEL_P (next))
16700 base_cost -= 20;
16702 switch (GET_CODE (insn))
16704 case CODE_LABEL:
16705 /* It will always be better to place the table before the label, rather
16706 than after it. */
16707 return 50;
16709 case INSN:
16710 case CALL_INSN:
16711 return base_cost;
16713 case JUMP_INSN:
16714 return base_cost - 10;
16716 default:
16717 return base_cost + 10;
16721 /* Find the best place in the insn stream in the range
16722 (FIX->address,MAX_ADDRESS) to forcibly insert a minipool barrier.
16723 Create the barrier by inserting a jump and add a new fix entry for
16724 it. */
16725 static Mfix *
16726 create_fix_barrier (Mfix *fix, HOST_WIDE_INT max_address)
16728 HOST_WIDE_INT count = 0;
16729 rtx_barrier *barrier;
16730 rtx_insn *from = fix->insn;
16731 /* The instruction after which we will insert the jump. */
16732 rtx_insn *selected = NULL;
16733 int selected_cost;
16734 /* The address at which the jump instruction will be placed. */
16735 HOST_WIDE_INT selected_address;
16736 Mfix * new_fix;
16737 HOST_WIDE_INT max_count = max_address - fix->address;
16738 rtx_code_label *label = gen_label_rtx ();
16740 selected_cost = arm_barrier_cost (from);
16741 selected_address = fix->address;
16743 while (from && count < max_count)
16745 rtx_jump_table_data *tmp;
16746 int new_cost;
16748 /* This code shouldn't have been called if there was a natural barrier
16749 within range. */
16750 gcc_assert (!BARRIER_P (from));
16752 /* Count the length of this insn. This must stay in sync with the
16753 code that pushes minipool fixes. */
16754 if (LABEL_P (from))
16755 count += get_label_padding (from);
16756 else
16757 count += get_attr_length (from);
16759 /* If there is a jump table, add its length. */
16760 if (tablejump_p (from, NULL, &tmp))
16762 count += get_jump_table_size (tmp);
16764 /* Jump tables aren't in a basic block, so base the cost on
16765 the dispatch insn. If we select this location, we will
16766 still put the pool after the table. */
16767 new_cost = arm_barrier_cost (from);
16769 if (count < max_count
16770 && (!selected || new_cost <= selected_cost))
16772 selected = tmp;
16773 selected_cost = new_cost;
16774 selected_address = fix->address + count;
16777 /* Continue after the dispatch table. */
16778 from = NEXT_INSN (tmp);
16779 continue;
16782 new_cost = arm_barrier_cost (from);
16784 if (count < max_count
16785 && (!selected || new_cost <= selected_cost))
16787 selected = from;
16788 selected_cost = new_cost;
16789 selected_address = fix->address + count;
16792 from = NEXT_INSN (from);
16795 /* Make sure that we found a place to insert the jump. */
16796 gcc_assert (selected);
16798 /* Make sure we do not split a call and its corresponding
16799 CALL_ARG_LOCATION note. */
16800 if (CALL_P (selected))
16802 rtx_insn *next = NEXT_INSN (selected);
16803 if (next && NOTE_P (next)
16804 && NOTE_KIND (next) == NOTE_INSN_CALL_ARG_LOCATION)
16805 selected = next;
16808 /* Create a new JUMP_INSN that branches around a barrier. */
16809 from = emit_jump_insn_after (gen_jump (label), selected);
16810 JUMP_LABEL (from) = label;
16811 barrier = emit_barrier_after (from);
16812 emit_label_after (label, barrier);
16814 /* Create a minipool barrier entry for the new barrier. */
16815 new_fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* new_fix));
16816 new_fix->insn = barrier;
16817 new_fix->address = selected_address;
16818 new_fix->next = fix->next;
16819 fix->next = new_fix;
16821 return new_fix;
16824 /* Record that there is a natural barrier in the insn stream at
16825 ADDRESS. */
16826 static void
16827 push_minipool_barrier (rtx_insn *insn, HOST_WIDE_INT address)
16829 Mfix * fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* fix));
16831 fix->insn = insn;
16832 fix->address = address;
16834 fix->next = NULL;
16835 if (minipool_fix_head != NULL)
16836 minipool_fix_tail->next = fix;
16837 else
16838 minipool_fix_head = fix;
16840 minipool_fix_tail = fix;
16843 /* Record INSN, which will need fixing up to load a value from the
16844 minipool. ADDRESS is the offset of the insn since the start of the
16845 function; LOC is a pointer to the part of the insn which requires
16846 fixing; VALUE is the constant that must be loaded, which is of type
16847 MODE. */
16848 static void
16849 push_minipool_fix (rtx_insn *insn, HOST_WIDE_INT address, rtx *loc,
16850 machine_mode mode, rtx value)
16852 Mfix * fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* fix));
16854 fix->insn = insn;
16855 fix->address = address;
16856 fix->loc = loc;
16857 fix->mode = mode;
16858 fix->fix_size = MINIPOOL_FIX_SIZE (mode);
16859 fix->value = value;
16860 fix->forwards = get_attr_pool_range (insn);
16861 fix->backwards = get_attr_neg_pool_range (insn);
16862 fix->minipool = NULL;
16864 /* If an insn doesn't have a range defined for it, then it isn't
16865 expecting to be reworked by this code. Better to stop now than
16866 to generate duff assembly code. */
16867 gcc_assert (fix->forwards || fix->backwards);
16869 /* If an entry requires 8-byte alignment then assume all constant pools
16870 require 4 bytes of padding. Trying to do this later on a per-pool
16871 basis is awkward because existing pool entries have to be modified. */
16872 if (ARM_DOUBLEWORD_ALIGN && fix->fix_size >= 8)
16873 minipool_pad = 4;
16875 if (dump_file)
16877 fprintf (dump_file,
16878 ";; %smode fixup for i%d; addr %lu, range (%ld,%ld): ",
16879 GET_MODE_NAME (mode),
16880 INSN_UID (insn), (unsigned long) address,
16881 -1 * (long)fix->backwards, (long)fix->forwards);
16882 arm_print_value (dump_file, fix->value);
16883 fprintf (dump_file, "\n");
16886 /* Add it to the chain of fixes. */
16887 fix->next = NULL;
16889 if (minipool_fix_head != NULL)
16890 minipool_fix_tail->next = fix;
16891 else
16892 minipool_fix_head = fix;
16894 minipool_fix_tail = fix;
16897 /* Return maximum allowed cost of synthesizing a 64-bit constant VAL inline.
16898 Returns the number of insns needed, or 99 if we always want to synthesize
16899 the value. */
16901 arm_max_const_double_inline_cost ()
16903 /* Let the value get synthesized to avoid the use of literal pools. */
16904 if (arm_disable_literal_pool)
16905 return 99;
16907 return ((optimize_size || arm_ld_sched) ? 3 : 4);
16910 /* Return the cost of synthesizing a 64-bit constant VAL inline.
16911 Returns the number of insns needed, or 99 if we don't know how to
16912 do it. */
16914 arm_const_double_inline_cost (rtx val)
16916 rtx lowpart, highpart;
16917 machine_mode mode;
16919 mode = GET_MODE (val);
16921 if (mode == VOIDmode)
16922 mode = DImode;
16924 gcc_assert (GET_MODE_SIZE (mode) == 8);
16926 lowpart = gen_lowpart (SImode, val);
16927 highpart = gen_highpart_mode (SImode, mode, val);
16929 gcc_assert (CONST_INT_P (lowpart));
16930 gcc_assert (CONST_INT_P (highpart));
16932 return (arm_gen_constant (SET, SImode, NULL_RTX, INTVAL (lowpart),
16933 NULL_RTX, NULL_RTX, 0, 0)
16934 + arm_gen_constant (SET, SImode, NULL_RTX, INTVAL (highpart),
16935 NULL_RTX, NULL_RTX, 0, 0));
16938 /* Cost of loading a SImode constant. */
16939 static inline int
16940 arm_const_inline_cost (enum rtx_code code, rtx val)
16942 return arm_gen_constant (code, SImode, NULL_RTX, INTVAL (val),
16943 NULL_RTX, NULL_RTX, 1, 0);
16946 /* Return true if it is worthwhile to split a 64-bit constant into two
16947 32-bit operations. This is the case if optimizing for size, or
16948 if we have load delay slots, or if one 32-bit part can be done with
16949 a single data operation. */
16950 bool
16951 arm_const_double_by_parts (rtx val)
16953 machine_mode mode = GET_MODE (val);
16954 rtx part;
16956 if (optimize_size || arm_ld_sched)
16957 return true;
16959 if (mode == VOIDmode)
16960 mode = DImode;
16962 part = gen_highpart_mode (SImode, mode, val);
16964 gcc_assert (CONST_INT_P (part));
16966 if (const_ok_for_arm (INTVAL (part))
16967 || const_ok_for_arm (~INTVAL (part)))
16968 return true;
16970 part = gen_lowpart (SImode, val);
16972 gcc_assert (CONST_INT_P (part));
16974 if (const_ok_for_arm (INTVAL (part))
16975 || const_ok_for_arm (~INTVAL (part)))
16976 return true;
16978 return false;
16981 /* Return true if it is possible to inline both the high and low parts
16982 of a 64-bit constant into 32-bit data processing instructions. */
16983 bool
16984 arm_const_double_by_immediates (rtx val)
16986 machine_mode mode = GET_MODE (val);
16987 rtx part;
16989 if (mode == VOIDmode)
16990 mode = DImode;
16992 part = gen_highpart_mode (SImode, mode, val);
16994 gcc_assert (CONST_INT_P (part));
16996 if (!const_ok_for_arm (INTVAL (part)))
16997 return false;
16999 part = gen_lowpart (SImode, val);
17001 gcc_assert (CONST_INT_P (part));
17003 if (!const_ok_for_arm (INTVAL (part)))
17004 return false;
17006 return true;
17009 /* Scan INSN and note any of its operands that need fixing.
17010 If DO_PUSHES is false we do not actually push any of the fixups
17011 needed. */
17012 static void
17013 note_invalid_constants (rtx_insn *insn, HOST_WIDE_INT address, int do_pushes)
17015 int opno;
17017 extract_constrain_insn (insn);
17019 if (recog_data.n_alternatives == 0)
17020 return;
17022 /* Fill in recog_op_alt with information about the constraints of
17023 this insn. */
17024 preprocess_constraints (insn);
17026 const operand_alternative *op_alt = which_op_alt ();
17027 for (opno = 0; opno < recog_data.n_operands; opno++)
17029 /* Things we need to fix can only occur in inputs. */
17030 if (recog_data.operand_type[opno] != OP_IN)
17031 continue;
17033 /* If this alternative is a memory reference, then any mention
17034 of constants in this alternative is really to fool reload
17035 into allowing us to accept one there. We need to fix them up
17036 now so that we output the right code. */
17037 if (op_alt[opno].memory_ok)
17039 rtx op = recog_data.operand[opno];
17041 if (CONSTANT_P (op))
17043 if (do_pushes)
17044 push_minipool_fix (insn, address, recog_data.operand_loc[opno],
17045 recog_data.operand_mode[opno], op);
17047 else if (MEM_P (op)
17048 && GET_CODE (XEXP (op, 0)) == SYMBOL_REF
17049 && CONSTANT_POOL_ADDRESS_P (XEXP (op, 0)))
17051 if (do_pushes)
17053 rtx cop = avoid_constant_pool_reference (op);
17055 /* Casting the address of something to a mode narrower
17056 than a word can cause avoid_constant_pool_reference()
17057 to return the pool reference itself. That's no good to
17058 us here. Lets just hope that we can use the
17059 constant pool value directly. */
17060 if (op == cop)
17061 cop = get_pool_constant (XEXP (op, 0));
17063 push_minipool_fix (insn, address,
17064 recog_data.operand_loc[opno],
17065 recog_data.operand_mode[opno], cop);
17072 return;
17075 /* Rewrite move insn into subtract of 0 if the condition codes will
17076 be useful in next conditional jump insn. */
17078 static void
17079 thumb1_reorg (void)
17081 basic_block bb;
17083 FOR_EACH_BB_FN (bb, cfun)
17085 rtx dest, src;
17086 rtx pat, op0, set = NULL;
17087 rtx_insn *prev, *insn = BB_END (bb);
17088 bool insn_clobbered = false;
17090 while (insn != BB_HEAD (bb) && !NONDEBUG_INSN_P (insn))
17091 insn = PREV_INSN (insn);
17093 /* Find the last cbranchsi4_insn in basic block BB. */
17094 if (insn == BB_HEAD (bb)
17095 || INSN_CODE (insn) != CODE_FOR_cbranchsi4_insn)
17096 continue;
17098 /* Get the register with which we are comparing. */
17099 pat = PATTERN (insn);
17100 op0 = XEXP (XEXP (SET_SRC (pat), 0), 0);
17102 /* Find the first flag setting insn before INSN in basic block BB. */
17103 gcc_assert (insn != BB_HEAD (bb));
17104 for (prev = PREV_INSN (insn);
17105 (!insn_clobbered
17106 && prev != BB_HEAD (bb)
17107 && (NOTE_P (prev)
17108 || DEBUG_INSN_P (prev)
17109 || ((set = single_set (prev)) != NULL
17110 && get_attr_conds (prev) == CONDS_NOCOND)));
17111 prev = PREV_INSN (prev))
17113 if (reg_set_p (op0, prev))
17114 insn_clobbered = true;
17117 /* Skip if op0 is clobbered by insn other than prev. */
17118 if (insn_clobbered)
17119 continue;
17121 if (!set)
17122 continue;
17124 dest = SET_DEST (set);
17125 src = SET_SRC (set);
17126 if (!low_register_operand (dest, SImode)
17127 || !low_register_operand (src, SImode))
17128 continue;
17130 /* Rewrite move into subtract of 0 if its operand is compared with ZERO
17131 in INSN. Both src and dest of the move insn are checked. */
17132 if (REGNO (op0) == REGNO (src) || REGNO (op0) == REGNO (dest))
17134 dest = copy_rtx (dest);
17135 src = copy_rtx (src);
17136 src = gen_rtx_MINUS (SImode, src, const0_rtx);
17137 PATTERN (prev) = gen_rtx_SET (VOIDmode, dest, src);
17138 INSN_CODE (prev) = -1;
17139 /* Set test register in INSN to dest. */
17140 XEXP (XEXP (SET_SRC (pat), 0), 0) = copy_rtx (dest);
17141 INSN_CODE (insn) = -1;
17146 /* Convert instructions to their cc-clobbering variant if possible, since
17147 that allows us to use smaller encodings. */
17149 static void
17150 thumb2_reorg (void)
17152 basic_block bb;
17153 regset_head live;
17155 INIT_REG_SET (&live);
17157 /* We are freeing block_for_insn in the toplev to keep compatibility
17158 with old MDEP_REORGS that are not CFG based. Recompute it now. */
17159 compute_bb_for_insn ();
17160 df_analyze ();
17162 enum Convert_Action {SKIP, CONV, SWAP_CONV};
17164 FOR_EACH_BB_FN (bb, cfun)
17166 if (current_tune->disparage_flag_setting_t16_encodings
17167 && optimize_bb_for_speed_p (bb))
17168 continue;
17170 rtx_insn *insn;
17171 Convert_Action action = SKIP;
17172 Convert_Action action_for_partial_flag_setting
17173 = (current_tune->disparage_partial_flag_setting_t16_encodings
17174 && optimize_bb_for_speed_p (bb))
17175 ? SKIP : CONV;
17177 COPY_REG_SET (&live, DF_LR_OUT (bb));
17178 df_simulate_initialize_backwards (bb, &live);
17179 FOR_BB_INSNS_REVERSE (bb, insn)
17181 if (NONJUMP_INSN_P (insn)
17182 && !REGNO_REG_SET_P (&live, CC_REGNUM)
17183 && GET_CODE (PATTERN (insn)) == SET)
17185 action = SKIP;
17186 rtx pat = PATTERN (insn);
17187 rtx dst = XEXP (pat, 0);
17188 rtx src = XEXP (pat, 1);
17189 rtx op0 = NULL_RTX, op1 = NULL_RTX;
17191 if (!OBJECT_P (src))
17192 op0 = XEXP (src, 0);
17194 if (BINARY_P (src))
17195 op1 = XEXP (src, 1);
17197 if (low_register_operand (dst, SImode))
17199 switch (GET_CODE (src))
17201 case PLUS:
17202 /* Adding two registers and storing the result
17203 in the first source is already a 16-bit
17204 operation. */
17205 if (rtx_equal_p (dst, op0)
17206 && register_operand (op1, SImode))
17207 break;
17209 if (low_register_operand (op0, SImode))
17211 /* ADDS <Rd>,<Rn>,<Rm> */
17212 if (low_register_operand (op1, SImode))
17213 action = CONV;
17214 /* ADDS <Rdn>,#<imm8> */
17215 /* SUBS <Rdn>,#<imm8> */
17216 else if (rtx_equal_p (dst, op0)
17217 && CONST_INT_P (op1)
17218 && IN_RANGE (INTVAL (op1), -255, 255))
17219 action = CONV;
17220 /* ADDS <Rd>,<Rn>,#<imm3> */
17221 /* SUBS <Rd>,<Rn>,#<imm3> */
17222 else if (CONST_INT_P (op1)
17223 && IN_RANGE (INTVAL (op1), -7, 7))
17224 action = CONV;
17226 /* ADCS <Rd>, <Rn> */
17227 else if (GET_CODE (XEXP (src, 0)) == PLUS
17228 && rtx_equal_p (XEXP (XEXP (src, 0), 0), dst)
17229 && low_register_operand (XEXP (XEXP (src, 0), 1),
17230 SImode)
17231 && COMPARISON_P (op1)
17232 && cc_register (XEXP (op1, 0), VOIDmode)
17233 && maybe_get_arm_condition_code (op1) == ARM_CS
17234 && XEXP (op1, 1) == const0_rtx)
17235 action = CONV;
17236 break;
17238 case MINUS:
17239 /* RSBS <Rd>,<Rn>,#0
17240 Not handled here: see NEG below. */
17241 /* SUBS <Rd>,<Rn>,#<imm3>
17242 SUBS <Rdn>,#<imm8>
17243 Not handled here: see PLUS above. */
17244 /* SUBS <Rd>,<Rn>,<Rm> */
17245 if (low_register_operand (op0, SImode)
17246 && low_register_operand (op1, SImode))
17247 action = CONV;
17248 break;
17250 case MULT:
17251 /* MULS <Rdm>,<Rn>,<Rdm>
17252 As an exception to the rule, this is only used
17253 when optimizing for size since MULS is slow on all
17254 known implementations. We do not even want to use
17255 MULS in cold code, if optimizing for speed, so we
17256 test the global flag here. */
17257 if (!optimize_size)
17258 break;
17259 /* else fall through. */
17260 case AND:
17261 case IOR:
17262 case XOR:
17263 /* ANDS <Rdn>,<Rm> */
17264 if (rtx_equal_p (dst, op0)
17265 && low_register_operand (op1, SImode))
17266 action = action_for_partial_flag_setting;
17267 else if (rtx_equal_p (dst, op1)
17268 && low_register_operand (op0, SImode))
17269 action = action_for_partial_flag_setting == SKIP
17270 ? SKIP : SWAP_CONV;
17271 break;
17273 case ASHIFTRT:
17274 case ASHIFT:
17275 case LSHIFTRT:
17276 /* ASRS <Rdn>,<Rm> */
17277 /* LSRS <Rdn>,<Rm> */
17278 /* LSLS <Rdn>,<Rm> */
17279 if (rtx_equal_p (dst, op0)
17280 && low_register_operand (op1, SImode))
17281 action = action_for_partial_flag_setting;
17282 /* ASRS <Rd>,<Rm>,#<imm5> */
17283 /* LSRS <Rd>,<Rm>,#<imm5> */
17284 /* LSLS <Rd>,<Rm>,#<imm5> */
17285 else if (low_register_operand (op0, SImode)
17286 && CONST_INT_P (op1)
17287 && IN_RANGE (INTVAL (op1), 0, 31))
17288 action = action_for_partial_flag_setting;
17289 break;
17291 case ROTATERT:
17292 /* RORS <Rdn>,<Rm> */
17293 if (rtx_equal_p (dst, op0)
17294 && low_register_operand (op1, SImode))
17295 action = action_for_partial_flag_setting;
17296 break;
17298 case NOT:
17299 /* MVNS <Rd>,<Rm> */
17300 if (low_register_operand (op0, SImode))
17301 action = action_for_partial_flag_setting;
17302 break;
17304 case NEG:
17305 /* NEGS <Rd>,<Rm> (a.k.a RSBS) */
17306 if (low_register_operand (op0, SImode))
17307 action = CONV;
17308 break;
17310 case CONST_INT:
17311 /* MOVS <Rd>,#<imm8> */
17312 if (CONST_INT_P (src)
17313 && IN_RANGE (INTVAL (src), 0, 255))
17314 action = action_for_partial_flag_setting;
17315 break;
17317 case REG:
17318 /* MOVS and MOV<c> with registers have different
17319 encodings, so are not relevant here. */
17320 break;
17322 default:
17323 break;
17327 if (action != SKIP)
17329 rtx ccreg = gen_rtx_REG (CCmode, CC_REGNUM);
17330 rtx clobber = gen_rtx_CLOBBER (VOIDmode, ccreg);
17331 rtvec vec;
17333 if (action == SWAP_CONV)
17335 src = copy_rtx (src);
17336 XEXP (src, 0) = op1;
17337 XEXP (src, 1) = op0;
17338 pat = gen_rtx_SET (VOIDmode, dst, src);
17339 vec = gen_rtvec (2, pat, clobber);
17341 else /* action == CONV */
17342 vec = gen_rtvec (2, pat, clobber);
17344 PATTERN (insn) = gen_rtx_PARALLEL (VOIDmode, vec);
17345 INSN_CODE (insn) = -1;
17349 if (NONDEBUG_INSN_P (insn))
17350 df_simulate_one_insn_backwards (bb, insn, &live);
17354 CLEAR_REG_SET (&live);
17357 /* Gcc puts the pool in the wrong place for ARM, since we can only
17358 load addresses a limited distance around the pc. We do some
17359 special munging to move the constant pool values to the correct
17360 point in the code. */
17361 static void
17362 arm_reorg (void)
17364 rtx_insn *insn;
17365 HOST_WIDE_INT address = 0;
17366 Mfix * fix;
17368 if (TARGET_THUMB1)
17369 thumb1_reorg ();
17370 else if (TARGET_THUMB2)
17371 thumb2_reorg ();
17373 /* Ensure all insns that must be split have been split at this point.
17374 Otherwise, the pool placement code below may compute incorrect
17375 insn lengths. Note that when optimizing, all insns have already
17376 been split at this point. */
17377 if (!optimize)
17378 split_all_insns_noflow ();
17380 minipool_fix_head = minipool_fix_tail = NULL;
17382 /* The first insn must always be a note, or the code below won't
17383 scan it properly. */
17384 insn = get_insns ();
17385 gcc_assert (NOTE_P (insn));
17386 minipool_pad = 0;
17388 /* Scan all the insns and record the operands that will need fixing. */
17389 for (insn = next_nonnote_insn (insn); insn; insn = next_nonnote_insn (insn))
17391 if (BARRIER_P (insn))
17392 push_minipool_barrier (insn, address);
17393 else if (INSN_P (insn))
17395 rtx_jump_table_data *table;
17397 note_invalid_constants (insn, address, true);
17398 address += get_attr_length (insn);
17400 /* If the insn is a vector jump, add the size of the table
17401 and skip the table. */
17402 if (tablejump_p (insn, NULL, &table))
17404 address += get_jump_table_size (table);
17405 insn = table;
17408 else if (LABEL_P (insn))
17409 /* Add the worst-case padding due to alignment. We don't add
17410 the _current_ padding because the minipool insertions
17411 themselves might change it. */
17412 address += get_label_padding (insn);
17415 fix = minipool_fix_head;
17417 /* Now scan the fixups and perform the required changes. */
17418 while (fix)
17420 Mfix * ftmp;
17421 Mfix * fdel;
17422 Mfix * last_added_fix;
17423 Mfix * last_barrier = NULL;
17424 Mfix * this_fix;
17426 /* Skip any further barriers before the next fix. */
17427 while (fix && BARRIER_P (fix->insn))
17428 fix = fix->next;
17430 /* No more fixes. */
17431 if (fix == NULL)
17432 break;
17434 last_added_fix = NULL;
17436 for (ftmp = fix; ftmp; ftmp = ftmp->next)
17438 if (BARRIER_P (ftmp->insn))
17440 if (ftmp->address >= minipool_vector_head->max_address)
17441 break;
17443 last_barrier = ftmp;
17445 else if ((ftmp->minipool = add_minipool_forward_ref (ftmp)) == NULL)
17446 break;
17448 last_added_fix = ftmp; /* Keep track of the last fix added. */
17451 /* If we found a barrier, drop back to that; any fixes that we
17452 could have reached but come after the barrier will now go in
17453 the next mini-pool. */
17454 if (last_barrier != NULL)
17456 /* Reduce the refcount for those fixes that won't go into this
17457 pool after all. */
17458 for (fdel = last_barrier->next;
17459 fdel && fdel != ftmp;
17460 fdel = fdel->next)
17462 fdel->minipool->refcount--;
17463 fdel->minipool = NULL;
17466 ftmp = last_barrier;
17468 else
17470 /* ftmp is first fix that we can't fit into this pool and
17471 there no natural barriers that we could use. Insert a
17472 new barrier in the code somewhere between the previous
17473 fix and this one, and arrange to jump around it. */
17474 HOST_WIDE_INT max_address;
17476 /* The last item on the list of fixes must be a barrier, so
17477 we can never run off the end of the list of fixes without
17478 last_barrier being set. */
17479 gcc_assert (ftmp);
17481 max_address = minipool_vector_head->max_address;
17482 /* Check that there isn't another fix that is in range that
17483 we couldn't fit into this pool because the pool was
17484 already too large: we need to put the pool before such an
17485 instruction. The pool itself may come just after the
17486 fix because create_fix_barrier also allows space for a
17487 jump instruction. */
17488 if (ftmp->address < max_address)
17489 max_address = ftmp->address + 1;
17491 last_barrier = create_fix_barrier (last_added_fix, max_address);
17494 assign_minipool_offsets (last_barrier);
17496 while (ftmp)
17498 if (!BARRIER_P (ftmp->insn)
17499 && ((ftmp->minipool = add_minipool_backward_ref (ftmp))
17500 == NULL))
17501 break;
17503 ftmp = ftmp->next;
17506 /* Scan over the fixes we have identified for this pool, fixing them
17507 up and adding the constants to the pool itself. */
17508 for (this_fix = fix; this_fix && ftmp != this_fix;
17509 this_fix = this_fix->next)
17510 if (!BARRIER_P (this_fix->insn))
17512 rtx addr
17513 = plus_constant (Pmode,
17514 gen_rtx_LABEL_REF (VOIDmode,
17515 minipool_vector_label),
17516 this_fix->minipool->offset);
17517 *this_fix->loc = gen_rtx_MEM (this_fix->mode, addr);
17520 dump_minipool (last_barrier->insn);
17521 fix = ftmp;
17524 /* From now on we must synthesize any constants that we can't handle
17525 directly. This can happen if the RTL gets split during final
17526 instruction generation. */
17527 cfun->machine->after_arm_reorg = 1;
17529 /* Free the minipool memory. */
17530 obstack_free (&minipool_obstack, minipool_startobj);
17533 /* Routines to output assembly language. */
17535 /* Return string representation of passed in real value. */
17536 static const char *
17537 fp_const_from_val (REAL_VALUE_TYPE *r)
17539 if (!fp_consts_inited)
17540 init_fp_table ();
17542 gcc_assert (REAL_VALUES_EQUAL (*r, value_fp0));
17543 return "0";
17546 /* OPERANDS[0] is the entire list of insns that constitute pop,
17547 OPERANDS[1] is the base register, RETURN_PC is true iff return insn
17548 is in the list, UPDATE is true iff the list contains explicit
17549 update of base register. */
17550 void
17551 arm_output_multireg_pop (rtx *operands, bool return_pc, rtx cond, bool reverse,
17552 bool update)
17554 int i;
17555 char pattern[100];
17556 int offset;
17557 const char *conditional;
17558 int num_saves = XVECLEN (operands[0], 0);
17559 unsigned int regno;
17560 unsigned int regno_base = REGNO (operands[1]);
17562 offset = 0;
17563 offset += update ? 1 : 0;
17564 offset += return_pc ? 1 : 0;
17566 /* Is the base register in the list? */
17567 for (i = offset; i < num_saves; i++)
17569 regno = REGNO (XEXP (XVECEXP (operands[0], 0, i), 0));
17570 /* If SP is in the list, then the base register must be SP. */
17571 gcc_assert ((regno != SP_REGNUM) || (regno_base == SP_REGNUM));
17572 /* If base register is in the list, there must be no explicit update. */
17573 if (regno == regno_base)
17574 gcc_assert (!update);
17577 conditional = reverse ? "%?%D0" : "%?%d0";
17578 if ((regno_base == SP_REGNUM) && TARGET_UNIFIED_ASM)
17580 /* Output pop (not stmfd) because it has a shorter encoding. */
17581 gcc_assert (update);
17582 sprintf (pattern, "pop%s\t{", conditional);
17584 else
17586 /* Output ldmfd when the base register is SP, otherwise output ldmia.
17587 It's just a convention, their semantics are identical. */
17588 if (regno_base == SP_REGNUM)
17589 sprintf (pattern, "ldm%sfd\t", conditional);
17590 else if (TARGET_UNIFIED_ASM)
17591 sprintf (pattern, "ldmia%s\t", conditional);
17592 else
17593 sprintf (pattern, "ldm%sia\t", conditional);
17595 strcat (pattern, reg_names[regno_base]);
17596 if (update)
17597 strcat (pattern, "!, {");
17598 else
17599 strcat (pattern, ", {");
17602 /* Output the first destination register. */
17603 strcat (pattern,
17604 reg_names[REGNO (XEXP (XVECEXP (operands[0], 0, offset), 0))]);
17606 /* Output the rest of the destination registers. */
17607 for (i = offset + 1; i < num_saves; i++)
17609 strcat (pattern, ", ");
17610 strcat (pattern,
17611 reg_names[REGNO (XEXP (XVECEXP (operands[0], 0, i), 0))]);
17614 strcat (pattern, "}");
17616 if (IS_INTERRUPT (arm_current_func_type ()) && return_pc)
17617 strcat (pattern, "^");
17619 output_asm_insn (pattern, &cond);
17623 /* Output the assembly for a store multiple. */
17625 const char *
17626 vfp_output_vstmd (rtx * operands)
17628 char pattern[100];
17629 int p;
17630 int base;
17631 int i;
17632 rtx addr_reg = REG_P (XEXP (operands[0], 0))
17633 ? XEXP (operands[0], 0)
17634 : XEXP (XEXP (operands[0], 0), 0);
17635 bool push_p = REGNO (addr_reg) == SP_REGNUM;
17637 if (push_p)
17638 strcpy (pattern, "vpush%?.64\t{%P1");
17639 else
17640 strcpy (pattern, "vstmdb%?.64\t%m0!, {%P1");
17642 p = strlen (pattern);
17644 gcc_assert (REG_P (operands[1]));
17646 base = (REGNO (operands[1]) - FIRST_VFP_REGNUM) / 2;
17647 for (i = 1; i < XVECLEN (operands[2], 0); i++)
17649 p += sprintf (&pattern[p], ", d%d", base + i);
17651 strcpy (&pattern[p], "}");
17653 output_asm_insn (pattern, operands);
17654 return "";
17658 /* Emit RTL to save block of VFP register pairs to the stack. Returns the
17659 number of bytes pushed. */
17661 static int
17662 vfp_emit_fstmd (int base_reg, int count)
17664 rtx par;
17665 rtx dwarf;
17666 rtx tmp, reg;
17667 int i;
17669 /* Workaround ARM10 VFPr1 bug. Data corruption can occur when exactly two
17670 register pairs are stored by a store multiple insn. We avoid this
17671 by pushing an extra pair. */
17672 if (count == 2 && !arm_arch6)
17674 if (base_reg == LAST_VFP_REGNUM - 3)
17675 base_reg -= 2;
17676 count++;
17679 /* FSTMD may not store more than 16 doubleword registers at once. Split
17680 larger stores into multiple parts (up to a maximum of two, in
17681 practice). */
17682 if (count > 16)
17684 int saved;
17685 /* NOTE: base_reg is an internal register number, so each D register
17686 counts as 2. */
17687 saved = vfp_emit_fstmd (base_reg + 32, count - 16);
17688 saved += vfp_emit_fstmd (base_reg, 16);
17689 return saved;
17692 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (count));
17693 dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (count + 1));
17695 reg = gen_rtx_REG (DFmode, base_reg);
17696 base_reg += 2;
17698 XVECEXP (par, 0, 0)
17699 = gen_rtx_SET (VOIDmode,
17700 gen_frame_mem
17701 (BLKmode,
17702 gen_rtx_PRE_MODIFY (Pmode,
17703 stack_pointer_rtx,
17704 plus_constant
17705 (Pmode, stack_pointer_rtx,
17706 - (count * 8)))
17708 gen_rtx_UNSPEC (BLKmode,
17709 gen_rtvec (1, reg),
17710 UNSPEC_PUSH_MULT));
17712 tmp = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
17713 plus_constant (Pmode, stack_pointer_rtx, -(count * 8)));
17714 RTX_FRAME_RELATED_P (tmp) = 1;
17715 XVECEXP (dwarf, 0, 0) = tmp;
17717 tmp = gen_rtx_SET (VOIDmode,
17718 gen_frame_mem (DFmode, stack_pointer_rtx),
17719 reg);
17720 RTX_FRAME_RELATED_P (tmp) = 1;
17721 XVECEXP (dwarf, 0, 1) = tmp;
17723 for (i = 1; i < count; i++)
17725 reg = gen_rtx_REG (DFmode, base_reg);
17726 base_reg += 2;
17727 XVECEXP (par, 0, i) = gen_rtx_USE (VOIDmode, reg);
17729 tmp = gen_rtx_SET (VOIDmode,
17730 gen_frame_mem (DFmode,
17731 plus_constant (Pmode,
17732 stack_pointer_rtx,
17733 i * 8)),
17734 reg);
17735 RTX_FRAME_RELATED_P (tmp) = 1;
17736 XVECEXP (dwarf, 0, i + 1) = tmp;
17739 par = emit_insn (par);
17740 add_reg_note (par, REG_FRAME_RELATED_EXPR, dwarf);
17741 RTX_FRAME_RELATED_P (par) = 1;
17743 return count * 8;
17746 /* Emit a call instruction with pattern PAT. ADDR is the address of
17747 the call target. */
17749 void
17750 arm_emit_call_insn (rtx pat, rtx addr, bool sibcall)
17752 rtx insn;
17754 insn = emit_call_insn (pat);
17756 /* The PIC register is live on entry to VxWorks PIC PLT entries.
17757 If the call might use such an entry, add a use of the PIC register
17758 to the instruction's CALL_INSN_FUNCTION_USAGE. */
17759 if (TARGET_VXWORKS_RTP
17760 && flag_pic
17761 && !sibcall
17762 && GET_CODE (addr) == SYMBOL_REF
17763 && (SYMBOL_REF_DECL (addr)
17764 ? !targetm.binds_local_p (SYMBOL_REF_DECL (addr))
17765 : !SYMBOL_REF_LOCAL_P (addr)))
17767 require_pic_register ();
17768 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), cfun->machine->pic_reg);
17771 if (TARGET_AAPCS_BASED)
17773 /* For AAPCS, IP and CC can be clobbered by veneers inserted by the
17774 linker. We need to add an IP clobber to allow setting
17775 TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS to true. A CC clobber
17776 is not needed since it's a fixed register. */
17777 rtx *fusage = &CALL_INSN_FUNCTION_USAGE (insn);
17778 clobber_reg (fusage, gen_rtx_REG (word_mode, IP_REGNUM));
17782 /* Output a 'call' insn. */
17783 const char *
17784 output_call (rtx *operands)
17786 gcc_assert (!arm_arch5); /* Patterns should call blx <reg> directly. */
17788 /* Handle calls to lr using ip (which may be clobbered in subr anyway). */
17789 if (REGNO (operands[0]) == LR_REGNUM)
17791 operands[0] = gen_rtx_REG (SImode, IP_REGNUM);
17792 output_asm_insn ("mov%?\t%0, %|lr", operands);
17795 output_asm_insn ("mov%?\t%|lr, %|pc", operands);
17797 if (TARGET_INTERWORK || arm_arch4t)
17798 output_asm_insn ("bx%?\t%0", operands);
17799 else
17800 output_asm_insn ("mov%?\t%|pc, %0", operands);
17802 return "";
17805 /* Output a 'call' insn that is a reference in memory. This is
17806 disabled for ARMv5 and we prefer a blx instead because otherwise
17807 there's a significant performance overhead. */
17808 const char *
17809 output_call_mem (rtx *operands)
17811 gcc_assert (!arm_arch5);
17812 if (TARGET_INTERWORK)
17814 output_asm_insn ("ldr%?\t%|ip, %0", operands);
17815 output_asm_insn ("mov%?\t%|lr, %|pc", operands);
17816 output_asm_insn ("bx%?\t%|ip", operands);
17818 else if (regno_use_in (LR_REGNUM, operands[0]))
17820 /* LR is used in the memory address. We load the address in the
17821 first instruction. It's safe to use IP as the target of the
17822 load since the call will kill it anyway. */
17823 output_asm_insn ("ldr%?\t%|ip, %0", operands);
17824 output_asm_insn ("mov%?\t%|lr, %|pc", operands);
17825 if (arm_arch4t)
17826 output_asm_insn ("bx%?\t%|ip", operands);
17827 else
17828 output_asm_insn ("mov%?\t%|pc, %|ip", operands);
17830 else
17832 output_asm_insn ("mov%?\t%|lr, %|pc", operands);
17833 output_asm_insn ("ldr%?\t%|pc, %0", operands);
17836 return "";
17840 /* Output a move from arm registers to arm registers of a long double
17841 OPERANDS[0] is the destination.
17842 OPERANDS[1] is the source. */
17843 const char *
17844 output_mov_long_double_arm_from_arm (rtx *operands)
17846 /* We have to be careful here because the two might overlap. */
17847 int dest_start = REGNO (operands[0]);
17848 int src_start = REGNO (operands[1]);
17849 rtx ops[2];
17850 int i;
17852 if (dest_start < src_start)
17854 for (i = 0; i < 3; i++)
17856 ops[0] = gen_rtx_REG (SImode, dest_start + i);
17857 ops[1] = gen_rtx_REG (SImode, src_start + i);
17858 output_asm_insn ("mov%?\t%0, %1", ops);
17861 else
17863 for (i = 2; i >= 0; i--)
17865 ops[0] = gen_rtx_REG (SImode, dest_start + i);
17866 ops[1] = gen_rtx_REG (SImode, src_start + i);
17867 output_asm_insn ("mov%?\t%0, %1", ops);
17871 return "";
17874 void
17875 arm_emit_movpair (rtx dest, rtx src)
17877 /* If the src is an immediate, simplify it. */
17878 if (CONST_INT_P (src))
17880 HOST_WIDE_INT val = INTVAL (src);
17881 emit_set_insn (dest, GEN_INT (val & 0x0000ffff));
17882 if ((val >> 16) & 0x0000ffff)
17883 emit_set_insn (gen_rtx_ZERO_EXTRACT (SImode, dest, GEN_INT (16),
17884 GEN_INT (16)),
17885 GEN_INT ((val >> 16) & 0x0000ffff));
17886 return;
17888 emit_set_insn (dest, gen_rtx_HIGH (SImode, src));
17889 emit_set_insn (dest, gen_rtx_LO_SUM (SImode, dest, src));
17892 /* Output a move between double words. It must be REG<-MEM
17893 or MEM<-REG. */
17894 const char *
17895 output_move_double (rtx *operands, bool emit, int *count)
17897 enum rtx_code code0 = GET_CODE (operands[0]);
17898 enum rtx_code code1 = GET_CODE (operands[1]);
17899 rtx otherops[3];
17900 if (count)
17901 *count = 1;
17903 /* The only case when this might happen is when
17904 you are looking at the length of a DImode instruction
17905 that has an invalid constant in it. */
17906 if (code0 == REG && code1 != MEM)
17908 gcc_assert (!emit);
17909 *count = 2;
17910 return "";
17913 if (code0 == REG)
17915 unsigned int reg0 = REGNO (operands[0]);
17917 otherops[0] = gen_rtx_REG (SImode, 1 + reg0);
17919 gcc_assert (code1 == MEM); /* Constraints should ensure this. */
17921 switch (GET_CODE (XEXP (operands[1], 0)))
17923 case REG:
17925 if (emit)
17927 if (TARGET_LDRD
17928 && !(fix_cm3_ldrd && reg0 == REGNO(XEXP (operands[1], 0))))
17929 output_asm_insn ("ldr%(d%)\t%0, [%m1]", operands);
17930 else
17931 output_asm_insn ("ldm%(ia%)\t%m1, %M0", operands);
17933 break;
17935 case PRE_INC:
17936 gcc_assert (TARGET_LDRD);
17937 if (emit)
17938 output_asm_insn ("ldr%(d%)\t%0, [%m1, #8]!", operands);
17939 break;
17941 case PRE_DEC:
17942 if (emit)
17944 if (TARGET_LDRD)
17945 output_asm_insn ("ldr%(d%)\t%0, [%m1, #-8]!", operands);
17946 else
17947 output_asm_insn ("ldm%(db%)\t%m1!, %M0", operands);
17949 break;
17951 case POST_INC:
17952 if (emit)
17954 if (TARGET_LDRD)
17955 output_asm_insn ("ldr%(d%)\t%0, [%m1], #8", operands);
17956 else
17957 output_asm_insn ("ldm%(ia%)\t%m1!, %M0", operands);
17959 break;
17961 case POST_DEC:
17962 gcc_assert (TARGET_LDRD);
17963 if (emit)
17964 output_asm_insn ("ldr%(d%)\t%0, [%m1], #-8", operands);
17965 break;
17967 case PRE_MODIFY:
17968 case POST_MODIFY:
17969 /* Autoicrement addressing modes should never have overlapping
17970 base and destination registers, and overlapping index registers
17971 are already prohibited, so this doesn't need to worry about
17972 fix_cm3_ldrd. */
17973 otherops[0] = operands[0];
17974 otherops[1] = XEXP (XEXP (XEXP (operands[1], 0), 1), 0);
17975 otherops[2] = XEXP (XEXP (XEXP (operands[1], 0), 1), 1);
17977 if (GET_CODE (XEXP (operands[1], 0)) == PRE_MODIFY)
17979 if (reg_overlap_mentioned_p (otherops[0], otherops[2]))
17981 /* Registers overlap so split out the increment. */
17982 if (emit)
17984 output_asm_insn ("add%?\t%1, %1, %2", otherops);
17985 output_asm_insn ("ldr%(d%)\t%0, [%1] @split", otherops);
17987 if (count)
17988 *count = 2;
17990 else
17992 /* Use a single insn if we can.
17993 FIXME: IWMMXT allows offsets larger than ldrd can
17994 handle, fix these up with a pair of ldr. */
17995 if (TARGET_THUMB2
17996 || !CONST_INT_P (otherops[2])
17997 || (INTVAL (otherops[2]) > -256
17998 && INTVAL (otherops[2]) < 256))
18000 if (emit)
18001 output_asm_insn ("ldr%(d%)\t%0, [%1, %2]!", otherops);
18003 else
18005 if (emit)
18007 output_asm_insn ("ldr%?\t%0, [%1, %2]!", otherops);
18008 output_asm_insn ("ldr%?\t%H0, [%1, #4]", otherops);
18010 if (count)
18011 *count = 2;
18016 else
18018 /* Use a single insn if we can.
18019 FIXME: IWMMXT allows offsets larger than ldrd can handle,
18020 fix these up with a pair of ldr. */
18021 if (TARGET_THUMB2
18022 || !CONST_INT_P (otherops[2])
18023 || (INTVAL (otherops[2]) > -256
18024 && INTVAL (otherops[2]) < 256))
18026 if (emit)
18027 output_asm_insn ("ldr%(d%)\t%0, [%1], %2", otherops);
18029 else
18031 if (emit)
18033 output_asm_insn ("ldr%?\t%H0, [%1, #4]", otherops);
18034 output_asm_insn ("ldr%?\t%0, [%1], %2", otherops);
18036 if (count)
18037 *count = 2;
18040 break;
18042 case LABEL_REF:
18043 case CONST:
18044 /* We might be able to use ldrd %0, %1 here. However the range is
18045 different to ldr/adr, and it is broken on some ARMv7-M
18046 implementations. */
18047 /* Use the second register of the pair to avoid problematic
18048 overlap. */
18049 otherops[1] = operands[1];
18050 if (emit)
18051 output_asm_insn ("adr%?\t%0, %1", otherops);
18052 operands[1] = otherops[0];
18053 if (emit)
18055 if (TARGET_LDRD)
18056 output_asm_insn ("ldr%(d%)\t%0, [%1]", operands);
18057 else
18058 output_asm_insn ("ldm%(ia%)\t%1, %M0", operands);
18061 if (count)
18062 *count = 2;
18063 break;
18065 /* ??? This needs checking for thumb2. */
18066 default:
18067 if (arm_add_operand (XEXP (XEXP (operands[1], 0), 1),
18068 GET_MODE (XEXP (XEXP (operands[1], 0), 1))))
18070 otherops[0] = operands[0];
18071 otherops[1] = XEXP (XEXP (operands[1], 0), 0);
18072 otherops[2] = XEXP (XEXP (operands[1], 0), 1);
18074 if (GET_CODE (XEXP (operands[1], 0)) == PLUS)
18076 if (CONST_INT_P (otherops[2]) && !TARGET_LDRD)
18078 switch ((int) INTVAL (otherops[2]))
18080 case -8:
18081 if (emit)
18082 output_asm_insn ("ldm%(db%)\t%1, %M0", otherops);
18083 return "";
18084 case -4:
18085 if (TARGET_THUMB2)
18086 break;
18087 if (emit)
18088 output_asm_insn ("ldm%(da%)\t%1, %M0", otherops);
18089 return "";
18090 case 4:
18091 if (TARGET_THUMB2)
18092 break;
18093 if (emit)
18094 output_asm_insn ("ldm%(ib%)\t%1, %M0", otherops);
18095 return "";
18098 otherops[0] = gen_rtx_REG(SImode, REGNO(operands[0]) + 1);
18099 operands[1] = otherops[0];
18100 if (TARGET_LDRD
18101 && (REG_P (otherops[2])
18102 || TARGET_THUMB2
18103 || (CONST_INT_P (otherops[2])
18104 && INTVAL (otherops[2]) > -256
18105 && INTVAL (otherops[2]) < 256)))
18107 if (reg_overlap_mentioned_p (operands[0],
18108 otherops[2]))
18110 /* Swap base and index registers over to
18111 avoid a conflict. */
18112 std::swap (otherops[1], otherops[2]);
18114 /* If both registers conflict, it will usually
18115 have been fixed by a splitter. */
18116 if (reg_overlap_mentioned_p (operands[0], otherops[2])
18117 || (fix_cm3_ldrd && reg0 == REGNO (otherops[1])))
18119 if (emit)
18121 output_asm_insn ("add%?\t%0, %1, %2", otherops);
18122 output_asm_insn ("ldr%(d%)\t%0, [%1]", operands);
18124 if (count)
18125 *count = 2;
18127 else
18129 otherops[0] = operands[0];
18130 if (emit)
18131 output_asm_insn ("ldr%(d%)\t%0, [%1, %2]", otherops);
18133 return "";
18136 if (CONST_INT_P (otherops[2]))
18138 if (emit)
18140 if (!(const_ok_for_arm (INTVAL (otherops[2]))))
18141 output_asm_insn ("sub%?\t%0, %1, #%n2", otherops);
18142 else
18143 output_asm_insn ("add%?\t%0, %1, %2", otherops);
18146 else
18148 if (emit)
18149 output_asm_insn ("add%?\t%0, %1, %2", otherops);
18152 else
18154 if (emit)
18155 output_asm_insn ("sub%?\t%0, %1, %2", otherops);
18158 if (count)
18159 *count = 2;
18161 if (TARGET_LDRD)
18162 return "ldr%(d%)\t%0, [%1]";
18164 return "ldm%(ia%)\t%1, %M0";
18166 else
18168 otherops[1] = adjust_address (operands[1], SImode, 4);
18169 /* Take care of overlapping base/data reg. */
18170 if (reg_mentioned_p (operands[0], operands[1]))
18172 if (emit)
18174 output_asm_insn ("ldr%?\t%0, %1", otherops);
18175 output_asm_insn ("ldr%?\t%0, %1", operands);
18177 if (count)
18178 *count = 2;
18181 else
18183 if (emit)
18185 output_asm_insn ("ldr%?\t%0, %1", operands);
18186 output_asm_insn ("ldr%?\t%0, %1", otherops);
18188 if (count)
18189 *count = 2;
18194 else
18196 /* Constraints should ensure this. */
18197 gcc_assert (code0 == MEM && code1 == REG);
18198 gcc_assert ((REGNO (operands[1]) != IP_REGNUM)
18199 || (TARGET_ARM && TARGET_LDRD));
18201 switch (GET_CODE (XEXP (operands[0], 0)))
18203 case REG:
18204 if (emit)
18206 if (TARGET_LDRD)
18207 output_asm_insn ("str%(d%)\t%1, [%m0]", operands);
18208 else
18209 output_asm_insn ("stm%(ia%)\t%m0, %M1", operands);
18211 break;
18213 case PRE_INC:
18214 gcc_assert (TARGET_LDRD);
18215 if (emit)
18216 output_asm_insn ("str%(d%)\t%1, [%m0, #8]!", operands);
18217 break;
18219 case PRE_DEC:
18220 if (emit)
18222 if (TARGET_LDRD)
18223 output_asm_insn ("str%(d%)\t%1, [%m0, #-8]!", operands);
18224 else
18225 output_asm_insn ("stm%(db%)\t%m0!, %M1", operands);
18227 break;
18229 case POST_INC:
18230 if (emit)
18232 if (TARGET_LDRD)
18233 output_asm_insn ("str%(d%)\t%1, [%m0], #8", operands);
18234 else
18235 output_asm_insn ("stm%(ia%)\t%m0!, %M1", operands);
18237 break;
18239 case POST_DEC:
18240 gcc_assert (TARGET_LDRD);
18241 if (emit)
18242 output_asm_insn ("str%(d%)\t%1, [%m0], #-8", operands);
18243 break;
18245 case PRE_MODIFY:
18246 case POST_MODIFY:
18247 otherops[0] = operands[1];
18248 otherops[1] = XEXP (XEXP (XEXP (operands[0], 0), 1), 0);
18249 otherops[2] = XEXP (XEXP (XEXP (operands[0], 0), 1), 1);
18251 /* IWMMXT allows offsets larger than ldrd can handle,
18252 fix these up with a pair of ldr. */
18253 if (!TARGET_THUMB2
18254 && CONST_INT_P (otherops[2])
18255 && (INTVAL(otherops[2]) <= -256
18256 || INTVAL(otherops[2]) >= 256))
18258 if (GET_CODE (XEXP (operands[0], 0)) == PRE_MODIFY)
18260 if (emit)
18262 output_asm_insn ("str%?\t%0, [%1, %2]!", otherops);
18263 output_asm_insn ("str%?\t%H0, [%1, #4]", otherops);
18265 if (count)
18266 *count = 2;
18268 else
18270 if (emit)
18272 output_asm_insn ("str%?\t%H0, [%1, #4]", otherops);
18273 output_asm_insn ("str%?\t%0, [%1], %2", otherops);
18275 if (count)
18276 *count = 2;
18279 else if (GET_CODE (XEXP (operands[0], 0)) == PRE_MODIFY)
18281 if (emit)
18282 output_asm_insn ("str%(d%)\t%0, [%1, %2]!", otherops);
18284 else
18286 if (emit)
18287 output_asm_insn ("str%(d%)\t%0, [%1], %2", otherops);
18289 break;
18291 case PLUS:
18292 otherops[2] = XEXP (XEXP (operands[0], 0), 1);
18293 if (CONST_INT_P (otherops[2]) && !TARGET_LDRD)
18295 switch ((int) INTVAL (XEXP (XEXP (operands[0], 0), 1)))
18297 case -8:
18298 if (emit)
18299 output_asm_insn ("stm%(db%)\t%m0, %M1", operands);
18300 return "";
18302 case -4:
18303 if (TARGET_THUMB2)
18304 break;
18305 if (emit)
18306 output_asm_insn ("stm%(da%)\t%m0, %M1", operands);
18307 return "";
18309 case 4:
18310 if (TARGET_THUMB2)
18311 break;
18312 if (emit)
18313 output_asm_insn ("stm%(ib%)\t%m0, %M1", operands);
18314 return "";
18317 if (TARGET_LDRD
18318 && (REG_P (otherops[2])
18319 || TARGET_THUMB2
18320 || (CONST_INT_P (otherops[2])
18321 && INTVAL (otherops[2]) > -256
18322 && INTVAL (otherops[2]) < 256)))
18324 otherops[0] = operands[1];
18325 otherops[1] = XEXP (XEXP (operands[0], 0), 0);
18326 if (emit)
18327 output_asm_insn ("str%(d%)\t%0, [%1, %2]", otherops);
18328 return "";
18330 /* Fall through */
18332 default:
18333 otherops[0] = adjust_address (operands[0], SImode, 4);
18334 otherops[1] = operands[1];
18335 if (emit)
18337 output_asm_insn ("str%?\t%1, %0", operands);
18338 output_asm_insn ("str%?\t%H1, %0", otherops);
18340 if (count)
18341 *count = 2;
18345 return "";
18348 /* Output a move, load or store for quad-word vectors in ARM registers. Only
18349 handles MEMs accepted by neon_vector_mem_operand with TYPE=1. */
18351 const char *
18352 output_move_quad (rtx *operands)
18354 if (REG_P (operands[0]))
18356 /* Load, or reg->reg move. */
18358 if (MEM_P (operands[1]))
18360 switch (GET_CODE (XEXP (operands[1], 0)))
18362 case REG:
18363 output_asm_insn ("ldm%(ia%)\t%m1, %M0", operands);
18364 break;
18366 case LABEL_REF:
18367 case CONST:
18368 output_asm_insn ("adr%?\t%0, %1", operands);
18369 output_asm_insn ("ldm%(ia%)\t%0, %M0", operands);
18370 break;
18372 default:
18373 gcc_unreachable ();
18376 else
18378 rtx ops[2];
18379 int dest, src, i;
18381 gcc_assert (REG_P (operands[1]));
18383 dest = REGNO (operands[0]);
18384 src = REGNO (operands[1]);
18386 /* This seems pretty dumb, but hopefully GCC won't try to do it
18387 very often. */
18388 if (dest < src)
18389 for (i = 0; i < 4; i++)
18391 ops[0] = gen_rtx_REG (SImode, dest + i);
18392 ops[1] = gen_rtx_REG (SImode, src + i);
18393 output_asm_insn ("mov%?\t%0, %1", ops);
18395 else
18396 for (i = 3; i >= 0; i--)
18398 ops[0] = gen_rtx_REG (SImode, dest + i);
18399 ops[1] = gen_rtx_REG (SImode, src + i);
18400 output_asm_insn ("mov%?\t%0, %1", ops);
18404 else
18406 gcc_assert (MEM_P (operands[0]));
18407 gcc_assert (REG_P (operands[1]));
18408 gcc_assert (!reg_overlap_mentioned_p (operands[1], operands[0]));
18410 switch (GET_CODE (XEXP (operands[0], 0)))
18412 case REG:
18413 output_asm_insn ("stm%(ia%)\t%m0, %M1", operands);
18414 break;
18416 default:
18417 gcc_unreachable ();
18421 return "";
18424 /* Output a VFP load or store instruction. */
18426 const char *
18427 output_move_vfp (rtx *operands)
18429 rtx reg, mem, addr, ops[2];
18430 int load = REG_P (operands[0]);
18431 int dp = GET_MODE_SIZE (GET_MODE (operands[0])) == 8;
18432 int integer_p = GET_MODE_CLASS (GET_MODE (operands[0])) == MODE_INT;
18433 const char *templ;
18434 char buff[50];
18435 machine_mode mode;
18437 reg = operands[!load];
18438 mem = operands[load];
18440 mode = GET_MODE (reg);
18442 gcc_assert (REG_P (reg));
18443 gcc_assert (IS_VFP_REGNUM (REGNO (reg)));
18444 gcc_assert (mode == SFmode
18445 || mode == DFmode
18446 || mode == SImode
18447 || mode == DImode
18448 || (TARGET_NEON && VALID_NEON_DREG_MODE (mode)));
18449 gcc_assert (MEM_P (mem));
18451 addr = XEXP (mem, 0);
18453 switch (GET_CODE (addr))
18455 case PRE_DEC:
18456 templ = "v%smdb%%?.%s\t%%0!, {%%%s1}%s";
18457 ops[0] = XEXP (addr, 0);
18458 ops[1] = reg;
18459 break;
18461 case POST_INC:
18462 templ = "v%smia%%?.%s\t%%0!, {%%%s1}%s";
18463 ops[0] = XEXP (addr, 0);
18464 ops[1] = reg;
18465 break;
18467 default:
18468 templ = "v%sr%%?.%s\t%%%s0, %%1%s";
18469 ops[0] = reg;
18470 ops[1] = mem;
18471 break;
18474 sprintf (buff, templ,
18475 load ? "ld" : "st",
18476 dp ? "64" : "32",
18477 dp ? "P" : "",
18478 integer_p ? "\t%@ int" : "");
18479 output_asm_insn (buff, ops);
18481 return "";
18484 /* Output a Neon double-word or quad-word load or store, or a load
18485 or store for larger structure modes.
18487 WARNING: The ordering of elements is weird in big-endian mode,
18488 because the EABI requires that vectors stored in memory appear
18489 as though they were stored by a VSTM, as required by the EABI.
18490 GCC RTL defines element ordering based on in-memory order.
18491 This can be different from the architectural ordering of elements
18492 within a NEON register. The intrinsics defined in arm_neon.h use the
18493 NEON register element ordering, not the GCC RTL element ordering.
18495 For example, the in-memory ordering of a big-endian a quadword
18496 vector with 16-bit elements when stored from register pair {d0,d1}
18497 will be (lowest address first, d0[N] is NEON register element N):
18499 [d0[3], d0[2], d0[1], d0[0], d1[7], d1[6], d1[5], d1[4]]
18501 When necessary, quadword registers (dN, dN+1) are moved to ARM
18502 registers from rN in the order:
18504 dN -> (rN+1, rN), dN+1 -> (rN+3, rN+2)
18506 So that STM/LDM can be used on vectors in ARM registers, and the
18507 same memory layout will result as if VSTM/VLDM were used.
18509 Instead of VSTM/VLDM we prefer to use VST1.64/VLD1.64 where
18510 possible, which allows use of appropriate alignment tags.
18511 Note that the choice of "64" is independent of the actual vector
18512 element size; this size simply ensures that the behavior is
18513 equivalent to VSTM/VLDM in both little-endian and big-endian mode.
18515 Due to limitations of those instructions, use of VST1.64/VLD1.64
18516 is not possible if:
18517 - the address contains PRE_DEC, or
18518 - the mode refers to more than 4 double-word registers
18520 In those cases, it would be possible to replace VSTM/VLDM by a
18521 sequence of instructions; this is not currently implemented since
18522 this is not certain to actually improve performance. */
18524 const char *
18525 output_move_neon (rtx *operands)
18527 rtx reg, mem, addr, ops[2];
18528 int regno, nregs, load = REG_P (operands[0]);
18529 const char *templ;
18530 char buff[50];
18531 machine_mode mode;
18533 reg = operands[!load];
18534 mem = operands[load];
18536 mode = GET_MODE (reg);
18538 gcc_assert (REG_P (reg));
18539 regno = REGNO (reg);
18540 nregs = HARD_REGNO_NREGS (regno, mode) / 2;
18541 gcc_assert (VFP_REGNO_OK_FOR_DOUBLE (regno)
18542 || NEON_REGNO_OK_FOR_QUAD (regno));
18543 gcc_assert (VALID_NEON_DREG_MODE (mode)
18544 || VALID_NEON_QREG_MODE (mode)
18545 || VALID_NEON_STRUCT_MODE (mode));
18546 gcc_assert (MEM_P (mem));
18548 addr = XEXP (mem, 0);
18550 /* Strip off const from addresses like (const (plus (...))). */
18551 if (GET_CODE (addr) == CONST && GET_CODE (XEXP (addr, 0)) == PLUS)
18552 addr = XEXP (addr, 0);
18554 switch (GET_CODE (addr))
18556 case POST_INC:
18557 /* We have to use vldm / vstm for too-large modes. */
18558 if (nregs > 4)
18560 templ = "v%smia%%?\t%%0!, %%h1";
18561 ops[0] = XEXP (addr, 0);
18563 else
18565 templ = "v%s1.64\t%%h1, %%A0";
18566 ops[0] = mem;
18568 ops[1] = reg;
18569 break;
18571 case PRE_DEC:
18572 /* We have to use vldm / vstm in this case, since there is no
18573 pre-decrement form of the vld1 / vst1 instructions. */
18574 templ = "v%smdb%%?\t%%0!, %%h1";
18575 ops[0] = XEXP (addr, 0);
18576 ops[1] = reg;
18577 break;
18579 case POST_MODIFY:
18580 /* FIXME: Not currently enabled in neon_vector_mem_operand. */
18581 gcc_unreachable ();
18583 case REG:
18584 /* We have to use vldm / vstm for too-large modes. */
18585 if (nregs > 1)
18587 if (nregs > 4)
18588 templ = "v%smia%%?\t%%m0, %%h1";
18589 else
18590 templ = "v%s1.64\t%%h1, %%A0";
18592 ops[0] = mem;
18593 ops[1] = reg;
18594 break;
18596 /* Fall through. */
18597 case LABEL_REF:
18598 case PLUS:
18600 int i;
18601 int overlap = -1;
18602 for (i = 0; i < nregs; i++)
18604 /* We're only using DImode here because it's a convenient size. */
18605 ops[0] = gen_rtx_REG (DImode, REGNO (reg) + 2 * i);
18606 ops[1] = adjust_address (mem, DImode, 8 * i);
18607 if (reg_overlap_mentioned_p (ops[0], mem))
18609 gcc_assert (overlap == -1);
18610 overlap = i;
18612 else
18614 sprintf (buff, "v%sr%%?\t%%P0, %%1", load ? "ld" : "st");
18615 output_asm_insn (buff, ops);
18618 if (overlap != -1)
18620 ops[0] = gen_rtx_REG (DImode, REGNO (reg) + 2 * overlap);
18621 ops[1] = adjust_address (mem, SImode, 8 * overlap);
18622 sprintf (buff, "v%sr%%?\t%%P0, %%1", load ? "ld" : "st");
18623 output_asm_insn (buff, ops);
18626 return "";
18629 default:
18630 gcc_unreachable ();
18633 sprintf (buff, templ, load ? "ld" : "st");
18634 output_asm_insn (buff, ops);
18636 return "";
18639 /* Compute and return the length of neon_mov<mode>, where <mode> is
18640 one of VSTRUCT modes: EI, OI, CI or XI. */
18642 arm_attr_length_move_neon (rtx_insn *insn)
18644 rtx reg, mem, addr;
18645 int load;
18646 machine_mode mode;
18648 extract_insn_cached (insn);
18650 if (REG_P (recog_data.operand[0]) && REG_P (recog_data.operand[1]))
18652 mode = GET_MODE (recog_data.operand[0]);
18653 switch (mode)
18655 case EImode:
18656 case OImode:
18657 return 8;
18658 case CImode:
18659 return 12;
18660 case XImode:
18661 return 16;
18662 default:
18663 gcc_unreachable ();
18667 load = REG_P (recog_data.operand[0]);
18668 reg = recog_data.operand[!load];
18669 mem = recog_data.operand[load];
18671 gcc_assert (MEM_P (mem));
18673 mode = GET_MODE (reg);
18674 addr = XEXP (mem, 0);
18676 /* Strip off const from addresses like (const (plus (...))). */
18677 if (GET_CODE (addr) == CONST && GET_CODE (XEXP (addr, 0)) == PLUS)
18678 addr = XEXP (addr, 0);
18680 if (GET_CODE (addr) == LABEL_REF || GET_CODE (addr) == PLUS)
18682 int insns = HARD_REGNO_NREGS (REGNO (reg), mode) / 2;
18683 return insns * 4;
18685 else
18686 return 4;
18689 /* Return nonzero if the offset in the address is an immediate. Otherwise,
18690 return zero. */
18693 arm_address_offset_is_imm (rtx_insn *insn)
18695 rtx mem, addr;
18697 extract_insn_cached (insn);
18699 if (REG_P (recog_data.operand[0]))
18700 return 0;
18702 mem = recog_data.operand[0];
18704 gcc_assert (MEM_P (mem));
18706 addr = XEXP (mem, 0);
18708 if (REG_P (addr)
18709 || (GET_CODE (addr) == PLUS
18710 && REG_P (XEXP (addr, 0))
18711 && CONST_INT_P (XEXP (addr, 1))))
18712 return 1;
18713 else
18714 return 0;
18717 /* Output an ADD r, s, #n where n may be too big for one instruction.
18718 If adding zero to one register, output nothing. */
18719 const char *
18720 output_add_immediate (rtx *operands)
18722 HOST_WIDE_INT n = INTVAL (operands[2]);
18724 if (n != 0 || REGNO (operands[0]) != REGNO (operands[1]))
18726 if (n < 0)
18727 output_multi_immediate (operands,
18728 "sub%?\t%0, %1, %2", "sub%?\t%0, %0, %2", 2,
18729 -n);
18730 else
18731 output_multi_immediate (operands,
18732 "add%?\t%0, %1, %2", "add%?\t%0, %0, %2", 2,
18736 return "";
18739 /* Output a multiple immediate operation.
18740 OPERANDS is the vector of operands referred to in the output patterns.
18741 INSTR1 is the output pattern to use for the first constant.
18742 INSTR2 is the output pattern to use for subsequent constants.
18743 IMMED_OP is the index of the constant slot in OPERANDS.
18744 N is the constant value. */
18745 static const char *
18746 output_multi_immediate (rtx *operands, const char *instr1, const char *instr2,
18747 int immed_op, HOST_WIDE_INT n)
18749 #if HOST_BITS_PER_WIDE_INT > 32
18750 n &= 0xffffffff;
18751 #endif
18753 if (n == 0)
18755 /* Quick and easy output. */
18756 operands[immed_op] = const0_rtx;
18757 output_asm_insn (instr1, operands);
18759 else
18761 int i;
18762 const char * instr = instr1;
18764 /* Note that n is never zero here (which would give no output). */
18765 for (i = 0; i < 32; i += 2)
18767 if (n & (3 << i))
18769 operands[immed_op] = GEN_INT (n & (255 << i));
18770 output_asm_insn (instr, operands);
18771 instr = instr2;
18772 i += 6;
18777 return "";
18780 /* Return the name of a shifter operation. */
18781 static const char *
18782 arm_shift_nmem(enum rtx_code code)
18784 switch (code)
18786 case ASHIFT:
18787 return ARM_LSL_NAME;
18789 case ASHIFTRT:
18790 return "asr";
18792 case LSHIFTRT:
18793 return "lsr";
18795 case ROTATERT:
18796 return "ror";
18798 default:
18799 abort();
18803 /* Return the appropriate ARM instruction for the operation code.
18804 The returned result should not be overwritten. OP is the rtx of the
18805 operation. SHIFT_FIRST_ARG is TRUE if the first argument of the operator
18806 was shifted. */
18807 const char *
18808 arithmetic_instr (rtx op, int shift_first_arg)
18810 switch (GET_CODE (op))
18812 case PLUS:
18813 return "add";
18815 case MINUS:
18816 return shift_first_arg ? "rsb" : "sub";
18818 case IOR:
18819 return "orr";
18821 case XOR:
18822 return "eor";
18824 case AND:
18825 return "and";
18827 case ASHIFT:
18828 case ASHIFTRT:
18829 case LSHIFTRT:
18830 case ROTATERT:
18831 return arm_shift_nmem(GET_CODE(op));
18833 default:
18834 gcc_unreachable ();
18838 /* Ensure valid constant shifts and return the appropriate shift mnemonic
18839 for the operation code. The returned result should not be overwritten.
18840 OP is the rtx code of the shift.
18841 On exit, *AMOUNTP will be -1 if the shift is by a register, or a constant
18842 shift. */
18843 static const char *
18844 shift_op (rtx op, HOST_WIDE_INT *amountp)
18846 const char * mnem;
18847 enum rtx_code code = GET_CODE (op);
18849 switch (code)
18851 case ROTATE:
18852 if (!CONST_INT_P (XEXP (op, 1)))
18854 output_operand_lossage ("invalid shift operand");
18855 return NULL;
18858 code = ROTATERT;
18859 *amountp = 32 - INTVAL (XEXP (op, 1));
18860 mnem = "ror";
18861 break;
18863 case ASHIFT:
18864 case ASHIFTRT:
18865 case LSHIFTRT:
18866 case ROTATERT:
18867 mnem = arm_shift_nmem(code);
18868 if (CONST_INT_P (XEXP (op, 1)))
18870 *amountp = INTVAL (XEXP (op, 1));
18872 else if (REG_P (XEXP (op, 1)))
18874 *amountp = -1;
18875 return mnem;
18877 else
18879 output_operand_lossage ("invalid shift operand");
18880 return NULL;
18882 break;
18884 case MULT:
18885 /* We never have to worry about the amount being other than a
18886 power of 2, since this case can never be reloaded from a reg. */
18887 if (!CONST_INT_P (XEXP (op, 1)))
18889 output_operand_lossage ("invalid shift operand");
18890 return NULL;
18893 *amountp = INTVAL (XEXP (op, 1)) & 0xFFFFFFFF;
18895 /* Amount must be a power of two. */
18896 if (*amountp & (*amountp - 1))
18898 output_operand_lossage ("invalid shift operand");
18899 return NULL;
18902 *amountp = int_log2 (*amountp);
18903 return ARM_LSL_NAME;
18905 default:
18906 output_operand_lossage ("invalid shift operand");
18907 return NULL;
18910 /* This is not 100% correct, but follows from the desire to merge
18911 multiplication by a power of 2 with the recognizer for a
18912 shift. >=32 is not a valid shift for "lsl", so we must try and
18913 output a shift that produces the correct arithmetical result.
18914 Using lsr #32 is identical except for the fact that the carry bit
18915 is not set correctly if we set the flags; but we never use the
18916 carry bit from such an operation, so we can ignore that. */
18917 if (code == ROTATERT)
18918 /* Rotate is just modulo 32. */
18919 *amountp &= 31;
18920 else if (*amountp != (*amountp & 31))
18922 if (code == ASHIFT)
18923 mnem = "lsr";
18924 *amountp = 32;
18927 /* Shifts of 0 are no-ops. */
18928 if (*amountp == 0)
18929 return NULL;
18931 return mnem;
18934 /* Obtain the shift from the POWER of two. */
18936 static HOST_WIDE_INT
18937 int_log2 (HOST_WIDE_INT power)
18939 HOST_WIDE_INT shift = 0;
18941 while ((((HOST_WIDE_INT) 1 << shift) & power) == 0)
18943 gcc_assert (shift <= 31);
18944 shift++;
18947 return shift;
18950 /* Output a .ascii pseudo-op, keeping track of lengths. This is
18951 because /bin/as is horribly restrictive. The judgement about
18952 whether or not each character is 'printable' (and can be output as
18953 is) or not (and must be printed with an octal escape) must be made
18954 with reference to the *host* character set -- the situation is
18955 similar to that discussed in the comments above pp_c_char in
18956 c-pretty-print.c. */
18958 #define MAX_ASCII_LEN 51
18960 void
18961 output_ascii_pseudo_op (FILE *stream, const unsigned char *p, int len)
18963 int i;
18964 int len_so_far = 0;
18966 fputs ("\t.ascii\t\"", stream);
18968 for (i = 0; i < len; i++)
18970 int c = p[i];
18972 if (len_so_far >= MAX_ASCII_LEN)
18974 fputs ("\"\n\t.ascii\t\"", stream);
18975 len_so_far = 0;
18978 if (ISPRINT (c))
18980 if (c == '\\' || c == '\"')
18982 putc ('\\', stream);
18983 len_so_far++;
18985 putc (c, stream);
18986 len_so_far++;
18988 else
18990 fprintf (stream, "\\%03o", c);
18991 len_so_far += 4;
18995 fputs ("\"\n", stream);
18998 /* Compute the register save mask for registers 0 through 12
18999 inclusive. This code is used by arm_compute_save_reg_mask. */
19001 static unsigned long
19002 arm_compute_save_reg0_reg12_mask (void)
19004 unsigned long func_type = arm_current_func_type ();
19005 unsigned long save_reg_mask = 0;
19006 unsigned int reg;
19008 if (IS_INTERRUPT (func_type))
19010 unsigned int max_reg;
19011 /* Interrupt functions must not corrupt any registers,
19012 even call clobbered ones. If this is a leaf function
19013 we can just examine the registers used by the RTL, but
19014 otherwise we have to assume that whatever function is
19015 called might clobber anything, and so we have to save
19016 all the call-clobbered registers as well. */
19017 if (ARM_FUNC_TYPE (func_type) == ARM_FT_FIQ)
19018 /* FIQ handlers have registers r8 - r12 banked, so
19019 we only need to check r0 - r7, Normal ISRs only
19020 bank r14 and r15, so we must check up to r12.
19021 r13 is the stack pointer which is always preserved,
19022 so we do not need to consider it here. */
19023 max_reg = 7;
19024 else
19025 max_reg = 12;
19027 for (reg = 0; reg <= max_reg; reg++)
19028 if (df_regs_ever_live_p (reg)
19029 || (! crtl->is_leaf && call_used_regs[reg]))
19030 save_reg_mask |= (1 << reg);
19032 /* Also save the pic base register if necessary. */
19033 if (flag_pic
19034 && !TARGET_SINGLE_PIC_BASE
19035 && arm_pic_register != INVALID_REGNUM
19036 && crtl->uses_pic_offset_table)
19037 save_reg_mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
19039 else if (IS_VOLATILE(func_type))
19041 /* For noreturn functions we historically omitted register saves
19042 altogether. However this really messes up debugging. As a
19043 compromise save just the frame pointers. Combined with the link
19044 register saved elsewhere this should be sufficient to get
19045 a backtrace. */
19046 if (frame_pointer_needed)
19047 save_reg_mask |= 1 << HARD_FRAME_POINTER_REGNUM;
19048 if (df_regs_ever_live_p (ARM_HARD_FRAME_POINTER_REGNUM))
19049 save_reg_mask |= 1 << ARM_HARD_FRAME_POINTER_REGNUM;
19050 if (df_regs_ever_live_p (THUMB_HARD_FRAME_POINTER_REGNUM))
19051 save_reg_mask |= 1 << THUMB_HARD_FRAME_POINTER_REGNUM;
19053 else
19055 /* In the normal case we only need to save those registers
19056 which are call saved and which are used by this function. */
19057 for (reg = 0; reg <= 11; reg++)
19058 if (df_regs_ever_live_p (reg) && ! call_used_regs[reg])
19059 save_reg_mask |= (1 << reg);
19061 /* Handle the frame pointer as a special case. */
19062 if (frame_pointer_needed)
19063 save_reg_mask |= 1 << HARD_FRAME_POINTER_REGNUM;
19065 /* If we aren't loading the PIC register,
19066 don't stack it even though it may be live. */
19067 if (flag_pic
19068 && !TARGET_SINGLE_PIC_BASE
19069 && arm_pic_register != INVALID_REGNUM
19070 && (df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM)
19071 || crtl->uses_pic_offset_table))
19072 save_reg_mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
19074 /* The prologue will copy SP into R0, so save it. */
19075 if (IS_STACKALIGN (func_type))
19076 save_reg_mask |= 1;
19079 /* Save registers so the exception handler can modify them. */
19080 if (crtl->calls_eh_return)
19082 unsigned int i;
19084 for (i = 0; ; i++)
19086 reg = EH_RETURN_DATA_REGNO (i);
19087 if (reg == INVALID_REGNUM)
19088 break;
19089 save_reg_mask |= 1 << reg;
19093 return save_reg_mask;
19096 /* Return true if r3 is live at the start of the function. */
19098 static bool
19099 arm_r3_live_at_start_p (void)
19101 /* Just look at cfg info, which is still close enough to correct at this
19102 point. This gives false positives for broken functions that might use
19103 uninitialized data that happens to be allocated in r3, but who cares? */
19104 return REGNO_REG_SET_P (df_get_live_out (ENTRY_BLOCK_PTR_FOR_FN (cfun)), 3);
19107 /* Compute the number of bytes used to store the static chain register on the
19108 stack, above the stack frame. We need to know this accurately to get the
19109 alignment of the rest of the stack frame correct. */
19111 static int
19112 arm_compute_static_chain_stack_bytes (void)
19114 /* See the defining assertion in arm_expand_prologue. */
19115 if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM
19116 && IS_NESTED (arm_current_func_type ())
19117 && arm_r3_live_at_start_p ()
19118 && crtl->args.pretend_args_size == 0)
19119 return 4;
19121 return 0;
19124 /* Compute a bit mask of which registers need to be
19125 saved on the stack for the current function.
19126 This is used by arm_get_frame_offsets, which may add extra registers. */
19128 static unsigned long
19129 arm_compute_save_reg_mask (void)
19131 unsigned int save_reg_mask = 0;
19132 unsigned long func_type = arm_current_func_type ();
19133 unsigned int reg;
19135 if (IS_NAKED (func_type))
19136 /* This should never really happen. */
19137 return 0;
19139 /* If we are creating a stack frame, then we must save the frame pointer,
19140 IP (which will hold the old stack pointer), LR and the PC. */
19141 if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
19142 save_reg_mask |=
19143 (1 << ARM_HARD_FRAME_POINTER_REGNUM)
19144 | (1 << IP_REGNUM)
19145 | (1 << LR_REGNUM)
19146 | (1 << PC_REGNUM);
19148 save_reg_mask |= arm_compute_save_reg0_reg12_mask ();
19150 /* Decide if we need to save the link register.
19151 Interrupt routines have their own banked link register,
19152 so they never need to save it.
19153 Otherwise if we do not use the link register we do not need to save
19154 it. If we are pushing other registers onto the stack however, we
19155 can save an instruction in the epilogue by pushing the link register
19156 now and then popping it back into the PC. This incurs extra memory
19157 accesses though, so we only do it when optimizing for size, and only
19158 if we know that we will not need a fancy return sequence. */
19159 if (df_regs_ever_live_p (LR_REGNUM)
19160 || (save_reg_mask
19161 && optimize_size
19162 && ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL
19163 && !crtl->calls_eh_return))
19164 save_reg_mask |= 1 << LR_REGNUM;
19166 if (cfun->machine->lr_save_eliminated)
19167 save_reg_mask &= ~ (1 << LR_REGNUM);
19169 if (TARGET_REALLY_IWMMXT
19170 && ((bit_count (save_reg_mask)
19171 + ARM_NUM_INTS (crtl->args.pretend_args_size +
19172 arm_compute_static_chain_stack_bytes())
19173 ) % 2) != 0)
19175 /* The total number of registers that are going to be pushed
19176 onto the stack is odd. We need to ensure that the stack
19177 is 64-bit aligned before we start to save iWMMXt registers,
19178 and also before we start to create locals. (A local variable
19179 might be a double or long long which we will load/store using
19180 an iWMMXt instruction). Therefore we need to push another
19181 ARM register, so that the stack will be 64-bit aligned. We
19182 try to avoid using the arg registers (r0 -r3) as they might be
19183 used to pass values in a tail call. */
19184 for (reg = 4; reg <= 12; reg++)
19185 if ((save_reg_mask & (1 << reg)) == 0)
19186 break;
19188 if (reg <= 12)
19189 save_reg_mask |= (1 << reg);
19190 else
19192 cfun->machine->sibcall_blocked = 1;
19193 save_reg_mask |= (1 << 3);
19197 /* We may need to push an additional register for use initializing the
19198 PIC base register. */
19199 if (TARGET_THUMB2 && IS_NESTED (func_type) && flag_pic
19200 && (save_reg_mask & THUMB2_WORK_REGS) == 0)
19202 reg = thumb_find_work_register (1 << 4);
19203 if (!call_used_regs[reg])
19204 save_reg_mask |= (1 << reg);
19207 return save_reg_mask;
19211 /* Compute a bit mask of which registers need to be
19212 saved on the stack for the current function. */
19213 static unsigned long
19214 thumb1_compute_save_reg_mask (void)
19216 unsigned long mask;
19217 unsigned reg;
19219 mask = 0;
19220 for (reg = 0; reg < 12; reg ++)
19221 if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
19222 mask |= 1 << reg;
19224 if (flag_pic
19225 && !TARGET_SINGLE_PIC_BASE
19226 && arm_pic_register != INVALID_REGNUM
19227 && crtl->uses_pic_offset_table)
19228 mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
19230 /* See if we might need r11 for calls to _interwork_r11_call_via_rN(). */
19231 if (!frame_pointer_needed && CALLER_INTERWORKING_SLOT_SIZE > 0)
19232 mask |= 1 << ARM_HARD_FRAME_POINTER_REGNUM;
19234 /* LR will also be pushed if any lo regs are pushed. */
19235 if (mask & 0xff || thumb_force_lr_save ())
19236 mask |= (1 << LR_REGNUM);
19238 /* Make sure we have a low work register if we need one.
19239 We will need one if we are going to push a high register,
19240 but we are not currently intending to push a low register. */
19241 if ((mask & 0xff) == 0
19242 && ((mask & 0x0f00) || TARGET_BACKTRACE))
19244 /* Use thumb_find_work_register to choose which register
19245 we will use. If the register is live then we will
19246 have to push it. Use LAST_LO_REGNUM as our fallback
19247 choice for the register to select. */
19248 reg = thumb_find_work_register (1 << LAST_LO_REGNUM);
19249 /* Make sure the register returned by thumb_find_work_register is
19250 not part of the return value. */
19251 if (reg * UNITS_PER_WORD <= (unsigned) arm_size_return_regs ())
19252 reg = LAST_LO_REGNUM;
19254 if (! call_used_regs[reg])
19255 mask |= 1 << reg;
19258 /* The 504 below is 8 bytes less than 512 because there are two possible
19259 alignment words. We can't tell here if they will be present or not so we
19260 have to play it safe and assume that they are. */
19261 if ((CALLER_INTERWORKING_SLOT_SIZE +
19262 ROUND_UP_WORD (get_frame_size ()) +
19263 crtl->outgoing_args_size) >= 504)
19265 /* This is the same as the code in thumb1_expand_prologue() which
19266 determines which register to use for stack decrement. */
19267 for (reg = LAST_ARG_REGNUM + 1; reg <= LAST_LO_REGNUM; reg++)
19268 if (mask & (1 << reg))
19269 break;
19271 if (reg > LAST_LO_REGNUM)
19273 /* Make sure we have a register available for stack decrement. */
19274 mask |= 1 << LAST_LO_REGNUM;
19278 return mask;
19282 /* Return the number of bytes required to save VFP registers. */
19283 static int
19284 arm_get_vfp_saved_size (void)
19286 unsigned int regno;
19287 int count;
19288 int saved;
19290 saved = 0;
19291 /* Space for saved VFP registers. */
19292 if (TARGET_HARD_FLOAT && TARGET_VFP)
19294 count = 0;
19295 for (regno = FIRST_VFP_REGNUM;
19296 regno < LAST_VFP_REGNUM;
19297 regno += 2)
19299 if ((!df_regs_ever_live_p (regno) || call_used_regs[regno])
19300 && (!df_regs_ever_live_p (regno + 1) || call_used_regs[regno + 1]))
19302 if (count > 0)
19304 /* Workaround ARM10 VFPr1 bug. */
19305 if (count == 2 && !arm_arch6)
19306 count++;
19307 saved += count * 8;
19309 count = 0;
19311 else
19312 count++;
19314 if (count > 0)
19316 if (count == 2 && !arm_arch6)
19317 count++;
19318 saved += count * 8;
19321 return saved;
19325 /* Generate a function exit sequence. If REALLY_RETURN is false, then do
19326 everything bar the final return instruction. If simple_return is true,
19327 then do not output epilogue, because it has already been emitted in RTL. */
19328 const char *
19329 output_return_instruction (rtx operand, bool really_return, bool reverse,
19330 bool simple_return)
19332 char conditional[10];
19333 char instr[100];
19334 unsigned reg;
19335 unsigned long live_regs_mask;
19336 unsigned long func_type;
19337 arm_stack_offsets *offsets;
19339 func_type = arm_current_func_type ();
19341 if (IS_NAKED (func_type))
19342 return "";
19344 if (IS_VOLATILE (func_type) && TARGET_ABORT_NORETURN)
19346 /* If this function was declared non-returning, and we have
19347 found a tail call, then we have to trust that the called
19348 function won't return. */
19349 if (really_return)
19351 rtx ops[2];
19353 /* Otherwise, trap an attempted return by aborting. */
19354 ops[0] = operand;
19355 ops[1] = gen_rtx_SYMBOL_REF (Pmode, NEED_PLT_RELOC ? "abort(PLT)"
19356 : "abort");
19357 assemble_external_libcall (ops[1]);
19358 output_asm_insn (reverse ? "bl%D0\t%a1" : "bl%d0\t%a1", ops);
19361 return "";
19364 gcc_assert (!cfun->calls_alloca || really_return);
19366 sprintf (conditional, "%%?%%%c0", reverse ? 'D' : 'd');
19368 cfun->machine->return_used_this_function = 1;
19370 offsets = arm_get_frame_offsets ();
19371 live_regs_mask = offsets->saved_regs_mask;
19373 if (!simple_return && live_regs_mask)
19375 const char * return_reg;
19377 /* If we do not have any special requirements for function exit
19378 (e.g. interworking) then we can load the return address
19379 directly into the PC. Otherwise we must load it into LR. */
19380 if (really_return
19381 && (IS_INTERRUPT (func_type) || !TARGET_INTERWORK))
19382 return_reg = reg_names[PC_REGNUM];
19383 else
19384 return_reg = reg_names[LR_REGNUM];
19386 if ((live_regs_mask & (1 << IP_REGNUM)) == (1 << IP_REGNUM))
19388 /* There are three possible reasons for the IP register
19389 being saved. 1) a stack frame was created, in which case
19390 IP contains the old stack pointer, or 2) an ISR routine
19391 corrupted it, or 3) it was saved to align the stack on
19392 iWMMXt. In case 1, restore IP into SP, otherwise just
19393 restore IP. */
19394 if (frame_pointer_needed)
19396 live_regs_mask &= ~ (1 << IP_REGNUM);
19397 live_regs_mask |= (1 << SP_REGNUM);
19399 else
19400 gcc_assert (IS_INTERRUPT (func_type) || TARGET_REALLY_IWMMXT);
19403 /* On some ARM architectures it is faster to use LDR rather than
19404 LDM to load a single register. On other architectures, the
19405 cost is the same. In 26 bit mode, or for exception handlers,
19406 we have to use LDM to load the PC so that the CPSR is also
19407 restored. */
19408 for (reg = 0; reg <= LAST_ARM_REGNUM; reg++)
19409 if (live_regs_mask == (1U << reg))
19410 break;
19412 if (reg <= LAST_ARM_REGNUM
19413 && (reg != LR_REGNUM
19414 || ! really_return
19415 || ! IS_INTERRUPT (func_type)))
19417 sprintf (instr, "ldr%s\t%%|%s, [%%|sp], #4", conditional,
19418 (reg == LR_REGNUM) ? return_reg : reg_names[reg]);
19420 else
19422 char *p;
19423 int first = 1;
19425 /* Generate the load multiple instruction to restore the
19426 registers. Note we can get here, even if
19427 frame_pointer_needed is true, but only if sp already
19428 points to the base of the saved core registers. */
19429 if (live_regs_mask & (1 << SP_REGNUM))
19431 unsigned HOST_WIDE_INT stack_adjust;
19433 stack_adjust = offsets->outgoing_args - offsets->saved_regs;
19434 gcc_assert (stack_adjust == 0 || stack_adjust == 4);
19436 if (stack_adjust && arm_arch5 && TARGET_ARM)
19437 if (TARGET_UNIFIED_ASM)
19438 sprintf (instr, "ldmib%s\t%%|sp, {", conditional);
19439 else
19440 sprintf (instr, "ldm%sib\t%%|sp, {", conditional);
19441 else
19443 /* If we can't use ldmib (SA110 bug),
19444 then try to pop r3 instead. */
19445 if (stack_adjust)
19446 live_regs_mask |= 1 << 3;
19448 if (TARGET_UNIFIED_ASM)
19449 sprintf (instr, "ldmfd%s\t%%|sp, {", conditional);
19450 else
19451 sprintf (instr, "ldm%sfd\t%%|sp, {", conditional);
19454 else
19455 if (TARGET_UNIFIED_ASM)
19456 sprintf (instr, "pop%s\t{", conditional);
19457 else
19458 sprintf (instr, "ldm%sfd\t%%|sp!, {", conditional);
19460 p = instr + strlen (instr);
19462 for (reg = 0; reg <= SP_REGNUM; reg++)
19463 if (live_regs_mask & (1 << reg))
19465 int l = strlen (reg_names[reg]);
19467 if (first)
19468 first = 0;
19469 else
19471 memcpy (p, ", ", 2);
19472 p += 2;
19475 memcpy (p, "%|", 2);
19476 memcpy (p + 2, reg_names[reg], l);
19477 p += l + 2;
19480 if (live_regs_mask & (1 << LR_REGNUM))
19482 sprintf (p, "%s%%|%s}", first ? "" : ", ", return_reg);
19483 /* If returning from an interrupt, restore the CPSR. */
19484 if (IS_INTERRUPT (func_type))
19485 strcat (p, "^");
19487 else
19488 strcpy (p, "}");
19491 output_asm_insn (instr, & operand);
19493 /* See if we need to generate an extra instruction to
19494 perform the actual function return. */
19495 if (really_return
19496 && func_type != ARM_FT_INTERWORKED
19497 && (live_regs_mask & (1 << LR_REGNUM)) != 0)
19499 /* The return has already been handled
19500 by loading the LR into the PC. */
19501 return "";
19505 if (really_return)
19507 switch ((int) ARM_FUNC_TYPE (func_type))
19509 case ARM_FT_ISR:
19510 case ARM_FT_FIQ:
19511 /* ??? This is wrong for unified assembly syntax. */
19512 sprintf (instr, "sub%ss\t%%|pc, %%|lr, #4", conditional);
19513 break;
19515 case ARM_FT_INTERWORKED:
19516 sprintf (instr, "bx%s\t%%|lr", conditional);
19517 break;
19519 case ARM_FT_EXCEPTION:
19520 /* ??? This is wrong for unified assembly syntax. */
19521 sprintf (instr, "mov%ss\t%%|pc, %%|lr", conditional);
19522 break;
19524 default:
19525 /* Use bx if it's available. */
19526 if (arm_arch5 || arm_arch4t)
19527 sprintf (instr, "bx%s\t%%|lr", conditional);
19528 else
19529 sprintf (instr, "mov%s\t%%|pc, %%|lr", conditional);
19530 break;
19533 output_asm_insn (instr, & operand);
19536 return "";
19539 /* Write the function name into the code section, directly preceding
19540 the function prologue.
19542 Code will be output similar to this:
19544 .ascii "arm_poke_function_name", 0
19545 .align
19547 .word 0xff000000 + (t1 - t0)
19548 arm_poke_function_name
19549 mov ip, sp
19550 stmfd sp!, {fp, ip, lr, pc}
19551 sub fp, ip, #4
19553 When performing a stack backtrace, code can inspect the value
19554 of 'pc' stored at 'fp' + 0. If the trace function then looks
19555 at location pc - 12 and the top 8 bits are set, then we know
19556 that there is a function name embedded immediately preceding this
19557 location and has length ((pc[-3]) & 0xff000000).
19559 We assume that pc is declared as a pointer to an unsigned long.
19561 It is of no benefit to output the function name if we are assembling
19562 a leaf function. These function types will not contain a stack
19563 backtrace structure, therefore it is not possible to determine the
19564 function name. */
19565 void
19566 arm_poke_function_name (FILE *stream, const char *name)
19568 unsigned long alignlength;
19569 unsigned long length;
19570 rtx x;
19572 length = strlen (name) + 1;
19573 alignlength = ROUND_UP_WORD (length);
19575 ASM_OUTPUT_ASCII (stream, name, length);
19576 ASM_OUTPUT_ALIGN (stream, 2);
19577 x = GEN_INT ((unsigned HOST_WIDE_INT) 0xff000000 + alignlength);
19578 assemble_aligned_integer (UNITS_PER_WORD, x);
19581 /* Place some comments into the assembler stream
19582 describing the current function. */
19583 static void
19584 arm_output_function_prologue (FILE *f, HOST_WIDE_INT frame_size)
19586 unsigned long func_type;
19588 /* ??? Do we want to print some of the below anyway? */
19589 if (TARGET_THUMB1)
19590 return;
19592 /* Sanity check. */
19593 gcc_assert (!arm_ccfsm_state && !arm_target_insn);
19595 func_type = arm_current_func_type ();
19597 switch ((int) ARM_FUNC_TYPE (func_type))
19599 default:
19600 case ARM_FT_NORMAL:
19601 break;
19602 case ARM_FT_INTERWORKED:
19603 asm_fprintf (f, "\t%@ Function supports interworking.\n");
19604 break;
19605 case ARM_FT_ISR:
19606 asm_fprintf (f, "\t%@ Interrupt Service Routine.\n");
19607 break;
19608 case ARM_FT_FIQ:
19609 asm_fprintf (f, "\t%@ Fast Interrupt Service Routine.\n");
19610 break;
19611 case ARM_FT_EXCEPTION:
19612 asm_fprintf (f, "\t%@ ARM Exception Handler.\n");
19613 break;
19616 if (IS_NAKED (func_type))
19617 asm_fprintf (f, "\t%@ Naked Function: prologue and epilogue provided by programmer.\n");
19619 if (IS_VOLATILE (func_type))
19620 asm_fprintf (f, "\t%@ Volatile: function does not return.\n");
19622 if (IS_NESTED (func_type))
19623 asm_fprintf (f, "\t%@ Nested: function declared inside another function.\n");
19624 if (IS_STACKALIGN (func_type))
19625 asm_fprintf (f, "\t%@ Stack Align: May be called with mis-aligned SP.\n");
19627 asm_fprintf (f, "\t%@ args = %d, pretend = %d, frame = %wd\n",
19628 crtl->args.size,
19629 crtl->args.pretend_args_size, frame_size);
19631 asm_fprintf (f, "\t%@ frame_needed = %d, uses_anonymous_args = %d\n",
19632 frame_pointer_needed,
19633 cfun->machine->uses_anonymous_args);
19635 if (cfun->machine->lr_save_eliminated)
19636 asm_fprintf (f, "\t%@ link register save eliminated.\n");
19638 if (crtl->calls_eh_return)
19639 asm_fprintf (f, "\t@ Calls __builtin_eh_return.\n");
19643 static void
19644 arm_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED,
19645 HOST_WIDE_INT frame_size ATTRIBUTE_UNUSED)
19647 arm_stack_offsets *offsets;
19649 if (TARGET_THUMB1)
19651 int regno;
19653 /* Emit any call-via-reg trampolines that are needed for v4t support
19654 of call_reg and call_value_reg type insns. */
19655 for (regno = 0; regno < LR_REGNUM; regno++)
19657 rtx label = cfun->machine->call_via[regno];
19659 if (label != NULL)
19661 switch_to_section (function_section (current_function_decl));
19662 targetm.asm_out.internal_label (asm_out_file, "L",
19663 CODE_LABEL_NUMBER (label));
19664 asm_fprintf (asm_out_file, "\tbx\t%r\n", regno);
19668 /* ??? Probably not safe to set this here, since it assumes that a
19669 function will be emitted as assembly immediately after we generate
19670 RTL for it. This does not happen for inline functions. */
19671 cfun->machine->return_used_this_function = 0;
19673 else /* TARGET_32BIT */
19675 /* We need to take into account any stack-frame rounding. */
19676 offsets = arm_get_frame_offsets ();
19678 gcc_assert (!use_return_insn (FALSE, NULL)
19679 || (cfun->machine->return_used_this_function != 0)
19680 || offsets->saved_regs == offsets->outgoing_args
19681 || frame_pointer_needed);
19685 /* Generate and emit a sequence of insns equivalent to PUSH, but using
19686 STR and STRD. If an even number of registers are being pushed, one
19687 or more STRD patterns are created for each register pair. If an
19688 odd number of registers are pushed, emit an initial STR followed by
19689 as many STRD instructions as are needed. This works best when the
19690 stack is initially 64-bit aligned (the normal case), since it
19691 ensures that each STRD is also 64-bit aligned. */
19692 static void
19693 thumb2_emit_strd_push (unsigned long saved_regs_mask)
19695 int num_regs = 0;
19696 int i;
19697 int regno;
19698 rtx par = NULL_RTX;
19699 rtx dwarf = NULL_RTX;
19700 rtx tmp;
19701 bool first = true;
19703 num_regs = bit_count (saved_regs_mask);
19705 /* Must be at least one register to save, and can't save SP or PC. */
19706 gcc_assert (num_regs > 0 && num_regs <= 14);
19707 gcc_assert (!(saved_regs_mask & (1 << SP_REGNUM)));
19708 gcc_assert (!(saved_regs_mask & (1 << PC_REGNUM)));
19710 /* Create sequence for DWARF info. All the frame-related data for
19711 debugging is held in this wrapper. */
19712 dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (num_regs + 1));
19714 /* Describe the stack adjustment. */
19715 tmp = gen_rtx_SET (VOIDmode,
19716 stack_pointer_rtx,
19717 plus_constant (Pmode, stack_pointer_rtx, -4 * num_regs));
19718 RTX_FRAME_RELATED_P (tmp) = 1;
19719 XVECEXP (dwarf, 0, 0) = tmp;
19721 /* Find the first register. */
19722 for (regno = 0; (saved_regs_mask & (1 << regno)) == 0; regno++)
19725 i = 0;
19727 /* If there's an odd number of registers to push. Start off by
19728 pushing a single register. This ensures that subsequent strd
19729 operations are dword aligned (assuming that SP was originally
19730 64-bit aligned). */
19731 if ((num_regs & 1) != 0)
19733 rtx reg, mem, insn;
19735 reg = gen_rtx_REG (SImode, regno);
19736 if (num_regs == 1)
19737 mem = gen_frame_mem (Pmode, gen_rtx_PRE_DEC (Pmode,
19738 stack_pointer_rtx));
19739 else
19740 mem = gen_frame_mem (Pmode,
19741 gen_rtx_PRE_MODIFY
19742 (Pmode, stack_pointer_rtx,
19743 plus_constant (Pmode, stack_pointer_rtx,
19744 -4 * num_regs)));
19746 tmp = gen_rtx_SET (VOIDmode, mem, reg);
19747 RTX_FRAME_RELATED_P (tmp) = 1;
19748 insn = emit_insn (tmp);
19749 RTX_FRAME_RELATED_P (insn) = 1;
19750 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
19751 tmp = gen_rtx_SET (VOIDmode, gen_frame_mem (Pmode, stack_pointer_rtx),
19752 reg);
19753 RTX_FRAME_RELATED_P (tmp) = 1;
19754 i++;
19755 regno++;
19756 XVECEXP (dwarf, 0, i) = tmp;
19757 first = false;
19760 while (i < num_regs)
19761 if (saved_regs_mask & (1 << regno))
19763 rtx reg1, reg2, mem1, mem2;
19764 rtx tmp0, tmp1, tmp2;
19765 int regno2;
19767 /* Find the register to pair with this one. */
19768 for (regno2 = regno + 1; (saved_regs_mask & (1 << regno2)) == 0;
19769 regno2++)
19772 reg1 = gen_rtx_REG (SImode, regno);
19773 reg2 = gen_rtx_REG (SImode, regno2);
19775 if (first)
19777 rtx insn;
19779 first = false;
19780 mem1 = gen_frame_mem (Pmode, plus_constant (Pmode,
19781 stack_pointer_rtx,
19782 -4 * num_regs));
19783 mem2 = gen_frame_mem (Pmode, plus_constant (Pmode,
19784 stack_pointer_rtx,
19785 -4 * (num_regs - 1)));
19786 tmp0 = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
19787 plus_constant (Pmode, stack_pointer_rtx,
19788 -4 * (num_regs)));
19789 tmp1 = gen_rtx_SET (VOIDmode, mem1, reg1);
19790 tmp2 = gen_rtx_SET (VOIDmode, mem2, reg2);
19791 RTX_FRAME_RELATED_P (tmp0) = 1;
19792 RTX_FRAME_RELATED_P (tmp1) = 1;
19793 RTX_FRAME_RELATED_P (tmp2) = 1;
19794 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (3));
19795 XVECEXP (par, 0, 0) = tmp0;
19796 XVECEXP (par, 0, 1) = tmp1;
19797 XVECEXP (par, 0, 2) = tmp2;
19798 insn = emit_insn (par);
19799 RTX_FRAME_RELATED_P (insn) = 1;
19800 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
19802 else
19804 mem1 = gen_frame_mem (Pmode, plus_constant (Pmode,
19805 stack_pointer_rtx,
19806 4 * i));
19807 mem2 = gen_frame_mem (Pmode, plus_constant (Pmode,
19808 stack_pointer_rtx,
19809 4 * (i + 1)));
19810 tmp1 = gen_rtx_SET (VOIDmode, mem1, reg1);
19811 tmp2 = gen_rtx_SET (VOIDmode, mem2, reg2);
19812 RTX_FRAME_RELATED_P (tmp1) = 1;
19813 RTX_FRAME_RELATED_P (tmp2) = 1;
19814 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
19815 XVECEXP (par, 0, 0) = tmp1;
19816 XVECEXP (par, 0, 1) = tmp2;
19817 emit_insn (par);
19820 /* Create unwind information. This is an approximation. */
19821 tmp1 = gen_rtx_SET (VOIDmode,
19822 gen_frame_mem (Pmode,
19823 plus_constant (Pmode,
19824 stack_pointer_rtx,
19825 4 * i)),
19826 reg1);
19827 tmp2 = gen_rtx_SET (VOIDmode,
19828 gen_frame_mem (Pmode,
19829 plus_constant (Pmode,
19830 stack_pointer_rtx,
19831 4 * (i + 1))),
19832 reg2);
19834 RTX_FRAME_RELATED_P (tmp1) = 1;
19835 RTX_FRAME_RELATED_P (tmp2) = 1;
19836 XVECEXP (dwarf, 0, i + 1) = tmp1;
19837 XVECEXP (dwarf, 0, i + 2) = tmp2;
19838 i += 2;
19839 regno = regno2 + 1;
19841 else
19842 regno++;
19844 return;
19847 /* STRD in ARM mode requires consecutive registers. This function emits STRD
19848 whenever possible, otherwise it emits single-word stores. The first store
19849 also allocates stack space for all saved registers, using writeback with
19850 post-addressing mode. All other stores use offset addressing. If no STRD
19851 can be emitted, this function emits a sequence of single-word stores,
19852 and not an STM as before, because single-word stores provide more freedom
19853 scheduling and can be turned into an STM by peephole optimizations. */
19854 static void
19855 arm_emit_strd_push (unsigned long saved_regs_mask)
19857 int num_regs = 0;
19858 int i, j, dwarf_index = 0;
19859 int offset = 0;
19860 rtx dwarf = NULL_RTX;
19861 rtx insn = NULL_RTX;
19862 rtx tmp, mem;
19864 /* TODO: A more efficient code can be emitted by changing the
19865 layout, e.g., first push all pairs that can use STRD to keep the
19866 stack aligned, and then push all other registers. */
19867 for (i = 0; i <= LAST_ARM_REGNUM; i++)
19868 if (saved_regs_mask & (1 << i))
19869 num_regs++;
19871 gcc_assert (!(saved_regs_mask & (1 << SP_REGNUM)));
19872 gcc_assert (!(saved_regs_mask & (1 << PC_REGNUM)));
19873 gcc_assert (num_regs > 0);
19875 /* Create sequence for DWARF info. */
19876 dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (num_regs + 1));
19878 /* For dwarf info, we generate explicit stack update. */
19879 tmp = gen_rtx_SET (VOIDmode,
19880 stack_pointer_rtx,
19881 plus_constant (Pmode, stack_pointer_rtx, -4 * num_regs));
19882 RTX_FRAME_RELATED_P (tmp) = 1;
19883 XVECEXP (dwarf, 0, dwarf_index++) = tmp;
19885 /* Save registers. */
19886 offset = - 4 * num_regs;
19887 j = 0;
19888 while (j <= LAST_ARM_REGNUM)
19889 if (saved_regs_mask & (1 << j))
19891 if ((j % 2 == 0)
19892 && (saved_regs_mask & (1 << (j + 1))))
19894 /* Current register and previous register form register pair for
19895 which STRD can be generated. */
19896 if (offset < 0)
19898 /* Allocate stack space for all saved registers. */
19899 tmp = plus_constant (Pmode, stack_pointer_rtx, offset);
19900 tmp = gen_rtx_PRE_MODIFY (Pmode, stack_pointer_rtx, tmp);
19901 mem = gen_frame_mem (DImode, tmp);
19902 offset = 0;
19904 else if (offset > 0)
19905 mem = gen_frame_mem (DImode,
19906 plus_constant (Pmode,
19907 stack_pointer_rtx,
19908 offset));
19909 else
19910 mem = gen_frame_mem (DImode, stack_pointer_rtx);
19912 tmp = gen_rtx_SET (DImode, mem, gen_rtx_REG (DImode, j));
19913 RTX_FRAME_RELATED_P (tmp) = 1;
19914 tmp = emit_insn (tmp);
19916 /* Record the first store insn. */
19917 if (dwarf_index == 1)
19918 insn = tmp;
19920 /* Generate dwarf info. */
19921 mem = gen_frame_mem (SImode,
19922 plus_constant (Pmode,
19923 stack_pointer_rtx,
19924 offset));
19925 tmp = gen_rtx_SET (SImode, mem, gen_rtx_REG (SImode, j));
19926 RTX_FRAME_RELATED_P (tmp) = 1;
19927 XVECEXP (dwarf, 0, dwarf_index++) = tmp;
19929 mem = gen_frame_mem (SImode,
19930 plus_constant (Pmode,
19931 stack_pointer_rtx,
19932 offset + 4));
19933 tmp = gen_rtx_SET (SImode, mem, gen_rtx_REG (SImode, j + 1));
19934 RTX_FRAME_RELATED_P (tmp) = 1;
19935 XVECEXP (dwarf, 0, dwarf_index++) = tmp;
19937 offset += 8;
19938 j += 2;
19940 else
19942 /* Emit a single word store. */
19943 if (offset < 0)
19945 /* Allocate stack space for all saved registers. */
19946 tmp = plus_constant (Pmode, stack_pointer_rtx, offset);
19947 tmp = gen_rtx_PRE_MODIFY (Pmode, stack_pointer_rtx, tmp);
19948 mem = gen_frame_mem (SImode, tmp);
19949 offset = 0;
19951 else if (offset > 0)
19952 mem = gen_frame_mem (SImode,
19953 plus_constant (Pmode,
19954 stack_pointer_rtx,
19955 offset));
19956 else
19957 mem = gen_frame_mem (SImode, stack_pointer_rtx);
19959 tmp = gen_rtx_SET (SImode, mem, gen_rtx_REG (SImode, j));
19960 RTX_FRAME_RELATED_P (tmp) = 1;
19961 tmp = emit_insn (tmp);
19963 /* Record the first store insn. */
19964 if (dwarf_index == 1)
19965 insn = tmp;
19967 /* Generate dwarf info. */
19968 mem = gen_frame_mem (SImode,
19969 plus_constant(Pmode,
19970 stack_pointer_rtx,
19971 offset));
19972 tmp = gen_rtx_SET (SImode, mem, gen_rtx_REG (SImode, j));
19973 RTX_FRAME_RELATED_P (tmp) = 1;
19974 XVECEXP (dwarf, 0, dwarf_index++) = tmp;
19976 offset += 4;
19977 j += 1;
19980 else
19981 j++;
19983 /* Attach dwarf info to the first insn we generate. */
19984 gcc_assert (insn != NULL_RTX);
19985 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
19986 RTX_FRAME_RELATED_P (insn) = 1;
19989 /* Generate and emit an insn that we will recognize as a push_multi.
19990 Unfortunately, since this insn does not reflect very well the actual
19991 semantics of the operation, we need to annotate the insn for the benefit
19992 of DWARF2 frame unwind information. DWARF_REGS_MASK is a subset of
19993 MASK for registers that should be annotated for DWARF2 frame unwind
19994 information. */
19995 static rtx
19996 emit_multi_reg_push (unsigned long mask, unsigned long dwarf_regs_mask)
19998 int num_regs = 0;
19999 int num_dwarf_regs = 0;
20000 int i, j;
20001 rtx par;
20002 rtx dwarf;
20003 int dwarf_par_index;
20004 rtx tmp, reg;
20006 /* We don't record the PC in the dwarf frame information. */
20007 dwarf_regs_mask &= ~(1 << PC_REGNUM);
20009 for (i = 0; i <= LAST_ARM_REGNUM; i++)
20011 if (mask & (1 << i))
20012 num_regs++;
20013 if (dwarf_regs_mask & (1 << i))
20014 num_dwarf_regs++;
20017 gcc_assert (num_regs && num_regs <= 16);
20018 gcc_assert ((dwarf_regs_mask & ~mask) == 0);
20020 /* For the body of the insn we are going to generate an UNSPEC in
20021 parallel with several USEs. This allows the insn to be recognized
20022 by the push_multi pattern in the arm.md file.
20024 The body of the insn looks something like this:
20026 (parallel [
20027 (set (mem:BLK (pre_modify:SI (reg:SI sp)
20028 (const_int:SI <num>)))
20029 (unspec:BLK [(reg:SI r4)] UNSPEC_PUSH_MULT))
20030 (use (reg:SI XX))
20031 (use (reg:SI YY))
20035 For the frame note however, we try to be more explicit and actually
20036 show each register being stored into the stack frame, plus a (single)
20037 decrement of the stack pointer. We do it this way in order to be
20038 friendly to the stack unwinding code, which only wants to see a single
20039 stack decrement per instruction. The RTL we generate for the note looks
20040 something like this:
20042 (sequence [
20043 (set (reg:SI sp) (plus:SI (reg:SI sp) (const_int -20)))
20044 (set (mem:SI (reg:SI sp)) (reg:SI r4))
20045 (set (mem:SI (plus:SI (reg:SI sp) (const_int 4))) (reg:SI XX))
20046 (set (mem:SI (plus:SI (reg:SI sp) (const_int 8))) (reg:SI YY))
20050 FIXME:: In an ideal world the PRE_MODIFY would not exist and
20051 instead we'd have a parallel expression detailing all
20052 the stores to the various memory addresses so that debug
20053 information is more up-to-date. Remember however while writing
20054 this to take care of the constraints with the push instruction.
20056 Note also that this has to be taken care of for the VFP registers.
20058 For more see PR43399. */
20060 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (num_regs));
20061 dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (num_dwarf_regs + 1));
20062 dwarf_par_index = 1;
20064 for (i = 0; i <= LAST_ARM_REGNUM; i++)
20066 if (mask & (1 << i))
20068 reg = gen_rtx_REG (SImode, i);
20070 XVECEXP (par, 0, 0)
20071 = gen_rtx_SET (VOIDmode,
20072 gen_frame_mem
20073 (BLKmode,
20074 gen_rtx_PRE_MODIFY (Pmode,
20075 stack_pointer_rtx,
20076 plus_constant
20077 (Pmode, stack_pointer_rtx,
20078 -4 * num_regs))
20080 gen_rtx_UNSPEC (BLKmode,
20081 gen_rtvec (1, reg),
20082 UNSPEC_PUSH_MULT));
20084 if (dwarf_regs_mask & (1 << i))
20086 tmp = gen_rtx_SET (VOIDmode,
20087 gen_frame_mem (SImode, stack_pointer_rtx),
20088 reg);
20089 RTX_FRAME_RELATED_P (tmp) = 1;
20090 XVECEXP (dwarf, 0, dwarf_par_index++) = tmp;
20093 break;
20097 for (j = 1, i++; j < num_regs; i++)
20099 if (mask & (1 << i))
20101 reg = gen_rtx_REG (SImode, i);
20103 XVECEXP (par, 0, j) = gen_rtx_USE (VOIDmode, reg);
20105 if (dwarf_regs_mask & (1 << i))
20108 = gen_rtx_SET (VOIDmode,
20109 gen_frame_mem
20110 (SImode,
20111 plus_constant (Pmode, stack_pointer_rtx,
20112 4 * j)),
20113 reg);
20114 RTX_FRAME_RELATED_P (tmp) = 1;
20115 XVECEXP (dwarf, 0, dwarf_par_index++) = tmp;
20118 j++;
20122 par = emit_insn (par);
20124 tmp = gen_rtx_SET (VOIDmode,
20125 stack_pointer_rtx,
20126 plus_constant (Pmode, stack_pointer_rtx, -4 * num_regs));
20127 RTX_FRAME_RELATED_P (tmp) = 1;
20128 XVECEXP (dwarf, 0, 0) = tmp;
20130 add_reg_note (par, REG_FRAME_RELATED_EXPR, dwarf);
20132 return par;
20135 /* Add a REG_CFA_ADJUST_CFA REG note to INSN.
20136 SIZE is the offset to be adjusted.
20137 DEST and SRC might be stack_pointer_rtx or hard_frame_pointer_rtx. */
20138 static void
20139 arm_add_cfa_adjust_cfa_note (rtx insn, int size, rtx dest, rtx src)
20141 rtx dwarf;
20143 RTX_FRAME_RELATED_P (insn) = 1;
20144 dwarf = gen_rtx_SET (VOIDmode, dest, plus_constant (Pmode, src, size));
20145 add_reg_note (insn, REG_CFA_ADJUST_CFA, dwarf);
20148 /* Generate and emit an insn pattern that we will recognize as a pop_multi.
20149 SAVED_REGS_MASK shows which registers need to be restored.
20151 Unfortunately, since this insn does not reflect very well the actual
20152 semantics of the operation, we need to annotate the insn for the benefit
20153 of DWARF2 frame unwind information. */
20154 static void
20155 arm_emit_multi_reg_pop (unsigned long saved_regs_mask)
20157 int num_regs = 0;
20158 int i, j;
20159 rtx par;
20160 rtx dwarf = NULL_RTX;
20161 rtx tmp, reg;
20162 bool return_in_pc;
20163 int offset_adj;
20164 int emit_update;
20166 return_in_pc = (saved_regs_mask & (1 << PC_REGNUM)) ? true : false;
20167 offset_adj = return_in_pc ? 1 : 0;
20168 for (i = 0; i <= LAST_ARM_REGNUM; i++)
20169 if (saved_regs_mask & (1 << i))
20170 num_regs++;
20172 gcc_assert (num_regs && num_regs <= 16);
20174 /* If SP is in reglist, then we don't emit SP update insn. */
20175 emit_update = (saved_regs_mask & (1 << SP_REGNUM)) ? 0 : 1;
20177 /* The parallel needs to hold num_regs SETs
20178 and one SET for the stack update. */
20179 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (num_regs + emit_update + offset_adj));
20181 if (return_in_pc)
20183 tmp = ret_rtx;
20184 XVECEXP (par, 0, 0) = tmp;
20187 if (emit_update)
20189 /* Increment the stack pointer, based on there being
20190 num_regs 4-byte registers to restore. */
20191 tmp = gen_rtx_SET (VOIDmode,
20192 stack_pointer_rtx,
20193 plus_constant (Pmode,
20194 stack_pointer_rtx,
20195 4 * num_regs));
20196 RTX_FRAME_RELATED_P (tmp) = 1;
20197 XVECEXP (par, 0, offset_adj) = tmp;
20200 /* Now restore every reg, which may include PC. */
20201 for (j = 0, i = 0; j < num_regs; i++)
20202 if (saved_regs_mask & (1 << i))
20204 reg = gen_rtx_REG (SImode, i);
20205 if ((num_regs == 1) && emit_update && !return_in_pc)
20207 /* Emit single load with writeback. */
20208 tmp = gen_frame_mem (SImode,
20209 gen_rtx_POST_INC (Pmode,
20210 stack_pointer_rtx));
20211 tmp = emit_insn (gen_rtx_SET (VOIDmode, reg, tmp));
20212 REG_NOTES (tmp) = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
20213 return;
20216 tmp = gen_rtx_SET (VOIDmode,
20217 reg,
20218 gen_frame_mem
20219 (SImode,
20220 plus_constant (Pmode, stack_pointer_rtx, 4 * j)));
20221 RTX_FRAME_RELATED_P (tmp) = 1;
20222 XVECEXP (par, 0, j + emit_update + offset_adj) = tmp;
20224 /* We need to maintain a sequence for DWARF info too. As dwarf info
20225 should not have PC, skip PC. */
20226 if (i != PC_REGNUM)
20227 dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
20229 j++;
20232 if (return_in_pc)
20233 par = emit_jump_insn (par);
20234 else
20235 par = emit_insn (par);
20237 REG_NOTES (par) = dwarf;
20238 if (!return_in_pc)
20239 arm_add_cfa_adjust_cfa_note (par, UNITS_PER_WORD * num_regs,
20240 stack_pointer_rtx, stack_pointer_rtx);
20243 /* Generate and emit an insn pattern that we will recognize as a pop_multi
20244 of NUM_REGS consecutive VFP regs, starting at FIRST_REG.
20246 Unfortunately, since this insn does not reflect very well the actual
20247 semantics of the operation, we need to annotate the insn for the benefit
20248 of DWARF2 frame unwind information. */
20249 static void
20250 arm_emit_vfp_multi_reg_pop (int first_reg, int num_regs, rtx base_reg)
20252 int i, j;
20253 rtx par;
20254 rtx dwarf = NULL_RTX;
20255 rtx tmp, reg;
20257 gcc_assert (num_regs && num_regs <= 32);
20259 /* Workaround ARM10 VFPr1 bug. */
20260 if (num_regs == 2 && !arm_arch6)
20262 if (first_reg == 15)
20263 first_reg--;
20265 num_regs++;
20268 /* We can emit at most 16 D-registers in a single pop_multi instruction, and
20269 there could be up to 32 D-registers to restore.
20270 If there are more than 16 D-registers, make two recursive calls,
20271 each of which emits one pop_multi instruction. */
20272 if (num_regs > 16)
20274 arm_emit_vfp_multi_reg_pop (first_reg, 16, base_reg);
20275 arm_emit_vfp_multi_reg_pop (first_reg + 16, num_regs - 16, base_reg);
20276 return;
20279 /* The parallel needs to hold num_regs SETs
20280 and one SET for the stack update. */
20281 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (num_regs + 1));
20283 /* Increment the stack pointer, based on there being
20284 num_regs 8-byte registers to restore. */
20285 tmp = gen_rtx_SET (VOIDmode,
20286 base_reg,
20287 plus_constant (Pmode, base_reg, 8 * num_regs));
20288 RTX_FRAME_RELATED_P (tmp) = 1;
20289 XVECEXP (par, 0, 0) = tmp;
20291 /* Now show every reg that will be restored, using a SET for each. */
20292 for (j = 0, i=first_reg; j < num_regs; i += 2)
20294 reg = gen_rtx_REG (DFmode, i);
20296 tmp = gen_rtx_SET (VOIDmode,
20297 reg,
20298 gen_frame_mem
20299 (DFmode,
20300 plus_constant (Pmode, base_reg, 8 * j)));
20301 RTX_FRAME_RELATED_P (tmp) = 1;
20302 XVECEXP (par, 0, j + 1) = tmp;
20304 dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
20306 j++;
20309 par = emit_insn (par);
20310 REG_NOTES (par) = dwarf;
20312 /* Make sure cfa doesn't leave with IP_REGNUM to allow unwinding fron FP. */
20313 if (TARGET_VFP && REGNO (base_reg) == IP_REGNUM)
20315 RTX_FRAME_RELATED_P (par) = 1;
20316 add_reg_note (par, REG_CFA_DEF_CFA, hard_frame_pointer_rtx);
20318 else
20319 arm_add_cfa_adjust_cfa_note (par, 2 * UNITS_PER_WORD * num_regs,
20320 base_reg, base_reg);
20323 /* Generate and emit a pattern that will be recognized as LDRD pattern. If even
20324 number of registers are being popped, multiple LDRD patterns are created for
20325 all register pairs. If odd number of registers are popped, last register is
20326 loaded by using LDR pattern. */
20327 static void
20328 thumb2_emit_ldrd_pop (unsigned long saved_regs_mask)
20330 int num_regs = 0;
20331 int i, j;
20332 rtx par = NULL_RTX;
20333 rtx dwarf = NULL_RTX;
20334 rtx tmp, reg, tmp1;
20335 bool return_in_pc;
20337 return_in_pc = (saved_regs_mask & (1 << PC_REGNUM)) ? true : false;
20338 for (i = 0; i <= LAST_ARM_REGNUM; i++)
20339 if (saved_regs_mask & (1 << i))
20340 num_regs++;
20342 gcc_assert (num_regs && num_regs <= 16);
20344 /* We cannot generate ldrd for PC. Hence, reduce the count if PC is
20345 to be popped. So, if num_regs is even, now it will become odd,
20346 and we can generate pop with PC. If num_regs is odd, it will be
20347 even now, and ldr with return can be generated for PC. */
20348 if (return_in_pc)
20349 num_regs--;
20351 gcc_assert (!(saved_regs_mask & (1 << SP_REGNUM)));
20353 /* Var j iterates over all the registers to gather all the registers in
20354 saved_regs_mask. Var i gives index of saved registers in stack frame.
20355 A PARALLEL RTX of register-pair is created here, so that pattern for
20356 LDRD can be matched. As PC is always last register to be popped, and
20357 we have already decremented num_regs if PC, we don't have to worry
20358 about PC in this loop. */
20359 for (i = 0, j = 0; i < (num_regs - (num_regs % 2)); j++)
20360 if (saved_regs_mask & (1 << j))
20362 /* Create RTX for memory load. */
20363 reg = gen_rtx_REG (SImode, j);
20364 tmp = gen_rtx_SET (SImode,
20365 reg,
20366 gen_frame_mem (SImode,
20367 plus_constant (Pmode,
20368 stack_pointer_rtx, 4 * i)));
20369 RTX_FRAME_RELATED_P (tmp) = 1;
20371 if (i % 2 == 0)
20373 /* When saved-register index (i) is even, the RTX to be emitted is
20374 yet to be created. Hence create it first. The LDRD pattern we
20375 are generating is :
20376 [ (SET (reg_t0) (MEM (PLUS (SP) (NUM))))
20377 (SET (reg_t1) (MEM (PLUS (SP) (NUM + 4)))) ]
20378 where target registers need not be consecutive. */
20379 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
20380 dwarf = NULL_RTX;
20383 /* ith register is added in PARALLEL RTX. If i is even, the reg_i is
20384 added as 0th element and if i is odd, reg_i is added as 1st element
20385 of LDRD pattern shown above. */
20386 XVECEXP (par, 0, (i % 2)) = tmp;
20387 dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
20389 if ((i % 2) == 1)
20391 /* When saved-register index (i) is odd, RTXs for both the registers
20392 to be loaded are generated in above given LDRD pattern, and the
20393 pattern can be emitted now. */
20394 par = emit_insn (par);
20395 REG_NOTES (par) = dwarf;
20396 RTX_FRAME_RELATED_P (par) = 1;
20399 i++;
20402 /* If the number of registers pushed is odd AND return_in_pc is false OR
20403 number of registers are even AND return_in_pc is true, last register is
20404 popped using LDR. It can be PC as well. Hence, adjust the stack first and
20405 then LDR with post increment. */
20407 /* Increment the stack pointer, based on there being
20408 num_regs 4-byte registers to restore. */
20409 tmp = gen_rtx_SET (VOIDmode,
20410 stack_pointer_rtx,
20411 plus_constant (Pmode, stack_pointer_rtx, 4 * i));
20412 RTX_FRAME_RELATED_P (tmp) = 1;
20413 tmp = emit_insn (tmp);
20414 if (!return_in_pc)
20416 arm_add_cfa_adjust_cfa_note (tmp, UNITS_PER_WORD * i,
20417 stack_pointer_rtx, stack_pointer_rtx);
20420 dwarf = NULL_RTX;
20422 if (((num_regs % 2) == 1 && !return_in_pc)
20423 || ((num_regs % 2) == 0 && return_in_pc))
20425 /* Scan for the single register to be popped. Skip until the saved
20426 register is found. */
20427 for (; (saved_regs_mask & (1 << j)) == 0; j++);
20429 /* Gen LDR with post increment here. */
20430 tmp1 = gen_rtx_MEM (SImode,
20431 gen_rtx_POST_INC (SImode,
20432 stack_pointer_rtx));
20433 set_mem_alias_set (tmp1, get_frame_alias_set ());
20435 reg = gen_rtx_REG (SImode, j);
20436 tmp = gen_rtx_SET (SImode, reg, tmp1);
20437 RTX_FRAME_RELATED_P (tmp) = 1;
20438 dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
20440 if (return_in_pc)
20442 /* If return_in_pc, j must be PC_REGNUM. */
20443 gcc_assert (j == PC_REGNUM);
20444 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
20445 XVECEXP (par, 0, 0) = ret_rtx;
20446 XVECEXP (par, 0, 1) = tmp;
20447 par = emit_jump_insn (par);
20449 else
20451 par = emit_insn (tmp);
20452 REG_NOTES (par) = dwarf;
20453 arm_add_cfa_adjust_cfa_note (par, UNITS_PER_WORD,
20454 stack_pointer_rtx, stack_pointer_rtx);
20458 else if ((num_regs % 2) == 1 && return_in_pc)
20460 /* There are 2 registers to be popped. So, generate the pattern
20461 pop_multiple_with_stack_update_and_return to pop in PC. */
20462 arm_emit_multi_reg_pop (saved_regs_mask & (~((1 << j) - 1)));
20465 return;
20468 /* LDRD in ARM mode needs consecutive registers as operands. This function
20469 emits LDRD whenever possible, otherwise it emits single-word loads. It uses
20470 offset addressing and then generates one separate stack udpate. This provides
20471 more scheduling freedom, compared to writeback on every load. However,
20472 if the function returns using load into PC directly
20473 (i.e., if PC is in SAVED_REGS_MASK), the stack needs to be updated
20474 before the last load. TODO: Add a peephole optimization to recognize
20475 the new epilogue sequence as an LDM instruction whenever possible. TODO: Add
20476 peephole optimization to merge the load at stack-offset zero
20477 with the stack update instruction using load with writeback
20478 in post-index addressing mode. */
20479 static void
20480 arm_emit_ldrd_pop (unsigned long saved_regs_mask)
20482 int j = 0;
20483 int offset = 0;
20484 rtx par = NULL_RTX;
20485 rtx dwarf = NULL_RTX;
20486 rtx tmp, mem;
20488 /* Restore saved registers. */
20489 gcc_assert (!((saved_regs_mask & (1 << SP_REGNUM))));
20490 j = 0;
20491 while (j <= LAST_ARM_REGNUM)
20492 if (saved_regs_mask & (1 << j))
20494 if ((j % 2) == 0
20495 && (saved_regs_mask & (1 << (j + 1)))
20496 && (j + 1) != PC_REGNUM)
20498 /* Current register and next register form register pair for which
20499 LDRD can be generated. PC is always the last register popped, and
20500 we handle it separately. */
20501 if (offset > 0)
20502 mem = gen_frame_mem (DImode,
20503 plus_constant (Pmode,
20504 stack_pointer_rtx,
20505 offset));
20506 else
20507 mem = gen_frame_mem (DImode, stack_pointer_rtx);
20509 tmp = gen_rtx_SET (DImode, gen_rtx_REG (DImode, j), mem);
20510 tmp = emit_insn (tmp);
20511 RTX_FRAME_RELATED_P (tmp) = 1;
20513 /* Generate dwarf info. */
20515 dwarf = alloc_reg_note (REG_CFA_RESTORE,
20516 gen_rtx_REG (SImode, j),
20517 NULL_RTX);
20518 dwarf = alloc_reg_note (REG_CFA_RESTORE,
20519 gen_rtx_REG (SImode, j + 1),
20520 dwarf);
20522 REG_NOTES (tmp) = dwarf;
20524 offset += 8;
20525 j += 2;
20527 else if (j != PC_REGNUM)
20529 /* Emit a single word load. */
20530 if (offset > 0)
20531 mem = gen_frame_mem (SImode,
20532 plus_constant (Pmode,
20533 stack_pointer_rtx,
20534 offset));
20535 else
20536 mem = gen_frame_mem (SImode, stack_pointer_rtx);
20538 tmp = gen_rtx_SET (SImode, gen_rtx_REG (SImode, j), mem);
20539 tmp = emit_insn (tmp);
20540 RTX_FRAME_RELATED_P (tmp) = 1;
20542 /* Generate dwarf info. */
20543 REG_NOTES (tmp) = alloc_reg_note (REG_CFA_RESTORE,
20544 gen_rtx_REG (SImode, j),
20545 NULL_RTX);
20547 offset += 4;
20548 j += 1;
20550 else /* j == PC_REGNUM */
20551 j++;
20553 else
20554 j++;
20556 /* Update the stack. */
20557 if (offset > 0)
20559 tmp = gen_rtx_SET (Pmode,
20560 stack_pointer_rtx,
20561 plus_constant (Pmode,
20562 stack_pointer_rtx,
20563 offset));
20564 tmp = emit_insn (tmp);
20565 arm_add_cfa_adjust_cfa_note (tmp, offset,
20566 stack_pointer_rtx, stack_pointer_rtx);
20567 offset = 0;
20570 if (saved_regs_mask & (1 << PC_REGNUM))
20572 /* Only PC is to be popped. */
20573 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
20574 XVECEXP (par, 0, 0) = ret_rtx;
20575 tmp = gen_rtx_SET (SImode,
20576 gen_rtx_REG (SImode, PC_REGNUM),
20577 gen_frame_mem (SImode,
20578 gen_rtx_POST_INC (SImode,
20579 stack_pointer_rtx)));
20580 RTX_FRAME_RELATED_P (tmp) = 1;
20581 XVECEXP (par, 0, 1) = tmp;
20582 par = emit_jump_insn (par);
20584 /* Generate dwarf info. */
20585 dwarf = alloc_reg_note (REG_CFA_RESTORE,
20586 gen_rtx_REG (SImode, PC_REGNUM),
20587 NULL_RTX);
20588 REG_NOTES (par) = dwarf;
20589 arm_add_cfa_adjust_cfa_note (par, UNITS_PER_WORD,
20590 stack_pointer_rtx, stack_pointer_rtx);
20594 /* Calculate the size of the return value that is passed in registers. */
20595 static unsigned
20596 arm_size_return_regs (void)
20598 machine_mode mode;
20600 if (crtl->return_rtx != 0)
20601 mode = GET_MODE (crtl->return_rtx);
20602 else
20603 mode = DECL_MODE (DECL_RESULT (current_function_decl));
20605 return GET_MODE_SIZE (mode);
20608 /* Return true if the current function needs to save/restore LR. */
20609 static bool
20610 thumb_force_lr_save (void)
20612 return !cfun->machine->lr_save_eliminated
20613 && (!leaf_function_p ()
20614 || thumb_far_jump_used_p ()
20615 || df_regs_ever_live_p (LR_REGNUM));
20618 /* We do not know if r3 will be available because
20619 we do have an indirect tailcall happening in this
20620 particular case. */
20621 static bool
20622 is_indirect_tailcall_p (rtx call)
20624 rtx pat = PATTERN (call);
20626 /* Indirect tail call. */
20627 pat = XVECEXP (pat, 0, 0);
20628 if (GET_CODE (pat) == SET)
20629 pat = SET_SRC (pat);
20631 pat = XEXP (XEXP (pat, 0), 0);
20632 return REG_P (pat);
20635 /* Return true if r3 is used by any of the tail call insns in the
20636 current function. */
20637 static bool
20638 any_sibcall_could_use_r3 (void)
20640 edge_iterator ei;
20641 edge e;
20643 if (!crtl->tail_call_emit)
20644 return false;
20645 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
20646 if (e->flags & EDGE_SIBCALL)
20648 rtx call = BB_END (e->src);
20649 if (!CALL_P (call))
20650 call = prev_nonnote_nondebug_insn (call);
20651 gcc_assert (CALL_P (call) && SIBLING_CALL_P (call));
20652 if (find_regno_fusage (call, USE, 3)
20653 || is_indirect_tailcall_p (call))
20654 return true;
20656 return false;
20660 /* Compute the distance from register FROM to register TO.
20661 These can be the arg pointer (26), the soft frame pointer (25),
20662 the stack pointer (13) or the hard frame pointer (11).
20663 In thumb mode r7 is used as the soft frame pointer, if needed.
20664 Typical stack layout looks like this:
20666 old stack pointer -> | |
20667 ----
20668 | | \
20669 | | saved arguments for
20670 | | vararg functions
20671 | | /
20673 hard FP & arg pointer -> | | \
20674 | | stack
20675 | | frame
20676 | | /
20678 | | \
20679 | | call saved
20680 | | registers
20681 soft frame pointer -> | | /
20683 | | \
20684 | | local
20685 | | variables
20686 locals base pointer -> | | /
20688 | | \
20689 | | outgoing
20690 | | arguments
20691 current stack pointer -> | | /
20694 For a given function some or all of these stack components
20695 may not be needed, giving rise to the possibility of
20696 eliminating some of the registers.
20698 The values returned by this function must reflect the behavior
20699 of arm_expand_prologue() and arm_compute_save_reg_mask().
20701 The sign of the number returned reflects the direction of stack
20702 growth, so the values are positive for all eliminations except
20703 from the soft frame pointer to the hard frame pointer.
20705 SFP may point just inside the local variables block to ensure correct
20706 alignment. */
20709 /* Calculate stack offsets. These are used to calculate register elimination
20710 offsets and in prologue/epilogue code. Also calculates which registers
20711 should be saved. */
20713 static arm_stack_offsets *
20714 arm_get_frame_offsets (void)
20716 struct arm_stack_offsets *offsets;
20717 unsigned long func_type;
20718 int leaf;
20719 int saved;
20720 int core_saved;
20721 HOST_WIDE_INT frame_size;
20722 int i;
20724 offsets = &cfun->machine->stack_offsets;
20726 /* We need to know if we are a leaf function. Unfortunately, it
20727 is possible to be called after start_sequence has been called,
20728 which causes get_insns to return the insns for the sequence,
20729 not the function, which will cause leaf_function_p to return
20730 the incorrect result.
20732 to know about leaf functions once reload has completed, and the
20733 frame size cannot be changed after that time, so we can safely
20734 use the cached value. */
20736 if (reload_completed)
20737 return offsets;
20739 /* Initially this is the size of the local variables. It will translated
20740 into an offset once we have determined the size of preceding data. */
20741 frame_size = ROUND_UP_WORD (get_frame_size ());
20743 leaf = leaf_function_p ();
20745 /* Space for variadic functions. */
20746 offsets->saved_args = crtl->args.pretend_args_size;
20748 /* In Thumb mode this is incorrect, but never used. */
20749 offsets->frame
20750 = (offsets->saved_args
20751 + arm_compute_static_chain_stack_bytes ()
20752 + (frame_pointer_needed ? 4 : 0));
20754 if (TARGET_32BIT)
20756 unsigned int regno;
20758 offsets->saved_regs_mask = arm_compute_save_reg_mask ();
20759 core_saved = bit_count (offsets->saved_regs_mask) * 4;
20760 saved = core_saved;
20762 /* We know that SP will be doubleword aligned on entry, and we must
20763 preserve that condition at any subroutine call. We also require the
20764 soft frame pointer to be doubleword aligned. */
20766 if (TARGET_REALLY_IWMMXT)
20768 /* Check for the call-saved iWMMXt registers. */
20769 for (regno = FIRST_IWMMXT_REGNUM;
20770 regno <= LAST_IWMMXT_REGNUM;
20771 regno++)
20772 if (df_regs_ever_live_p (regno) && ! call_used_regs[regno])
20773 saved += 8;
20776 func_type = arm_current_func_type ();
20777 /* Space for saved VFP registers. */
20778 if (! IS_VOLATILE (func_type)
20779 && TARGET_HARD_FLOAT && TARGET_VFP)
20780 saved += arm_get_vfp_saved_size ();
20782 else /* TARGET_THUMB1 */
20784 offsets->saved_regs_mask = thumb1_compute_save_reg_mask ();
20785 core_saved = bit_count (offsets->saved_regs_mask) * 4;
20786 saved = core_saved;
20787 if (TARGET_BACKTRACE)
20788 saved += 16;
20791 /* Saved registers include the stack frame. */
20792 offsets->saved_regs
20793 = offsets->saved_args + arm_compute_static_chain_stack_bytes () + saved;
20794 offsets->soft_frame = offsets->saved_regs + CALLER_INTERWORKING_SLOT_SIZE;
20796 /* A leaf function does not need any stack alignment if it has nothing
20797 on the stack. */
20798 if (leaf && frame_size == 0
20799 /* However if it calls alloca(), we have a dynamically allocated
20800 block of BIGGEST_ALIGNMENT on stack, so still do stack alignment. */
20801 && ! cfun->calls_alloca)
20803 offsets->outgoing_args = offsets->soft_frame;
20804 offsets->locals_base = offsets->soft_frame;
20805 return offsets;
20808 /* Ensure SFP has the correct alignment. */
20809 if (ARM_DOUBLEWORD_ALIGN
20810 && (offsets->soft_frame & 7))
20812 offsets->soft_frame += 4;
20813 /* Try to align stack by pushing an extra reg. Don't bother doing this
20814 when there is a stack frame as the alignment will be rolled into
20815 the normal stack adjustment. */
20816 if (frame_size + crtl->outgoing_args_size == 0)
20818 int reg = -1;
20820 /* Register r3 is caller-saved. Normally it does not need to be
20821 saved on entry by the prologue. However if we choose to save
20822 it for padding then we may confuse the compiler into thinking
20823 a prologue sequence is required when in fact it is not. This
20824 will occur when shrink-wrapping if r3 is used as a scratch
20825 register and there are no other callee-saved writes.
20827 This situation can be avoided when other callee-saved registers
20828 are available and r3 is not mandatory if we choose a callee-saved
20829 register for padding. */
20830 bool prefer_callee_reg_p = false;
20832 /* If it is safe to use r3, then do so. This sometimes
20833 generates better code on Thumb-2 by avoiding the need to
20834 use 32-bit push/pop instructions. */
20835 if (! any_sibcall_could_use_r3 ()
20836 && arm_size_return_regs () <= 12
20837 && (offsets->saved_regs_mask & (1 << 3)) == 0
20838 && (TARGET_THUMB2
20839 || !(TARGET_LDRD && current_tune->prefer_ldrd_strd)))
20841 reg = 3;
20842 if (!TARGET_THUMB2)
20843 prefer_callee_reg_p = true;
20845 if (reg == -1
20846 || prefer_callee_reg_p)
20848 for (i = 4; i <= (TARGET_THUMB1 ? LAST_LO_REGNUM : 11); i++)
20850 /* Avoid fixed registers; they may be changed at
20851 arbitrary times so it's unsafe to restore them
20852 during the epilogue. */
20853 if (!fixed_regs[i]
20854 && (offsets->saved_regs_mask & (1 << i)) == 0)
20856 reg = i;
20857 break;
20862 if (reg != -1)
20864 offsets->saved_regs += 4;
20865 offsets->saved_regs_mask |= (1 << reg);
20870 offsets->locals_base = offsets->soft_frame + frame_size;
20871 offsets->outgoing_args = (offsets->locals_base
20872 + crtl->outgoing_args_size);
20874 if (ARM_DOUBLEWORD_ALIGN)
20876 /* Ensure SP remains doubleword aligned. */
20877 if (offsets->outgoing_args & 7)
20878 offsets->outgoing_args += 4;
20879 gcc_assert (!(offsets->outgoing_args & 7));
20882 return offsets;
20886 /* Calculate the relative offsets for the different stack pointers. Positive
20887 offsets are in the direction of stack growth. */
20889 HOST_WIDE_INT
20890 arm_compute_initial_elimination_offset (unsigned int from, unsigned int to)
20892 arm_stack_offsets *offsets;
20894 offsets = arm_get_frame_offsets ();
20896 /* OK, now we have enough information to compute the distances.
20897 There must be an entry in these switch tables for each pair
20898 of registers in ELIMINABLE_REGS, even if some of the entries
20899 seem to be redundant or useless. */
20900 switch (from)
20902 case ARG_POINTER_REGNUM:
20903 switch (to)
20905 case THUMB_HARD_FRAME_POINTER_REGNUM:
20906 return 0;
20908 case FRAME_POINTER_REGNUM:
20909 /* This is the reverse of the soft frame pointer
20910 to hard frame pointer elimination below. */
20911 return offsets->soft_frame - offsets->saved_args;
20913 case ARM_HARD_FRAME_POINTER_REGNUM:
20914 /* This is only non-zero in the case where the static chain register
20915 is stored above the frame. */
20916 return offsets->frame - offsets->saved_args - 4;
20918 case STACK_POINTER_REGNUM:
20919 /* If nothing has been pushed on the stack at all
20920 then this will return -4. This *is* correct! */
20921 return offsets->outgoing_args - (offsets->saved_args + 4);
20923 default:
20924 gcc_unreachable ();
20926 gcc_unreachable ();
20928 case FRAME_POINTER_REGNUM:
20929 switch (to)
20931 case THUMB_HARD_FRAME_POINTER_REGNUM:
20932 return 0;
20934 case ARM_HARD_FRAME_POINTER_REGNUM:
20935 /* The hard frame pointer points to the top entry in the
20936 stack frame. The soft frame pointer to the bottom entry
20937 in the stack frame. If there is no stack frame at all,
20938 then they are identical. */
20940 return offsets->frame - offsets->soft_frame;
20942 case STACK_POINTER_REGNUM:
20943 return offsets->outgoing_args - offsets->soft_frame;
20945 default:
20946 gcc_unreachable ();
20948 gcc_unreachable ();
20950 default:
20951 /* You cannot eliminate from the stack pointer.
20952 In theory you could eliminate from the hard frame
20953 pointer to the stack pointer, but this will never
20954 happen, since if a stack frame is not needed the
20955 hard frame pointer will never be used. */
20956 gcc_unreachable ();
20960 /* Given FROM and TO register numbers, say whether this elimination is
20961 allowed. Frame pointer elimination is automatically handled.
20963 All eliminations are permissible. Note that ARG_POINTER_REGNUM and
20964 HARD_FRAME_POINTER_REGNUM are in fact the same thing. If we need a frame
20965 pointer, we must eliminate FRAME_POINTER_REGNUM into
20966 HARD_FRAME_POINTER_REGNUM and not into STACK_POINTER_REGNUM or
20967 ARG_POINTER_REGNUM. */
20969 bool
20970 arm_can_eliminate (const int from, const int to)
20972 return ((to == FRAME_POINTER_REGNUM && from == ARG_POINTER_REGNUM) ? false :
20973 (to == STACK_POINTER_REGNUM && frame_pointer_needed) ? false :
20974 (to == ARM_HARD_FRAME_POINTER_REGNUM && TARGET_THUMB) ? false :
20975 (to == THUMB_HARD_FRAME_POINTER_REGNUM && TARGET_ARM) ? false :
20976 true);
20979 /* Emit RTL to save coprocessor registers on function entry. Returns the
20980 number of bytes pushed. */
20982 static int
20983 arm_save_coproc_regs(void)
20985 int saved_size = 0;
20986 unsigned reg;
20987 unsigned start_reg;
20988 rtx insn;
20990 for (reg = LAST_IWMMXT_REGNUM; reg >= FIRST_IWMMXT_REGNUM; reg--)
20991 if (df_regs_ever_live_p (reg) && ! call_used_regs[reg])
20993 insn = gen_rtx_PRE_DEC (Pmode, stack_pointer_rtx);
20994 insn = gen_rtx_MEM (V2SImode, insn);
20995 insn = emit_set_insn (insn, gen_rtx_REG (V2SImode, reg));
20996 RTX_FRAME_RELATED_P (insn) = 1;
20997 saved_size += 8;
21000 if (TARGET_HARD_FLOAT && TARGET_VFP)
21002 start_reg = FIRST_VFP_REGNUM;
21004 for (reg = FIRST_VFP_REGNUM; reg < LAST_VFP_REGNUM; reg += 2)
21006 if ((!df_regs_ever_live_p (reg) || call_used_regs[reg])
21007 && (!df_regs_ever_live_p (reg + 1) || call_used_regs[reg + 1]))
21009 if (start_reg != reg)
21010 saved_size += vfp_emit_fstmd (start_reg,
21011 (reg - start_reg) / 2);
21012 start_reg = reg + 2;
21015 if (start_reg != reg)
21016 saved_size += vfp_emit_fstmd (start_reg,
21017 (reg - start_reg) / 2);
21019 return saved_size;
21023 /* Set the Thumb frame pointer from the stack pointer. */
21025 static void
21026 thumb_set_frame_pointer (arm_stack_offsets *offsets)
21028 HOST_WIDE_INT amount;
21029 rtx insn, dwarf;
21031 amount = offsets->outgoing_args - offsets->locals_base;
21032 if (amount < 1024)
21033 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
21034 stack_pointer_rtx, GEN_INT (amount)));
21035 else
21037 emit_insn (gen_movsi (hard_frame_pointer_rtx, GEN_INT (amount)));
21038 /* Thumb-2 RTL patterns expect sp as the first input. Thumb-1
21039 expects the first two operands to be the same. */
21040 if (TARGET_THUMB2)
21042 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
21043 stack_pointer_rtx,
21044 hard_frame_pointer_rtx));
21046 else
21048 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
21049 hard_frame_pointer_rtx,
21050 stack_pointer_rtx));
21052 dwarf = gen_rtx_SET (VOIDmode, hard_frame_pointer_rtx,
21053 plus_constant (Pmode, stack_pointer_rtx, amount));
21054 RTX_FRAME_RELATED_P (dwarf) = 1;
21055 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
21058 RTX_FRAME_RELATED_P (insn) = 1;
21061 /* Generate the prologue instructions for entry into an ARM or Thumb-2
21062 function. */
21063 void
21064 arm_expand_prologue (void)
21066 rtx amount;
21067 rtx insn;
21068 rtx ip_rtx;
21069 unsigned long live_regs_mask;
21070 unsigned long func_type;
21071 int fp_offset = 0;
21072 int saved_pretend_args = 0;
21073 int saved_regs = 0;
21074 unsigned HOST_WIDE_INT args_to_push;
21075 arm_stack_offsets *offsets;
21077 func_type = arm_current_func_type ();
21079 /* Naked functions don't have prologues. */
21080 if (IS_NAKED (func_type))
21081 return;
21083 /* Make a copy of c_f_p_a_s as we may need to modify it locally. */
21084 args_to_push = crtl->args.pretend_args_size;
21086 /* Compute which register we will have to save onto the stack. */
21087 offsets = arm_get_frame_offsets ();
21088 live_regs_mask = offsets->saved_regs_mask;
21090 ip_rtx = gen_rtx_REG (SImode, IP_REGNUM);
21092 if (IS_STACKALIGN (func_type))
21094 rtx r0, r1;
21096 /* Handle a word-aligned stack pointer. We generate the following:
21098 mov r0, sp
21099 bic r1, r0, #7
21100 mov sp, r1
21101 <save and restore r0 in normal prologue/epilogue>
21102 mov sp, r0
21103 bx lr
21105 The unwinder doesn't need to know about the stack realignment.
21106 Just tell it we saved SP in r0. */
21107 gcc_assert (TARGET_THUMB2 && !arm_arch_notm && args_to_push == 0);
21109 r0 = gen_rtx_REG (SImode, 0);
21110 r1 = gen_rtx_REG (SImode, 1);
21112 insn = emit_insn (gen_movsi (r0, stack_pointer_rtx));
21113 RTX_FRAME_RELATED_P (insn) = 1;
21114 add_reg_note (insn, REG_CFA_REGISTER, NULL);
21116 emit_insn (gen_andsi3 (r1, r0, GEN_INT (~(HOST_WIDE_INT)7)));
21118 /* ??? The CFA changes here, which may cause GDB to conclude that it
21119 has entered a different function. That said, the unwind info is
21120 correct, individually, before and after this instruction because
21121 we've described the save of SP, which will override the default
21122 handling of SP as restoring from the CFA. */
21123 emit_insn (gen_movsi (stack_pointer_rtx, r1));
21126 /* For APCS frames, if IP register is clobbered
21127 when creating frame, save that register in a special
21128 way. */
21129 if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
21131 if (IS_INTERRUPT (func_type))
21133 /* Interrupt functions must not corrupt any registers.
21134 Creating a frame pointer however, corrupts the IP
21135 register, so we must push it first. */
21136 emit_multi_reg_push (1 << IP_REGNUM, 1 << IP_REGNUM);
21138 /* Do not set RTX_FRAME_RELATED_P on this insn.
21139 The dwarf stack unwinding code only wants to see one
21140 stack decrement per function, and this is not it. If
21141 this instruction is labeled as being part of the frame
21142 creation sequence then dwarf2out_frame_debug_expr will
21143 die when it encounters the assignment of IP to FP
21144 later on, since the use of SP here establishes SP as
21145 the CFA register and not IP.
21147 Anyway this instruction is not really part of the stack
21148 frame creation although it is part of the prologue. */
21150 else if (IS_NESTED (func_type))
21152 /* The static chain register is the same as the IP register
21153 used as a scratch register during stack frame creation.
21154 To get around this need to find somewhere to store IP
21155 whilst the frame is being created. We try the following
21156 places in order:
21158 1. The last argument register r3 if it is available.
21159 2. A slot on the stack above the frame if there are no
21160 arguments to push onto the stack.
21161 3. Register r3 again, after pushing the argument registers
21162 onto the stack, if this is a varargs function.
21163 4. The last slot on the stack created for the arguments to
21164 push, if this isn't a varargs function.
21166 Note - we only need to tell the dwarf2 backend about the SP
21167 adjustment in the second variant; the static chain register
21168 doesn't need to be unwound, as it doesn't contain a value
21169 inherited from the caller. */
21171 if (!arm_r3_live_at_start_p ())
21172 insn = emit_set_insn (gen_rtx_REG (SImode, 3), ip_rtx);
21173 else if (args_to_push == 0)
21175 rtx addr, dwarf;
21177 gcc_assert(arm_compute_static_chain_stack_bytes() == 4);
21178 saved_regs += 4;
21180 addr = gen_rtx_PRE_DEC (Pmode, stack_pointer_rtx);
21181 insn = emit_set_insn (gen_frame_mem (SImode, addr), ip_rtx);
21182 fp_offset = 4;
21184 /* Just tell the dwarf backend that we adjusted SP. */
21185 dwarf = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
21186 plus_constant (Pmode, stack_pointer_rtx,
21187 -fp_offset));
21188 RTX_FRAME_RELATED_P (insn) = 1;
21189 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
21191 else
21193 /* Store the args on the stack. */
21194 if (cfun->machine->uses_anonymous_args)
21196 insn
21197 = emit_multi_reg_push ((0xf0 >> (args_to_push / 4)) & 0xf,
21198 (0xf0 >> (args_to_push / 4)) & 0xf);
21199 emit_set_insn (gen_rtx_REG (SImode, 3), ip_rtx);
21200 saved_pretend_args = 1;
21202 else
21204 rtx addr, dwarf;
21206 if (args_to_push == 4)
21207 addr = gen_rtx_PRE_DEC (Pmode, stack_pointer_rtx);
21208 else
21209 addr
21210 = gen_rtx_PRE_MODIFY (Pmode, stack_pointer_rtx,
21211 plus_constant (Pmode,
21212 stack_pointer_rtx,
21213 -args_to_push));
21215 insn = emit_set_insn (gen_frame_mem (SImode, addr), ip_rtx);
21217 /* Just tell the dwarf backend that we adjusted SP. */
21218 dwarf
21219 = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
21220 plus_constant (Pmode, stack_pointer_rtx,
21221 -args_to_push));
21222 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
21225 RTX_FRAME_RELATED_P (insn) = 1;
21226 fp_offset = args_to_push;
21227 args_to_push = 0;
21231 insn = emit_set_insn (ip_rtx,
21232 plus_constant (Pmode, stack_pointer_rtx,
21233 fp_offset));
21234 RTX_FRAME_RELATED_P (insn) = 1;
21237 if (args_to_push)
21239 /* Push the argument registers, or reserve space for them. */
21240 if (cfun->machine->uses_anonymous_args)
21241 insn = emit_multi_reg_push
21242 ((0xf0 >> (args_to_push / 4)) & 0xf,
21243 (0xf0 >> (args_to_push / 4)) & 0xf);
21244 else
21245 insn = emit_insn
21246 (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
21247 GEN_INT (- args_to_push)));
21248 RTX_FRAME_RELATED_P (insn) = 1;
21251 /* If this is an interrupt service routine, and the link register
21252 is going to be pushed, and we're not generating extra
21253 push of IP (needed when frame is needed and frame layout if apcs),
21254 subtracting four from LR now will mean that the function return
21255 can be done with a single instruction. */
21256 if ((func_type == ARM_FT_ISR || func_type == ARM_FT_FIQ)
21257 && (live_regs_mask & (1 << LR_REGNUM)) != 0
21258 && !(frame_pointer_needed && TARGET_APCS_FRAME)
21259 && TARGET_ARM)
21261 rtx lr = gen_rtx_REG (SImode, LR_REGNUM);
21263 emit_set_insn (lr, plus_constant (SImode, lr, -4));
21266 if (live_regs_mask)
21268 unsigned long dwarf_regs_mask = live_regs_mask;
21270 saved_regs += bit_count (live_regs_mask) * 4;
21271 if (optimize_size && !frame_pointer_needed
21272 && saved_regs == offsets->saved_regs - offsets->saved_args)
21274 /* If no coprocessor registers are being pushed and we don't have
21275 to worry about a frame pointer then push extra registers to
21276 create the stack frame. This is done is a way that does not
21277 alter the frame layout, so is independent of the epilogue. */
21278 int n;
21279 int frame;
21280 n = 0;
21281 while (n < 8 && (live_regs_mask & (1 << n)) == 0)
21282 n++;
21283 frame = offsets->outgoing_args - (offsets->saved_args + saved_regs);
21284 if (frame && n * 4 >= frame)
21286 n = frame / 4;
21287 live_regs_mask |= (1 << n) - 1;
21288 saved_regs += frame;
21292 if (TARGET_LDRD
21293 && current_tune->prefer_ldrd_strd
21294 && !optimize_function_for_size_p (cfun))
21296 gcc_checking_assert (live_regs_mask == dwarf_regs_mask);
21297 if (TARGET_THUMB2)
21298 thumb2_emit_strd_push (live_regs_mask);
21299 else if (TARGET_ARM
21300 && !TARGET_APCS_FRAME
21301 && !IS_INTERRUPT (func_type))
21302 arm_emit_strd_push (live_regs_mask);
21303 else
21305 insn = emit_multi_reg_push (live_regs_mask, live_regs_mask);
21306 RTX_FRAME_RELATED_P (insn) = 1;
21309 else
21311 insn = emit_multi_reg_push (live_regs_mask, dwarf_regs_mask);
21312 RTX_FRAME_RELATED_P (insn) = 1;
21316 if (! IS_VOLATILE (func_type))
21317 saved_regs += arm_save_coproc_regs ();
21319 if (frame_pointer_needed && TARGET_ARM)
21321 /* Create the new frame pointer. */
21322 if (TARGET_APCS_FRAME)
21324 insn = GEN_INT (-(4 + args_to_push + fp_offset));
21325 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx, ip_rtx, insn));
21326 RTX_FRAME_RELATED_P (insn) = 1;
21328 if (IS_NESTED (func_type))
21330 /* Recover the static chain register. */
21331 if (!arm_r3_live_at_start_p () || saved_pretend_args)
21332 insn = gen_rtx_REG (SImode, 3);
21333 else
21335 insn = plus_constant (Pmode, hard_frame_pointer_rtx, 4);
21336 insn = gen_frame_mem (SImode, insn);
21338 emit_set_insn (ip_rtx, insn);
21339 /* Add a USE to stop propagate_one_insn() from barfing. */
21340 emit_insn (gen_force_register_use (ip_rtx));
21343 else
21345 insn = GEN_INT (saved_regs - 4);
21346 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
21347 stack_pointer_rtx, insn));
21348 RTX_FRAME_RELATED_P (insn) = 1;
21352 if (flag_stack_usage_info)
21353 current_function_static_stack_size
21354 = offsets->outgoing_args - offsets->saved_args;
21356 if (offsets->outgoing_args != offsets->saved_args + saved_regs)
21358 /* This add can produce multiple insns for a large constant, so we
21359 need to get tricky. */
21360 rtx_insn *last = get_last_insn ();
21362 amount = GEN_INT (offsets->saved_args + saved_regs
21363 - offsets->outgoing_args);
21365 insn = emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
21366 amount));
21369 last = last ? NEXT_INSN (last) : get_insns ();
21370 RTX_FRAME_RELATED_P (last) = 1;
21372 while (last != insn);
21374 /* If the frame pointer is needed, emit a special barrier that
21375 will prevent the scheduler from moving stores to the frame
21376 before the stack adjustment. */
21377 if (frame_pointer_needed)
21378 insn = emit_insn (gen_stack_tie (stack_pointer_rtx,
21379 hard_frame_pointer_rtx));
21383 if (frame_pointer_needed && TARGET_THUMB2)
21384 thumb_set_frame_pointer (offsets);
21386 if (flag_pic && arm_pic_register != INVALID_REGNUM)
21388 unsigned long mask;
21390 mask = live_regs_mask;
21391 mask &= THUMB2_WORK_REGS;
21392 if (!IS_NESTED (func_type))
21393 mask |= (1 << IP_REGNUM);
21394 arm_load_pic_register (mask);
21397 /* If we are profiling, make sure no instructions are scheduled before
21398 the call to mcount. Similarly if the user has requested no
21399 scheduling in the prolog. Similarly if we want non-call exceptions
21400 using the EABI unwinder, to prevent faulting instructions from being
21401 swapped with a stack adjustment. */
21402 if (crtl->profile || !TARGET_SCHED_PROLOG
21403 || (arm_except_unwind_info (&global_options) == UI_TARGET
21404 && cfun->can_throw_non_call_exceptions))
21405 emit_insn (gen_blockage ());
21407 /* If the link register is being kept alive, with the return address in it,
21408 then make sure that it does not get reused by the ce2 pass. */
21409 if ((live_regs_mask & (1 << LR_REGNUM)) == 0)
21410 cfun->machine->lr_save_eliminated = 1;
21413 /* Print condition code to STREAM. Helper function for arm_print_operand. */
21414 static void
21415 arm_print_condition (FILE *stream)
21417 if (arm_ccfsm_state == 3 || arm_ccfsm_state == 4)
21419 /* Branch conversion is not implemented for Thumb-2. */
21420 if (TARGET_THUMB)
21422 output_operand_lossage ("predicated Thumb instruction");
21423 return;
21425 if (current_insn_predicate != NULL)
21427 output_operand_lossage
21428 ("predicated instruction in conditional sequence");
21429 return;
21432 fputs (arm_condition_codes[arm_current_cc], stream);
21434 else if (current_insn_predicate)
21436 enum arm_cond_code code;
21438 if (TARGET_THUMB1)
21440 output_operand_lossage ("predicated Thumb instruction");
21441 return;
21444 code = get_arm_condition_code (current_insn_predicate);
21445 fputs (arm_condition_codes[code], stream);
21450 /* Globally reserved letters: acln
21451 Puncutation letters currently used: @_|?().!#
21452 Lower case letters currently used: bcdefhimpqtvwxyz
21453 Upper case letters currently used: ABCDFGHJKLMNOPQRSTU
21454 Letters previously used, but now deprecated/obsolete: sVWXYZ.
21456 Note that the global reservation for 'c' is only for CONSTANT_ADDRESS_P.
21458 If CODE is 'd', then the X is a condition operand and the instruction
21459 should only be executed if the condition is true.
21460 if CODE is 'D', then the X is a condition operand and the instruction
21461 should only be executed if the condition is false: however, if the mode
21462 of the comparison is CCFPEmode, then always execute the instruction -- we
21463 do this because in these circumstances !GE does not necessarily imply LT;
21464 in these cases the instruction pattern will take care to make sure that
21465 an instruction containing %d will follow, thereby undoing the effects of
21466 doing this instruction unconditionally.
21467 If CODE is 'N' then X is a floating point operand that must be negated
21468 before output.
21469 If CODE is 'B' then output a bitwise inverted value of X (a const int).
21470 If X is a REG and CODE is `M', output a ldm/stm style multi-reg. */
21471 static void
21472 arm_print_operand (FILE *stream, rtx x, int code)
21474 switch (code)
21476 case '@':
21477 fputs (ASM_COMMENT_START, stream);
21478 return;
21480 case '_':
21481 fputs (user_label_prefix, stream);
21482 return;
21484 case '|':
21485 fputs (REGISTER_PREFIX, stream);
21486 return;
21488 case '?':
21489 arm_print_condition (stream);
21490 return;
21492 case '(':
21493 /* Nothing in unified syntax, otherwise the current condition code. */
21494 if (!TARGET_UNIFIED_ASM)
21495 arm_print_condition (stream);
21496 break;
21498 case ')':
21499 /* The current condition code in unified syntax, otherwise nothing. */
21500 if (TARGET_UNIFIED_ASM)
21501 arm_print_condition (stream);
21502 break;
21504 case '.':
21505 /* The current condition code for a condition code setting instruction.
21506 Preceded by 's' in unified syntax, otherwise followed by 's'. */
21507 if (TARGET_UNIFIED_ASM)
21509 fputc('s', stream);
21510 arm_print_condition (stream);
21512 else
21514 arm_print_condition (stream);
21515 fputc('s', stream);
21517 return;
21519 case '!':
21520 /* If the instruction is conditionally executed then print
21521 the current condition code, otherwise print 's'. */
21522 gcc_assert (TARGET_THUMB2 && TARGET_UNIFIED_ASM);
21523 if (current_insn_predicate)
21524 arm_print_condition (stream);
21525 else
21526 fputc('s', stream);
21527 break;
21529 /* %# is a "break" sequence. It doesn't output anything, but is used to
21530 separate e.g. operand numbers from following text, if that text consists
21531 of further digits which we don't want to be part of the operand
21532 number. */
21533 case '#':
21534 return;
21536 case 'N':
21538 REAL_VALUE_TYPE r;
21539 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
21540 r = real_value_negate (&r);
21541 fprintf (stream, "%s", fp_const_from_val (&r));
21543 return;
21545 /* An integer or symbol address without a preceding # sign. */
21546 case 'c':
21547 switch (GET_CODE (x))
21549 case CONST_INT:
21550 fprintf (stream, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
21551 break;
21553 case SYMBOL_REF:
21554 output_addr_const (stream, x);
21555 break;
21557 case CONST:
21558 if (GET_CODE (XEXP (x, 0)) == PLUS
21559 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF)
21561 output_addr_const (stream, x);
21562 break;
21564 /* Fall through. */
21566 default:
21567 output_operand_lossage ("Unsupported operand for code '%c'", code);
21569 return;
21571 /* An integer that we want to print in HEX. */
21572 case 'x':
21573 switch (GET_CODE (x))
21575 case CONST_INT:
21576 fprintf (stream, "#" HOST_WIDE_INT_PRINT_HEX, INTVAL (x));
21577 break;
21579 default:
21580 output_operand_lossage ("Unsupported operand for code '%c'", code);
21582 return;
21584 case 'B':
21585 if (CONST_INT_P (x))
21587 HOST_WIDE_INT val;
21588 val = ARM_SIGN_EXTEND (~INTVAL (x));
21589 fprintf (stream, HOST_WIDE_INT_PRINT_DEC, val);
21591 else
21593 putc ('~', stream);
21594 output_addr_const (stream, x);
21596 return;
21598 case 'b':
21599 /* Print the log2 of a CONST_INT. */
21601 HOST_WIDE_INT val;
21603 if (!CONST_INT_P (x)
21604 || (val = exact_log2 (INTVAL (x) & 0xffffffff)) < 0)
21605 output_operand_lossage ("Unsupported operand for code '%c'", code);
21606 else
21607 fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, val);
21609 return;
21611 case 'L':
21612 /* The low 16 bits of an immediate constant. */
21613 fprintf (stream, HOST_WIDE_INT_PRINT_DEC, INTVAL(x) & 0xffff);
21614 return;
21616 case 'i':
21617 fprintf (stream, "%s", arithmetic_instr (x, 1));
21618 return;
21620 case 'I':
21621 fprintf (stream, "%s", arithmetic_instr (x, 0));
21622 return;
21624 case 'S':
21626 HOST_WIDE_INT val;
21627 const char *shift;
21629 shift = shift_op (x, &val);
21631 if (shift)
21633 fprintf (stream, ", %s ", shift);
21634 if (val == -1)
21635 arm_print_operand (stream, XEXP (x, 1), 0);
21636 else
21637 fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, val);
21640 return;
21642 /* An explanation of the 'Q', 'R' and 'H' register operands:
21644 In a pair of registers containing a DI or DF value the 'Q'
21645 operand returns the register number of the register containing
21646 the least significant part of the value. The 'R' operand returns
21647 the register number of the register containing the most
21648 significant part of the value.
21650 The 'H' operand returns the higher of the two register numbers.
21651 On a run where WORDS_BIG_ENDIAN is true the 'H' operand is the
21652 same as the 'Q' operand, since the most significant part of the
21653 value is held in the lower number register. The reverse is true
21654 on systems where WORDS_BIG_ENDIAN is false.
21656 The purpose of these operands is to distinguish between cases
21657 where the endian-ness of the values is important (for example
21658 when they are added together), and cases where the endian-ness
21659 is irrelevant, but the order of register operations is important.
21660 For example when loading a value from memory into a register
21661 pair, the endian-ness does not matter. Provided that the value
21662 from the lower memory address is put into the lower numbered
21663 register, and the value from the higher address is put into the
21664 higher numbered register, the load will work regardless of whether
21665 the value being loaded is big-wordian or little-wordian. The
21666 order of the two register loads can matter however, if the address
21667 of the memory location is actually held in one of the registers
21668 being overwritten by the load.
21670 The 'Q' and 'R' constraints are also available for 64-bit
21671 constants. */
21672 case 'Q':
21673 if (CONST_INT_P (x) || CONST_DOUBLE_P (x))
21675 rtx part = gen_lowpart (SImode, x);
21676 fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, INTVAL (part));
21677 return;
21680 if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
21682 output_operand_lossage ("invalid operand for code '%c'", code);
21683 return;
21686 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 1 : 0));
21687 return;
21689 case 'R':
21690 if (CONST_INT_P (x) || CONST_DOUBLE_P (x))
21692 machine_mode mode = GET_MODE (x);
21693 rtx part;
21695 if (mode == VOIDmode)
21696 mode = DImode;
21697 part = gen_highpart_mode (SImode, mode, x);
21698 fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, INTVAL (part));
21699 return;
21702 if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
21704 output_operand_lossage ("invalid operand for code '%c'", code);
21705 return;
21708 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 0 : 1));
21709 return;
21711 case 'H':
21712 if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
21714 output_operand_lossage ("invalid operand for code '%c'", code);
21715 return;
21718 asm_fprintf (stream, "%r", REGNO (x) + 1);
21719 return;
21721 case 'J':
21722 if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
21724 output_operand_lossage ("invalid operand for code '%c'", code);
21725 return;
21728 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 3 : 2));
21729 return;
21731 case 'K':
21732 if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
21734 output_operand_lossage ("invalid operand for code '%c'", code);
21735 return;
21738 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 2 : 3));
21739 return;
21741 case 'm':
21742 asm_fprintf (stream, "%r",
21743 REG_P (XEXP (x, 0))
21744 ? REGNO (XEXP (x, 0)) : REGNO (XEXP (XEXP (x, 0), 0)));
21745 return;
21747 case 'M':
21748 asm_fprintf (stream, "{%r-%r}",
21749 REGNO (x),
21750 REGNO (x) + ARM_NUM_REGS (GET_MODE (x)) - 1);
21751 return;
21753 /* Like 'M', but writing doubleword vector registers, for use by Neon
21754 insns. */
21755 case 'h':
21757 int regno = (REGNO (x) - FIRST_VFP_REGNUM) / 2;
21758 int numregs = ARM_NUM_REGS (GET_MODE (x)) / 2;
21759 if (numregs == 1)
21760 asm_fprintf (stream, "{d%d}", regno);
21761 else
21762 asm_fprintf (stream, "{d%d-d%d}", regno, regno + numregs - 1);
21764 return;
21766 case 'd':
21767 /* CONST_TRUE_RTX means always -- that's the default. */
21768 if (x == const_true_rtx)
21769 return;
21771 if (!COMPARISON_P (x))
21773 output_operand_lossage ("invalid operand for code '%c'", code);
21774 return;
21777 fputs (arm_condition_codes[get_arm_condition_code (x)],
21778 stream);
21779 return;
21781 case 'D':
21782 /* CONST_TRUE_RTX means not always -- i.e. never. We shouldn't ever
21783 want to do that. */
21784 if (x == const_true_rtx)
21786 output_operand_lossage ("instruction never executed");
21787 return;
21789 if (!COMPARISON_P (x))
21791 output_operand_lossage ("invalid operand for code '%c'", code);
21792 return;
21795 fputs (arm_condition_codes[ARM_INVERSE_CONDITION_CODE
21796 (get_arm_condition_code (x))],
21797 stream);
21798 return;
21800 case 's':
21801 case 'V':
21802 case 'W':
21803 case 'X':
21804 case 'Y':
21805 case 'Z':
21806 /* Former Maverick support, removed after GCC-4.7. */
21807 output_operand_lossage ("obsolete Maverick format code '%c'", code);
21808 return;
21810 case 'U':
21811 if (!REG_P (x)
21812 || REGNO (x) < FIRST_IWMMXT_GR_REGNUM
21813 || REGNO (x) > LAST_IWMMXT_GR_REGNUM)
21814 /* Bad value for wCG register number. */
21816 output_operand_lossage ("invalid operand for code '%c'", code);
21817 return;
21820 else
21821 fprintf (stream, "%d", REGNO (x) - FIRST_IWMMXT_GR_REGNUM);
21822 return;
21824 /* Print an iWMMXt control register name. */
21825 case 'w':
21826 if (!CONST_INT_P (x)
21827 || INTVAL (x) < 0
21828 || INTVAL (x) >= 16)
21829 /* Bad value for wC register number. */
21831 output_operand_lossage ("invalid operand for code '%c'", code);
21832 return;
21835 else
21837 static const char * wc_reg_names [16] =
21839 "wCID", "wCon", "wCSSF", "wCASF",
21840 "wC4", "wC5", "wC6", "wC7",
21841 "wCGR0", "wCGR1", "wCGR2", "wCGR3",
21842 "wC12", "wC13", "wC14", "wC15"
21845 fputs (wc_reg_names [INTVAL (x)], stream);
21847 return;
21849 /* Print the high single-precision register of a VFP double-precision
21850 register. */
21851 case 'p':
21853 machine_mode mode = GET_MODE (x);
21854 int regno;
21856 if (GET_MODE_SIZE (mode) != 8 || !REG_P (x))
21858 output_operand_lossage ("invalid operand for code '%c'", code);
21859 return;
21862 regno = REGNO (x);
21863 if (!VFP_REGNO_OK_FOR_DOUBLE (regno))
21865 output_operand_lossage ("invalid operand for code '%c'", code);
21866 return;
21869 fprintf (stream, "s%d", regno - FIRST_VFP_REGNUM + 1);
21871 return;
21873 /* Print a VFP/Neon double precision or quad precision register name. */
21874 case 'P':
21875 case 'q':
21877 machine_mode mode = GET_MODE (x);
21878 int is_quad = (code == 'q');
21879 int regno;
21881 if (GET_MODE_SIZE (mode) != (is_quad ? 16 : 8))
21883 output_operand_lossage ("invalid operand for code '%c'", code);
21884 return;
21887 if (!REG_P (x)
21888 || !IS_VFP_REGNUM (REGNO (x)))
21890 output_operand_lossage ("invalid operand for code '%c'", code);
21891 return;
21894 regno = REGNO (x);
21895 if ((is_quad && !NEON_REGNO_OK_FOR_QUAD (regno))
21896 || (!is_quad && !VFP_REGNO_OK_FOR_DOUBLE (regno)))
21898 output_operand_lossage ("invalid operand for code '%c'", code);
21899 return;
21902 fprintf (stream, "%c%d", is_quad ? 'q' : 'd',
21903 (regno - FIRST_VFP_REGNUM) >> (is_quad ? 2 : 1));
21905 return;
21907 /* These two codes print the low/high doubleword register of a Neon quad
21908 register, respectively. For pair-structure types, can also print
21909 low/high quadword registers. */
21910 case 'e':
21911 case 'f':
21913 machine_mode mode = GET_MODE (x);
21914 int regno;
21916 if ((GET_MODE_SIZE (mode) != 16
21917 && GET_MODE_SIZE (mode) != 32) || !REG_P (x))
21919 output_operand_lossage ("invalid operand for code '%c'", code);
21920 return;
21923 regno = REGNO (x);
21924 if (!NEON_REGNO_OK_FOR_QUAD (regno))
21926 output_operand_lossage ("invalid operand for code '%c'", code);
21927 return;
21930 if (GET_MODE_SIZE (mode) == 16)
21931 fprintf (stream, "d%d", ((regno - FIRST_VFP_REGNUM) >> 1)
21932 + (code == 'f' ? 1 : 0));
21933 else
21934 fprintf (stream, "q%d", ((regno - FIRST_VFP_REGNUM) >> 2)
21935 + (code == 'f' ? 1 : 0));
21937 return;
21939 /* Print a VFPv3 floating-point constant, represented as an integer
21940 index. */
21941 case 'G':
21943 int index = vfp3_const_double_index (x);
21944 gcc_assert (index != -1);
21945 fprintf (stream, "%d", index);
21947 return;
21949 /* Print bits representing opcode features for Neon.
21951 Bit 0 is 1 for signed, 0 for unsigned. Floats count as signed
21952 and polynomials as unsigned.
21954 Bit 1 is 1 for floats and polynomials, 0 for ordinary integers.
21956 Bit 2 is 1 for rounding functions, 0 otherwise. */
21958 /* Identify the type as 's', 'u', 'p' or 'f'. */
21959 case 'T':
21961 HOST_WIDE_INT bits = INTVAL (x);
21962 fputc ("uspf"[bits & 3], stream);
21964 return;
21966 /* Likewise, but signed and unsigned integers are both 'i'. */
21967 case 'F':
21969 HOST_WIDE_INT bits = INTVAL (x);
21970 fputc ("iipf"[bits & 3], stream);
21972 return;
21974 /* As for 'T', but emit 'u' instead of 'p'. */
21975 case 't':
21977 HOST_WIDE_INT bits = INTVAL (x);
21978 fputc ("usuf"[bits & 3], stream);
21980 return;
21982 /* Bit 2: rounding (vs none). */
21983 case 'O':
21985 HOST_WIDE_INT bits = INTVAL (x);
21986 fputs ((bits & 4) != 0 ? "r" : "", stream);
21988 return;
21990 /* Memory operand for vld1/vst1 instruction. */
21991 case 'A':
21993 rtx addr;
21994 bool postinc = FALSE;
21995 rtx postinc_reg = NULL;
21996 unsigned align, memsize, align_bits;
21998 gcc_assert (MEM_P (x));
21999 addr = XEXP (x, 0);
22000 if (GET_CODE (addr) == POST_INC)
22002 postinc = 1;
22003 addr = XEXP (addr, 0);
22005 if (GET_CODE (addr) == POST_MODIFY)
22007 postinc_reg = XEXP( XEXP (addr, 1), 1);
22008 addr = XEXP (addr, 0);
22010 asm_fprintf (stream, "[%r", REGNO (addr));
22012 /* We know the alignment of this access, so we can emit a hint in the
22013 instruction (for some alignments) as an aid to the memory subsystem
22014 of the target. */
22015 align = MEM_ALIGN (x) >> 3;
22016 memsize = MEM_SIZE (x);
22018 /* Only certain alignment specifiers are supported by the hardware. */
22019 if (memsize == 32 && (align % 32) == 0)
22020 align_bits = 256;
22021 else if ((memsize == 16 || memsize == 32) && (align % 16) == 0)
22022 align_bits = 128;
22023 else if (memsize >= 8 && (align % 8) == 0)
22024 align_bits = 64;
22025 else
22026 align_bits = 0;
22028 if (align_bits != 0)
22029 asm_fprintf (stream, ":%d", align_bits);
22031 asm_fprintf (stream, "]");
22033 if (postinc)
22034 fputs("!", stream);
22035 if (postinc_reg)
22036 asm_fprintf (stream, ", %r", REGNO (postinc_reg));
22038 return;
22040 case 'C':
22042 rtx addr;
22044 gcc_assert (MEM_P (x));
22045 addr = XEXP (x, 0);
22046 gcc_assert (REG_P (addr));
22047 asm_fprintf (stream, "[%r]", REGNO (addr));
22049 return;
22051 /* Translate an S register number into a D register number and element index. */
22052 case 'y':
22054 machine_mode mode = GET_MODE (x);
22055 int regno;
22057 if (GET_MODE_SIZE (mode) != 4 || !REG_P (x))
22059 output_operand_lossage ("invalid operand for code '%c'", code);
22060 return;
22063 regno = REGNO (x);
22064 if (!VFP_REGNO_OK_FOR_SINGLE (regno))
22066 output_operand_lossage ("invalid operand for code '%c'", code);
22067 return;
22070 regno = regno - FIRST_VFP_REGNUM;
22071 fprintf (stream, "d%d[%d]", regno / 2, regno % 2);
22073 return;
22075 case 'v':
22076 gcc_assert (CONST_DOUBLE_P (x));
22077 int result;
22078 result = vfp3_const_double_for_fract_bits (x);
22079 if (result == 0)
22080 result = vfp3_const_double_for_bits (x);
22081 fprintf (stream, "#%d", result);
22082 return;
22084 /* Register specifier for vld1.16/vst1.16. Translate the S register
22085 number into a D register number and element index. */
22086 case 'z':
22088 machine_mode mode = GET_MODE (x);
22089 int regno;
22091 if (GET_MODE_SIZE (mode) != 2 || !REG_P (x))
22093 output_operand_lossage ("invalid operand for code '%c'", code);
22094 return;
22097 regno = REGNO (x);
22098 if (!VFP_REGNO_OK_FOR_SINGLE (regno))
22100 output_operand_lossage ("invalid operand for code '%c'", code);
22101 return;
22104 regno = regno - FIRST_VFP_REGNUM;
22105 fprintf (stream, "d%d[%d]", regno/2, ((regno % 2) ? 2 : 0));
22107 return;
22109 default:
22110 if (x == 0)
22112 output_operand_lossage ("missing operand");
22113 return;
22116 switch (GET_CODE (x))
22118 case REG:
22119 asm_fprintf (stream, "%r", REGNO (x));
22120 break;
22122 case MEM:
22123 output_memory_reference_mode = GET_MODE (x);
22124 output_address (XEXP (x, 0));
22125 break;
22127 case CONST_DOUBLE:
22129 char fpstr[20];
22130 real_to_decimal (fpstr, CONST_DOUBLE_REAL_VALUE (x),
22131 sizeof (fpstr), 0, 1);
22132 fprintf (stream, "#%s", fpstr);
22134 break;
22136 default:
22137 gcc_assert (GET_CODE (x) != NEG);
22138 fputc ('#', stream);
22139 if (GET_CODE (x) == HIGH)
22141 fputs (":lower16:", stream);
22142 x = XEXP (x, 0);
22145 output_addr_const (stream, x);
22146 break;
22151 /* Target hook for printing a memory address. */
22152 static void
22153 arm_print_operand_address (FILE *stream, rtx x)
22155 if (TARGET_32BIT)
22157 int is_minus = GET_CODE (x) == MINUS;
22159 if (REG_P (x))
22160 asm_fprintf (stream, "[%r]", REGNO (x));
22161 else if (GET_CODE (x) == PLUS || is_minus)
22163 rtx base = XEXP (x, 0);
22164 rtx index = XEXP (x, 1);
22165 HOST_WIDE_INT offset = 0;
22166 if (!REG_P (base)
22167 || (REG_P (index) && REGNO (index) == SP_REGNUM))
22169 /* Ensure that BASE is a register. */
22170 /* (one of them must be). */
22171 /* Also ensure the SP is not used as in index register. */
22172 std::swap (base, index);
22174 switch (GET_CODE (index))
22176 case CONST_INT:
22177 offset = INTVAL (index);
22178 if (is_minus)
22179 offset = -offset;
22180 asm_fprintf (stream, "[%r, #%wd]",
22181 REGNO (base), offset);
22182 break;
22184 case REG:
22185 asm_fprintf (stream, "[%r, %s%r]",
22186 REGNO (base), is_minus ? "-" : "",
22187 REGNO (index));
22188 break;
22190 case MULT:
22191 case ASHIFTRT:
22192 case LSHIFTRT:
22193 case ASHIFT:
22194 case ROTATERT:
22196 asm_fprintf (stream, "[%r, %s%r",
22197 REGNO (base), is_minus ? "-" : "",
22198 REGNO (XEXP (index, 0)));
22199 arm_print_operand (stream, index, 'S');
22200 fputs ("]", stream);
22201 break;
22204 default:
22205 gcc_unreachable ();
22208 else if (GET_CODE (x) == PRE_INC || GET_CODE (x) == POST_INC
22209 || GET_CODE (x) == PRE_DEC || GET_CODE (x) == POST_DEC)
22211 extern machine_mode output_memory_reference_mode;
22213 gcc_assert (REG_P (XEXP (x, 0)));
22215 if (GET_CODE (x) == PRE_DEC || GET_CODE (x) == PRE_INC)
22216 asm_fprintf (stream, "[%r, #%s%d]!",
22217 REGNO (XEXP (x, 0)),
22218 GET_CODE (x) == PRE_DEC ? "-" : "",
22219 GET_MODE_SIZE (output_memory_reference_mode));
22220 else
22221 asm_fprintf (stream, "[%r], #%s%d",
22222 REGNO (XEXP (x, 0)),
22223 GET_CODE (x) == POST_DEC ? "-" : "",
22224 GET_MODE_SIZE (output_memory_reference_mode));
22226 else if (GET_CODE (x) == PRE_MODIFY)
22228 asm_fprintf (stream, "[%r, ", REGNO (XEXP (x, 0)));
22229 if (CONST_INT_P (XEXP (XEXP (x, 1), 1)))
22230 asm_fprintf (stream, "#%wd]!",
22231 INTVAL (XEXP (XEXP (x, 1), 1)));
22232 else
22233 asm_fprintf (stream, "%r]!",
22234 REGNO (XEXP (XEXP (x, 1), 1)));
22236 else if (GET_CODE (x) == POST_MODIFY)
22238 asm_fprintf (stream, "[%r], ", REGNO (XEXP (x, 0)));
22239 if (CONST_INT_P (XEXP (XEXP (x, 1), 1)))
22240 asm_fprintf (stream, "#%wd",
22241 INTVAL (XEXP (XEXP (x, 1), 1)));
22242 else
22243 asm_fprintf (stream, "%r",
22244 REGNO (XEXP (XEXP (x, 1), 1)));
22246 else output_addr_const (stream, x);
22248 else
22250 if (REG_P (x))
22251 asm_fprintf (stream, "[%r]", REGNO (x));
22252 else if (GET_CODE (x) == POST_INC)
22253 asm_fprintf (stream, "%r!", REGNO (XEXP (x, 0)));
22254 else if (GET_CODE (x) == PLUS)
22256 gcc_assert (REG_P (XEXP (x, 0)));
22257 if (CONST_INT_P (XEXP (x, 1)))
22258 asm_fprintf (stream, "[%r, #%wd]",
22259 REGNO (XEXP (x, 0)),
22260 INTVAL (XEXP (x, 1)));
22261 else
22262 asm_fprintf (stream, "[%r, %r]",
22263 REGNO (XEXP (x, 0)),
22264 REGNO (XEXP (x, 1)));
22266 else
22267 output_addr_const (stream, x);
22271 /* Target hook for indicating whether a punctuation character for
22272 TARGET_PRINT_OPERAND is valid. */
22273 static bool
22274 arm_print_operand_punct_valid_p (unsigned char code)
22276 return (code == '@' || code == '|' || code == '.'
22277 || code == '(' || code == ')' || code == '#'
22278 || (TARGET_32BIT && (code == '?'))
22279 || (TARGET_THUMB2 && (code == '!'))
22280 || (TARGET_THUMB && (code == '_')));
22283 /* Target hook for assembling integer objects. The ARM version needs to
22284 handle word-sized values specially. */
22285 static bool
22286 arm_assemble_integer (rtx x, unsigned int size, int aligned_p)
22288 machine_mode mode;
22290 if (size == UNITS_PER_WORD && aligned_p)
22292 fputs ("\t.word\t", asm_out_file);
22293 output_addr_const (asm_out_file, x);
22295 /* Mark symbols as position independent. We only do this in the
22296 .text segment, not in the .data segment. */
22297 if (NEED_GOT_RELOC && flag_pic && making_const_table &&
22298 (GET_CODE (x) == SYMBOL_REF || GET_CODE (x) == LABEL_REF))
22300 /* See legitimize_pic_address for an explanation of the
22301 TARGET_VXWORKS_RTP check. */
22302 if (!arm_pic_data_is_text_relative
22303 || (GET_CODE (x) == SYMBOL_REF && !SYMBOL_REF_LOCAL_P (x)))
22304 fputs ("(GOT)", asm_out_file);
22305 else
22306 fputs ("(GOTOFF)", asm_out_file);
22308 fputc ('\n', asm_out_file);
22309 return true;
22312 mode = GET_MODE (x);
22314 if (arm_vector_mode_supported_p (mode))
22316 int i, units;
22318 gcc_assert (GET_CODE (x) == CONST_VECTOR);
22320 units = CONST_VECTOR_NUNITS (x);
22321 size = GET_MODE_SIZE (GET_MODE_INNER (mode));
22323 if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
22324 for (i = 0; i < units; i++)
22326 rtx elt = CONST_VECTOR_ELT (x, i);
22327 assemble_integer
22328 (elt, size, i == 0 ? BIGGEST_ALIGNMENT : size * BITS_PER_UNIT, 1);
22330 else
22331 for (i = 0; i < units; i++)
22333 rtx elt = CONST_VECTOR_ELT (x, i);
22334 REAL_VALUE_TYPE rval;
22336 REAL_VALUE_FROM_CONST_DOUBLE (rval, elt);
22338 assemble_real
22339 (rval, GET_MODE_INNER (mode),
22340 i == 0 ? BIGGEST_ALIGNMENT : size * BITS_PER_UNIT);
22343 return true;
22346 return default_assemble_integer (x, size, aligned_p);
22349 static void
22350 arm_elf_asm_cdtor (rtx symbol, int priority, bool is_ctor)
22352 section *s;
22354 if (!TARGET_AAPCS_BASED)
22356 (is_ctor ?
22357 default_named_section_asm_out_constructor
22358 : default_named_section_asm_out_destructor) (symbol, priority);
22359 return;
22362 /* Put these in the .init_array section, using a special relocation. */
22363 if (priority != DEFAULT_INIT_PRIORITY)
22365 char buf[18];
22366 sprintf (buf, "%s.%.5u",
22367 is_ctor ? ".init_array" : ".fini_array",
22368 priority);
22369 s = get_section (buf, SECTION_WRITE, NULL_TREE);
22371 else if (is_ctor)
22372 s = ctors_section;
22373 else
22374 s = dtors_section;
22376 switch_to_section (s);
22377 assemble_align (POINTER_SIZE);
22378 fputs ("\t.word\t", asm_out_file);
22379 output_addr_const (asm_out_file, symbol);
22380 fputs ("(target1)\n", asm_out_file);
22383 /* Add a function to the list of static constructors. */
22385 static void
22386 arm_elf_asm_constructor (rtx symbol, int priority)
22388 arm_elf_asm_cdtor (symbol, priority, /*is_ctor=*/true);
22391 /* Add a function to the list of static destructors. */
22393 static void
22394 arm_elf_asm_destructor (rtx symbol, int priority)
22396 arm_elf_asm_cdtor (symbol, priority, /*is_ctor=*/false);
22399 /* A finite state machine takes care of noticing whether or not instructions
22400 can be conditionally executed, and thus decrease execution time and code
22401 size by deleting branch instructions. The fsm is controlled by
22402 final_prescan_insn, and controls the actions of ASM_OUTPUT_OPCODE. */
22404 /* The state of the fsm controlling condition codes are:
22405 0: normal, do nothing special
22406 1: make ASM_OUTPUT_OPCODE not output this instruction
22407 2: make ASM_OUTPUT_OPCODE not output this instruction
22408 3: make instructions conditional
22409 4: make instructions conditional
22411 State transitions (state->state by whom under condition):
22412 0 -> 1 final_prescan_insn if the `target' is a label
22413 0 -> 2 final_prescan_insn if the `target' is an unconditional branch
22414 1 -> 3 ASM_OUTPUT_OPCODE after not having output the conditional branch
22415 2 -> 4 ASM_OUTPUT_OPCODE after not having output the conditional branch
22416 3 -> 0 (*targetm.asm_out.internal_label) if the `target' label is reached
22417 (the target label has CODE_LABEL_NUMBER equal to arm_target_label).
22418 4 -> 0 final_prescan_insn if the `target' unconditional branch is reached
22419 (the target insn is arm_target_insn).
22421 If the jump clobbers the conditions then we use states 2 and 4.
22423 A similar thing can be done with conditional return insns.
22425 XXX In case the `target' is an unconditional branch, this conditionalising
22426 of the instructions always reduces code size, but not always execution
22427 time. But then, I want to reduce the code size to somewhere near what
22428 /bin/cc produces. */
22430 /* In addition to this, state is maintained for Thumb-2 COND_EXEC
22431 instructions. When a COND_EXEC instruction is seen the subsequent
22432 instructions are scanned so that multiple conditional instructions can be
22433 combined into a single IT block. arm_condexec_count and arm_condexec_mask
22434 specify the length and true/false mask for the IT block. These will be
22435 decremented/zeroed by arm_asm_output_opcode as the insns are output. */
22437 /* Returns the index of the ARM condition code string in
22438 `arm_condition_codes', or ARM_NV if the comparison is invalid.
22439 COMPARISON should be an rtx like `(eq (...) (...))'. */
22441 enum arm_cond_code
22442 maybe_get_arm_condition_code (rtx comparison)
22444 machine_mode mode = GET_MODE (XEXP (comparison, 0));
22445 enum arm_cond_code code;
22446 enum rtx_code comp_code = GET_CODE (comparison);
22448 if (GET_MODE_CLASS (mode) != MODE_CC)
22449 mode = SELECT_CC_MODE (comp_code, XEXP (comparison, 0),
22450 XEXP (comparison, 1));
22452 switch (mode)
22454 case CC_DNEmode: code = ARM_NE; goto dominance;
22455 case CC_DEQmode: code = ARM_EQ; goto dominance;
22456 case CC_DGEmode: code = ARM_GE; goto dominance;
22457 case CC_DGTmode: code = ARM_GT; goto dominance;
22458 case CC_DLEmode: code = ARM_LE; goto dominance;
22459 case CC_DLTmode: code = ARM_LT; goto dominance;
22460 case CC_DGEUmode: code = ARM_CS; goto dominance;
22461 case CC_DGTUmode: code = ARM_HI; goto dominance;
22462 case CC_DLEUmode: code = ARM_LS; goto dominance;
22463 case CC_DLTUmode: code = ARM_CC;
22465 dominance:
22466 if (comp_code == EQ)
22467 return ARM_INVERSE_CONDITION_CODE (code);
22468 if (comp_code == NE)
22469 return code;
22470 return ARM_NV;
22472 case CC_NOOVmode:
22473 switch (comp_code)
22475 case NE: return ARM_NE;
22476 case EQ: return ARM_EQ;
22477 case GE: return ARM_PL;
22478 case LT: return ARM_MI;
22479 default: return ARM_NV;
22482 case CC_Zmode:
22483 switch (comp_code)
22485 case NE: return ARM_NE;
22486 case EQ: return ARM_EQ;
22487 default: return ARM_NV;
22490 case CC_Nmode:
22491 switch (comp_code)
22493 case NE: return ARM_MI;
22494 case EQ: return ARM_PL;
22495 default: return ARM_NV;
22498 case CCFPEmode:
22499 case CCFPmode:
22500 /* We can handle all cases except UNEQ and LTGT. */
22501 switch (comp_code)
22503 case GE: return ARM_GE;
22504 case GT: return ARM_GT;
22505 case LE: return ARM_LS;
22506 case LT: return ARM_MI;
22507 case NE: return ARM_NE;
22508 case EQ: return ARM_EQ;
22509 case ORDERED: return ARM_VC;
22510 case UNORDERED: return ARM_VS;
22511 case UNLT: return ARM_LT;
22512 case UNLE: return ARM_LE;
22513 case UNGT: return ARM_HI;
22514 case UNGE: return ARM_PL;
22515 /* UNEQ and LTGT do not have a representation. */
22516 case UNEQ: /* Fall through. */
22517 case LTGT: /* Fall through. */
22518 default: return ARM_NV;
22521 case CC_SWPmode:
22522 switch (comp_code)
22524 case NE: return ARM_NE;
22525 case EQ: return ARM_EQ;
22526 case GE: return ARM_LE;
22527 case GT: return ARM_LT;
22528 case LE: return ARM_GE;
22529 case LT: return ARM_GT;
22530 case GEU: return ARM_LS;
22531 case GTU: return ARM_CC;
22532 case LEU: return ARM_CS;
22533 case LTU: return ARM_HI;
22534 default: return ARM_NV;
22537 case CC_Cmode:
22538 switch (comp_code)
22540 case LTU: return ARM_CS;
22541 case GEU: return ARM_CC;
22542 default: return ARM_NV;
22545 case CC_CZmode:
22546 switch (comp_code)
22548 case NE: return ARM_NE;
22549 case EQ: return ARM_EQ;
22550 case GEU: return ARM_CS;
22551 case GTU: return ARM_HI;
22552 case LEU: return ARM_LS;
22553 case LTU: return ARM_CC;
22554 default: return ARM_NV;
22557 case CC_NCVmode:
22558 switch (comp_code)
22560 case GE: return ARM_GE;
22561 case LT: return ARM_LT;
22562 case GEU: return ARM_CS;
22563 case LTU: return ARM_CC;
22564 default: return ARM_NV;
22567 case CCmode:
22568 switch (comp_code)
22570 case NE: return ARM_NE;
22571 case EQ: return ARM_EQ;
22572 case GE: return ARM_GE;
22573 case GT: return ARM_GT;
22574 case LE: return ARM_LE;
22575 case LT: return ARM_LT;
22576 case GEU: return ARM_CS;
22577 case GTU: return ARM_HI;
22578 case LEU: return ARM_LS;
22579 case LTU: return ARM_CC;
22580 default: return ARM_NV;
22583 default: gcc_unreachable ();
22587 /* Like maybe_get_arm_condition_code, but never return ARM_NV. */
22588 static enum arm_cond_code
22589 get_arm_condition_code (rtx comparison)
22591 enum arm_cond_code code = maybe_get_arm_condition_code (comparison);
22592 gcc_assert (code != ARM_NV);
22593 return code;
22596 /* Tell arm_asm_output_opcode to output IT blocks for conditionally executed
22597 instructions. */
22598 void
22599 thumb2_final_prescan_insn (rtx_insn *insn)
22601 rtx_insn *first_insn = insn;
22602 rtx body = PATTERN (insn);
22603 rtx predicate;
22604 enum arm_cond_code code;
22605 int n;
22606 int mask;
22607 int max;
22609 /* max_insns_skipped in the tune was already taken into account in the
22610 cost model of ifcvt pass when generating COND_EXEC insns. At this stage
22611 just emit the IT blocks as we can. It does not make sense to split
22612 the IT blocks. */
22613 max = MAX_INSN_PER_IT_BLOCK;
22615 /* Remove the previous insn from the count of insns to be output. */
22616 if (arm_condexec_count)
22617 arm_condexec_count--;
22619 /* Nothing to do if we are already inside a conditional block. */
22620 if (arm_condexec_count)
22621 return;
22623 if (GET_CODE (body) != COND_EXEC)
22624 return;
22626 /* Conditional jumps are implemented directly. */
22627 if (JUMP_P (insn))
22628 return;
22630 predicate = COND_EXEC_TEST (body);
22631 arm_current_cc = get_arm_condition_code (predicate);
22633 n = get_attr_ce_count (insn);
22634 arm_condexec_count = 1;
22635 arm_condexec_mask = (1 << n) - 1;
22636 arm_condexec_masklen = n;
22637 /* See if subsequent instructions can be combined into the same block. */
22638 for (;;)
22640 insn = next_nonnote_insn (insn);
22642 /* Jumping into the middle of an IT block is illegal, so a label or
22643 barrier terminates the block. */
22644 if (!NONJUMP_INSN_P (insn) && !JUMP_P (insn))
22645 break;
22647 body = PATTERN (insn);
22648 /* USE and CLOBBER aren't really insns, so just skip them. */
22649 if (GET_CODE (body) == USE
22650 || GET_CODE (body) == CLOBBER)
22651 continue;
22653 /* ??? Recognize conditional jumps, and combine them with IT blocks. */
22654 if (GET_CODE (body) != COND_EXEC)
22655 break;
22656 /* Maximum number of conditionally executed instructions in a block. */
22657 n = get_attr_ce_count (insn);
22658 if (arm_condexec_masklen + n > max)
22659 break;
22661 predicate = COND_EXEC_TEST (body);
22662 code = get_arm_condition_code (predicate);
22663 mask = (1 << n) - 1;
22664 if (arm_current_cc == code)
22665 arm_condexec_mask |= (mask << arm_condexec_masklen);
22666 else if (arm_current_cc != ARM_INVERSE_CONDITION_CODE(code))
22667 break;
22669 arm_condexec_count++;
22670 arm_condexec_masklen += n;
22672 /* A jump must be the last instruction in a conditional block. */
22673 if (JUMP_P (insn))
22674 break;
22676 /* Restore recog_data (getting the attributes of other insns can
22677 destroy this array, but final.c assumes that it remains intact
22678 across this call). */
22679 extract_constrain_insn_cached (first_insn);
22682 void
22683 arm_final_prescan_insn (rtx_insn *insn)
22685 /* BODY will hold the body of INSN. */
22686 rtx body = PATTERN (insn);
22688 /* This will be 1 if trying to repeat the trick, and things need to be
22689 reversed if it appears to fail. */
22690 int reverse = 0;
22692 /* If we start with a return insn, we only succeed if we find another one. */
22693 int seeking_return = 0;
22694 enum rtx_code return_code = UNKNOWN;
22696 /* START_INSN will hold the insn from where we start looking. This is the
22697 first insn after the following code_label if REVERSE is true. */
22698 rtx_insn *start_insn = insn;
22700 /* If in state 4, check if the target branch is reached, in order to
22701 change back to state 0. */
22702 if (arm_ccfsm_state == 4)
22704 if (insn == arm_target_insn)
22706 arm_target_insn = NULL;
22707 arm_ccfsm_state = 0;
22709 return;
22712 /* If in state 3, it is possible to repeat the trick, if this insn is an
22713 unconditional branch to a label, and immediately following this branch
22714 is the previous target label which is only used once, and the label this
22715 branch jumps to is not too far off. */
22716 if (arm_ccfsm_state == 3)
22718 if (simplejump_p (insn))
22720 start_insn = next_nonnote_insn (start_insn);
22721 if (BARRIER_P (start_insn))
22723 /* XXX Isn't this always a barrier? */
22724 start_insn = next_nonnote_insn (start_insn);
22726 if (LABEL_P (start_insn)
22727 && CODE_LABEL_NUMBER (start_insn) == arm_target_label
22728 && LABEL_NUSES (start_insn) == 1)
22729 reverse = TRUE;
22730 else
22731 return;
22733 else if (ANY_RETURN_P (body))
22735 start_insn = next_nonnote_insn (start_insn);
22736 if (BARRIER_P (start_insn))
22737 start_insn = next_nonnote_insn (start_insn);
22738 if (LABEL_P (start_insn)
22739 && CODE_LABEL_NUMBER (start_insn) == arm_target_label
22740 && LABEL_NUSES (start_insn) == 1)
22742 reverse = TRUE;
22743 seeking_return = 1;
22744 return_code = GET_CODE (body);
22746 else
22747 return;
22749 else
22750 return;
22753 gcc_assert (!arm_ccfsm_state || reverse);
22754 if (!JUMP_P (insn))
22755 return;
22757 /* This jump might be paralleled with a clobber of the condition codes
22758 the jump should always come first */
22759 if (GET_CODE (body) == PARALLEL && XVECLEN (body, 0) > 0)
22760 body = XVECEXP (body, 0, 0);
22762 if (reverse
22763 || (GET_CODE (body) == SET && GET_CODE (SET_DEST (body)) == PC
22764 && GET_CODE (SET_SRC (body)) == IF_THEN_ELSE))
22766 int insns_skipped;
22767 int fail = FALSE, succeed = FALSE;
22768 /* Flag which part of the IF_THEN_ELSE is the LABEL_REF. */
22769 int then_not_else = TRUE;
22770 rtx_insn *this_insn = start_insn;
22771 rtx label = 0;
22773 /* Register the insn jumped to. */
22774 if (reverse)
22776 if (!seeking_return)
22777 label = XEXP (SET_SRC (body), 0);
22779 else if (GET_CODE (XEXP (SET_SRC (body), 1)) == LABEL_REF)
22780 label = XEXP (XEXP (SET_SRC (body), 1), 0);
22781 else if (GET_CODE (XEXP (SET_SRC (body), 2)) == LABEL_REF)
22783 label = XEXP (XEXP (SET_SRC (body), 2), 0);
22784 then_not_else = FALSE;
22786 else if (ANY_RETURN_P (XEXP (SET_SRC (body), 1)))
22788 seeking_return = 1;
22789 return_code = GET_CODE (XEXP (SET_SRC (body), 1));
22791 else if (ANY_RETURN_P (XEXP (SET_SRC (body), 2)))
22793 seeking_return = 1;
22794 return_code = GET_CODE (XEXP (SET_SRC (body), 2));
22795 then_not_else = FALSE;
22797 else
22798 gcc_unreachable ();
22800 /* See how many insns this branch skips, and what kind of insns. If all
22801 insns are okay, and the label or unconditional branch to the same
22802 label is not too far away, succeed. */
22803 for (insns_skipped = 0;
22804 !fail && !succeed && insns_skipped++ < max_insns_skipped;)
22806 rtx scanbody;
22808 this_insn = next_nonnote_insn (this_insn);
22809 if (!this_insn)
22810 break;
22812 switch (GET_CODE (this_insn))
22814 case CODE_LABEL:
22815 /* Succeed if it is the target label, otherwise fail since
22816 control falls in from somewhere else. */
22817 if (this_insn == label)
22819 arm_ccfsm_state = 1;
22820 succeed = TRUE;
22822 else
22823 fail = TRUE;
22824 break;
22826 case BARRIER:
22827 /* Succeed if the following insn is the target label.
22828 Otherwise fail.
22829 If return insns are used then the last insn in a function
22830 will be a barrier. */
22831 this_insn = next_nonnote_insn (this_insn);
22832 if (this_insn && this_insn == label)
22834 arm_ccfsm_state = 1;
22835 succeed = TRUE;
22837 else
22838 fail = TRUE;
22839 break;
22841 case CALL_INSN:
22842 /* The AAPCS says that conditional calls should not be
22843 used since they make interworking inefficient (the
22844 linker can't transform BL<cond> into BLX). That's
22845 only a problem if the machine has BLX. */
22846 if (arm_arch5)
22848 fail = TRUE;
22849 break;
22852 /* Succeed if the following insn is the target label, or
22853 if the following two insns are a barrier and the
22854 target label. */
22855 this_insn = next_nonnote_insn (this_insn);
22856 if (this_insn && BARRIER_P (this_insn))
22857 this_insn = next_nonnote_insn (this_insn);
22859 if (this_insn && this_insn == label
22860 && insns_skipped < max_insns_skipped)
22862 arm_ccfsm_state = 1;
22863 succeed = TRUE;
22865 else
22866 fail = TRUE;
22867 break;
22869 case JUMP_INSN:
22870 /* If this is an unconditional branch to the same label, succeed.
22871 If it is to another label, do nothing. If it is conditional,
22872 fail. */
22873 /* XXX Probably, the tests for SET and the PC are
22874 unnecessary. */
22876 scanbody = PATTERN (this_insn);
22877 if (GET_CODE (scanbody) == SET
22878 && GET_CODE (SET_DEST (scanbody)) == PC)
22880 if (GET_CODE (SET_SRC (scanbody)) == LABEL_REF
22881 && XEXP (SET_SRC (scanbody), 0) == label && !reverse)
22883 arm_ccfsm_state = 2;
22884 succeed = TRUE;
22886 else if (GET_CODE (SET_SRC (scanbody)) == IF_THEN_ELSE)
22887 fail = TRUE;
22889 /* Fail if a conditional return is undesirable (e.g. on a
22890 StrongARM), but still allow this if optimizing for size. */
22891 else if (GET_CODE (scanbody) == return_code
22892 && !use_return_insn (TRUE, NULL)
22893 && !optimize_size)
22894 fail = TRUE;
22895 else if (GET_CODE (scanbody) == return_code)
22897 arm_ccfsm_state = 2;
22898 succeed = TRUE;
22900 else if (GET_CODE (scanbody) == PARALLEL)
22902 switch (get_attr_conds (this_insn))
22904 case CONDS_NOCOND:
22905 break;
22906 default:
22907 fail = TRUE;
22908 break;
22911 else
22912 fail = TRUE; /* Unrecognized jump (e.g. epilogue). */
22914 break;
22916 case INSN:
22917 /* Instructions using or affecting the condition codes make it
22918 fail. */
22919 scanbody = PATTERN (this_insn);
22920 if (!(GET_CODE (scanbody) == SET
22921 || GET_CODE (scanbody) == PARALLEL)
22922 || get_attr_conds (this_insn) != CONDS_NOCOND)
22923 fail = TRUE;
22924 break;
22926 default:
22927 break;
22930 if (succeed)
22932 if ((!seeking_return) && (arm_ccfsm_state == 1 || reverse))
22933 arm_target_label = CODE_LABEL_NUMBER (label);
22934 else
22936 gcc_assert (seeking_return || arm_ccfsm_state == 2);
22938 while (this_insn && GET_CODE (PATTERN (this_insn)) == USE)
22940 this_insn = next_nonnote_insn (this_insn);
22941 gcc_assert (!this_insn
22942 || (!BARRIER_P (this_insn)
22943 && !LABEL_P (this_insn)));
22945 if (!this_insn)
22947 /* Oh, dear! we ran off the end.. give up. */
22948 extract_constrain_insn_cached (insn);
22949 arm_ccfsm_state = 0;
22950 arm_target_insn = NULL;
22951 return;
22953 arm_target_insn = this_insn;
22956 /* If REVERSE is true, ARM_CURRENT_CC needs to be inverted from
22957 what it was. */
22958 if (!reverse)
22959 arm_current_cc = get_arm_condition_code (XEXP (SET_SRC (body), 0));
22961 if (reverse || then_not_else)
22962 arm_current_cc = ARM_INVERSE_CONDITION_CODE (arm_current_cc);
22965 /* Restore recog_data (getting the attributes of other insns can
22966 destroy this array, but final.c assumes that it remains intact
22967 across this call. */
22968 extract_constrain_insn_cached (insn);
22972 /* Output IT instructions. */
22973 void
22974 thumb2_asm_output_opcode (FILE * stream)
22976 char buff[5];
22977 int n;
22979 if (arm_condexec_mask)
22981 for (n = 0; n < arm_condexec_masklen; n++)
22982 buff[n] = (arm_condexec_mask & (1 << n)) ? 't' : 'e';
22983 buff[n] = 0;
22984 asm_fprintf(stream, "i%s\t%s\n\t", buff,
22985 arm_condition_codes[arm_current_cc]);
22986 arm_condexec_mask = 0;
22990 /* Returns true if REGNO is a valid register
22991 for holding a quantity of type MODE. */
22993 arm_hard_regno_mode_ok (unsigned int regno, machine_mode mode)
22995 if (GET_MODE_CLASS (mode) == MODE_CC)
22996 return (regno == CC_REGNUM
22997 || (TARGET_HARD_FLOAT && TARGET_VFP
22998 && regno == VFPCC_REGNUM));
23000 if (regno == CC_REGNUM && GET_MODE_CLASS (mode) != MODE_CC)
23001 return false;
23003 if (TARGET_THUMB1)
23004 /* For the Thumb we only allow values bigger than SImode in
23005 registers 0 - 6, so that there is always a second low
23006 register available to hold the upper part of the value.
23007 We probably we ought to ensure that the register is the
23008 start of an even numbered register pair. */
23009 return (ARM_NUM_REGS (mode) < 2) || (regno < LAST_LO_REGNUM);
23011 if (TARGET_HARD_FLOAT && TARGET_VFP
23012 && IS_VFP_REGNUM (regno))
23014 if (mode == SFmode || mode == SImode)
23015 return VFP_REGNO_OK_FOR_SINGLE (regno);
23017 if (mode == DFmode)
23018 return VFP_REGNO_OK_FOR_DOUBLE (regno);
23020 /* VFP registers can hold HFmode values, but there is no point in
23021 putting them there unless we have hardware conversion insns. */
23022 if (mode == HFmode)
23023 return TARGET_FP16 && VFP_REGNO_OK_FOR_SINGLE (regno);
23025 if (TARGET_NEON)
23026 return (VALID_NEON_DREG_MODE (mode) && VFP_REGNO_OK_FOR_DOUBLE (regno))
23027 || (VALID_NEON_QREG_MODE (mode)
23028 && NEON_REGNO_OK_FOR_QUAD (regno))
23029 || (mode == TImode && NEON_REGNO_OK_FOR_NREGS (regno, 2))
23030 || (mode == EImode && NEON_REGNO_OK_FOR_NREGS (regno, 3))
23031 || (mode == OImode && NEON_REGNO_OK_FOR_NREGS (regno, 4))
23032 || (mode == CImode && NEON_REGNO_OK_FOR_NREGS (regno, 6))
23033 || (mode == XImode && NEON_REGNO_OK_FOR_NREGS (regno, 8));
23035 return FALSE;
23038 if (TARGET_REALLY_IWMMXT)
23040 if (IS_IWMMXT_GR_REGNUM (regno))
23041 return mode == SImode;
23043 if (IS_IWMMXT_REGNUM (regno))
23044 return VALID_IWMMXT_REG_MODE (mode);
23047 /* We allow almost any value to be stored in the general registers.
23048 Restrict doubleword quantities to even register pairs in ARM state
23049 so that we can use ldrd. Do not allow very large Neon structure
23050 opaque modes in general registers; they would use too many. */
23051 if (regno <= LAST_ARM_REGNUM)
23053 if (ARM_NUM_REGS (mode) > 4)
23054 return FALSE;
23056 if (TARGET_THUMB2)
23057 return TRUE;
23059 return !(TARGET_LDRD && GET_MODE_SIZE (mode) > 4 && (regno & 1) != 0);
23062 if (regno == FRAME_POINTER_REGNUM
23063 || regno == ARG_POINTER_REGNUM)
23064 /* We only allow integers in the fake hard registers. */
23065 return GET_MODE_CLASS (mode) == MODE_INT;
23067 return FALSE;
23070 /* Implement MODES_TIEABLE_P. */
23072 bool
23073 arm_modes_tieable_p (machine_mode mode1, machine_mode mode2)
23075 if (GET_MODE_CLASS (mode1) == GET_MODE_CLASS (mode2))
23076 return true;
23078 /* We specifically want to allow elements of "structure" modes to
23079 be tieable to the structure. This more general condition allows
23080 other rarer situations too. */
23081 if (TARGET_NEON
23082 && (VALID_NEON_DREG_MODE (mode1)
23083 || VALID_NEON_QREG_MODE (mode1)
23084 || VALID_NEON_STRUCT_MODE (mode1))
23085 && (VALID_NEON_DREG_MODE (mode2)
23086 || VALID_NEON_QREG_MODE (mode2)
23087 || VALID_NEON_STRUCT_MODE (mode2)))
23088 return true;
23090 return false;
23093 /* For efficiency and historical reasons LO_REGS, HI_REGS and CC_REGS are
23094 not used in arm mode. */
23096 enum reg_class
23097 arm_regno_class (int regno)
23099 if (regno == PC_REGNUM)
23100 return NO_REGS;
23102 if (TARGET_THUMB1)
23104 if (regno == STACK_POINTER_REGNUM)
23105 return STACK_REG;
23106 if (regno == CC_REGNUM)
23107 return CC_REG;
23108 if (regno < 8)
23109 return LO_REGS;
23110 return HI_REGS;
23113 if (TARGET_THUMB2 && regno < 8)
23114 return LO_REGS;
23116 if ( regno <= LAST_ARM_REGNUM
23117 || regno == FRAME_POINTER_REGNUM
23118 || regno == ARG_POINTER_REGNUM)
23119 return TARGET_THUMB2 ? HI_REGS : GENERAL_REGS;
23121 if (regno == CC_REGNUM || regno == VFPCC_REGNUM)
23122 return TARGET_THUMB2 ? CC_REG : NO_REGS;
23124 if (IS_VFP_REGNUM (regno))
23126 if (regno <= D7_VFP_REGNUM)
23127 return VFP_D0_D7_REGS;
23128 else if (regno <= LAST_LO_VFP_REGNUM)
23129 return VFP_LO_REGS;
23130 else
23131 return VFP_HI_REGS;
23134 if (IS_IWMMXT_REGNUM (regno))
23135 return IWMMXT_REGS;
23137 if (IS_IWMMXT_GR_REGNUM (regno))
23138 return IWMMXT_GR_REGS;
23140 return NO_REGS;
23143 /* Handle a special case when computing the offset
23144 of an argument from the frame pointer. */
23146 arm_debugger_arg_offset (int value, rtx addr)
23148 rtx_insn *insn;
23150 /* We are only interested if dbxout_parms() failed to compute the offset. */
23151 if (value != 0)
23152 return 0;
23154 /* We can only cope with the case where the address is held in a register. */
23155 if (!REG_P (addr))
23156 return 0;
23158 /* If we are using the frame pointer to point at the argument, then
23159 an offset of 0 is correct. */
23160 if (REGNO (addr) == (unsigned) HARD_FRAME_POINTER_REGNUM)
23161 return 0;
23163 /* If we are using the stack pointer to point at the
23164 argument, then an offset of 0 is correct. */
23165 /* ??? Check this is consistent with thumb2 frame layout. */
23166 if ((TARGET_THUMB || !frame_pointer_needed)
23167 && REGNO (addr) == SP_REGNUM)
23168 return 0;
23170 /* Oh dear. The argument is pointed to by a register rather
23171 than being held in a register, or being stored at a known
23172 offset from the frame pointer. Since GDB only understands
23173 those two kinds of argument we must translate the address
23174 held in the register into an offset from the frame pointer.
23175 We do this by searching through the insns for the function
23176 looking to see where this register gets its value. If the
23177 register is initialized from the frame pointer plus an offset
23178 then we are in luck and we can continue, otherwise we give up.
23180 This code is exercised by producing debugging information
23181 for a function with arguments like this:
23183 double func (double a, double b, int c, double d) {return d;}
23185 Without this code the stab for parameter 'd' will be set to
23186 an offset of 0 from the frame pointer, rather than 8. */
23188 /* The if() statement says:
23190 If the insn is a normal instruction
23191 and if the insn is setting the value in a register
23192 and if the register being set is the register holding the address of the argument
23193 and if the address is computing by an addition
23194 that involves adding to a register
23195 which is the frame pointer
23196 a constant integer
23198 then... */
23200 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
23202 if ( NONJUMP_INSN_P (insn)
23203 && GET_CODE (PATTERN (insn)) == SET
23204 && REGNO (XEXP (PATTERN (insn), 0)) == REGNO (addr)
23205 && GET_CODE (XEXP (PATTERN (insn), 1)) == PLUS
23206 && REG_P (XEXP (XEXP (PATTERN (insn), 1), 0))
23207 && REGNO (XEXP (XEXP (PATTERN (insn), 1), 0)) == (unsigned) HARD_FRAME_POINTER_REGNUM
23208 && CONST_INT_P (XEXP (XEXP (PATTERN (insn), 1), 1))
23211 value = INTVAL (XEXP (XEXP (PATTERN (insn), 1), 1));
23213 break;
23217 if (value == 0)
23219 debug_rtx (addr);
23220 warning (0, "unable to compute real location of stacked parameter");
23221 value = 8; /* XXX magic hack */
23224 return value;
23227 /* Implement TARGET_INVALID_PARAMETER_TYPE. */
23229 static const char *
23230 arm_invalid_parameter_type (const_tree t)
23232 if (SCALAR_FLOAT_TYPE_P (t) && TYPE_PRECISION (t) == 16)
23233 return N_("function parameters cannot have __fp16 type");
23234 return NULL;
23237 /* Implement TARGET_INVALID_PARAMETER_TYPE. */
23239 static const char *
23240 arm_invalid_return_type (const_tree t)
23242 if (SCALAR_FLOAT_TYPE_P (t) && TYPE_PRECISION (t) == 16)
23243 return N_("functions cannot return __fp16 type");
23244 return NULL;
23247 /* Implement TARGET_PROMOTED_TYPE. */
23249 static tree
23250 arm_promoted_type (const_tree t)
23252 if (SCALAR_FLOAT_TYPE_P (t) && TYPE_PRECISION (t) == 16)
23253 return float_type_node;
23254 return NULL_TREE;
23257 /* Implement TARGET_CONVERT_TO_TYPE.
23258 Specifically, this hook implements the peculiarity of the ARM
23259 half-precision floating-point C semantics that requires conversions between
23260 __fp16 to or from double to do an intermediate conversion to float. */
23262 static tree
23263 arm_convert_to_type (tree type, tree expr)
23265 tree fromtype = TREE_TYPE (expr);
23266 if (!SCALAR_FLOAT_TYPE_P (fromtype) || !SCALAR_FLOAT_TYPE_P (type))
23267 return NULL_TREE;
23268 if ((TYPE_PRECISION (fromtype) == 16 && TYPE_PRECISION (type) > 32)
23269 || (TYPE_PRECISION (type) == 16 && TYPE_PRECISION (fromtype) > 32))
23270 return convert (type, convert (float_type_node, expr));
23271 return NULL_TREE;
23274 /* Implement TARGET_SCALAR_MODE_SUPPORTED_P.
23275 This simply adds HFmode as a supported mode; even though we don't
23276 implement arithmetic on this type directly, it's supported by
23277 optabs conversions, much the way the double-word arithmetic is
23278 special-cased in the default hook. */
23280 static bool
23281 arm_scalar_mode_supported_p (machine_mode mode)
23283 if (mode == HFmode)
23284 return (arm_fp16_format != ARM_FP16_FORMAT_NONE);
23285 else if (ALL_FIXED_POINT_MODE_P (mode))
23286 return true;
23287 else
23288 return default_scalar_mode_supported_p (mode);
23291 /* Emit code to reinterpret one Neon type as another, without altering bits. */
23292 void
23293 neon_reinterpret (rtx dest, rtx src)
23295 emit_move_insn (dest, gen_lowpart (GET_MODE (dest), src));
23298 /* Set up OPERANDS for a register copy from SRC to DEST, taking care
23299 not to early-clobber SRC registers in the process.
23301 We assume that the operands described by SRC and DEST represent a
23302 decomposed copy of OPERANDS[1] into OPERANDS[0]. COUNT is the
23303 number of components into which the copy has been decomposed. */
23304 void
23305 neon_disambiguate_copy (rtx *operands, rtx *dest, rtx *src, unsigned int count)
23307 unsigned int i;
23309 if (!reg_overlap_mentioned_p (operands[0], operands[1])
23310 || REGNO (operands[0]) < REGNO (operands[1]))
23312 for (i = 0; i < count; i++)
23314 operands[2 * i] = dest[i];
23315 operands[2 * i + 1] = src[i];
23318 else
23320 for (i = 0; i < count; i++)
23322 operands[2 * i] = dest[count - i - 1];
23323 operands[2 * i + 1] = src[count - i - 1];
23328 /* Split operands into moves from op[1] + op[2] into op[0]. */
23330 void
23331 neon_split_vcombine (rtx operands[3])
23333 unsigned int dest = REGNO (operands[0]);
23334 unsigned int src1 = REGNO (operands[1]);
23335 unsigned int src2 = REGNO (operands[2]);
23336 machine_mode halfmode = GET_MODE (operands[1]);
23337 unsigned int halfregs = HARD_REGNO_NREGS (src1, halfmode);
23338 rtx destlo, desthi;
23340 if (src1 == dest && src2 == dest + halfregs)
23342 /* No-op move. Can't split to nothing; emit something. */
23343 emit_note (NOTE_INSN_DELETED);
23344 return;
23347 /* Preserve register attributes for variable tracking. */
23348 destlo = gen_rtx_REG_offset (operands[0], halfmode, dest, 0);
23349 desthi = gen_rtx_REG_offset (operands[0], halfmode, dest + halfregs,
23350 GET_MODE_SIZE (halfmode));
23352 /* Special case of reversed high/low parts. Use VSWP. */
23353 if (src2 == dest && src1 == dest + halfregs)
23355 rtx x = gen_rtx_SET (VOIDmode, destlo, operands[1]);
23356 rtx y = gen_rtx_SET (VOIDmode, desthi, operands[2]);
23357 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, x, y)));
23358 return;
23361 if (!reg_overlap_mentioned_p (operands[2], destlo))
23363 /* Try to avoid unnecessary moves if part of the result
23364 is in the right place already. */
23365 if (src1 != dest)
23366 emit_move_insn (destlo, operands[1]);
23367 if (src2 != dest + halfregs)
23368 emit_move_insn (desthi, operands[2]);
23370 else
23372 if (src2 != dest + halfregs)
23373 emit_move_insn (desthi, operands[2]);
23374 if (src1 != dest)
23375 emit_move_insn (destlo, operands[1]);
23379 /* Return the number (counting from 0) of
23380 the least significant set bit in MASK. */
23382 inline static int
23383 number_of_first_bit_set (unsigned mask)
23385 return ctz_hwi (mask);
23388 /* Like emit_multi_reg_push, but allowing for a different set of
23389 registers to be described as saved. MASK is the set of registers
23390 to be saved; REAL_REGS is the set of registers to be described as
23391 saved. If REAL_REGS is 0, only describe the stack adjustment. */
23393 static rtx_insn *
23394 thumb1_emit_multi_reg_push (unsigned long mask, unsigned long real_regs)
23396 unsigned long regno;
23397 rtx par[10], tmp, reg;
23398 rtx_insn *insn;
23399 int i, j;
23401 /* Build the parallel of the registers actually being stored. */
23402 for (i = 0; mask; ++i, mask &= mask - 1)
23404 regno = ctz_hwi (mask);
23405 reg = gen_rtx_REG (SImode, regno);
23407 if (i == 0)
23408 tmp = gen_rtx_UNSPEC (BLKmode, gen_rtvec (1, reg), UNSPEC_PUSH_MULT);
23409 else
23410 tmp = gen_rtx_USE (VOIDmode, reg);
23412 par[i] = tmp;
23415 tmp = plus_constant (Pmode, stack_pointer_rtx, -4 * i);
23416 tmp = gen_rtx_PRE_MODIFY (Pmode, stack_pointer_rtx, tmp);
23417 tmp = gen_frame_mem (BLKmode, tmp);
23418 tmp = gen_rtx_SET (VOIDmode, tmp, par[0]);
23419 par[0] = tmp;
23421 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (i, par));
23422 insn = emit_insn (tmp);
23424 /* Always build the stack adjustment note for unwind info. */
23425 tmp = plus_constant (Pmode, stack_pointer_rtx, -4 * i);
23426 tmp = gen_rtx_SET (VOIDmode, stack_pointer_rtx, tmp);
23427 par[0] = tmp;
23429 /* Build the parallel of the registers recorded as saved for unwind. */
23430 for (j = 0; real_regs; ++j, real_regs &= real_regs - 1)
23432 regno = ctz_hwi (real_regs);
23433 reg = gen_rtx_REG (SImode, regno);
23435 tmp = plus_constant (Pmode, stack_pointer_rtx, j * 4);
23436 tmp = gen_frame_mem (SImode, tmp);
23437 tmp = gen_rtx_SET (VOIDmode, tmp, reg);
23438 RTX_FRAME_RELATED_P (tmp) = 1;
23439 par[j + 1] = tmp;
23442 if (j == 0)
23443 tmp = par[0];
23444 else
23446 RTX_FRAME_RELATED_P (par[0]) = 1;
23447 tmp = gen_rtx_SEQUENCE (VOIDmode, gen_rtvec_v (j + 1, par));
23450 add_reg_note (insn, REG_FRAME_RELATED_EXPR, tmp);
23452 return insn;
23455 /* Emit code to push or pop registers to or from the stack. F is the
23456 assembly file. MASK is the registers to pop. */
23457 static void
23458 thumb_pop (FILE *f, unsigned long mask)
23460 int regno;
23461 int lo_mask = mask & 0xFF;
23462 int pushed_words = 0;
23464 gcc_assert (mask);
23466 if (lo_mask == 0 && (mask & (1 << PC_REGNUM)))
23468 /* Special case. Do not generate a POP PC statement here, do it in
23469 thumb_exit() */
23470 thumb_exit (f, -1);
23471 return;
23474 fprintf (f, "\tpop\t{");
23476 /* Look at the low registers first. */
23477 for (regno = 0; regno <= LAST_LO_REGNUM; regno++, lo_mask >>= 1)
23479 if (lo_mask & 1)
23481 asm_fprintf (f, "%r", regno);
23483 if ((lo_mask & ~1) != 0)
23484 fprintf (f, ", ");
23486 pushed_words++;
23490 if (mask & (1 << PC_REGNUM))
23492 /* Catch popping the PC. */
23493 if (TARGET_INTERWORK || TARGET_BACKTRACE
23494 || crtl->calls_eh_return)
23496 /* The PC is never poped directly, instead
23497 it is popped into r3 and then BX is used. */
23498 fprintf (f, "}\n");
23500 thumb_exit (f, -1);
23502 return;
23504 else
23506 if (mask & 0xFF)
23507 fprintf (f, ", ");
23509 asm_fprintf (f, "%r", PC_REGNUM);
23513 fprintf (f, "}\n");
23516 /* Generate code to return from a thumb function.
23517 If 'reg_containing_return_addr' is -1, then the return address is
23518 actually on the stack, at the stack pointer. */
23519 static void
23520 thumb_exit (FILE *f, int reg_containing_return_addr)
23522 unsigned regs_available_for_popping;
23523 unsigned regs_to_pop;
23524 int pops_needed;
23525 unsigned available;
23526 unsigned required;
23527 machine_mode mode;
23528 int size;
23529 int restore_a4 = FALSE;
23531 /* Compute the registers we need to pop. */
23532 regs_to_pop = 0;
23533 pops_needed = 0;
23535 if (reg_containing_return_addr == -1)
23537 regs_to_pop |= 1 << LR_REGNUM;
23538 ++pops_needed;
23541 if (TARGET_BACKTRACE)
23543 /* Restore the (ARM) frame pointer and stack pointer. */
23544 regs_to_pop |= (1 << ARM_HARD_FRAME_POINTER_REGNUM) | (1 << SP_REGNUM);
23545 pops_needed += 2;
23548 /* If there is nothing to pop then just emit the BX instruction and
23549 return. */
23550 if (pops_needed == 0)
23552 if (crtl->calls_eh_return)
23553 asm_fprintf (f, "\tadd\t%r, %r\n", SP_REGNUM, ARM_EH_STACKADJ_REGNUM);
23555 asm_fprintf (f, "\tbx\t%r\n", reg_containing_return_addr);
23556 return;
23558 /* Otherwise if we are not supporting interworking and we have not created
23559 a backtrace structure and the function was not entered in ARM mode then
23560 just pop the return address straight into the PC. */
23561 else if (!TARGET_INTERWORK
23562 && !TARGET_BACKTRACE
23563 && !is_called_in_ARM_mode (current_function_decl)
23564 && !crtl->calls_eh_return)
23566 asm_fprintf (f, "\tpop\t{%r}\n", PC_REGNUM);
23567 return;
23570 /* Find out how many of the (return) argument registers we can corrupt. */
23571 regs_available_for_popping = 0;
23573 /* If returning via __builtin_eh_return, the bottom three registers
23574 all contain information needed for the return. */
23575 if (crtl->calls_eh_return)
23576 size = 12;
23577 else
23579 /* If we can deduce the registers used from the function's
23580 return value. This is more reliable that examining
23581 df_regs_ever_live_p () because that will be set if the register is
23582 ever used in the function, not just if the register is used
23583 to hold a return value. */
23585 if (crtl->return_rtx != 0)
23586 mode = GET_MODE (crtl->return_rtx);
23587 else
23588 mode = DECL_MODE (DECL_RESULT (current_function_decl));
23590 size = GET_MODE_SIZE (mode);
23592 if (size == 0)
23594 /* In a void function we can use any argument register.
23595 In a function that returns a structure on the stack
23596 we can use the second and third argument registers. */
23597 if (mode == VOIDmode)
23598 regs_available_for_popping =
23599 (1 << ARG_REGISTER (1))
23600 | (1 << ARG_REGISTER (2))
23601 | (1 << ARG_REGISTER (3));
23602 else
23603 regs_available_for_popping =
23604 (1 << ARG_REGISTER (2))
23605 | (1 << ARG_REGISTER (3));
23607 else if (size <= 4)
23608 regs_available_for_popping =
23609 (1 << ARG_REGISTER (2))
23610 | (1 << ARG_REGISTER (3));
23611 else if (size <= 8)
23612 regs_available_for_popping =
23613 (1 << ARG_REGISTER (3));
23616 /* Match registers to be popped with registers into which we pop them. */
23617 for (available = regs_available_for_popping,
23618 required = regs_to_pop;
23619 required != 0 && available != 0;
23620 available &= ~(available & - available),
23621 required &= ~(required & - required))
23622 -- pops_needed;
23624 /* If we have any popping registers left over, remove them. */
23625 if (available > 0)
23626 regs_available_for_popping &= ~available;
23628 /* Otherwise if we need another popping register we can use
23629 the fourth argument register. */
23630 else if (pops_needed)
23632 /* If we have not found any free argument registers and
23633 reg a4 contains the return address, we must move it. */
23634 if (regs_available_for_popping == 0
23635 && reg_containing_return_addr == LAST_ARG_REGNUM)
23637 asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM, LAST_ARG_REGNUM);
23638 reg_containing_return_addr = LR_REGNUM;
23640 else if (size > 12)
23642 /* Register a4 is being used to hold part of the return value,
23643 but we have dire need of a free, low register. */
23644 restore_a4 = TRUE;
23646 asm_fprintf (f, "\tmov\t%r, %r\n",IP_REGNUM, LAST_ARG_REGNUM);
23649 if (reg_containing_return_addr != LAST_ARG_REGNUM)
23651 /* The fourth argument register is available. */
23652 regs_available_for_popping |= 1 << LAST_ARG_REGNUM;
23654 --pops_needed;
23658 /* Pop as many registers as we can. */
23659 thumb_pop (f, regs_available_for_popping);
23661 /* Process the registers we popped. */
23662 if (reg_containing_return_addr == -1)
23664 /* The return address was popped into the lowest numbered register. */
23665 regs_to_pop &= ~(1 << LR_REGNUM);
23667 reg_containing_return_addr =
23668 number_of_first_bit_set (regs_available_for_popping);
23670 /* Remove this register for the mask of available registers, so that
23671 the return address will not be corrupted by further pops. */
23672 regs_available_for_popping &= ~(1 << reg_containing_return_addr);
23675 /* If we popped other registers then handle them here. */
23676 if (regs_available_for_popping)
23678 int frame_pointer;
23680 /* Work out which register currently contains the frame pointer. */
23681 frame_pointer = number_of_first_bit_set (regs_available_for_popping);
23683 /* Move it into the correct place. */
23684 asm_fprintf (f, "\tmov\t%r, %r\n",
23685 ARM_HARD_FRAME_POINTER_REGNUM, frame_pointer);
23687 /* (Temporarily) remove it from the mask of popped registers. */
23688 regs_available_for_popping &= ~(1 << frame_pointer);
23689 regs_to_pop &= ~(1 << ARM_HARD_FRAME_POINTER_REGNUM);
23691 if (regs_available_for_popping)
23693 int stack_pointer;
23695 /* We popped the stack pointer as well,
23696 find the register that contains it. */
23697 stack_pointer = number_of_first_bit_set (regs_available_for_popping);
23699 /* Move it into the stack register. */
23700 asm_fprintf (f, "\tmov\t%r, %r\n", SP_REGNUM, stack_pointer);
23702 /* At this point we have popped all necessary registers, so
23703 do not worry about restoring regs_available_for_popping
23704 to its correct value:
23706 assert (pops_needed == 0)
23707 assert (regs_available_for_popping == (1 << frame_pointer))
23708 assert (regs_to_pop == (1 << STACK_POINTER)) */
23710 else
23712 /* Since we have just move the popped value into the frame
23713 pointer, the popping register is available for reuse, and
23714 we know that we still have the stack pointer left to pop. */
23715 regs_available_for_popping |= (1 << frame_pointer);
23719 /* If we still have registers left on the stack, but we no longer have
23720 any registers into which we can pop them, then we must move the return
23721 address into the link register and make available the register that
23722 contained it. */
23723 if (regs_available_for_popping == 0 && pops_needed > 0)
23725 regs_available_for_popping |= 1 << reg_containing_return_addr;
23727 asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM,
23728 reg_containing_return_addr);
23730 reg_containing_return_addr = LR_REGNUM;
23733 /* If we have registers left on the stack then pop some more.
23734 We know that at most we will want to pop FP and SP. */
23735 if (pops_needed > 0)
23737 int popped_into;
23738 int move_to;
23740 thumb_pop (f, regs_available_for_popping);
23742 /* We have popped either FP or SP.
23743 Move whichever one it is into the correct register. */
23744 popped_into = number_of_first_bit_set (regs_available_for_popping);
23745 move_to = number_of_first_bit_set (regs_to_pop);
23747 asm_fprintf (f, "\tmov\t%r, %r\n", move_to, popped_into);
23749 regs_to_pop &= ~(1 << move_to);
23751 --pops_needed;
23754 /* If we still have not popped everything then we must have only
23755 had one register available to us and we are now popping the SP. */
23756 if (pops_needed > 0)
23758 int popped_into;
23760 thumb_pop (f, regs_available_for_popping);
23762 popped_into = number_of_first_bit_set (regs_available_for_popping);
23764 asm_fprintf (f, "\tmov\t%r, %r\n", SP_REGNUM, popped_into);
23766 assert (regs_to_pop == (1 << STACK_POINTER))
23767 assert (pops_needed == 1)
23771 /* If necessary restore the a4 register. */
23772 if (restore_a4)
23774 if (reg_containing_return_addr != LR_REGNUM)
23776 asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM, LAST_ARG_REGNUM);
23777 reg_containing_return_addr = LR_REGNUM;
23780 asm_fprintf (f, "\tmov\t%r, %r\n", LAST_ARG_REGNUM, IP_REGNUM);
23783 if (crtl->calls_eh_return)
23784 asm_fprintf (f, "\tadd\t%r, %r\n", SP_REGNUM, ARM_EH_STACKADJ_REGNUM);
23786 /* Return to caller. */
23787 asm_fprintf (f, "\tbx\t%r\n", reg_containing_return_addr);
23790 /* Scan INSN just before assembler is output for it.
23791 For Thumb-1, we track the status of the condition codes; this
23792 information is used in the cbranchsi4_insn pattern. */
23793 void
23794 thumb1_final_prescan_insn (rtx_insn *insn)
23796 if (flag_print_asm_name)
23797 asm_fprintf (asm_out_file, "%@ 0x%04x\n",
23798 INSN_ADDRESSES (INSN_UID (insn)));
23799 /* Don't overwrite the previous setter when we get to a cbranch. */
23800 if (INSN_CODE (insn) != CODE_FOR_cbranchsi4_insn)
23802 enum attr_conds conds;
23804 if (cfun->machine->thumb1_cc_insn)
23806 if (modified_in_p (cfun->machine->thumb1_cc_op0, insn)
23807 || modified_in_p (cfun->machine->thumb1_cc_op1, insn))
23808 CC_STATUS_INIT;
23810 conds = get_attr_conds (insn);
23811 if (conds == CONDS_SET)
23813 rtx set = single_set (insn);
23814 cfun->machine->thumb1_cc_insn = insn;
23815 cfun->machine->thumb1_cc_op0 = SET_DEST (set);
23816 cfun->machine->thumb1_cc_op1 = const0_rtx;
23817 cfun->machine->thumb1_cc_mode = CC_NOOVmode;
23818 if (INSN_CODE (insn) == CODE_FOR_thumb1_subsi3_insn)
23820 rtx src1 = XEXP (SET_SRC (set), 1);
23821 if (src1 == const0_rtx)
23822 cfun->machine->thumb1_cc_mode = CCmode;
23824 else if (REG_P (SET_DEST (set)) && REG_P (SET_SRC (set)))
23826 /* Record the src register operand instead of dest because
23827 cprop_hardreg pass propagates src. */
23828 cfun->machine->thumb1_cc_op0 = SET_SRC (set);
23831 else if (conds != CONDS_NOCOND)
23832 cfun->machine->thumb1_cc_insn = NULL_RTX;
23835 /* Check if unexpected far jump is used. */
23836 if (cfun->machine->lr_save_eliminated
23837 && get_attr_far_jump (insn) == FAR_JUMP_YES)
23838 internal_error("Unexpected thumb1 far jump");
23842 thumb_shiftable_const (unsigned HOST_WIDE_INT val)
23844 unsigned HOST_WIDE_INT mask = 0xff;
23845 int i;
23847 val = val & (unsigned HOST_WIDE_INT)0xffffffffu;
23848 if (val == 0) /* XXX */
23849 return 0;
23851 for (i = 0; i < 25; i++)
23852 if ((val & (mask << i)) == val)
23853 return 1;
23855 return 0;
23858 /* Returns nonzero if the current function contains,
23859 or might contain a far jump. */
23860 static int
23861 thumb_far_jump_used_p (void)
23863 rtx_insn *insn;
23864 bool far_jump = false;
23865 unsigned int func_size = 0;
23867 /* This test is only important for leaf functions. */
23868 /* assert (!leaf_function_p ()); */
23870 /* If we have already decided that far jumps may be used,
23871 do not bother checking again, and always return true even if
23872 it turns out that they are not being used. Once we have made
23873 the decision that far jumps are present (and that hence the link
23874 register will be pushed onto the stack) we cannot go back on it. */
23875 if (cfun->machine->far_jump_used)
23876 return 1;
23878 /* If this function is not being called from the prologue/epilogue
23879 generation code then it must be being called from the
23880 INITIAL_ELIMINATION_OFFSET macro. */
23881 if (!(ARM_DOUBLEWORD_ALIGN || reload_completed))
23883 /* In this case we know that we are being asked about the elimination
23884 of the arg pointer register. If that register is not being used,
23885 then there are no arguments on the stack, and we do not have to
23886 worry that a far jump might force the prologue to push the link
23887 register, changing the stack offsets. In this case we can just
23888 return false, since the presence of far jumps in the function will
23889 not affect stack offsets.
23891 If the arg pointer is live (or if it was live, but has now been
23892 eliminated and so set to dead) then we do have to test to see if
23893 the function might contain a far jump. This test can lead to some
23894 false negatives, since before reload is completed, then length of
23895 branch instructions is not known, so gcc defaults to returning their
23896 longest length, which in turn sets the far jump attribute to true.
23898 A false negative will not result in bad code being generated, but it
23899 will result in a needless push and pop of the link register. We
23900 hope that this does not occur too often.
23902 If we need doubleword stack alignment this could affect the other
23903 elimination offsets so we can't risk getting it wrong. */
23904 if (df_regs_ever_live_p (ARG_POINTER_REGNUM))
23905 cfun->machine->arg_pointer_live = 1;
23906 else if (!cfun->machine->arg_pointer_live)
23907 return 0;
23910 /* We should not change far_jump_used during or after reload, as there is
23911 no chance to change stack frame layout. */
23912 if (reload_in_progress || reload_completed)
23913 return 0;
23915 /* Check to see if the function contains a branch
23916 insn with the far jump attribute set. */
23917 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
23919 if (JUMP_P (insn) && get_attr_far_jump (insn) == FAR_JUMP_YES)
23921 far_jump = true;
23923 func_size += get_attr_length (insn);
23926 /* Attribute far_jump will always be true for thumb1 before
23927 shorten_branch pass. So checking far_jump attribute before
23928 shorten_branch isn't much useful.
23930 Following heuristic tries to estimate more accurately if a far jump
23931 may finally be used. The heuristic is very conservative as there is
23932 no chance to roll-back the decision of not to use far jump.
23934 Thumb1 long branch offset is -2048 to 2046. The worst case is each
23935 2-byte insn is associated with a 4 byte constant pool. Using
23936 function size 2048/3 as the threshold is conservative enough. */
23937 if (far_jump)
23939 if ((func_size * 3) >= 2048)
23941 /* Record the fact that we have decided that
23942 the function does use far jumps. */
23943 cfun->machine->far_jump_used = 1;
23944 return 1;
23948 return 0;
23951 /* Return nonzero if FUNC must be entered in ARM mode. */
23953 is_called_in_ARM_mode (tree func)
23955 gcc_assert (TREE_CODE (func) == FUNCTION_DECL);
23957 /* Ignore the problem about functions whose address is taken. */
23958 if (TARGET_CALLEE_INTERWORKING && TREE_PUBLIC (func))
23959 return TRUE;
23961 #ifdef ARM_PE
23962 return lookup_attribute ("interfacearm", DECL_ATTRIBUTES (func)) != NULL_TREE;
23963 #else
23964 return FALSE;
23965 #endif
23968 /* Given the stack offsets and register mask in OFFSETS, decide how
23969 many additional registers to push instead of subtracting a constant
23970 from SP. For epilogues the principle is the same except we use pop.
23971 FOR_PROLOGUE indicates which we're generating. */
23972 static int
23973 thumb1_extra_regs_pushed (arm_stack_offsets *offsets, bool for_prologue)
23975 HOST_WIDE_INT amount;
23976 unsigned long live_regs_mask = offsets->saved_regs_mask;
23977 /* Extract a mask of the ones we can give to the Thumb's push/pop
23978 instruction. */
23979 unsigned long l_mask = live_regs_mask & (for_prologue ? 0x40ff : 0xff);
23980 /* Then count how many other high registers will need to be pushed. */
23981 unsigned long high_regs_pushed = bit_count (live_regs_mask & 0x0f00);
23982 int n_free, reg_base, size;
23984 if (!for_prologue && frame_pointer_needed)
23985 amount = offsets->locals_base - offsets->saved_regs;
23986 else
23987 amount = offsets->outgoing_args - offsets->saved_regs;
23989 /* If the stack frame size is 512 exactly, we can save one load
23990 instruction, which should make this a win even when optimizing
23991 for speed. */
23992 if (!optimize_size && amount != 512)
23993 return 0;
23995 /* Can't do this if there are high registers to push. */
23996 if (high_regs_pushed != 0)
23997 return 0;
23999 /* Shouldn't do it in the prologue if no registers would normally
24000 be pushed at all. In the epilogue, also allow it if we'll have
24001 a pop insn for the PC. */
24002 if (l_mask == 0
24003 && (for_prologue
24004 || TARGET_BACKTRACE
24005 || (live_regs_mask & 1 << LR_REGNUM) == 0
24006 || TARGET_INTERWORK
24007 || crtl->args.pretend_args_size != 0))
24008 return 0;
24010 /* Don't do this if thumb_expand_prologue wants to emit instructions
24011 between the push and the stack frame allocation. */
24012 if (for_prologue
24013 && ((flag_pic && arm_pic_register != INVALID_REGNUM)
24014 || (!frame_pointer_needed && CALLER_INTERWORKING_SLOT_SIZE > 0)))
24015 return 0;
24017 reg_base = 0;
24018 n_free = 0;
24019 if (!for_prologue)
24021 size = arm_size_return_regs ();
24022 reg_base = ARM_NUM_INTS (size);
24023 live_regs_mask >>= reg_base;
24026 while (reg_base + n_free < 8 && !(live_regs_mask & 1)
24027 && (for_prologue || call_used_regs[reg_base + n_free]))
24029 live_regs_mask >>= 1;
24030 n_free++;
24033 if (n_free == 0)
24034 return 0;
24035 gcc_assert (amount / 4 * 4 == amount);
24037 if (amount >= 512 && (amount - n_free * 4) < 512)
24038 return (amount - 508) / 4;
24039 if (amount <= n_free * 4)
24040 return amount / 4;
24041 return 0;
24044 /* The bits which aren't usefully expanded as rtl. */
24045 const char *
24046 thumb1_unexpanded_epilogue (void)
24048 arm_stack_offsets *offsets;
24049 int regno;
24050 unsigned long live_regs_mask = 0;
24051 int high_regs_pushed = 0;
24052 int extra_pop;
24053 int had_to_push_lr;
24054 int size;
24056 if (cfun->machine->return_used_this_function != 0)
24057 return "";
24059 if (IS_NAKED (arm_current_func_type ()))
24060 return "";
24062 offsets = arm_get_frame_offsets ();
24063 live_regs_mask = offsets->saved_regs_mask;
24064 high_regs_pushed = bit_count (live_regs_mask & 0x0f00);
24066 /* If we can deduce the registers used from the function's return value.
24067 This is more reliable that examining df_regs_ever_live_p () because that
24068 will be set if the register is ever used in the function, not just if
24069 the register is used to hold a return value. */
24070 size = arm_size_return_regs ();
24072 extra_pop = thumb1_extra_regs_pushed (offsets, false);
24073 if (extra_pop > 0)
24075 unsigned long extra_mask = (1 << extra_pop) - 1;
24076 live_regs_mask |= extra_mask << ARM_NUM_INTS (size);
24079 /* The prolog may have pushed some high registers to use as
24080 work registers. e.g. the testsuite file:
24081 gcc/testsuite/gcc/gcc.c-torture/execute/complex-2.c
24082 compiles to produce:
24083 push {r4, r5, r6, r7, lr}
24084 mov r7, r9
24085 mov r6, r8
24086 push {r6, r7}
24087 as part of the prolog. We have to undo that pushing here. */
24089 if (high_regs_pushed)
24091 unsigned long mask = live_regs_mask & 0xff;
24092 int next_hi_reg;
24094 /* The available low registers depend on the size of the value we are
24095 returning. */
24096 if (size <= 12)
24097 mask |= 1 << 3;
24098 if (size <= 8)
24099 mask |= 1 << 2;
24101 if (mask == 0)
24102 /* Oh dear! We have no low registers into which we can pop
24103 high registers! */
24104 internal_error
24105 ("no low registers available for popping high registers");
24107 for (next_hi_reg = 8; next_hi_reg < 13; next_hi_reg++)
24108 if (live_regs_mask & (1 << next_hi_reg))
24109 break;
24111 while (high_regs_pushed)
24113 /* Find lo register(s) into which the high register(s) can
24114 be popped. */
24115 for (regno = 0; regno <= LAST_LO_REGNUM; regno++)
24117 if (mask & (1 << regno))
24118 high_regs_pushed--;
24119 if (high_regs_pushed == 0)
24120 break;
24123 mask &= (2 << regno) - 1; /* A noop if regno == 8 */
24125 /* Pop the values into the low register(s). */
24126 thumb_pop (asm_out_file, mask);
24128 /* Move the value(s) into the high registers. */
24129 for (regno = 0; regno <= LAST_LO_REGNUM; regno++)
24131 if (mask & (1 << regno))
24133 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", next_hi_reg,
24134 regno);
24136 for (next_hi_reg++; next_hi_reg < 13; next_hi_reg++)
24137 if (live_regs_mask & (1 << next_hi_reg))
24138 break;
24142 live_regs_mask &= ~0x0f00;
24145 had_to_push_lr = (live_regs_mask & (1 << LR_REGNUM)) != 0;
24146 live_regs_mask &= 0xff;
24148 if (crtl->args.pretend_args_size == 0 || TARGET_BACKTRACE)
24150 /* Pop the return address into the PC. */
24151 if (had_to_push_lr)
24152 live_regs_mask |= 1 << PC_REGNUM;
24154 /* Either no argument registers were pushed or a backtrace
24155 structure was created which includes an adjusted stack
24156 pointer, so just pop everything. */
24157 if (live_regs_mask)
24158 thumb_pop (asm_out_file, live_regs_mask);
24160 /* We have either just popped the return address into the
24161 PC or it is was kept in LR for the entire function.
24162 Note that thumb_pop has already called thumb_exit if the
24163 PC was in the list. */
24164 if (!had_to_push_lr)
24165 thumb_exit (asm_out_file, LR_REGNUM);
24167 else
24169 /* Pop everything but the return address. */
24170 if (live_regs_mask)
24171 thumb_pop (asm_out_file, live_regs_mask);
24173 if (had_to_push_lr)
24175 if (size > 12)
24177 /* We have no free low regs, so save one. */
24178 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", IP_REGNUM,
24179 LAST_ARG_REGNUM);
24182 /* Get the return address into a temporary register. */
24183 thumb_pop (asm_out_file, 1 << LAST_ARG_REGNUM);
24185 if (size > 12)
24187 /* Move the return address to lr. */
24188 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", LR_REGNUM,
24189 LAST_ARG_REGNUM);
24190 /* Restore the low register. */
24191 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", LAST_ARG_REGNUM,
24192 IP_REGNUM);
24193 regno = LR_REGNUM;
24195 else
24196 regno = LAST_ARG_REGNUM;
24198 else
24199 regno = LR_REGNUM;
24201 /* Remove the argument registers that were pushed onto the stack. */
24202 asm_fprintf (asm_out_file, "\tadd\t%r, %r, #%d\n",
24203 SP_REGNUM, SP_REGNUM,
24204 crtl->args.pretend_args_size);
24206 thumb_exit (asm_out_file, regno);
24209 return "";
24212 /* Functions to save and restore machine-specific function data. */
24213 static struct machine_function *
24214 arm_init_machine_status (void)
24216 struct machine_function *machine;
24217 machine = ggc_cleared_alloc<machine_function> ();
24219 #if ARM_FT_UNKNOWN != 0
24220 machine->func_type = ARM_FT_UNKNOWN;
24221 #endif
24222 return machine;
24225 /* Return an RTX indicating where the return address to the
24226 calling function can be found. */
24228 arm_return_addr (int count, rtx frame ATTRIBUTE_UNUSED)
24230 if (count != 0)
24231 return NULL_RTX;
24233 return get_hard_reg_initial_val (Pmode, LR_REGNUM);
24236 /* Do anything needed before RTL is emitted for each function. */
24237 void
24238 arm_init_expanders (void)
24240 /* Arrange to initialize and mark the machine per-function status. */
24241 init_machine_status = arm_init_machine_status;
24243 /* This is to stop the combine pass optimizing away the alignment
24244 adjustment of va_arg. */
24245 /* ??? It is claimed that this should not be necessary. */
24246 if (cfun)
24247 mark_reg_pointer (arg_pointer_rtx, PARM_BOUNDARY);
24251 /* Like arm_compute_initial_elimination offset. Simpler because there
24252 isn't an ABI specified frame pointer for Thumb. Instead, we set it
24253 to point at the base of the local variables after static stack
24254 space for a function has been allocated. */
24256 HOST_WIDE_INT
24257 thumb_compute_initial_elimination_offset (unsigned int from, unsigned int to)
24259 arm_stack_offsets *offsets;
24261 offsets = arm_get_frame_offsets ();
24263 switch (from)
24265 case ARG_POINTER_REGNUM:
24266 switch (to)
24268 case STACK_POINTER_REGNUM:
24269 return offsets->outgoing_args - offsets->saved_args;
24271 case FRAME_POINTER_REGNUM:
24272 return offsets->soft_frame - offsets->saved_args;
24274 case ARM_HARD_FRAME_POINTER_REGNUM:
24275 return offsets->saved_regs - offsets->saved_args;
24277 case THUMB_HARD_FRAME_POINTER_REGNUM:
24278 return offsets->locals_base - offsets->saved_args;
24280 default:
24281 gcc_unreachable ();
24283 break;
24285 case FRAME_POINTER_REGNUM:
24286 switch (to)
24288 case STACK_POINTER_REGNUM:
24289 return offsets->outgoing_args - offsets->soft_frame;
24291 case ARM_HARD_FRAME_POINTER_REGNUM:
24292 return offsets->saved_regs - offsets->soft_frame;
24294 case THUMB_HARD_FRAME_POINTER_REGNUM:
24295 return offsets->locals_base - offsets->soft_frame;
24297 default:
24298 gcc_unreachable ();
24300 break;
24302 default:
24303 gcc_unreachable ();
24307 /* Generate the function's prologue. */
24309 void
24310 thumb1_expand_prologue (void)
24312 rtx_insn *insn;
24314 HOST_WIDE_INT amount;
24315 arm_stack_offsets *offsets;
24316 unsigned long func_type;
24317 int regno;
24318 unsigned long live_regs_mask;
24319 unsigned long l_mask;
24320 unsigned high_regs_pushed = 0;
24322 func_type = arm_current_func_type ();
24324 /* Naked functions don't have prologues. */
24325 if (IS_NAKED (func_type))
24326 return;
24328 if (IS_INTERRUPT (func_type))
24330 error ("interrupt Service Routines cannot be coded in Thumb mode");
24331 return;
24334 if (is_called_in_ARM_mode (current_function_decl))
24335 emit_insn (gen_prologue_thumb1_interwork ());
24337 offsets = arm_get_frame_offsets ();
24338 live_regs_mask = offsets->saved_regs_mask;
24340 /* Extract a mask of the ones we can give to the Thumb's push instruction. */
24341 l_mask = live_regs_mask & 0x40ff;
24342 /* Then count how many other high registers will need to be pushed. */
24343 high_regs_pushed = bit_count (live_regs_mask & 0x0f00);
24345 if (crtl->args.pretend_args_size)
24347 rtx x = GEN_INT (-crtl->args.pretend_args_size);
24349 if (cfun->machine->uses_anonymous_args)
24351 int num_pushes = ARM_NUM_INTS (crtl->args.pretend_args_size);
24352 unsigned long mask;
24354 mask = 1ul << (LAST_ARG_REGNUM + 1);
24355 mask -= 1ul << (LAST_ARG_REGNUM + 1 - num_pushes);
24357 insn = thumb1_emit_multi_reg_push (mask, 0);
24359 else
24361 insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
24362 stack_pointer_rtx, x));
24364 RTX_FRAME_RELATED_P (insn) = 1;
24367 if (TARGET_BACKTRACE)
24369 HOST_WIDE_INT offset = 0;
24370 unsigned work_register;
24371 rtx work_reg, x, arm_hfp_rtx;
24373 /* We have been asked to create a stack backtrace structure.
24374 The code looks like this:
24376 0 .align 2
24377 0 func:
24378 0 sub SP, #16 Reserve space for 4 registers.
24379 2 push {R7} Push low registers.
24380 4 add R7, SP, #20 Get the stack pointer before the push.
24381 6 str R7, [SP, #8] Store the stack pointer
24382 (before reserving the space).
24383 8 mov R7, PC Get hold of the start of this code + 12.
24384 10 str R7, [SP, #16] Store it.
24385 12 mov R7, FP Get hold of the current frame pointer.
24386 14 str R7, [SP, #4] Store it.
24387 16 mov R7, LR Get hold of the current return address.
24388 18 str R7, [SP, #12] Store it.
24389 20 add R7, SP, #16 Point at the start of the
24390 backtrace structure.
24391 22 mov FP, R7 Put this value into the frame pointer. */
24393 work_register = thumb_find_work_register (live_regs_mask);
24394 work_reg = gen_rtx_REG (SImode, work_register);
24395 arm_hfp_rtx = gen_rtx_REG (SImode, ARM_HARD_FRAME_POINTER_REGNUM);
24397 insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
24398 stack_pointer_rtx, GEN_INT (-16)));
24399 RTX_FRAME_RELATED_P (insn) = 1;
24401 if (l_mask)
24403 insn = thumb1_emit_multi_reg_push (l_mask, l_mask);
24404 RTX_FRAME_RELATED_P (insn) = 1;
24406 offset = bit_count (l_mask) * UNITS_PER_WORD;
24409 x = GEN_INT (offset + 16 + crtl->args.pretend_args_size);
24410 emit_insn (gen_addsi3 (work_reg, stack_pointer_rtx, x));
24412 x = plus_constant (Pmode, stack_pointer_rtx, offset + 4);
24413 x = gen_frame_mem (SImode, x);
24414 emit_move_insn (x, work_reg);
24416 /* Make sure that the instruction fetching the PC is in the right place
24417 to calculate "start of backtrace creation code + 12". */
24418 /* ??? The stores using the common WORK_REG ought to be enough to
24419 prevent the scheduler from doing anything weird. Failing that
24420 we could always move all of the following into an UNSPEC_VOLATILE. */
24421 if (l_mask)
24423 x = gen_rtx_REG (SImode, PC_REGNUM);
24424 emit_move_insn (work_reg, x);
24426 x = plus_constant (Pmode, stack_pointer_rtx, offset + 12);
24427 x = gen_frame_mem (SImode, x);
24428 emit_move_insn (x, work_reg);
24430 emit_move_insn (work_reg, arm_hfp_rtx);
24432 x = plus_constant (Pmode, stack_pointer_rtx, offset);
24433 x = gen_frame_mem (SImode, x);
24434 emit_move_insn (x, work_reg);
24436 else
24438 emit_move_insn (work_reg, arm_hfp_rtx);
24440 x = plus_constant (Pmode, stack_pointer_rtx, offset);
24441 x = gen_frame_mem (SImode, x);
24442 emit_move_insn (x, work_reg);
24444 x = gen_rtx_REG (SImode, PC_REGNUM);
24445 emit_move_insn (work_reg, x);
24447 x = plus_constant (Pmode, stack_pointer_rtx, offset + 12);
24448 x = gen_frame_mem (SImode, x);
24449 emit_move_insn (x, work_reg);
24452 x = gen_rtx_REG (SImode, LR_REGNUM);
24453 emit_move_insn (work_reg, x);
24455 x = plus_constant (Pmode, stack_pointer_rtx, offset + 8);
24456 x = gen_frame_mem (SImode, x);
24457 emit_move_insn (x, work_reg);
24459 x = GEN_INT (offset + 12);
24460 emit_insn (gen_addsi3 (work_reg, stack_pointer_rtx, x));
24462 emit_move_insn (arm_hfp_rtx, work_reg);
24464 /* Optimization: If we are not pushing any low registers but we are going
24465 to push some high registers then delay our first push. This will just
24466 be a push of LR and we can combine it with the push of the first high
24467 register. */
24468 else if ((l_mask & 0xff) != 0
24469 || (high_regs_pushed == 0 && l_mask))
24471 unsigned long mask = l_mask;
24472 mask |= (1 << thumb1_extra_regs_pushed (offsets, true)) - 1;
24473 insn = thumb1_emit_multi_reg_push (mask, mask);
24474 RTX_FRAME_RELATED_P (insn) = 1;
24477 if (high_regs_pushed)
24479 unsigned pushable_regs;
24480 unsigned next_hi_reg;
24481 unsigned arg_regs_num = TARGET_AAPCS_BASED ? crtl->args.info.aapcs_ncrn
24482 : crtl->args.info.nregs;
24483 unsigned arg_regs_mask = (1 << arg_regs_num) - 1;
24485 for (next_hi_reg = 12; next_hi_reg > LAST_LO_REGNUM; next_hi_reg--)
24486 if (live_regs_mask & (1 << next_hi_reg))
24487 break;
24489 /* Here we need to mask out registers used for passing arguments
24490 even if they can be pushed. This is to avoid using them to stash the high
24491 registers. Such kind of stash may clobber the use of arguments. */
24492 pushable_regs = l_mask & (~arg_regs_mask) & 0xff;
24494 if (pushable_regs == 0)
24495 pushable_regs = 1 << thumb_find_work_register (live_regs_mask);
24497 while (high_regs_pushed > 0)
24499 unsigned long real_regs_mask = 0;
24501 for (regno = LAST_LO_REGNUM; regno >= 0; regno --)
24503 if (pushable_regs & (1 << regno))
24505 emit_move_insn (gen_rtx_REG (SImode, regno),
24506 gen_rtx_REG (SImode, next_hi_reg));
24508 high_regs_pushed --;
24509 real_regs_mask |= (1 << next_hi_reg);
24511 if (high_regs_pushed)
24513 for (next_hi_reg --; next_hi_reg > LAST_LO_REGNUM;
24514 next_hi_reg --)
24515 if (live_regs_mask & (1 << next_hi_reg))
24516 break;
24518 else
24520 pushable_regs &= ~((1 << regno) - 1);
24521 break;
24526 /* If we had to find a work register and we have not yet
24527 saved the LR then add it to the list of regs to push. */
24528 if (l_mask == (1 << LR_REGNUM))
24530 pushable_regs |= l_mask;
24531 real_regs_mask |= l_mask;
24532 l_mask = 0;
24535 insn = thumb1_emit_multi_reg_push (pushable_regs, real_regs_mask);
24536 RTX_FRAME_RELATED_P (insn) = 1;
24540 /* Load the pic register before setting the frame pointer,
24541 so we can use r7 as a temporary work register. */
24542 if (flag_pic && arm_pic_register != INVALID_REGNUM)
24543 arm_load_pic_register (live_regs_mask);
24545 if (!frame_pointer_needed && CALLER_INTERWORKING_SLOT_SIZE > 0)
24546 emit_move_insn (gen_rtx_REG (Pmode, ARM_HARD_FRAME_POINTER_REGNUM),
24547 stack_pointer_rtx);
24549 if (flag_stack_usage_info)
24550 current_function_static_stack_size
24551 = offsets->outgoing_args - offsets->saved_args;
24553 amount = offsets->outgoing_args - offsets->saved_regs;
24554 amount -= 4 * thumb1_extra_regs_pushed (offsets, true);
24555 if (amount)
24557 if (amount < 512)
24559 insn = emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
24560 GEN_INT (- amount)));
24561 RTX_FRAME_RELATED_P (insn) = 1;
24563 else
24565 rtx reg, dwarf;
24567 /* The stack decrement is too big for an immediate value in a single
24568 insn. In theory we could issue multiple subtracts, but after
24569 three of them it becomes more space efficient to place the full
24570 value in the constant pool and load into a register. (Also the
24571 ARM debugger really likes to see only one stack decrement per
24572 function). So instead we look for a scratch register into which
24573 we can load the decrement, and then we subtract this from the
24574 stack pointer. Unfortunately on the thumb the only available
24575 scratch registers are the argument registers, and we cannot use
24576 these as they may hold arguments to the function. Instead we
24577 attempt to locate a call preserved register which is used by this
24578 function. If we can find one, then we know that it will have
24579 been pushed at the start of the prologue and so we can corrupt
24580 it now. */
24581 for (regno = LAST_ARG_REGNUM + 1; regno <= LAST_LO_REGNUM; regno++)
24582 if (live_regs_mask & (1 << regno))
24583 break;
24585 gcc_assert(regno <= LAST_LO_REGNUM);
24587 reg = gen_rtx_REG (SImode, regno);
24589 emit_insn (gen_movsi (reg, GEN_INT (- amount)));
24591 insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
24592 stack_pointer_rtx, reg));
24594 dwarf = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
24595 plus_constant (Pmode, stack_pointer_rtx,
24596 -amount));
24597 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
24598 RTX_FRAME_RELATED_P (insn) = 1;
24602 if (frame_pointer_needed)
24603 thumb_set_frame_pointer (offsets);
24605 /* If we are profiling, make sure no instructions are scheduled before
24606 the call to mcount. Similarly if the user has requested no
24607 scheduling in the prolog. Similarly if we want non-call exceptions
24608 using the EABI unwinder, to prevent faulting instructions from being
24609 swapped with a stack adjustment. */
24610 if (crtl->profile || !TARGET_SCHED_PROLOG
24611 || (arm_except_unwind_info (&global_options) == UI_TARGET
24612 && cfun->can_throw_non_call_exceptions))
24613 emit_insn (gen_blockage ());
24615 cfun->machine->lr_save_eliminated = !thumb_force_lr_save ();
24616 if (live_regs_mask & 0xff)
24617 cfun->machine->lr_save_eliminated = 0;
24620 /* Generate pattern *pop_multiple_with_stack_update_and_return if single
24621 POP instruction can be generated. LR should be replaced by PC. All
24622 the checks required are already done by USE_RETURN_INSN (). Hence,
24623 all we really need to check here is if single register is to be
24624 returned, or multiple register return. */
24625 void
24626 thumb2_expand_return (bool simple_return)
24628 int i, num_regs;
24629 unsigned long saved_regs_mask;
24630 arm_stack_offsets *offsets;
24632 offsets = arm_get_frame_offsets ();
24633 saved_regs_mask = offsets->saved_regs_mask;
24635 for (i = 0, num_regs = 0; i <= LAST_ARM_REGNUM; i++)
24636 if (saved_regs_mask & (1 << i))
24637 num_regs++;
24639 if (!simple_return && saved_regs_mask)
24641 if (num_regs == 1)
24643 rtx par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
24644 rtx reg = gen_rtx_REG (SImode, PC_REGNUM);
24645 rtx addr = gen_rtx_MEM (SImode,
24646 gen_rtx_POST_INC (SImode,
24647 stack_pointer_rtx));
24648 set_mem_alias_set (addr, get_frame_alias_set ());
24649 XVECEXP (par, 0, 0) = ret_rtx;
24650 XVECEXP (par, 0, 1) = gen_rtx_SET (SImode, reg, addr);
24651 RTX_FRAME_RELATED_P (XVECEXP (par, 0, 1)) = 1;
24652 emit_jump_insn (par);
24654 else
24656 saved_regs_mask &= ~ (1 << LR_REGNUM);
24657 saved_regs_mask |= (1 << PC_REGNUM);
24658 arm_emit_multi_reg_pop (saved_regs_mask);
24661 else
24663 emit_jump_insn (simple_return_rtx);
24667 void
24668 thumb1_expand_epilogue (void)
24670 HOST_WIDE_INT amount;
24671 arm_stack_offsets *offsets;
24672 int regno;
24674 /* Naked functions don't have prologues. */
24675 if (IS_NAKED (arm_current_func_type ()))
24676 return;
24678 offsets = arm_get_frame_offsets ();
24679 amount = offsets->outgoing_args - offsets->saved_regs;
24681 if (frame_pointer_needed)
24683 emit_insn (gen_movsi (stack_pointer_rtx, hard_frame_pointer_rtx));
24684 amount = offsets->locals_base - offsets->saved_regs;
24686 amount -= 4 * thumb1_extra_regs_pushed (offsets, false);
24688 gcc_assert (amount >= 0);
24689 if (amount)
24691 emit_insn (gen_blockage ());
24693 if (amount < 512)
24694 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
24695 GEN_INT (amount)));
24696 else
24698 /* r3 is always free in the epilogue. */
24699 rtx reg = gen_rtx_REG (SImode, LAST_ARG_REGNUM);
24701 emit_insn (gen_movsi (reg, GEN_INT (amount)));
24702 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, reg));
24706 /* Emit a USE (stack_pointer_rtx), so that
24707 the stack adjustment will not be deleted. */
24708 emit_insn (gen_force_register_use (stack_pointer_rtx));
24710 if (crtl->profile || !TARGET_SCHED_PROLOG)
24711 emit_insn (gen_blockage ());
24713 /* Emit a clobber for each insn that will be restored in the epilogue,
24714 so that flow2 will get register lifetimes correct. */
24715 for (regno = 0; regno < 13; regno++)
24716 if (df_regs_ever_live_p (regno) && !call_used_regs[regno])
24717 emit_clobber (gen_rtx_REG (SImode, regno));
24719 if (! df_regs_ever_live_p (LR_REGNUM))
24720 emit_use (gen_rtx_REG (SImode, LR_REGNUM));
24723 /* Epilogue code for APCS frame. */
24724 static void
24725 arm_expand_epilogue_apcs_frame (bool really_return)
24727 unsigned long func_type;
24728 unsigned long saved_regs_mask;
24729 int num_regs = 0;
24730 int i;
24731 int floats_from_frame = 0;
24732 arm_stack_offsets *offsets;
24734 gcc_assert (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM);
24735 func_type = arm_current_func_type ();
24737 /* Get frame offsets for ARM. */
24738 offsets = arm_get_frame_offsets ();
24739 saved_regs_mask = offsets->saved_regs_mask;
24741 /* Find the offset of the floating-point save area in the frame. */
24742 floats_from_frame
24743 = (offsets->saved_args
24744 + arm_compute_static_chain_stack_bytes ()
24745 - offsets->frame);
24747 /* Compute how many core registers saved and how far away the floats are. */
24748 for (i = 0; i <= LAST_ARM_REGNUM; i++)
24749 if (saved_regs_mask & (1 << i))
24751 num_regs++;
24752 floats_from_frame += 4;
24755 if (TARGET_HARD_FLOAT && TARGET_VFP)
24757 int start_reg;
24758 rtx ip_rtx = gen_rtx_REG (SImode, IP_REGNUM);
24760 /* The offset is from IP_REGNUM. */
24761 int saved_size = arm_get_vfp_saved_size ();
24762 if (saved_size > 0)
24764 rtx_insn *insn;
24765 floats_from_frame += saved_size;
24766 insn = emit_insn (gen_addsi3 (ip_rtx,
24767 hard_frame_pointer_rtx,
24768 GEN_INT (-floats_from_frame)));
24769 arm_add_cfa_adjust_cfa_note (insn, -floats_from_frame,
24770 ip_rtx, hard_frame_pointer_rtx);
24773 /* Generate VFP register multi-pop. */
24774 start_reg = FIRST_VFP_REGNUM;
24776 for (i = FIRST_VFP_REGNUM; i < LAST_VFP_REGNUM; i += 2)
24777 /* Look for a case where a reg does not need restoring. */
24778 if ((!df_regs_ever_live_p (i) || call_used_regs[i])
24779 && (!df_regs_ever_live_p (i + 1)
24780 || call_used_regs[i + 1]))
24782 if (start_reg != i)
24783 arm_emit_vfp_multi_reg_pop (start_reg,
24784 (i - start_reg) / 2,
24785 gen_rtx_REG (SImode,
24786 IP_REGNUM));
24787 start_reg = i + 2;
24790 /* Restore the remaining regs that we have discovered (or possibly
24791 even all of them, if the conditional in the for loop never
24792 fired). */
24793 if (start_reg != i)
24794 arm_emit_vfp_multi_reg_pop (start_reg,
24795 (i - start_reg) / 2,
24796 gen_rtx_REG (SImode, IP_REGNUM));
24799 if (TARGET_IWMMXT)
24801 /* The frame pointer is guaranteed to be non-double-word aligned, as
24802 it is set to double-word-aligned old_stack_pointer - 4. */
24803 rtx_insn *insn;
24804 int lrm_count = (num_regs % 2) ? (num_regs + 2) : (num_regs + 1);
24806 for (i = LAST_IWMMXT_REGNUM; i >= FIRST_IWMMXT_REGNUM; i--)
24807 if (df_regs_ever_live_p (i) && !call_used_regs[i])
24809 rtx addr = gen_frame_mem (V2SImode,
24810 plus_constant (Pmode, hard_frame_pointer_rtx,
24811 - lrm_count * 4));
24812 insn = emit_insn (gen_movsi (gen_rtx_REG (V2SImode, i), addr));
24813 REG_NOTES (insn) = alloc_reg_note (REG_CFA_RESTORE,
24814 gen_rtx_REG (V2SImode, i),
24815 NULL_RTX);
24816 lrm_count += 2;
24820 /* saved_regs_mask should contain IP which contains old stack pointer
24821 at the time of activation creation. Since SP and IP are adjacent registers,
24822 we can restore the value directly into SP. */
24823 gcc_assert (saved_regs_mask & (1 << IP_REGNUM));
24824 saved_regs_mask &= ~(1 << IP_REGNUM);
24825 saved_regs_mask |= (1 << SP_REGNUM);
24827 /* There are two registers left in saved_regs_mask - LR and PC. We
24828 only need to restore LR (the return address), but to
24829 save time we can load it directly into PC, unless we need a
24830 special function exit sequence, or we are not really returning. */
24831 if (really_return
24832 && ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL
24833 && !crtl->calls_eh_return)
24834 /* Delete LR from the register mask, so that LR on
24835 the stack is loaded into the PC in the register mask. */
24836 saved_regs_mask &= ~(1 << LR_REGNUM);
24837 else
24838 saved_regs_mask &= ~(1 << PC_REGNUM);
24840 num_regs = bit_count (saved_regs_mask);
24841 if ((offsets->outgoing_args != (1 + num_regs)) || cfun->calls_alloca)
24843 rtx_insn *insn;
24844 emit_insn (gen_blockage ());
24845 /* Unwind the stack to just below the saved registers. */
24846 insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
24847 hard_frame_pointer_rtx,
24848 GEN_INT (- 4 * num_regs)));
24850 arm_add_cfa_adjust_cfa_note (insn, - 4 * num_regs,
24851 stack_pointer_rtx, hard_frame_pointer_rtx);
24854 arm_emit_multi_reg_pop (saved_regs_mask);
24856 if (IS_INTERRUPT (func_type))
24858 /* Interrupt handlers will have pushed the
24859 IP onto the stack, so restore it now. */
24860 rtx_insn *insn;
24861 rtx addr = gen_rtx_MEM (SImode,
24862 gen_rtx_POST_INC (SImode,
24863 stack_pointer_rtx));
24864 set_mem_alias_set (addr, get_frame_alias_set ());
24865 insn = emit_insn (gen_movsi (gen_rtx_REG (SImode, IP_REGNUM), addr));
24866 REG_NOTES (insn) = alloc_reg_note (REG_CFA_RESTORE,
24867 gen_rtx_REG (SImode, IP_REGNUM),
24868 NULL_RTX);
24871 if (!really_return || (saved_regs_mask & (1 << PC_REGNUM)))
24872 return;
24874 if (crtl->calls_eh_return)
24875 emit_insn (gen_addsi3 (stack_pointer_rtx,
24876 stack_pointer_rtx,
24877 gen_rtx_REG (SImode, ARM_EH_STACKADJ_REGNUM)));
24879 if (IS_STACKALIGN (func_type))
24880 /* Restore the original stack pointer. Before prologue, the stack was
24881 realigned and the original stack pointer saved in r0. For details,
24882 see comment in arm_expand_prologue. */
24883 emit_insn (gen_movsi (stack_pointer_rtx, gen_rtx_REG (SImode, 0)));
24885 emit_jump_insn (simple_return_rtx);
24888 /* Generate RTL to represent ARM epilogue. Really_return is true if the
24889 function is not a sibcall. */
24890 void
24891 arm_expand_epilogue (bool really_return)
24893 unsigned long func_type;
24894 unsigned long saved_regs_mask;
24895 int num_regs = 0;
24896 int i;
24897 int amount;
24898 arm_stack_offsets *offsets;
24900 func_type = arm_current_func_type ();
24902 /* Naked functions don't have epilogue. Hence, generate return pattern, and
24903 let output_return_instruction take care of instruction emission if any. */
24904 if (IS_NAKED (func_type)
24905 || (IS_VOLATILE (func_type) && TARGET_ABORT_NORETURN))
24907 if (really_return)
24908 emit_jump_insn (simple_return_rtx);
24909 return;
24912 /* If we are throwing an exception, then we really must be doing a
24913 return, so we can't tail-call. */
24914 gcc_assert (!crtl->calls_eh_return || really_return);
24916 if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
24918 arm_expand_epilogue_apcs_frame (really_return);
24919 return;
24922 /* Get frame offsets for ARM. */
24923 offsets = arm_get_frame_offsets ();
24924 saved_regs_mask = offsets->saved_regs_mask;
24925 num_regs = bit_count (saved_regs_mask);
24927 if (frame_pointer_needed)
24929 rtx_insn *insn;
24930 /* Restore stack pointer if necessary. */
24931 if (TARGET_ARM)
24933 /* In ARM mode, frame pointer points to first saved register.
24934 Restore stack pointer to last saved register. */
24935 amount = offsets->frame - offsets->saved_regs;
24937 /* Force out any pending memory operations that reference stacked data
24938 before stack de-allocation occurs. */
24939 emit_insn (gen_blockage ());
24940 insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
24941 hard_frame_pointer_rtx,
24942 GEN_INT (amount)));
24943 arm_add_cfa_adjust_cfa_note (insn, amount,
24944 stack_pointer_rtx,
24945 hard_frame_pointer_rtx);
24947 /* Emit USE(stack_pointer_rtx) to ensure that stack adjustment is not
24948 deleted. */
24949 emit_insn (gen_force_register_use (stack_pointer_rtx));
24951 else
24953 /* In Thumb-2 mode, the frame pointer points to the last saved
24954 register. */
24955 amount = offsets->locals_base - offsets->saved_regs;
24956 if (amount)
24958 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
24959 hard_frame_pointer_rtx,
24960 GEN_INT (amount)));
24961 arm_add_cfa_adjust_cfa_note (insn, amount,
24962 hard_frame_pointer_rtx,
24963 hard_frame_pointer_rtx);
24966 /* Force out any pending memory operations that reference stacked data
24967 before stack de-allocation occurs. */
24968 emit_insn (gen_blockage ());
24969 insn = emit_insn (gen_movsi (stack_pointer_rtx,
24970 hard_frame_pointer_rtx));
24971 arm_add_cfa_adjust_cfa_note (insn, 0,
24972 stack_pointer_rtx,
24973 hard_frame_pointer_rtx);
24974 /* Emit USE(stack_pointer_rtx) to ensure that stack adjustment is not
24975 deleted. */
24976 emit_insn (gen_force_register_use (stack_pointer_rtx));
24979 else
24981 /* Pop off outgoing args and local frame to adjust stack pointer to
24982 last saved register. */
24983 amount = offsets->outgoing_args - offsets->saved_regs;
24984 if (amount)
24986 rtx_insn *tmp;
24987 /* Force out any pending memory operations that reference stacked data
24988 before stack de-allocation occurs. */
24989 emit_insn (gen_blockage ());
24990 tmp = emit_insn (gen_addsi3 (stack_pointer_rtx,
24991 stack_pointer_rtx,
24992 GEN_INT (amount)));
24993 arm_add_cfa_adjust_cfa_note (tmp, amount,
24994 stack_pointer_rtx, stack_pointer_rtx);
24995 /* Emit USE(stack_pointer_rtx) to ensure that stack adjustment is
24996 not deleted. */
24997 emit_insn (gen_force_register_use (stack_pointer_rtx));
25001 if (TARGET_HARD_FLOAT && TARGET_VFP)
25003 /* Generate VFP register multi-pop. */
25004 int end_reg = LAST_VFP_REGNUM + 1;
25006 /* Scan the registers in reverse order. We need to match
25007 any groupings made in the prologue and generate matching
25008 vldm operations. The need to match groups is because,
25009 unlike pop, vldm can only do consecutive regs. */
25010 for (i = LAST_VFP_REGNUM - 1; i >= FIRST_VFP_REGNUM; i -= 2)
25011 /* Look for a case where a reg does not need restoring. */
25012 if ((!df_regs_ever_live_p (i) || call_used_regs[i])
25013 && (!df_regs_ever_live_p (i + 1)
25014 || call_used_regs[i + 1]))
25016 /* Restore the regs discovered so far (from reg+2 to
25017 end_reg). */
25018 if (end_reg > i + 2)
25019 arm_emit_vfp_multi_reg_pop (i + 2,
25020 (end_reg - (i + 2)) / 2,
25021 stack_pointer_rtx);
25022 end_reg = i;
25025 /* Restore the remaining regs that we have discovered (or possibly
25026 even all of them, if the conditional in the for loop never
25027 fired). */
25028 if (end_reg > i + 2)
25029 arm_emit_vfp_multi_reg_pop (i + 2,
25030 (end_reg - (i + 2)) / 2,
25031 stack_pointer_rtx);
25034 if (TARGET_IWMMXT)
25035 for (i = FIRST_IWMMXT_REGNUM; i <= LAST_IWMMXT_REGNUM; i++)
25036 if (df_regs_ever_live_p (i) && !call_used_regs[i])
25038 rtx_insn *insn;
25039 rtx addr = gen_rtx_MEM (V2SImode,
25040 gen_rtx_POST_INC (SImode,
25041 stack_pointer_rtx));
25042 set_mem_alias_set (addr, get_frame_alias_set ());
25043 insn = emit_insn (gen_movsi (gen_rtx_REG (V2SImode, i), addr));
25044 REG_NOTES (insn) = alloc_reg_note (REG_CFA_RESTORE,
25045 gen_rtx_REG (V2SImode, i),
25046 NULL_RTX);
25047 arm_add_cfa_adjust_cfa_note (insn, UNITS_PER_WORD,
25048 stack_pointer_rtx, stack_pointer_rtx);
25051 if (saved_regs_mask)
25053 rtx insn;
25054 bool return_in_pc = false;
25056 if (ARM_FUNC_TYPE (func_type) != ARM_FT_INTERWORKED
25057 && (TARGET_ARM || ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL)
25058 && !IS_STACKALIGN (func_type)
25059 && really_return
25060 && crtl->args.pretend_args_size == 0
25061 && saved_regs_mask & (1 << LR_REGNUM)
25062 && !crtl->calls_eh_return)
25064 saved_regs_mask &= ~(1 << LR_REGNUM);
25065 saved_regs_mask |= (1 << PC_REGNUM);
25066 return_in_pc = true;
25069 if (num_regs == 1 && (!IS_INTERRUPT (func_type) || !return_in_pc))
25071 for (i = 0; i <= LAST_ARM_REGNUM; i++)
25072 if (saved_regs_mask & (1 << i))
25074 rtx addr = gen_rtx_MEM (SImode,
25075 gen_rtx_POST_INC (SImode,
25076 stack_pointer_rtx));
25077 set_mem_alias_set (addr, get_frame_alias_set ());
25079 if (i == PC_REGNUM)
25081 insn = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
25082 XVECEXP (insn, 0, 0) = ret_rtx;
25083 XVECEXP (insn, 0, 1) = gen_rtx_SET (SImode,
25084 gen_rtx_REG (SImode, i),
25085 addr);
25086 RTX_FRAME_RELATED_P (XVECEXP (insn, 0, 1)) = 1;
25087 insn = emit_jump_insn (insn);
25089 else
25091 insn = emit_insn (gen_movsi (gen_rtx_REG (SImode, i),
25092 addr));
25093 REG_NOTES (insn) = alloc_reg_note (REG_CFA_RESTORE,
25094 gen_rtx_REG (SImode, i),
25095 NULL_RTX);
25096 arm_add_cfa_adjust_cfa_note (insn, UNITS_PER_WORD,
25097 stack_pointer_rtx,
25098 stack_pointer_rtx);
25102 else
25104 if (TARGET_LDRD
25105 && current_tune->prefer_ldrd_strd
25106 && !optimize_function_for_size_p (cfun))
25108 if (TARGET_THUMB2)
25109 thumb2_emit_ldrd_pop (saved_regs_mask);
25110 else if (TARGET_ARM && !IS_INTERRUPT (func_type))
25111 arm_emit_ldrd_pop (saved_regs_mask);
25112 else
25113 arm_emit_multi_reg_pop (saved_regs_mask);
25115 else
25116 arm_emit_multi_reg_pop (saved_regs_mask);
25119 if (return_in_pc == true)
25120 return;
25123 if (crtl->args.pretend_args_size)
25125 int i, j;
25126 rtx dwarf = NULL_RTX;
25127 rtx_insn *tmp =
25128 emit_insn (gen_addsi3 (stack_pointer_rtx,
25129 stack_pointer_rtx,
25130 GEN_INT (crtl->args.pretend_args_size)));
25132 RTX_FRAME_RELATED_P (tmp) = 1;
25134 if (cfun->machine->uses_anonymous_args)
25136 /* Restore pretend args. Refer arm_expand_prologue on how to save
25137 pretend_args in stack. */
25138 int num_regs = crtl->args.pretend_args_size / 4;
25139 saved_regs_mask = (0xf0 >> num_regs) & 0xf;
25140 for (j = 0, i = 0; j < num_regs; i++)
25141 if (saved_regs_mask & (1 << i))
25143 rtx reg = gen_rtx_REG (SImode, i);
25144 dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
25145 j++;
25147 REG_NOTES (tmp) = dwarf;
25149 arm_add_cfa_adjust_cfa_note (tmp, crtl->args.pretend_args_size,
25150 stack_pointer_rtx, stack_pointer_rtx);
25153 if (!really_return)
25154 return;
25156 if (crtl->calls_eh_return)
25157 emit_insn (gen_addsi3 (stack_pointer_rtx,
25158 stack_pointer_rtx,
25159 gen_rtx_REG (SImode, ARM_EH_STACKADJ_REGNUM)));
25161 if (IS_STACKALIGN (func_type))
25162 /* Restore the original stack pointer. Before prologue, the stack was
25163 realigned and the original stack pointer saved in r0. For details,
25164 see comment in arm_expand_prologue. */
25165 emit_insn (gen_movsi (stack_pointer_rtx, gen_rtx_REG (SImode, 0)));
25167 emit_jump_insn (simple_return_rtx);
25170 /* Implementation of insn prologue_thumb1_interwork. This is the first
25171 "instruction" of a function called in ARM mode. Swap to thumb mode. */
25173 const char *
25174 thumb1_output_interwork (void)
25176 const char * name;
25177 FILE *f = asm_out_file;
25179 gcc_assert (MEM_P (DECL_RTL (current_function_decl)));
25180 gcc_assert (GET_CODE (XEXP (DECL_RTL (current_function_decl), 0))
25181 == SYMBOL_REF);
25182 name = XSTR (XEXP (DECL_RTL (current_function_decl), 0), 0);
25184 /* Generate code sequence to switch us into Thumb mode. */
25185 /* The .code 32 directive has already been emitted by
25186 ASM_DECLARE_FUNCTION_NAME. */
25187 asm_fprintf (f, "\torr\t%r, %r, #1\n", IP_REGNUM, PC_REGNUM);
25188 asm_fprintf (f, "\tbx\t%r\n", IP_REGNUM);
25190 /* Generate a label, so that the debugger will notice the
25191 change in instruction sets. This label is also used by
25192 the assembler to bypass the ARM code when this function
25193 is called from a Thumb encoded function elsewhere in the
25194 same file. Hence the definition of STUB_NAME here must
25195 agree with the definition in gas/config/tc-arm.c. */
25197 #define STUB_NAME ".real_start_of"
25199 fprintf (f, "\t.code\t16\n");
25200 #ifdef ARM_PE
25201 if (arm_dllexport_name_p (name))
25202 name = arm_strip_name_encoding (name);
25203 #endif
25204 asm_fprintf (f, "\t.globl %s%U%s\n", STUB_NAME, name);
25205 fprintf (f, "\t.thumb_func\n");
25206 asm_fprintf (f, "%s%U%s:\n", STUB_NAME, name);
25208 return "";
25211 /* Handle the case of a double word load into a low register from
25212 a computed memory address. The computed address may involve a
25213 register which is overwritten by the load. */
25214 const char *
25215 thumb_load_double_from_address (rtx *operands)
25217 rtx addr;
25218 rtx base;
25219 rtx offset;
25220 rtx arg1;
25221 rtx arg2;
25223 gcc_assert (REG_P (operands[0]));
25224 gcc_assert (MEM_P (operands[1]));
25226 /* Get the memory address. */
25227 addr = XEXP (operands[1], 0);
25229 /* Work out how the memory address is computed. */
25230 switch (GET_CODE (addr))
25232 case REG:
25233 operands[2] = adjust_address (operands[1], SImode, 4);
25235 if (REGNO (operands[0]) == REGNO (addr))
25237 output_asm_insn ("ldr\t%H0, %2", operands);
25238 output_asm_insn ("ldr\t%0, %1", operands);
25240 else
25242 output_asm_insn ("ldr\t%0, %1", operands);
25243 output_asm_insn ("ldr\t%H0, %2", operands);
25245 break;
25247 case CONST:
25248 /* Compute <address> + 4 for the high order load. */
25249 operands[2] = adjust_address (operands[1], SImode, 4);
25251 output_asm_insn ("ldr\t%0, %1", operands);
25252 output_asm_insn ("ldr\t%H0, %2", operands);
25253 break;
25255 case PLUS:
25256 arg1 = XEXP (addr, 0);
25257 arg2 = XEXP (addr, 1);
25259 if (CONSTANT_P (arg1))
25260 base = arg2, offset = arg1;
25261 else
25262 base = arg1, offset = arg2;
25264 gcc_assert (REG_P (base));
25266 /* Catch the case of <address> = <reg> + <reg> */
25267 if (REG_P (offset))
25269 int reg_offset = REGNO (offset);
25270 int reg_base = REGNO (base);
25271 int reg_dest = REGNO (operands[0]);
25273 /* Add the base and offset registers together into the
25274 higher destination register. */
25275 asm_fprintf (asm_out_file, "\tadd\t%r, %r, %r",
25276 reg_dest + 1, reg_base, reg_offset);
25278 /* Load the lower destination register from the address in
25279 the higher destination register. */
25280 asm_fprintf (asm_out_file, "\tldr\t%r, [%r, #0]",
25281 reg_dest, reg_dest + 1);
25283 /* Load the higher destination register from its own address
25284 plus 4. */
25285 asm_fprintf (asm_out_file, "\tldr\t%r, [%r, #4]",
25286 reg_dest + 1, reg_dest + 1);
25288 else
25290 /* Compute <address> + 4 for the high order load. */
25291 operands[2] = adjust_address (operands[1], SImode, 4);
25293 /* If the computed address is held in the low order register
25294 then load the high order register first, otherwise always
25295 load the low order register first. */
25296 if (REGNO (operands[0]) == REGNO (base))
25298 output_asm_insn ("ldr\t%H0, %2", operands);
25299 output_asm_insn ("ldr\t%0, %1", operands);
25301 else
25303 output_asm_insn ("ldr\t%0, %1", operands);
25304 output_asm_insn ("ldr\t%H0, %2", operands);
25307 break;
25309 case LABEL_REF:
25310 /* With no registers to worry about we can just load the value
25311 directly. */
25312 operands[2] = adjust_address (operands[1], SImode, 4);
25314 output_asm_insn ("ldr\t%H0, %2", operands);
25315 output_asm_insn ("ldr\t%0, %1", operands);
25316 break;
25318 default:
25319 gcc_unreachable ();
25322 return "";
25325 const char *
25326 thumb_output_move_mem_multiple (int n, rtx *operands)
25328 rtx tmp;
25330 switch (n)
25332 case 2:
25333 if (REGNO (operands[4]) > REGNO (operands[5]))
25335 tmp = operands[4];
25336 operands[4] = operands[5];
25337 operands[5] = tmp;
25339 output_asm_insn ("ldmia\t%1!, {%4, %5}", operands);
25340 output_asm_insn ("stmia\t%0!, {%4, %5}", operands);
25341 break;
25343 case 3:
25344 if (REGNO (operands[4]) > REGNO (operands[5]))
25345 std::swap (operands[4], operands[5]);
25346 if (REGNO (operands[5]) > REGNO (operands[6]))
25347 std::swap (operands[5], operands[6]);
25348 if (REGNO (operands[4]) > REGNO (operands[5]))
25349 std::swap (operands[4], operands[5]);
25351 output_asm_insn ("ldmia\t%1!, {%4, %5, %6}", operands);
25352 output_asm_insn ("stmia\t%0!, {%4, %5, %6}", operands);
25353 break;
25355 default:
25356 gcc_unreachable ();
25359 return "";
25362 /* Output a call-via instruction for thumb state. */
25363 const char *
25364 thumb_call_via_reg (rtx reg)
25366 int regno = REGNO (reg);
25367 rtx *labelp;
25369 gcc_assert (regno < LR_REGNUM);
25371 /* If we are in the normal text section we can use a single instance
25372 per compilation unit. If we are doing function sections, then we need
25373 an entry per section, since we can't rely on reachability. */
25374 if (in_section == text_section)
25376 thumb_call_reg_needed = 1;
25378 if (thumb_call_via_label[regno] == NULL)
25379 thumb_call_via_label[regno] = gen_label_rtx ();
25380 labelp = thumb_call_via_label + regno;
25382 else
25384 if (cfun->machine->call_via[regno] == NULL)
25385 cfun->machine->call_via[regno] = gen_label_rtx ();
25386 labelp = cfun->machine->call_via + regno;
25389 output_asm_insn ("bl\t%a0", labelp);
25390 return "";
25393 /* Routines for generating rtl. */
25394 void
25395 thumb_expand_movmemqi (rtx *operands)
25397 rtx out = copy_to_mode_reg (SImode, XEXP (operands[0], 0));
25398 rtx in = copy_to_mode_reg (SImode, XEXP (operands[1], 0));
25399 HOST_WIDE_INT len = INTVAL (operands[2]);
25400 HOST_WIDE_INT offset = 0;
25402 while (len >= 12)
25404 emit_insn (gen_movmem12b (out, in, out, in));
25405 len -= 12;
25408 if (len >= 8)
25410 emit_insn (gen_movmem8b (out, in, out, in));
25411 len -= 8;
25414 if (len >= 4)
25416 rtx reg = gen_reg_rtx (SImode);
25417 emit_insn (gen_movsi (reg, gen_rtx_MEM (SImode, in)));
25418 emit_insn (gen_movsi (gen_rtx_MEM (SImode, out), reg));
25419 len -= 4;
25420 offset += 4;
25423 if (len >= 2)
25425 rtx reg = gen_reg_rtx (HImode);
25426 emit_insn (gen_movhi (reg, gen_rtx_MEM (HImode,
25427 plus_constant (Pmode, in,
25428 offset))));
25429 emit_insn (gen_movhi (gen_rtx_MEM (HImode, plus_constant (Pmode, out,
25430 offset)),
25431 reg));
25432 len -= 2;
25433 offset += 2;
25436 if (len)
25438 rtx reg = gen_reg_rtx (QImode);
25439 emit_insn (gen_movqi (reg, gen_rtx_MEM (QImode,
25440 plus_constant (Pmode, in,
25441 offset))));
25442 emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (Pmode, out,
25443 offset)),
25444 reg));
25448 void
25449 thumb_reload_out_hi (rtx *operands)
25451 emit_insn (gen_thumb_movhi_clobber (operands[0], operands[1], operands[2]));
25454 /* Handle reading a half-word from memory during reload. */
25455 void
25456 thumb_reload_in_hi (rtx *operands ATTRIBUTE_UNUSED)
25458 gcc_unreachable ();
25461 /* Return the length of a function name prefix
25462 that starts with the character 'c'. */
25463 static int
25464 arm_get_strip_length (int c)
25466 switch (c)
25468 ARM_NAME_ENCODING_LENGTHS
25469 default: return 0;
25473 /* Return a pointer to a function's name with any
25474 and all prefix encodings stripped from it. */
25475 const char *
25476 arm_strip_name_encoding (const char *name)
25478 int skip;
25480 while ((skip = arm_get_strip_length (* name)))
25481 name += skip;
25483 return name;
25486 /* If there is a '*' anywhere in the name's prefix, then
25487 emit the stripped name verbatim, otherwise prepend an
25488 underscore if leading underscores are being used. */
25489 void
25490 arm_asm_output_labelref (FILE *stream, const char *name)
25492 int skip;
25493 int verbatim = 0;
25495 while ((skip = arm_get_strip_length (* name)))
25497 verbatim |= (*name == '*');
25498 name += skip;
25501 if (verbatim)
25502 fputs (name, stream);
25503 else
25504 asm_fprintf (stream, "%U%s", name);
25507 /* This function is used to emit an EABI tag and its associated value.
25508 We emit the numerical value of the tag in case the assembler does not
25509 support textual tags. (Eg gas prior to 2.20). If requested we include
25510 the tag name in a comment so that anyone reading the assembler output
25511 will know which tag is being set.
25513 This function is not static because arm-c.c needs it too. */
25515 void
25516 arm_emit_eabi_attribute (const char *name, int num, int val)
25518 asm_fprintf (asm_out_file, "\t.eabi_attribute %d, %d", num, val);
25519 if (flag_verbose_asm || flag_debug_asm)
25520 asm_fprintf (asm_out_file, "\t%s %s", ASM_COMMENT_START, name);
25521 asm_fprintf (asm_out_file, "\n");
25524 static void
25525 arm_file_start (void)
25527 int val;
25529 if (TARGET_UNIFIED_ASM)
25530 asm_fprintf (asm_out_file, "\t.syntax unified\n");
25532 if (TARGET_BPABI)
25534 const char *fpu_name;
25535 if (arm_selected_arch)
25537 /* armv7ve doesn't support any extensions. */
25538 if (strcmp (arm_selected_arch->name, "armv7ve") == 0)
25540 /* Keep backward compatability for assemblers
25541 which don't support armv7ve. */
25542 asm_fprintf (asm_out_file, "\t.arch armv7-a\n");
25543 asm_fprintf (asm_out_file, "\t.arch_extension virt\n");
25544 asm_fprintf (asm_out_file, "\t.arch_extension idiv\n");
25545 asm_fprintf (asm_out_file, "\t.arch_extension sec\n");
25546 asm_fprintf (asm_out_file, "\t.arch_extension mp\n");
25548 else
25550 const char* pos = strchr (arm_selected_arch->name, '+');
25551 if (pos)
25553 char buf[15];
25554 gcc_assert (strlen (arm_selected_arch->name)
25555 <= sizeof (buf) / sizeof (*pos));
25556 strncpy (buf, arm_selected_arch->name,
25557 (pos - arm_selected_arch->name) * sizeof (*pos));
25558 buf[pos - arm_selected_arch->name] = '\0';
25559 asm_fprintf (asm_out_file, "\t.arch %s\n", buf);
25560 asm_fprintf (asm_out_file, "\t.arch_extension %s\n", pos + 1);
25562 else
25563 asm_fprintf (asm_out_file, "\t.arch %s\n", arm_selected_arch->name);
25566 else if (strncmp (arm_selected_cpu->name, "generic", 7) == 0)
25567 asm_fprintf (asm_out_file, "\t.arch %s\n", arm_selected_cpu->name + 8);
25568 else
25570 const char* truncated_name
25571 = arm_rewrite_selected_cpu (arm_selected_cpu->name);
25572 asm_fprintf (asm_out_file, "\t.cpu %s\n", truncated_name);
25575 if (TARGET_SOFT_FLOAT)
25577 fpu_name = "softvfp";
25579 else
25581 fpu_name = arm_fpu_desc->name;
25582 if (arm_fpu_desc->model == ARM_FP_MODEL_VFP)
25584 if (TARGET_HARD_FLOAT)
25585 arm_emit_eabi_attribute ("Tag_ABI_HardFP_use", 27, 3);
25586 if (TARGET_HARD_FLOAT_ABI)
25587 arm_emit_eabi_attribute ("Tag_ABI_VFP_args", 28, 1);
25590 asm_fprintf (asm_out_file, "\t.fpu %s\n", fpu_name);
25592 /* Some of these attributes only apply when the corresponding features
25593 are used. However we don't have any easy way of figuring this out.
25594 Conservatively record the setting that would have been used. */
25596 if (flag_rounding_math)
25597 arm_emit_eabi_attribute ("Tag_ABI_FP_rounding", 19, 1);
25599 if (!flag_unsafe_math_optimizations)
25601 arm_emit_eabi_attribute ("Tag_ABI_FP_denormal", 20, 1);
25602 arm_emit_eabi_attribute ("Tag_ABI_FP_exceptions", 21, 1);
25604 if (flag_signaling_nans)
25605 arm_emit_eabi_attribute ("Tag_ABI_FP_user_exceptions", 22, 1);
25607 arm_emit_eabi_attribute ("Tag_ABI_FP_number_model", 23,
25608 flag_finite_math_only ? 1 : 3);
25610 arm_emit_eabi_attribute ("Tag_ABI_align8_needed", 24, 1);
25611 arm_emit_eabi_attribute ("Tag_ABI_align8_preserved", 25, 1);
25612 arm_emit_eabi_attribute ("Tag_ABI_enum_size", 26,
25613 flag_short_enums ? 1 : 2);
25615 /* Tag_ABI_optimization_goals. */
25616 if (optimize_size)
25617 val = 4;
25618 else if (optimize >= 2)
25619 val = 2;
25620 else if (optimize)
25621 val = 1;
25622 else
25623 val = 6;
25624 arm_emit_eabi_attribute ("Tag_ABI_optimization_goals", 30, val);
25626 arm_emit_eabi_attribute ("Tag_CPU_unaligned_access", 34,
25627 unaligned_access);
25629 if (arm_fp16_format)
25630 arm_emit_eabi_attribute ("Tag_ABI_FP_16bit_format", 38,
25631 (int) arm_fp16_format);
25633 if (arm_lang_output_object_attributes_hook)
25634 arm_lang_output_object_attributes_hook();
25637 default_file_start ();
25640 static void
25641 arm_file_end (void)
25643 int regno;
25645 if (NEED_INDICATE_EXEC_STACK)
25646 /* Add .note.GNU-stack. */
25647 file_end_indicate_exec_stack ();
25649 if (! thumb_call_reg_needed)
25650 return;
25652 switch_to_section (text_section);
25653 asm_fprintf (asm_out_file, "\t.code 16\n");
25654 ASM_OUTPUT_ALIGN (asm_out_file, 1);
25656 for (regno = 0; regno < LR_REGNUM; regno++)
25658 rtx label = thumb_call_via_label[regno];
25660 if (label != 0)
25662 targetm.asm_out.internal_label (asm_out_file, "L",
25663 CODE_LABEL_NUMBER (label));
25664 asm_fprintf (asm_out_file, "\tbx\t%r\n", regno);
25669 #ifndef ARM_PE
25670 /* Symbols in the text segment can be accessed without indirecting via the
25671 constant pool; it may take an extra binary operation, but this is still
25672 faster than indirecting via memory. Don't do this when not optimizing,
25673 since we won't be calculating al of the offsets necessary to do this
25674 simplification. */
25676 static void
25677 arm_encode_section_info (tree decl, rtx rtl, int first)
25679 if (optimize > 0 && TREE_CONSTANT (decl))
25680 SYMBOL_REF_FLAG (XEXP (rtl, 0)) = 1;
25682 default_encode_section_info (decl, rtl, first);
25684 #endif /* !ARM_PE */
25686 static void
25687 arm_internal_label (FILE *stream, const char *prefix, unsigned long labelno)
25689 if (arm_ccfsm_state == 3 && (unsigned) arm_target_label == labelno
25690 && !strcmp (prefix, "L"))
25692 arm_ccfsm_state = 0;
25693 arm_target_insn = NULL;
25695 default_internal_label (stream, prefix, labelno);
25698 /* Output code to add DELTA to the first argument, and then jump
25699 to FUNCTION. Used for C++ multiple inheritance. */
25700 static void
25701 arm_output_mi_thunk (FILE *file, tree thunk ATTRIBUTE_UNUSED,
25702 HOST_WIDE_INT delta,
25703 HOST_WIDE_INT vcall_offset ATTRIBUTE_UNUSED,
25704 tree function)
25706 static int thunk_label = 0;
25707 char label[256];
25708 char labelpc[256];
25709 int mi_delta = delta;
25710 const char *const mi_op = mi_delta < 0 ? "sub" : "add";
25711 int shift = 0;
25712 int this_regno = (aggregate_value_p (TREE_TYPE (TREE_TYPE (function)), function)
25713 ? 1 : 0);
25714 if (mi_delta < 0)
25715 mi_delta = - mi_delta;
25717 final_start_function (emit_barrier (), file, 1);
25719 if (TARGET_THUMB1)
25721 int labelno = thunk_label++;
25722 ASM_GENERATE_INTERNAL_LABEL (label, "LTHUMBFUNC", labelno);
25723 /* Thunks are entered in arm mode when avaiable. */
25724 if (TARGET_THUMB1_ONLY)
25726 /* push r3 so we can use it as a temporary. */
25727 /* TODO: Omit this save if r3 is not used. */
25728 fputs ("\tpush {r3}\n", file);
25729 fputs ("\tldr\tr3, ", file);
25731 else
25733 fputs ("\tldr\tr12, ", file);
25735 assemble_name (file, label);
25736 fputc ('\n', file);
25737 if (flag_pic)
25739 /* If we are generating PIC, the ldr instruction below loads
25740 "(target - 7) - .LTHUNKPCn" into r12. The pc reads as
25741 the address of the add + 8, so we have:
25743 r12 = (target - 7) - .LTHUNKPCn + (.LTHUNKPCn + 8)
25744 = target + 1.
25746 Note that we have "+ 1" because some versions of GNU ld
25747 don't set the low bit of the result for R_ARM_REL32
25748 relocations against thumb function symbols.
25749 On ARMv6M this is +4, not +8. */
25750 ASM_GENERATE_INTERNAL_LABEL (labelpc, "LTHUNKPC", labelno);
25751 assemble_name (file, labelpc);
25752 fputs (":\n", file);
25753 if (TARGET_THUMB1_ONLY)
25755 /* This is 2 insns after the start of the thunk, so we know it
25756 is 4-byte aligned. */
25757 fputs ("\tadd\tr3, pc, r3\n", file);
25758 fputs ("\tmov r12, r3\n", file);
25760 else
25761 fputs ("\tadd\tr12, pc, r12\n", file);
25763 else if (TARGET_THUMB1_ONLY)
25764 fputs ("\tmov r12, r3\n", file);
25766 if (TARGET_THUMB1_ONLY)
25768 if (mi_delta > 255)
25770 fputs ("\tldr\tr3, ", file);
25771 assemble_name (file, label);
25772 fputs ("+4\n", file);
25773 asm_fprintf (file, "\t%ss\t%r, %r, r3\n",
25774 mi_op, this_regno, this_regno);
25776 else if (mi_delta != 0)
25778 /* Thumb1 unified syntax requires s suffix in instruction name when
25779 one of the operands is immediate. */
25780 asm_fprintf (file, "\t%ss\t%r, %r, #%d\n",
25781 mi_op, this_regno, this_regno,
25782 mi_delta);
25785 else
25787 /* TODO: Use movw/movt for large constants when available. */
25788 while (mi_delta != 0)
25790 if ((mi_delta & (3 << shift)) == 0)
25791 shift += 2;
25792 else
25794 asm_fprintf (file, "\t%s\t%r, %r, #%d\n",
25795 mi_op, this_regno, this_regno,
25796 mi_delta & (0xff << shift));
25797 mi_delta &= ~(0xff << shift);
25798 shift += 8;
25802 if (TARGET_THUMB1)
25804 if (TARGET_THUMB1_ONLY)
25805 fputs ("\tpop\t{r3}\n", file);
25807 fprintf (file, "\tbx\tr12\n");
25808 ASM_OUTPUT_ALIGN (file, 2);
25809 assemble_name (file, label);
25810 fputs (":\n", file);
25811 if (flag_pic)
25813 /* Output ".word .LTHUNKn-[3,7]-.LTHUNKPCn". */
25814 rtx tem = XEXP (DECL_RTL (function), 0);
25815 /* For TARGET_THUMB1_ONLY the thunk is in Thumb mode, so the PC
25816 pipeline offset is four rather than eight. Adjust the offset
25817 accordingly. */
25818 tem = plus_constant (GET_MODE (tem), tem,
25819 TARGET_THUMB1_ONLY ? -3 : -7);
25820 tem = gen_rtx_MINUS (GET_MODE (tem),
25821 tem,
25822 gen_rtx_SYMBOL_REF (Pmode,
25823 ggc_strdup (labelpc)));
25824 assemble_integer (tem, 4, BITS_PER_WORD, 1);
25826 else
25827 /* Output ".word .LTHUNKn". */
25828 assemble_integer (XEXP (DECL_RTL (function), 0), 4, BITS_PER_WORD, 1);
25830 if (TARGET_THUMB1_ONLY && mi_delta > 255)
25831 assemble_integer (GEN_INT(mi_delta), 4, BITS_PER_WORD, 1);
25833 else
25835 fputs ("\tb\t", file);
25836 assemble_name (file, XSTR (XEXP (DECL_RTL (function), 0), 0));
25837 if (NEED_PLT_RELOC)
25838 fputs ("(PLT)", file);
25839 fputc ('\n', file);
25842 final_end_function ();
25846 arm_emit_vector_const (FILE *file, rtx x)
25848 int i;
25849 const char * pattern;
25851 gcc_assert (GET_CODE (x) == CONST_VECTOR);
25853 switch (GET_MODE (x))
25855 case V2SImode: pattern = "%08x"; break;
25856 case V4HImode: pattern = "%04x"; break;
25857 case V8QImode: pattern = "%02x"; break;
25858 default: gcc_unreachable ();
25861 fprintf (file, "0x");
25862 for (i = CONST_VECTOR_NUNITS (x); i--;)
25864 rtx element;
25866 element = CONST_VECTOR_ELT (x, i);
25867 fprintf (file, pattern, INTVAL (element));
25870 return 1;
25873 /* Emit a fp16 constant appropriately padded to occupy a 4-byte word.
25874 HFmode constant pool entries are actually loaded with ldr. */
25875 void
25876 arm_emit_fp16_const (rtx c)
25878 REAL_VALUE_TYPE r;
25879 long bits;
25881 REAL_VALUE_FROM_CONST_DOUBLE (r, c);
25882 bits = real_to_target (NULL, &r, HFmode);
25883 if (WORDS_BIG_ENDIAN)
25884 assemble_zeros (2);
25885 assemble_integer (GEN_INT (bits), 2, BITS_PER_WORD, 1);
25886 if (!WORDS_BIG_ENDIAN)
25887 assemble_zeros (2);
25890 const char *
25891 arm_output_load_gr (rtx *operands)
25893 rtx reg;
25894 rtx offset;
25895 rtx wcgr;
25896 rtx sum;
25898 if (!MEM_P (operands [1])
25899 || GET_CODE (sum = XEXP (operands [1], 0)) != PLUS
25900 || !REG_P (reg = XEXP (sum, 0))
25901 || !CONST_INT_P (offset = XEXP (sum, 1))
25902 || ((INTVAL (offset) < 1024) && (INTVAL (offset) > -1024)))
25903 return "wldrw%?\t%0, %1";
25905 /* Fix up an out-of-range load of a GR register. */
25906 output_asm_insn ("str%?\t%0, [sp, #-4]!\t@ Start of GR load expansion", & reg);
25907 wcgr = operands[0];
25908 operands[0] = reg;
25909 output_asm_insn ("ldr%?\t%0, %1", operands);
25911 operands[0] = wcgr;
25912 operands[1] = reg;
25913 output_asm_insn ("tmcr%?\t%0, %1", operands);
25914 output_asm_insn ("ldr%?\t%0, [sp], #4\t@ End of GR load expansion", & reg);
25916 return "";
25919 /* Worker function for TARGET_SETUP_INCOMING_VARARGS.
25921 On the ARM, PRETEND_SIZE is set in order to have the prologue push the last
25922 named arg and all anonymous args onto the stack.
25923 XXX I know the prologue shouldn't be pushing registers, but it is faster
25924 that way. */
25926 static void
25927 arm_setup_incoming_varargs (cumulative_args_t pcum_v,
25928 machine_mode mode,
25929 tree type,
25930 int *pretend_size,
25931 int second_time ATTRIBUTE_UNUSED)
25933 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
25934 int nregs;
25936 cfun->machine->uses_anonymous_args = 1;
25937 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
25939 nregs = pcum->aapcs_ncrn;
25940 if ((nregs & 1) && arm_needs_doubleword_align (mode, type))
25941 nregs++;
25943 else
25944 nregs = pcum->nregs;
25946 if (nregs < NUM_ARG_REGS)
25947 *pretend_size = (NUM_ARG_REGS - nregs) * UNITS_PER_WORD;
25950 /* We can't rely on the caller doing the proper promotion when
25951 using APCS or ATPCS. */
25953 static bool
25954 arm_promote_prototypes (const_tree t ATTRIBUTE_UNUSED)
25956 return !TARGET_AAPCS_BASED;
25959 static machine_mode
25960 arm_promote_function_mode (const_tree type ATTRIBUTE_UNUSED,
25961 machine_mode mode,
25962 int *punsignedp ATTRIBUTE_UNUSED,
25963 const_tree fntype ATTRIBUTE_UNUSED,
25964 int for_return ATTRIBUTE_UNUSED)
25966 if (GET_MODE_CLASS (mode) == MODE_INT
25967 && GET_MODE_SIZE (mode) < 4)
25968 return SImode;
25970 return mode;
25973 /* AAPCS based ABIs use short enums by default. */
25975 static bool
25976 arm_default_short_enums (void)
25978 return TARGET_AAPCS_BASED && arm_abi != ARM_ABI_AAPCS_LINUX;
25982 /* AAPCS requires that anonymous bitfields affect structure alignment. */
25984 static bool
25985 arm_align_anon_bitfield (void)
25987 return TARGET_AAPCS_BASED;
25991 /* The generic C++ ABI says 64-bit (long long). The EABI says 32-bit. */
25993 static tree
25994 arm_cxx_guard_type (void)
25996 return TARGET_AAPCS_BASED ? integer_type_node : long_long_integer_type_node;
26000 /* The EABI says test the least significant bit of a guard variable. */
26002 static bool
26003 arm_cxx_guard_mask_bit (void)
26005 return TARGET_AAPCS_BASED;
26009 /* The EABI specifies that all array cookies are 8 bytes long. */
26011 static tree
26012 arm_get_cookie_size (tree type)
26014 tree size;
26016 if (!TARGET_AAPCS_BASED)
26017 return default_cxx_get_cookie_size (type);
26019 size = build_int_cst (sizetype, 8);
26020 return size;
26024 /* The EABI says that array cookies should also contain the element size. */
26026 static bool
26027 arm_cookie_has_size (void)
26029 return TARGET_AAPCS_BASED;
26033 /* The EABI says constructors and destructors should return a pointer to
26034 the object constructed/destroyed. */
26036 static bool
26037 arm_cxx_cdtor_returns_this (void)
26039 return TARGET_AAPCS_BASED;
26042 /* The EABI says that an inline function may never be the key
26043 method. */
26045 static bool
26046 arm_cxx_key_method_may_be_inline (void)
26048 return !TARGET_AAPCS_BASED;
26051 static void
26052 arm_cxx_determine_class_data_visibility (tree decl)
26054 if (!TARGET_AAPCS_BASED
26055 || !TARGET_DLLIMPORT_DECL_ATTRIBUTES)
26056 return;
26058 /* In general, \S 3.2.5.5 of the ARM EABI requires that class data
26059 is exported. However, on systems without dynamic vague linkage,
26060 \S 3.2.5.6 says that COMDAT class data has hidden linkage. */
26061 if (!TARGET_ARM_DYNAMIC_VAGUE_LINKAGE_P && DECL_COMDAT (decl))
26062 DECL_VISIBILITY (decl) = VISIBILITY_HIDDEN;
26063 else
26064 DECL_VISIBILITY (decl) = VISIBILITY_DEFAULT;
26065 DECL_VISIBILITY_SPECIFIED (decl) = 1;
26068 static bool
26069 arm_cxx_class_data_always_comdat (void)
26071 /* \S 3.2.5.4 of the ARM C++ ABI says that class data only have
26072 vague linkage if the class has no key function. */
26073 return !TARGET_AAPCS_BASED;
26077 /* The EABI says __aeabi_atexit should be used to register static
26078 destructors. */
26080 static bool
26081 arm_cxx_use_aeabi_atexit (void)
26083 return TARGET_AAPCS_BASED;
26087 void
26088 arm_set_return_address (rtx source, rtx scratch)
26090 arm_stack_offsets *offsets;
26091 HOST_WIDE_INT delta;
26092 rtx addr;
26093 unsigned long saved_regs;
26095 offsets = arm_get_frame_offsets ();
26096 saved_regs = offsets->saved_regs_mask;
26098 if ((saved_regs & (1 << LR_REGNUM)) == 0)
26099 emit_move_insn (gen_rtx_REG (Pmode, LR_REGNUM), source);
26100 else
26102 if (frame_pointer_needed)
26103 addr = plus_constant (Pmode, hard_frame_pointer_rtx, -4);
26104 else
26106 /* LR will be the first saved register. */
26107 delta = offsets->outgoing_args - (offsets->frame + 4);
26110 if (delta >= 4096)
26112 emit_insn (gen_addsi3 (scratch, stack_pointer_rtx,
26113 GEN_INT (delta & ~4095)));
26114 addr = scratch;
26115 delta &= 4095;
26117 else
26118 addr = stack_pointer_rtx;
26120 addr = plus_constant (Pmode, addr, delta);
26122 /* The store needs to be marked as frame related in order to prevent
26123 DSE from deleting it as dead if it is based on fp. */
26124 rtx insn = emit_move_insn (gen_frame_mem (Pmode, addr), source);
26125 RTX_FRAME_RELATED_P (insn) = 1;
26126 add_reg_note (insn, REG_CFA_RESTORE, gen_rtx_REG (Pmode, LR_REGNUM));
26131 void
26132 thumb_set_return_address (rtx source, rtx scratch)
26134 arm_stack_offsets *offsets;
26135 HOST_WIDE_INT delta;
26136 HOST_WIDE_INT limit;
26137 int reg;
26138 rtx addr;
26139 unsigned long mask;
26141 emit_use (source);
26143 offsets = arm_get_frame_offsets ();
26144 mask = offsets->saved_regs_mask;
26145 if (mask & (1 << LR_REGNUM))
26147 limit = 1024;
26148 /* Find the saved regs. */
26149 if (frame_pointer_needed)
26151 delta = offsets->soft_frame - offsets->saved_args;
26152 reg = THUMB_HARD_FRAME_POINTER_REGNUM;
26153 if (TARGET_THUMB1)
26154 limit = 128;
26156 else
26158 delta = offsets->outgoing_args - offsets->saved_args;
26159 reg = SP_REGNUM;
26161 /* Allow for the stack frame. */
26162 if (TARGET_THUMB1 && TARGET_BACKTRACE)
26163 delta -= 16;
26164 /* The link register is always the first saved register. */
26165 delta -= 4;
26167 /* Construct the address. */
26168 addr = gen_rtx_REG (SImode, reg);
26169 if (delta > limit)
26171 emit_insn (gen_movsi (scratch, GEN_INT (delta)));
26172 emit_insn (gen_addsi3 (scratch, scratch, stack_pointer_rtx));
26173 addr = scratch;
26175 else
26176 addr = plus_constant (Pmode, addr, delta);
26178 /* The store needs to be marked as frame related in order to prevent
26179 DSE from deleting it as dead if it is based on fp. */
26180 rtx insn = emit_move_insn (gen_frame_mem (Pmode, addr), source);
26181 RTX_FRAME_RELATED_P (insn) = 1;
26182 add_reg_note (insn, REG_CFA_RESTORE, gen_rtx_REG (Pmode, LR_REGNUM));
26184 else
26185 emit_move_insn (gen_rtx_REG (Pmode, LR_REGNUM), source);
26188 /* Implements target hook vector_mode_supported_p. */
26189 bool
26190 arm_vector_mode_supported_p (machine_mode mode)
26192 /* Neon also supports V2SImode, etc. listed in the clause below. */
26193 if (TARGET_NEON && (mode == V2SFmode || mode == V4SImode || mode == V8HImode
26194 || mode == V4HFmode || mode == V16QImode || mode == V4SFmode || mode == V2DImode))
26195 return true;
26197 if ((TARGET_NEON || TARGET_IWMMXT)
26198 && ((mode == V2SImode)
26199 || (mode == V4HImode)
26200 || (mode == V8QImode)))
26201 return true;
26203 if (TARGET_INT_SIMD && (mode == V4UQQmode || mode == V4QQmode
26204 || mode == V2UHQmode || mode == V2HQmode || mode == V2UHAmode
26205 || mode == V2HAmode))
26206 return true;
26208 return false;
26211 /* Implements target hook array_mode_supported_p. */
26213 static bool
26214 arm_array_mode_supported_p (machine_mode mode,
26215 unsigned HOST_WIDE_INT nelems)
26217 if (TARGET_NEON
26218 && (VALID_NEON_DREG_MODE (mode) || VALID_NEON_QREG_MODE (mode))
26219 && (nelems >= 2 && nelems <= 4))
26220 return true;
26222 return false;
26225 /* Use the option -mvectorize-with-neon-double to override the use of quardword
26226 registers when autovectorizing for Neon, at least until multiple vector
26227 widths are supported properly by the middle-end. */
26229 static machine_mode
26230 arm_preferred_simd_mode (machine_mode mode)
26232 if (TARGET_NEON)
26233 switch (mode)
26235 case SFmode:
26236 return TARGET_NEON_VECTORIZE_DOUBLE ? V2SFmode : V4SFmode;
26237 case SImode:
26238 return TARGET_NEON_VECTORIZE_DOUBLE ? V2SImode : V4SImode;
26239 case HImode:
26240 return TARGET_NEON_VECTORIZE_DOUBLE ? V4HImode : V8HImode;
26241 case QImode:
26242 return TARGET_NEON_VECTORIZE_DOUBLE ? V8QImode : V16QImode;
26243 case DImode:
26244 if (!TARGET_NEON_VECTORIZE_DOUBLE)
26245 return V2DImode;
26246 break;
26248 default:;
26251 if (TARGET_REALLY_IWMMXT)
26252 switch (mode)
26254 case SImode:
26255 return V2SImode;
26256 case HImode:
26257 return V4HImode;
26258 case QImode:
26259 return V8QImode;
26261 default:;
26264 return word_mode;
26267 /* Implement TARGET_CLASS_LIKELY_SPILLED_P.
26269 We need to define this for LO_REGS on Thumb-1. Otherwise we can end up
26270 using r0-r4 for function arguments, r7 for the stack frame and don't have
26271 enough left over to do doubleword arithmetic. For Thumb-2 all the
26272 potentially problematic instructions accept high registers so this is not
26273 necessary. Care needs to be taken to avoid adding new Thumb-2 patterns
26274 that require many low registers. */
26275 static bool
26276 arm_class_likely_spilled_p (reg_class_t rclass)
26278 if ((TARGET_THUMB1 && rclass == LO_REGS)
26279 || rclass == CC_REG)
26280 return true;
26282 return false;
26285 /* Implements target hook small_register_classes_for_mode_p. */
26286 bool
26287 arm_small_register_classes_for_mode_p (machine_mode mode ATTRIBUTE_UNUSED)
26289 return TARGET_THUMB1;
26292 /* Implement TARGET_SHIFT_TRUNCATION_MASK. SImode shifts use normal
26293 ARM insns and therefore guarantee that the shift count is modulo 256.
26294 DImode shifts (those implemented by lib1funcs.S or by optabs.c)
26295 guarantee no particular behavior for out-of-range counts. */
26297 static unsigned HOST_WIDE_INT
26298 arm_shift_truncation_mask (machine_mode mode)
26300 return mode == SImode ? 255 : 0;
26304 /* Map internal gcc register numbers to DWARF2 register numbers. */
26306 unsigned int
26307 arm_dbx_register_number (unsigned int regno)
26309 if (regno < 16)
26310 return regno;
26312 if (IS_VFP_REGNUM (regno))
26314 /* See comment in arm_dwarf_register_span. */
26315 if (VFP_REGNO_OK_FOR_SINGLE (regno))
26316 return 64 + regno - FIRST_VFP_REGNUM;
26317 else
26318 return 256 + (regno - FIRST_VFP_REGNUM) / 2;
26321 if (IS_IWMMXT_GR_REGNUM (regno))
26322 return 104 + regno - FIRST_IWMMXT_GR_REGNUM;
26324 if (IS_IWMMXT_REGNUM (regno))
26325 return 112 + regno - FIRST_IWMMXT_REGNUM;
26327 gcc_unreachable ();
26330 /* Dwarf models VFPv3 registers as 32 64-bit registers.
26331 GCC models tham as 64 32-bit registers, so we need to describe this to
26332 the DWARF generation code. Other registers can use the default. */
26333 static rtx
26334 arm_dwarf_register_span (rtx rtl)
26336 machine_mode mode;
26337 unsigned regno;
26338 rtx parts[16];
26339 int nregs;
26340 int i;
26342 regno = REGNO (rtl);
26343 if (!IS_VFP_REGNUM (regno))
26344 return NULL_RTX;
26346 /* XXX FIXME: The EABI defines two VFP register ranges:
26347 64-95: Legacy VFPv2 numbering for S0-S31 (obsolescent)
26348 256-287: D0-D31
26349 The recommended encoding for S0-S31 is a DW_OP_bit_piece of the
26350 corresponding D register. Until GDB supports this, we shall use the
26351 legacy encodings. We also use these encodings for D0-D15 for
26352 compatibility with older debuggers. */
26353 mode = GET_MODE (rtl);
26354 if (GET_MODE_SIZE (mode) < 8)
26355 return NULL_RTX;
26357 if (VFP_REGNO_OK_FOR_SINGLE (regno))
26359 nregs = GET_MODE_SIZE (mode) / 4;
26360 for (i = 0; i < nregs; i += 2)
26361 if (TARGET_BIG_END)
26363 parts[i] = gen_rtx_REG (SImode, regno + i + 1);
26364 parts[i + 1] = gen_rtx_REG (SImode, regno + i);
26366 else
26368 parts[i] = gen_rtx_REG (SImode, regno + i);
26369 parts[i + 1] = gen_rtx_REG (SImode, regno + i + 1);
26372 else
26374 nregs = GET_MODE_SIZE (mode) / 8;
26375 for (i = 0; i < nregs; i++)
26376 parts[i] = gen_rtx_REG (DImode, regno + i);
26379 return gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (nregs , parts));
26382 #if ARM_UNWIND_INFO
26383 /* Emit unwind directives for a store-multiple instruction or stack pointer
26384 push during alignment.
26385 These should only ever be generated by the function prologue code, so
26386 expect them to have a particular form.
26387 The store-multiple instruction sometimes pushes pc as the last register,
26388 although it should not be tracked into unwind information, or for -Os
26389 sometimes pushes some dummy registers before first register that needs
26390 to be tracked in unwind information; such dummy registers are there just
26391 to avoid separate stack adjustment, and will not be restored in the
26392 epilogue. */
26394 static void
26395 arm_unwind_emit_sequence (FILE * asm_out_file, rtx p)
26397 int i;
26398 HOST_WIDE_INT offset;
26399 HOST_WIDE_INT nregs;
26400 int reg_size;
26401 unsigned reg;
26402 unsigned lastreg;
26403 unsigned padfirst = 0, padlast = 0;
26404 rtx e;
26406 e = XVECEXP (p, 0, 0);
26407 gcc_assert (GET_CODE (e) == SET);
26409 /* First insn will adjust the stack pointer. */
26410 gcc_assert (GET_CODE (e) == SET
26411 && REG_P (SET_DEST (e))
26412 && REGNO (SET_DEST (e)) == SP_REGNUM
26413 && GET_CODE (SET_SRC (e)) == PLUS);
26415 offset = -INTVAL (XEXP (SET_SRC (e), 1));
26416 nregs = XVECLEN (p, 0) - 1;
26417 gcc_assert (nregs);
26419 reg = REGNO (SET_SRC (XVECEXP (p, 0, 1)));
26420 if (reg < 16)
26422 /* For -Os dummy registers can be pushed at the beginning to
26423 avoid separate stack pointer adjustment. */
26424 e = XVECEXP (p, 0, 1);
26425 e = XEXP (SET_DEST (e), 0);
26426 if (GET_CODE (e) == PLUS)
26427 padfirst = INTVAL (XEXP (e, 1));
26428 gcc_assert (padfirst == 0 || optimize_size);
26429 /* The function prologue may also push pc, but not annotate it as it is
26430 never restored. We turn this into a stack pointer adjustment. */
26431 e = XVECEXP (p, 0, nregs);
26432 e = XEXP (SET_DEST (e), 0);
26433 if (GET_CODE (e) == PLUS)
26434 padlast = offset - INTVAL (XEXP (e, 1)) - 4;
26435 else
26436 padlast = offset - 4;
26437 gcc_assert (padlast == 0 || padlast == 4);
26438 if (padlast == 4)
26439 fprintf (asm_out_file, "\t.pad #4\n");
26440 reg_size = 4;
26441 fprintf (asm_out_file, "\t.save {");
26443 else if (IS_VFP_REGNUM (reg))
26445 reg_size = 8;
26446 fprintf (asm_out_file, "\t.vsave {");
26448 else
26449 /* Unknown register type. */
26450 gcc_unreachable ();
26452 /* If the stack increment doesn't match the size of the saved registers,
26453 something has gone horribly wrong. */
26454 gcc_assert (offset == padfirst + nregs * reg_size + padlast);
26456 offset = padfirst;
26457 lastreg = 0;
26458 /* The remaining insns will describe the stores. */
26459 for (i = 1; i <= nregs; i++)
26461 /* Expect (set (mem <addr>) (reg)).
26462 Where <addr> is (reg:SP) or (plus (reg:SP) (const_int)). */
26463 e = XVECEXP (p, 0, i);
26464 gcc_assert (GET_CODE (e) == SET
26465 && MEM_P (SET_DEST (e))
26466 && REG_P (SET_SRC (e)));
26468 reg = REGNO (SET_SRC (e));
26469 gcc_assert (reg >= lastreg);
26471 if (i != 1)
26472 fprintf (asm_out_file, ", ");
26473 /* We can't use %r for vfp because we need to use the
26474 double precision register names. */
26475 if (IS_VFP_REGNUM (reg))
26476 asm_fprintf (asm_out_file, "d%d", (reg - FIRST_VFP_REGNUM) / 2);
26477 else
26478 asm_fprintf (asm_out_file, "%r", reg);
26480 #ifdef ENABLE_CHECKING
26481 /* Check that the addresses are consecutive. */
26482 e = XEXP (SET_DEST (e), 0);
26483 if (GET_CODE (e) == PLUS)
26484 gcc_assert (REG_P (XEXP (e, 0))
26485 && REGNO (XEXP (e, 0)) == SP_REGNUM
26486 && CONST_INT_P (XEXP (e, 1))
26487 && offset == INTVAL (XEXP (e, 1)));
26488 else
26489 gcc_assert (i == 1
26490 && REG_P (e)
26491 && REGNO (e) == SP_REGNUM);
26492 offset += reg_size;
26493 #endif
26495 fprintf (asm_out_file, "}\n");
26496 if (padfirst)
26497 fprintf (asm_out_file, "\t.pad #%d\n", padfirst);
26500 /* Emit unwind directives for a SET. */
26502 static void
26503 arm_unwind_emit_set (FILE * asm_out_file, rtx p)
26505 rtx e0;
26506 rtx e1;
26507 unsigned reg;
26509 e0 = XEXP (p, 0);
26510 e1 = XEXP (p, 1);
26511 switch (GET_CODE (e0))
26513 case MEM:
26514 /* Pushing a single register. */
26515 if (GET_CODE (XEXP (e0, 0)) != PRE_DEC
26516 || !REG_P (XEXP (XEXP (e0, 0), 0))
26517 || REGNO (XEXP (XEXP (e0, 0), 0)) != SP_REGNUM)
26518 abort ();
26520 asm_fprintf (asm_out_file, "\t.save ");
26521 if (IS_VFP_REGNUM (REGNO (e1)))
26522 asm_fprintf(asm_out_file, "{d%d}\n",
26523 (REGNO (e1) - FIRST_VFP_REGNUM) / 2);
26524 else
26525 asm_fprintf(asm_out_file, "{%r}\n", REGNO (e1));
26526 break;
26528 case REG:
26529 if (REGNO (e0) == SP_REGNUM)
26531 /* A stack increment. */
26532 if (GET_CODE (e1) != PLUS
26533 || !REG_P (XEXP (e1, 0))
26534 || REGNO (XEXP (e1, 0)) != SP_REGNUM
26535 || !CONST_INT_P (XEXP (e1, 1)))
26536 abort ();
26538 asm_fprintf (asm_out_file, "\t.pad #%wd\n",
26539 -INTVAL (XEXP (e1, 1)));
26541 else if (REGNO (e0) == HARD_FRAME_POINTER_REGNUM)
26543 HOST_WIDE_INT offset;
26545 if (GET_CODE (e1) == PLUS)
26547 if (!REG_P (XEXP (e1, 0))
26548 || !CONST_INT_P (XEXP (e1, 1)))
26549 abort ();
26550 reg = REGNO (XEXP (e1, 0));
26551 offset = INTVAL (XEXP (e1, 1));
26552 asm_fprintf (asm_out_file, "\t.setfp %r, %r, #%wd\n",
26553 HARD_FRAME_POINTER_REGNUM, reg,
26554 offset);
26556 else if (REG_P (e1))
26558 reg = REGNO (e1);
26559 asm_fprintf (asm_out_file, "\t.setfp %r, %r\n",
26560 HARD_FRAME_POINTER_REGNUM, reg);
26562 else
26563 abort ();
26565 else if (REG_P (e1) && REGNO (e1) == SP_REGNUM)
26567 /* Move from sp to reg. */
26568 asm_fprintf (asm_out_file, "\t.movsp %r\n", REGNO (e0));
26570 else if (GET_CODE (e1) == PLUS
26571 && REG_P (XEXP (e1, 0))
26572 && REGNO (XEXP (e1, 0)) == SP_REGNUM
26573 && CONST_INT_P (XEXP (e1, 1)))
26575 /* Set reg to offset from sp. */
26576 asm_fprintf (asm_out_file, "\t.movsp %r, #%d\n",
26577 REGNO (e0), (int)INTVAL(XEXP (e1, 1)));
26579 else
26580 abort ();
26581 break;
26583 default:
26584 abort ();
26589 /* Emit unwind directives for the given insn. */
26591 static void
26592 arm_unwind_emit (FILE * asm_out_file, rtx_insn *insn)
26594 rtx note, pat;
26595 bool handled_one = false;
26597 if (arm_except_unwind_info (&global_options) != UI_TARGET)
26598 return;
26600 if (!(flag_unwind_tables || crtl->uses_eh_lsda)
26601 && (TREE_NOTHROW (current_function_decl)
26602 || crtl->all_throwers_are_sibcalls))
26603 return;
26605 if (NOTE_P (insn) || !RTX_FRAME_RELATED_P (insn))
26606 return;
26608 for (note = REG_NOTES (insn); note ; note = XEXP (note, 1))
26610 switch (REG_NOTE_KIND (note))
26612 case REG_FRAME_RELATED_EXPR:
26613 pat = XEXP (note, 0);
26614 goto found;
26616 case REG_CFA_REGISTER:
26617 pat = XEXP (note, 0);
26618 if (pat == NULL)
26620 pat = PATTERN (insn);
26621 if (GET_CODE (pat) == PARALLEL)
26622 pat = XVECEXP (pat, 0, 0);
26625 /* Only emitted for IS_STACKALIGN re-alignment. */
26627 rtx dest, src;
26628 unsigned reg;
26630 src = SET_SRC (pat);
26631 dest = SET_DEST (pat);
26633 gcc_assert (src == stack_pointer_rtx);
26634 reg = REGNO (dest);
26635 asm_fprintf (asm_out_file, "\t.unwind_raw 0, 0x%x @ vsp = r%d\n",
26636 reg + 0x90, reg);
26638 handled_one = true;
26639 break;
26641 /* The INSN is generated in epilogue. It is set as RTX_FRAME_RELATED_P
26642 to get correct dwarf information for shrink-wrap. We should not
26643 emit unwind information for it because these are used either for
26644 pretend arguments or notes to adjust sp and restore registers from
26645 stack. */
26646 case REG_CFA_DEF_CFA:
26647 case REG_CFA_ADJUST_CFA:
26648 case REG_CFA_RESTORE:
26649 return;
26651 case REG_CFA_EXPRESSION:
26652 case REG_CFA_OFFSET:
26653 /* ??? Only handling here what we actually emit. */
26654 gcc_unreachable ();
26656 default:
26657 break;
26660 if (handled_one)
26661 return;
26662 pat = PATTERN (insn);
26663 found:
26665 switch (GET_CODE (pat))
26667 case SET:
26668 arm_unwind_emit_set (asm_out_file, pat);
26669 break;
26671 case SEQUENCE:
26672 /* Store multiple. */
26673 arm_unwind_emit_sequence (asm_out_file, pat);
26674 break;
26676 default:
26677 abort();
26682 /* Output a reference from a function exception table to the type_info
26683 object X. The EABI specifies that the symbol should be relocated by
26684 an R_ARM_TARGET2 relocation. */
26686 static bool
26687 arm_output_ttype (rtx x)
26689 fputs ("\t.word\t", asm_out_file);
26690 output_addr_const (asm_out_file, x);
26691 /* Use special relocations for symbol references. */
26692 if (!CONST_INT_P (x))
26693 fputs ("(TARGET2)", asm_out_file);
26694 fputc ('\n', asm_out_file);
26696 return TRUE;
26699 /* Implement TARGET_ASM_EMIT_EXCEPT_PERSONALITY. */
26701 static void
26702 arm_asm_emit_except_personality (rtx personality)
26704 fputs ("\t.personality\t", asm_out_file);
26705 output_addr_const (asm_out_file, personality);
26706 fputc ('\n', asm_out_file);
26709 /* Implement TARGET_ASM_INITIALIZE_SECTIONS. */
26711 static void
26712 arm_asm_init_sections (void)
26714 exception_section = get_unnamed_section (0, output_section_asm_op,
26715 "\t.handlerdata");
26717 #endif /* ARM_UNWIND_INFO */
26719 /* Output unwind directives for the start/end of a function. */
26721 void
26722 arm_output_fn_unwind (FILE * f, bool prologue)
26724 if (arm_except_unwind_info (&global_options) != UI_TARGET)
26725 return;
26727 if (prologue)
26728 fputs ("\t.fnstart\n", f);
26729 else
26731 /* If this function will never be unwound, then mark it as such.
26732 The came condition is used in arm_unwind_emit to suppress
26733 the frame annotations. */
26734 if (!(flag_unwind_tables || crtl->uses_eh_lsda)
26735 && (TREE_NOTHROW (current_function_decl)
26736 || crtl->all_throwers_are_sibcalls))
26737 fputs("\t.cantunwind\n", f);
26739 fputs ("\t.fnend\n", f);
26743 static bool
26744 arm_emit_tls_decoration (FILE *fp, rtx x)
26746 enum tls_reloc reloc;
26747 rtx val;
26749 val = XVECEXP (x, 0, 0);
26750 reloc = (enum tls_reloc) INTVAL (XVECEXP (x, 0, 1));
26752 output_addr_const (fp, val);
26754 switch (reloc)
26756 case TLS_GD32:
26757 fputs ("(tlsgd)", fp);
26758 break;
26759 case TLS_LDM32:
26760 fputs ("(tlsldm)", fp);
26761 break;
26762 case TLS_LDO32:
26763 fputs ("(tlsldo)", fp);
26764 break;
26765 case TLS_IE32:
26766 fputs ("(gottpoff)", fp);
26767 break;
26768 case TLS_LE32:
26769 fputs ("(tpoff)", fp);
26770 break;
26771 case TLS_DESCSEQ:
26772 fputs ("(tlsdesc)", fp);
26773 break;
26774 default:
26775 gcc_unreachable ();
26778 switch (reloc)
26780 case TLS_GD32:
26781 case TLS_LDM32:
26782 case TLS_IE32:
26783 case TLS_DESCSEQ:
26784 fputs (" + (. - ", fp);
26785 output_addr_const (fp, XVECEXP (x, 0, 2));
26786 /* For DESCSEQ the 3rd operand encodes thumbness, and is added */
26787 fputs (reloc == TLS_DESCSEQ ? " + " : " - ", fp);
26788 output_addr_const (fp, XVECEXP (x, 0, 3));
26789 fputc (')', fp);
26790 break;
26791 default:
26792 break;
26795 return TRUE;
26798 /* ARM implementation of TARGET_ASM_OUTPUT_DWARF_DTPREL. */
26800 static void
26801 arm_output_dwarf_dtprel (FILE *file, int size, rtx x)
26803 gcc_assert (size == 4);
26804 fputs ("\t.word\t", file);
26805 output_addr_const (file, x);
26806 fputs ("(tlsldo)", file);
26809 /* Implement TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA. */
26811 static bool
26812 arm_output_addr_const_extra (FILE *fp, rtx x)
26814 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
26815 return arm_emit_tls_decoration (fp, x);
26816 else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_PIC_LABEL)
26818 char label[256];
26819 int labelno = INTVAL (XVECEXP (x, 0, 0));
26821 ASM_GENERATE_INTERNAL_LABEL (label, "LPIC", labelno);
26822 assemble_name_raw (fp, label);
26824 return TRUE;
26826 else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_GOTSYM_OFF)
26828 assemble_name (fp, "_GLOBAL_OFFSET_TABLE_");
26829 if (GOT_PCREL)
26830 fputs ("+.", fp);
26831 fputs ("-(", fp);
26832 output_addr_const (fp, XVECEXP (x, 0, 0));
26833 fputc (')', fp);
26834 return TRUE;
26836 else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_SYMBOL_OFFSET)
26838 output_addr_const (fp, XVECEXP (x, 0, 0));
26839 if (GOT_PCREL)
26840 fputs ("+.", fp);
26841 fputs ("-(", fp);
26842 output_addr_const (fp, XVECEXP (x, 0, 1));
26843 fputc (')', fp);
26844 return TRUE;
26846 else if (GET_CODE (x) == CONST_VECTOR)
26847 return arm_emit_vector_const (fp, x);
26849 return FALSE;
26852 /* Output assembly for a shift instruction.
26853 SET_FLAGS determines how the instruction modifies the condition codes.
26854 0 - Do not set condition codes.
26855 1 - Set condition codes.
26856 2 - Use smallest instruction. */
26857 const char *
26858 arm_output_shift(rtx * operands, int set_flags)
26860 char pattern[100];
26861 static const char flag_chars[3] = {'?', '.', '!'};
26862 const char *shift;
26863 HOST_WIDE_INT val;
26864 char c;
26866 c = flag_chars[set_flags];
26867 if (TARGET_UNIFIED_ASM)
26869 shift = shift_op(operands[3], &val);
26870 if (shift)
26872 if (val != -1)
26873 operands[2] = GEN_INT(val);
26874 sprintf (pattern, "%s%%%c\t%%0, %%1, %%2", shift, c);
26876 else
26877 sprintf (pattern, "mov%%%c\t%%0, %%1", c);
26879 else
26880 sprintf (pattern, "mov%%%c\t%%0, %%1%%S3", c);
26881 output_asm_insn (pattern, operands);
26882 return "";
26885 /* Output assembly for a WMMX immediate shift instruction. */
26886 const char *
26887 arm_output_iwmmxt_shift_immediate (const char *insn_name, rtx *operands, bool wror_or_wsra)
26889 int shift = INTVAL (operands[2]);
26890 char templ[50];
26891 machine_mode opmode = GET_MODE (operands[0]);
26893 gcc_assert (shift >= 0);
26895 /* If the shift value in the register versions is > 63 (for D qualifier),
26896 31 (for W qualifier) or 15 (for H qualifier). */
26897 if (((opmode == V4HImode) && (shift > 15))
26898 || ((opmode == V2SImode) && (shift > 31))
26899 || ((opmode == DImode) && (shift > 63)))
26901 if (wror_or_wsra)
26903 sprintf (templ, "%s\t%%0, %%1, #%d", insn_name, 32);
26904 output_asm_insn (templ, operands);
26905 if (opmode == DImode)
26907 sprintf (templ, "%s\t%%0, %%0, #%d", insn_name, 32);
26908 output_asm_insn (templ, operands);
26911 else
26913 /* The destination register will contain all zeros. */
26914 sprintf (templ, "wzero\t%%0");
26915 output_asm_insn (templ, operands);
26917 return "";
26920 if ((opmode == DImode) && (shift > 32))
26922 sprintf (templ, "%s\t%%0, %%1, #%d", insn_name, 32);
26923 output_asm_insn (templ, operands);
26924 sprintf (templ, "%s\t%%0, %%0, #%d", insn_name, shift - 32);
26925 output_asm_insn (templ, operands);
26927 else
26929 sprintf (templ, "%s\t%%0, %%1, #%d", insn_name, shift);
26930 output_asm_insn (templ, operands);
26932 return "";
26935 /* Output assembly for a WMMX tinsr instruction. */
26936 const char *
26937 arm_output_iwmmxt_tinsr (rtx *operands)
26939 int mask = INTVAL (operands[3]);
26940 int i;
26941 char templ[50];
26942 int units = mode_nunits[GET_MODE (operands[0])];
26943 gcc_assert ((mask & (mask - 1)) == 0);
26944 for (i = 0; i < units; ++i)
26946 if ((mask & 0x01) == 1)
26948 break;
26950 mask >>= 1;
26952 gcc_assert (i < units);
26954 switch (GET_MODE (operands[0]))
26956 case V8QImode:
26957 sprintf (templ, "tinsrb%%?\t%%0, %%2, #%d", i);
26958 break;
26959 case V4HImode:
26960 sprintf (templ, "tinsrh%%?\t%%0, %%2, #%d", i);
26961 break;
26962 case V2SImode:
26963 sprintf (templ, "tinsrw%%?\t%%0, %%2, #%d", i);
26964 break;
26965 default:
26966 gcc_unreachable ();
26967 break;
26969 output_asm_insn (templ, operands);
26971 return "";
26974 /* Output a Thumb-1 casesi dispatch sequence. */
26975 const char *
26976 thumb1_output_casesi (rtx *operands)
26978 rtx diff_vec = PATTERN (NEXT_INSN (as_a <rtx_insn *> (operands[0])));
26980 gcc_assert (GET_CODE (diff_vec) == ADDR_DIFF_VEC);
26982 switch (GET_MODE(diff_vec))
26984 case QImode:
26985 return (ADDR_DIFF_VEC_FLAGS (diff_vec).offset_unsigned ?
26986 "bl\t%___gnu_thumb1_case_uqi" : "bl\t%___gnu_thumb1_case_sqi");
26987 case HImode:
26988 return (ADDR_DIFF_VEC_FLAGS (diff_vec).offset_unsigned ?
26989 "bl\t%___gnu_thumb1_case_uhi" : "bl\t%___gnu_thumb1_case_shi");
26990 case SImode:
26991 return "bl\t%___gnu_thumb1_case_si";
26992 default:
26993 gcc_unreachable ();
26997 /* Output a Thumb-2 casesi instruction. */
26998 const char *
26999 thumb2_output_casesi (rtx *operands)
27001 rtx diff_vec = PATTERN (NEXT_INSN (as_a <rtx_insn *> (operands[2])));
27003 gcc_assert (GET_CODE (diff_vec) == ADDR_DIFF_VEC);
27005 output_asm_insn ("cmp\t%0, %1", operands);
27006 output_asm_insn ("bhi\t%l3", operands);
27007 switch (GET_MODE(diff_vec))
27009 case QImode:
27010 return "tbb\t[%|pc, %0]";
27011 case HImode:
27012 return "tbh\t[%|pc, %0, lsl #1]";
27013 case SImode:
27014 if (flag_pic)
27016 output_asm_insn ("adr\t%4, %l2", operands);
27017 output_asm_insn ("ldr\t%5, [%4, %0, lsl #2]", operands);
27018 output_asm_insn ("add\t%4, %4, %5", operands);
27019 return "bx\t%4";
27021 else
27023 output_asm_insn ("adr\t%4, %l2", operands);
27024 return "ldr\t%|pc, [%4, %0, lsl #2]";
27026 default:
27027 gcc_unreachable ();
27031 /* Most ARM cores are single issue, but some newer ones can dual issue.
27032 The scheduler descriptions rely on this being correct. */
27033 static int
27034 arm_issue_rate (void)
27036 switch (arm_tune)
27038 case cortexa15:
27039 case cortexa57:
27040 return 3;
27042 case cortexm7:
27043 case cortexr4:
27044 case cortexr4f:
27045 case cortexr5:
27046 case genericv7a:
27047 case cortexa5:
27048 case cortexa7:
27049 case cortexa8:
27050 case cortexa9:
27051 case cortexa12:
27052 case cortexa17:
27053 case cortexa53:
27054 case fa726te:
27055 case marvell_pj4:
27056 return 2;
27058 default:
27059 return 1;
27063 const char *
27064 arm_mangle_type (const_tree type)
27066 /* The ARM ABI documents (10th October 2008) say that "__va_list"
27067 has to be managled as if it is in the "std" namespace. */
27068 if (TARGET_AAPCS_BASED
27069 && lang_hooks.types_compatible_p (CONST_CAST_TREE (type), va_list_type))
27070 return "St9__va_list";
27072 /* Half-precision float. */
27073 if (TREE_CODE (type) == REAL_TYPE && TYPE_PRECISION (type) == 16)
27074 return "Dh";
27076 /* Try mangling as a Neon type, TYPE_NAME is non-NULL if this is a
27077 builtin type. */
27078 if (TYPE_NAME (type) != NULL)
27079 return arm_mangle_builtin_type (type);
27081 /* Use the default mangling. */
27082 return NULL;
27085 /* Order of allocation of core registers for Thumb: this allocation is
27086 written over the corresponding initial entries of the array
27087 initialized with REG_ALLOC_ORDER. We allocate all low registers
27088 first. Saving and restoring a low register is usually cheaper than
27089 using a call-clobbered high register. */
27091 static const int thumb_core_reg_alloc_order[] =
27093 3, 2, 1, 0, 4, 5, 6, 7,
27094 14, 12, 8, 9, 10, 11
27097 /* Adjust register allocation order when compiling for Thumb. */
27099 void
27100 arm_order_regs_for_local_alloc (void)
27102 const int arm_reg_alloc_order[] = REG_ALLOC_ORDER;
27103 memcpy(reg_alloc_order, arm_reg_alloc_order, sizeof (reg_alloc_order));
27104 if (TARGET_THUMB)
27105 memcpy (reg_alloc_order, thumb_core_reg_alloc_order,
27106 sizeof (thumb_core_reg_alloc_order));
27109 /* Implement TARGET_FRAME_POINTER_REQUIRED. */
27111 bool
27112 arm_frame_pointer_required (void)
27114 return (cfun->has_nonlocal_label
27115 || SUBTARGET_FRAME_POINTER_REQUIRED
27116 || (TARGET_ARM && TARGET_APCS_FRAME && ! leaf_function_p ()));
27119 /* Only thumb1 can't support conditional execution, so return true if
27120 the target is not thumb1. */
27121 static bool
27122 arm_have_conditional_execution (void)
27124 return !TARGET_THUMB1;
27127 /* The AAPCS sets the maximum alignment of a vector to 64 bits. */
27128 static HOST_WIDE_INT
27129 arm_vector_alignment (const_tree type)
27131 HOST_WIDE_INT align = tree_to_shwi (TYPE_SIZE (type));
27133 if (TARGET_AAPCS_BASED)
27134 align = MIN (align, 64);
27136 return align;
27139 static unsigned int
27140 arm_autovectorize_vector_sizes (void)
27142 return TARGET_NEON_VECTORIZE_DOUBLE ? 0 : (16 | 8);
27145 static bool
27146 arm_vector_alignment_reachable (const_tree type, bool is_packed)
27148 /* Vectors which aren't in packed structures will not be less aligned than
27149 the natural alignment of their element type, so this is safe. */
27150 if (TARGET_NEON && !BYTES_BIG_ENDIAN && unaligned_access)
27151 return !is_packed;
27153 return default_builtin_vector_alignment_reachable (type, is_packed);
27156 static bool
27157 arm_builtin_support_vector_misalignment (machine_mode mode,
27158 const_tree type, int misalignment,
27159 bool is_packed)
27161 if (TARGET_NEON && !BYTES_BIG_ENDIAN && unaligned_access)
27163 HOST_WIDE_INT align = TYPE_ALIGN_UNIT (type);
27165 if (is_packed)
27166 return align == 1;
27168 /* If the misalignment is unknown, we should be able to handle the access
27169 so long as it is not to a member of a packed data structure. */
27170 if (misalignment == -1)
27171 return true;
27173 /* Return true if the misalignment is a multiple of the natural alignment
27174 of the vector's element type. This is probably always going to be
27175 true in practice, since we've already established that this isn't a
27176 packed access. */
27177 return ((misalignment % align) == 0);
27180 return default_builtin_support_vector_misalignment (mode, type, misalignment,
27181 is_packed);
27184 static void
27185 arm_conditional_register_usage (void)
27187 int regno;
27189 if (TARGET_THUMB1 && optimize_size)
27191 /* When optimizing for size on Thumb-1, it's better not
27192 to use the HI regs, because of the overhead of
27193 stacking them. */
27194 for (regno = FIRST_HI_REGNUM;
27195 regno <= LAST_HI_REGNUM; ++regno)
27196 fixed_regs[regno] = call_used_regs[regno] = 1;
27199 /* The link register can be clobbered by any branch insn,
27200 but we have no way to track that at present, so mark
27201 it as unavailable. */
27202 if (TARGET_THUMB1)
27203 fixed_regs[LR_REGNUM] = call_used_regs[LR_REGNUM] = 1;
27205 if (TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP)
27207 /* VFPv3 registers are disabled when earlier VFP
27208 versions are selected due to the definition of
27209 LAST_VFP_REGNUM. */
27210 for (regno = FIRST_VFP_REGNUM;
27211 regno <= LAST_VFP_REGNUM; ++ regno)
27213 fixed_regs[regno] = 0;
27214 call_used_regs[regno] = regno < FIRST_VFP_REGNUM + 16
27215 || regno >= FIRST_VFP_REGNUM + 32;
27219 if (TARGET_REALLY_IWMMXT)
27221 regno = FIRST_IWMMXT_GR_REGNUM;
27222 /* The 2002/10/09 revision of the XScale ABI has wCG0
27223 and wCG1 as call-preserved registers. The 2002/11/21
27224 revision changed this so that all wCG registers are
27225 scratch registers. */
27226 for (regno = FIRST_IWMMXT_GR_REGNUM;
27227 regno <= LAST_IWMMXT_GR_REGNUM; ++ regno)
27228 fixed_regs[regno] = 0;
27229 /* The XScale ABI has wR0 - wR9 as scratch registers,
27230 the rest as call-preserved registers. */
27231 for (regno = FIRST_IWMMXT_REGNUM;
27232 regno <= LAST_IWMMXT_REGNUM; ++ regno)
27234 fixed_regs[regno] = 0;
27235 call_used_regs[regno] = regno < FIRST_IWMMXT_REGNUM + 10;
27239 if ((unsigned) PIC_OFFSET_TABLE_REGNUM != INVALID_REGNUM)
27241 fixed_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
27242 call_used_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
27244 else if (TARGET_APCS_STACK)
27246 fixed_regs[10] = 1;
27247 call_used_regs[10] = 1;
27249 /* -mcaller-super-interworking reserves r11 for calls to
27250 _interwork_r11_call_via_rN(). Making the register global
27251 is an easy way of ensuring that it remains valid for all
27252 calls. */
27253 if (TARGET_APCS_FRAME || TARGET_CALLER_INTERWORKING
27254 || TARGET_TPCS_FRAME || TARGET_TPCS_LEAF_FRAME)
27256 fixed_regs[ARM_HARD_FRAME_POINTER_REGNUM] = 1;
27257 call_used_regs[ARM_HARD_FRAME_POINTER_REGNUM] = 1;
27258 if (TARGET_CALLER_INTERWORKING)
27259 global_regs[ARM_HARD_FRAME_POINTER_REGNUM] = 1;
27261 SUBTARGET_CONDITIONAL_REGISTER_USAGE
27264 static reg_class_t
27265 arm_preferred_rename_class (reg_class_t rclass)
27267 /* Thumb-2 instructions using LO_REGS may be smaller than instructions
27268 using GENERIC_REGS. During register rename pass, we prefer LO_REGS,
27269 and code size can be reduced. */
27270 if (TARGET_THUMB2 && rclass == GENERAL_REGS)
27271 return LO_REGS;
27272 else
27273 return NO_REGS;
27276 /* Compute the atrribute "length" of insn "*push_multi".
27277 So this function MUST be kept in sync with that insn pattern. */
27279 arm_attr_length_push_multi(rtx parallel_op, rtx first_op)
27281 int i, regno, hi_reg;
27282 int num_saves = XVECLEN (parallel_op, 0);
27284 /* ARM mode. */
27285 if (TARGET_ARM)
27286 return 4;
27287 /* Thumb1 mode. */
27288 if (TARGET_THUMB1)
27289 return 2;
27291 /* Thumb2 mode. */
27292 regno = REGNO (first_op);
27293 hi_reg = (REGNO_REG_CLASS (regno) == HI_REGS) && (regno != LR_REGNUM);
27294 for (i = 1; i < num_saves && !hi_reg; i++)
27296 regno = REGNO (XEXP (XVECEXP (parallel_op, 0, i), 0));
27297 hi_reg |= (REGNO_REG_CLASS (regno) == HI_REGS) && (regno != LR_REGNUM);
27300 if (!hi_reg)
27301 return 2;
27302 return 4;
27305 /* Compute the number of instructions emitted by output_move_double. */
27307 arm_count_output_move_double_insns (rtx *operands)
27309 int count;
27310 rtx ops[2];
27311 /* output_move_double may modify the operands array, so call it
27312 here on a copy of the array. */
27313 ops[0] = operands[0];
27314 ops[1] = operands[1];
27315 output_move_double (ops, false, &count);
27316 return count;
27320 vfp3_const_double_for_fract_bits (rtx operand)
27322 REAL_VALUE_TYPE r0;
27324 if (!CONST_DOUBLE_P (operand))
27325 return 0;
27327 REAL_VALUE_FROM_CONST_DOUBLE (r0, operand);
27328 if (exact_real_inverse (DFmode, &r0))
27330 if (exact_real_truncate (DFmode, &r0))
27332 HOST_WIDE_INT value = real_to_integer (&r0);
27333 value = value & 0xffffffff;
27334 if ((value != 0) && ( (value & (value - 1)) == 0))
27335 return int_log2 (value);
27338 return 0;
27342 vfp3_const_double_for_bits (rtx operand)
27344 REAL_VALUE_TYPE r0;
27346 if (!CONST_DOUBLE_P (operand))
27347 return 0;
27349 REAL_VALUE_FROM_CONST_DOUBLE (r0, operand);
27350 if (exact_real_truncate (DFmode, &r0))
27352 HOST_WIDE_INT value = real_to_integer (&r0);
27353 value = value & 0xffffffff;
27354 if ((value != 0) && ( (value & (value - 1)) == 0))
27355 return int_log2 (value);
27358 return 0;
27361 /* Emit a memory barrier around an atomic sequence according to MODEL. */
27363 static void
27364 arm_pre_atomic_barrier (enum memmodel model)
27366 if (need_atomic_barrier_p (model, true))
27367 emit_insn (gen_memory_barrier ());
27370 static void
27371 arm_post_atomic_barrier (enum memmodel model)
27373 if (need_atomic_barrier_p (model, false))
27374 emit_insn (gen_memory_barrier ());
27377 /* Emit the load-exclusive and store-exclusive instructions.
27378 Use acquire and release versions if necessary. */
27380 static void
27381 arm_emit_load_exclusive (machine_mode mode, rtx rval, rtx mem, bool acq)
27383 rtx (*gen) (rtx, rtx);
27385 if (acq)
27387 switch (mode)
27389 case QImode: gen = gen_arm_load_acquire_exclusiveqi; break;
27390 case HImode: gen = gen_arm_load_acquire_exclusivehi; break;
27391 case SImode: gen = gen_arm_load_acquire_exclusivesi; break;
27392 case DImode: gen = gen_arm_load_acquire_exclusivedi; break;
27393 default:
27394 gcc_unreachable ();
27397 else
27399 switch (mode)
27401 case QImode: gen = gen_arm_load_exclusiveqi; break;
27402 case HImode: gen = gen_arm_load_exclusivehi; break;
27403 case SImode: gen = gen_arm_load_exclusivesi; break;
27404 case DImode: gen = gen_arm_load_exclusivedi; break;
27405 default:
27406 gcc_unreachable ();
27410 emit_insn (gen (rval, mem));
27413 static void
27414 arm_emit_store_exclusive (machine_mode mode, rtx bval, rtx rval,
27415 rtx mem, bool rel)
27417 rtx (*gen) (rtx, rtx, rtx);
27419 if (rel)
27421 switch (mode)
27423 case QImode: gen = gen_arm_store_release_exclusiveqi; break;
27424 case HImode: gen = gen_arm_store_release_exclusivehi; break;
27425 case SImode: gen = gen_arm_store_release_exclusivesi; break;
27426 case DImode: gen = gen_arm_store_release_exclusivedi; break;
27427 default:
27428 gcc_unreachable ();
27431 else
27433 switch (mode)
27435 case QImode: gen = gen_arm_store_exclusiveqi; break;
27436 case HImode: gen = gen_arm_store_exclusivehi; break;
27437 case SImode: gen = gen_arm_store_exclusivesi; break;
27438 case DImode: gen = gen_arm_store_exclusivedi; break;
27439 default:
27440 gcc_unreachable ();
27444 emit_insn (gen (bval, rval, mem));
27447 /* Mark the previous jump instruction as unlikely. */
27449 static void
27450 emit_unlikely_jump (rtx insn)
27452 int very_unlikely = REG_BR_PROB_BASE / 100 - 1;
27454 insn = emit_jump_insn (insn);
27455 add_int_reg_note (insn, REG_BR_PROB, very_unlikely);
27458 /* Expand a compare and swap pattern. */
27460 void
27461 arm_expand_compare_and_swap (rtx operands[])
27463 rtx bval, rval, mem, oldval, newval, is_weak, mod_s, mod_f, x;
27464 machine_mode mode;
27465 rtx (*gen) (rtx, rtx, rtx, rtx, rtx, rtx, rtx);
27467 bval = operands[0];
27468 rval = operands[1];
27469 mem = operands[2];
27470 oldval = operands[3];
27471 newval = operands[4];
27472 is_weak = operands[5];
27473 mod_s = operands[6];
27474 mod_f = operands[7];
27475 mode = GET_MODE (mem);
27477 /* Normally the succ memory model must be stronger than fail, but in the
27478 unlikely event of fail being ACQUIRE and succ being RELEASE we need to
27479 promote succ to ACQ_REL so that we don't lose the acquire semantics. */
27481 if (TARGET_HAVE_LDACQ
27482 && INTVAL (mod_f) == MEMMODEL_ACQUIRE
27483 && INTVAL (mod_s) == MEMMODEL_RELEASE)
27484 mod_s = GEN_INT (MEMMODEL_ACQ_REL);
27486 switch (mode)
27488 case QImode:
27489 case HImode:
27490 /* For narrow modes, we're going to perform the comparison in SImode,
27491 so do the zero-extension now. */
27492 rval = gen_reg_rtx (SImode);
27493 oldval = convert_modes (SImode, mode, oldval, true);
27494 /* FALLTHRU */
27496 case SImode:
27497 /* Force the value into a register if needed. We waited until after
27498 the zero-extension above to do this properly. */
27499 if (!arm_add_operand (oldval, SImode))
27500 oldval = force_reg (SImode, oldval);
27501 break;
27503 case DImode:
27504 if (!cmpdi_operand (oldval, mode))
27505 oldval = force_reg (mode, oldval);
27506 break;
27508 default:
27509 gcc_unreachable ();
27512 switch (mode)
27514 case QImode: gen = gen_atomic_compare_and_swapqi_1; break;
27515 case HImode: gen = gen_atomic_compare_and_swaphi_1; break;
27516 case SImode: gen = gen_atomic_compare_and_swapsi_1; break;
27517 case DImode: gen = gen_atomic_compare_and_swapdi_1; break;
27518 default:
27519 gcc_unreachable ();
27522 emit_insn (gen (rval, mem, oldval, newval, is_weak, mod_s, mod_f));
27524 if (mode == QImode || mode == HImode)
27525 emit_move_insn (operands[1], gen_lowpart (mode, rval));
27527 /* In all cases, we arrange for success to be signaled by Z set.
27528 This arrangement allows for the boolean result to be used directly
27529 in a subsequent branch, post optimization. */
27530 x = gen_rtx_REG (CCmode, CC_REGNUM);
27531 x = gen_rtx_EQ (SImode, x, const0_rtx);
27532 emit_insn (gen_rtx_SET (VOIDmode, bval, x));
27535 /* Split a compare and swap pattern. It is IMPLEMENTATION DEFINED whether
27536 another memory store between the load-exclusive and store-exclusive can
27537 reset the monitor from Exclusive to Open state. This means we must wait
27538 until after reload to split the pattern, lest we get a register spill in
27539 the middle of the atomic sequence. */
27541 void
27542 arm_split_compare_and_swap (rtx operands[])
27544 rtx rval, mem, oldval, newval, scratch;
27545 machine_mode mode;
27546 enum memmodel mod_s, mod_f;
27547 bool is_weak;
27548 rtx_code_label *label1, *label2;
27549 rtx x, cond;
27551 rval = operands[0];
27552 mem = operands[1];
27553 oldval = operands[2];
27554 newval = operands[3];
27555 is_weak = (operands[4] != const0_rtx);
27556 mod_s = (enum memmodel) INTVAL (operands[5]);
27557 mod_f = (enum memmodel) INTVAL (operands[6]);
27558 scratch = operands[7];
27559 mode = GET_MODE (mem);
27561 bool use_acquire = TARGET_HAVE_LDACQ
27562 && !(mod_s == MEMMODEL_RELAXED
27563 || mod_s == MEMMODEL_CONSUME
27564 || mod_s == MEMMODEL_RELEASE);
27566 bool use_release = TARGET_HAVE_LDACQ
27567 && !(mod_s == MEMMODEL_RELAXED
27568 || mod_s == MEMMODEL_CONSUME
27569 || mod_s == MEMMODEL_ACQUIRE);
27571 /* Checks whether a barrier is needed and emits one accordingly. */
27572 if (!(use_acquire || use_release))
27573 arm_pre_atomic_barrier (mod_s);
27575 label1 = NULL;
27576 if (!is_weak)
27578 label1 = gen_label_rtx ();
27579 emit_label (label1);
27581 label2 = gen_label_rtx ();
27583 arm_emit_load_exclusive (mode, rval, mem, use_acquire);
27585 cond = arm_gen_compare_reg (NE, rval, oldval, scratch);
27586 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
27587 x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
27588 gen_rtx_LABEL_REF (Pmode, label2), pc_rtx);
27589 emit_unlikely_jump (gen_rtx_SET (VOIDmode, pc_rtx, x));
27591 arm_emit_store_exclusive (mode, scratch, mem, newval, use_release);
27593 /* Weak or strong, we want EQ to be true for success, so that we
27594 match the flags that we got from the compare above. */
27595 cond = gen_rtx_REG (CCmode, CC_REGNUM);
27596 x = gen_rtx_COMPARE (CCmode, scratch, const0_rtx);
27597 emit_insn (gen_rtx_SET (VOIDmode, cond, x));
27599 if (!is_weak)
27601 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
27602 x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
27603 gen_rtx_LABEL_REF (Pmode, label1), pc_rtx);
27604 emit_unlikely_jump (gen_rtx_SET (VOIDmode, pc_rtx, x));
27607 if (mod_f != MEMMODEL_RELAXED)
27608 emit_label (label2);
27610 /* Checks whether a barrier is needed and emits one accordingly. */
27611 if (!(use_acquire || use_release))
27612 arm_post_atomic_barrier (mod_s);
27614 if (mod_f == MEMMODEL_RELAXED)
27615 emit_label (label2);
27618 void
27619 arm_split_atomic_op (enum rtx_code code, rtx old_out, rtx new_out, rtx mem,
27620 rtx value, rtx model_rtx, rtx cond)
27622 enum memmodel model = (enum memmodel) INTVAL (model_rtx);
27623 machine_mode mode = GET_MODE (mem);
27624 machine_mode wmode = (mode == DImode ? DImode : SImode);
27625 rtx_code_label *label;
27626 rtx x;
27628 bool use_acquire = TARGET_HAVE_LDACQ
27629 && !(model == MEMMODEL_RELAXED
27630 || model == MEMMODEL_CONSUME
27631 || model == MEMMODEL_RELEASE);
27633 bool use_release = TARGET_HAVE_LDACQ
27634 && !(model == MEMMODEL_RELAXED
27635 || model == MEMMODEL_CONSUME
27636 || model == MEMMODEL_ACQUIRE);
27638 /* Checks whether a barrier is needed and emits one accordingly. */
27639 if (!(use_acquire || use_release))
27640 arm_pre_atomic_barrier (model);
27642 label = gen_label_rtx ();
27643 emit_label (label);
27645 if (new_out)
27646 new_out = gen_lowpart (wmode, new_out);
27647 if (old_out)
27648 old_out = gen_lowpart (wmode, old_out);
27649 else
27650 old_out = new_out;
27651 value = simplify_gen_subreg (wmode, value, mode, 0);
27653 arm_emit_load_exclusive (mode, old_out, mem, use_acquire);
27655 switch (code)
27657 case SET:
27658 new_out = value;
27659 break;
27661 case NOT:
27662 x = gen_rtx_AND (wmode, old_out, value);
27663 emit_insn (gen_rtx_SET (VOIDmode, new_out, x));
27664 x = gen_rtx_NOT (wmode, new_out);
27665 emit_insn (gen_rtx_SET (VOIDmode, new_out, x));
27666 break;
27668 case MINUS:
27669 if (CONST_INT_P (value))
27671 value = GEN_INT (-INTVAL (value));
27672 code = PLUS;
27674 /* FALLTHRU */
27676 case PLUS:
27677 if (mode == DImode)
27679 /* DImode plus/minus need to clobber flags. */
27680 /* The adddi3 and subdi3 patterns are incorrectly written so that
27681 they require matching operands, even when we could easily support
27682 three operands. Thankfully, this can be fixed up post-splitting,
27683 as the individual add+adc patterns do accept three operands and
27684 post-reload cprop can make these moves go away. */
27685 emit_move_insn (new_out, old_out);
27686 if (code == PLUS)
27687 x = gen_adddi3 (new_out, new_out, value);
27688 else
27689 x = gen_subdi3 (new_out, new_out, value);
27690 emit_insn (x);
27691 break;
27693 /* FALLTHRU */
27695 default:
27696 x = gen_rtx_fmt_ee (code, wmode, old_out, value);
27697 emit_insn (gen_rtx_SET (VOIDmode, new_out, x));
27698 break;
27701 arm_emit_store_exclusive (mode, cond, mem, gen_lowpart (mode, new_out),
27702 use_release);
27704 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
27705 emit_unlikely_jump (gen_cbranchsi4 (x, cond, const0_rtx, label));
27707 /* Checks whether a barrier is needed and emits one accordingly. */
27708 if (!(use_acquire || use_release))
27709 arm_post_atomic_barrier (model);
27712 #define MAX_VECT_LEN 16
27714 struct expand_vec_perm_d
27716 rtx target, op0, op1;
27717 unsigned char perm[MAX_VECT_LEN];
27718 machine_mode vmode;
27719 unsigned char nelt;
27720 bool one_vector_p;
27721 bool testing_p;
27724 /* Generate a variable permutation. */
27726 static void
27727 arm_expand_vec_perm_1 (rtx target, rtx op0, rtx op1, rtx sel)
27729 machine_mode vmode = GET_MODE (target);
27730 bool one_vector_p = rtx_equal_p (op0, op1);
27732 gcc_checking_assert (vmode == V8QImode || vmode == V16QImode);
27733 gcc_checking_assert (GET_MODE (op0) == vmode);
27734 gcc_checking_assert (GET_MODE (op1) == vmode);
27735 gcc_checking_assert (GET_MODE (sel) == vmode);
27736 gcc_checking_assert (TARGET_NEON);
27738 if (one_vector_p)
27740 if (vmode == V8QImode)
27741 emit_insn (gen_neon_vtbl1v8qi (target, op0, sel));
27742 else
27743 emit_insn (gen_neon_vtbl1v16qi (target, op0, sel));
27745 else
27747 rtx pair;
27749 if (vmode == V8QImode)
27751 pair = gen_reg_rtx (V16QImode);
27752 emit_insn (gen_neon_vcombinev8qi (pair, op0, op1));
27753 pair = gen_lowpart (TImode, pair);
27754 emit_insn (gen_neon_vtbl2v8qi (target, pair, sel));
27756 else
27758 pair = gen_reg_rtx (OImode);
27759 emit_insn (gen_neon_vcombinev16qi (pair, op0, op1));
27760 emit_insn (gen_neon_vtbl2v16qi (target, pair, sel));
27765 void
27766 arm_expand_vec_perm (rtx target, rtx op0, rtx op1, rtx sel)
27768 machine_mode vmode = GET_MODE (target);
27769 unsigned int i, nelt = GET_MODE_NUNITS (vmode);
27770 bool one_vector_p = rtx_equal_p (op0, op1);
27771 rtx rmask[MAX_VECT_LEN], mask;
27773 /* TODO: ARM's VTBL indexing is little-endian. In order to handle GCC's
27774 numbering of elements for big-endian, we must reverse the order. */
27775 gcc_checking_assert (!BYTES_BIG_ENDIAN);
27777 /* The VTBL instruction does not use a modulo index, so we must take care
27778 of that ourselves. */
27779 mask = GEN_INT (one_vector_p ? nelt - 1 : 2 * nelt - 1);
27780 for (i = 0; i < nelt; ++i)
27781 rmask[i] = mask;
27782 mask = gen_rtx_CONST_VECTOR (vmode, gen_rtvec_v (nelt, rmask));
27783 sel = expand_simple_binop (vmode, AND, sel, mask, NULL, 0, OPTAB_LIB_WIDEN);
27785 arm_expand_vec_perm_1 (target, op0, op1, sel);
27788 /* Generate or test for an insn that supports a constant permutation. */
27790 /* Recognize patterns for the VUZP insns. */
27792 static bool
27793 arm_evpc_neon_vuzp (struct expand_vec_perm_d *d)
27795 unsigned int i, odd, mask, nelt = d->nelt;
27796 rtx out0, out1, in0, in1, x;
27797 rtx (*gen)(rtx, rtx, rtx, rtx);
27799 if (GET_MODE_UNIT_SIZE (d->vmode) >= 8)
27800 return false;
27802 /* Note that these are little-endian tests. Adjust for big-endian later. */
27803 if (d->perm[0] == 0)
27804 odd = 0;
27805 else if (d->perm[0] == 1)
27806 odd = 1;
27807 else
27808 return false;
27809 mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
27811 for (i = 0; i < nelt; i++)
27813 unsigned elt = (i * 2 + odd) & mask;
27814 if (d->perm[i] != elt)
27815 return false;
27818 /* Success! */
27819 if (d->testing_p)
27820 return true;
27822 switch (d->vmode)
27824 case V16QImode: gen = gen_neon_vuzpv16qi_internal; break;
27825 case V8QImode: gen = gen_neon_vuzpv8qi_internal; break;
27826 case V8HImode: gen = gen_neon_vuzpv8hi_internal; break;
27827 case V4HImode: gen = gen_neon_vuzpv4hi_internal; break;
27828 case V4SImode: gen = gen_neon_vuzpv4si_internal; break;
27829 case V2SImode: gen = gen_neon_vuzpv2si_internal; break;
27830 case V2SFmode: gen = gen_neon_vuzpv2sf_internal; break;
27831 case V4SFmode: gen = gen_neon_vuzpv4sf_internal; break;
27832 default:
27833 gcc_unreachable ();
27836 in0 = d->op0;
27837 in1 = d->op1;
27838 if (BYTES_BIG_ENDIAN)
27840 x = in0, in0 = in1, in1 = x;
27841 odd = !odd;
27844 out0 = d->target;
27845 out1 = gen_reg_rtx (d->vmode);
27846 if (odd)
27847 x = out0, out0 = out1, out1 = x;
27849 emit_insn (gen (out0, in0, in1, out1));
27850 return true;
27853 /* Recognize patterns for the VZIP insns. */
27855 static bool
27856 arm_evpc_neon_vzip (struct expand_vec_perm_d *d)
27858 unsigned int i, high, mask, nelt = d->nelt;
27859 rtx out0, out1, in0, in1, x;
27860 rtx (*gen)(rtx, rtx, rtx, rtx);
27862 if (GET_MODE_UNIT_SIZE (d->vmode) >= 8)
27863 return false;
27865 /* Note that these are little-endian tests. Adjust for big-endian later. */
27866 high = nelt / 2;
27867 if (d->perm[0] == high)
27869 else if (d->perm[0] == 0)
27870 high = 0;
27871 else
27872 return false;
27873 mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
27875 for (i = 0; i < nelt / 2; i++)
27877 unsigned elt = (i + high) & mask;
27878 if (d->perm[i * 2] != elt)
27879 return false;
27880 elt = (elt + nelt) & mask;
27881 if (d->perm[i * 2 + 1] != elt)
27882 return false;
27885 /* Success! */
27886 if (d->testing_p)
27887 return true;
27889 switch (d->vmode)
27891 case V16QImode: gen = gen_neon_vzipv16qi_internal; break;
27892 case V8QImode: gen = gen_neon_vzipv8qi_internal; break;
27893 case V8HImode: gen = gen_neon_vzipv8hi_internal; break;
27894 case V4HImode: gen = gen_neon_vzipv4hi_internal; break;
27895 case V4SImode: gen = gen_neon_vzipv4si_internal; break;
27896 case V2SImode: gen = gen_neon_vzipv2si_internal; break;
27897 case V2SFmode: gen = gen_neon_vzipv2sf_internal; break;
27898 case V4SFmode: gen = gen_neon_vzipv4sf_internal; break;
27899 default:
27900 gcc_unreachable ();
27903 in0 = d->op0;
27904 in1 = d->op1;
27905 if (BYTES_BIG_ENDIAN)
27907 x = in0, in0 = in1, in1 = x;
27908 high = !high;
27911 out0 = d->target;
27912 out1 = gen_reg_rtx (d->vmode);
27913 if (high)
27914 x = out0, out0 = out1, out1 = x;
27916 emit_insn (gen (out0, in0, in1, out1));
27917 return true;
27920 /* Recognize patterns for the VREV insns. */
27922 static bool
27923 arm_evpc_neon_vrev (struct expand_vec_perm_d *d)
27925 unsigned int i, j, diff, nelt = d->nelt;
27926 rtx (*gen)(rtx, rtx);
27928 if (!d->one_vector_p)
27929 return false;
27931 diff = d->perm[0];
27932 switch (diff)
27934 case 7:
27935 switch (d->vmode)
27937 case V16QImode: gen = gen_neon_vrev64v16qi; break;
27938 case V8QImode: gen = gen_neon_vrev64v8qi; break;
27939 default:
27940 return false;
27942 break;
27943 case 3:
27944 switch (d->vmode)
27946 case V16QImode: gen = gen_neon_vrev32v16qi; break;
27947 case V8QImode: gen = gen_neon_vrev32v8qi; break;
27948 case V8HImode: gen = gen_neon_vrev64v8hi; break;
27949 case V4HImode: gen = gen_neon_vrev64v4hi; break;
27950 default:
27951 return false;
27953 break;
27954 case 1:
27955 switch (d->vmode)
27957 case V16QImode: gen = gen_neon_vrev16v16qi; break;
27958 case V8QImode: gen = gen_neon_vrev16v8qi; break;
27959 case V8HImode: gen = gen_neon_vrev32v8hi; break;
27960 case V4HImode: gen = gen_neon_vrev32v4hi; break;
27961 case V4SImode: gen = gen_neon_vrev64v4si; break;
27962 case V2SImode: gen = gen_neon_vrev64v2si; break;
27963 case V4SFmode: gen = gen_neon_vrev64v4sf; break;
27964 case V2SFmode: gen = gen_neon_vrev64v2sf; break;
27965 default:
27966 return false;
27968 break;
27969 default:
27970 return false;
27973 for (i = 0; i < nelt ; i += diff + 1)
27974 for (j = 0; j <= diff; j += 1)
27976 /* This is guaranteed to be true as the value of diff
27977 is 7, 3, 1 and we should have enough elements in the
27978 queue to generate this. Getting a vector mask with a
27979 value of diff other than these values implies that
27980 something is wrong by the time we get here. */
27981 gcc_assert (i + j < nelt);
27982 if (d->perm[i + j] != i + diff - j)
27983 return false;
27986 /* Success! */
27987 if (d->testing_p)
27988 return true;
27990 emit_insn (gen (d->target, d->op0));
27991 return true;
27994 /* Recognize patterns for the VTRN insns. */
27996 static bool
27997 arm_evpc_neon_vtrn (struct expand_vec_perm_d *d)
27999 unsigned int i, odd, mask, nelt = d->nelt;
28000 rtx out0, out1, in0, in1, x;
28001 rtx (*gen)(rtx, rtx, rtx, rtx);
28003 if (GET_MODE_UNIT_SIZE (d->vmode) >= 8)
28004 return false;
28006 /* Note that these are little-endian tests. Adjust for big-endian later. */
28007 if (d->perm[0] == 0)
28008 odd = 0;
28009 else if (d->perm[0] == 1)
28010 odd = 1;
28011 else
28012 return false;
28013 mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
28015 for (i = 0; i < nelt; i += 2)
28017 if (d->perm[i] != i + odd)
28018 return false;
28019 if (d->perm[i + 1] != ((i + nelt + odd) & mask))
28020 return false;
28023 /* Success! */
28024 if (d->testing_p)
28025 return true;
28027 switch (d->vmode)
28029 case V16QImode: gen = gen_neon_vtrnv16qi_internal; break;
28030 case V8QImode: gen = gen_neon_vtrnv8qi_internal; break;
28031 case V8HImode: gen = gen_neon_vtrnv8hi_internal; break;
28032 case V4HImode: gen = gen_neon_vtrnv4hi_internal; break;
28033 case V4SImode: gen = gen_neon_vtrnv4si_internal; break;
28034 case V2SImode: gen = gen_neon_vtrnv2si_internal; break;
28035 case V2SFmode: gen = gen_neon_vtrnv2sf_internal; break;
28036 case V4SFmode: gen = gen_neon_vtrnv4sf_internal; break;
28037 default:
28038 gcc_unreachable ();
28041 in0 = d->op0;
28042 in1 = d->op1;
28043 if (BYTES_BIG_ENDIAN)
28045 x = in0, in0 = in1, in1 = x;
28046 odd = !odd;
28049 out0 = d->target;
28050 out1 = gen_reg_rtx (d->vmode);
28051 if (odd)
28052 x = out0, out0 = out1, out1 = x;
28054 emit_insn (gen (out0, in0, in1, out1));
28055 return true;
28058 /* Recognize patterns for the VEXT insns. */
28060 static bool
28061 arm_evpc_neon_vext (struct expand_vec_perm_d *d)
28063 unsigned int i, nelt = d->nelt;
28064 rtx (*gen) (rtx, rtx, rtx, rtx);
28065 rtx offset;
28067 unsigned int location;
28069 unsigned int next = d->perm[0] + 1;
28071 /* TODO: Handle GCC's numbering of elements for big-endian. */
28072 if (BYTES_BIG_ENDIAN)
28073 return false;
28075 /* Check if the extracted indexes are increasing by one. */
28076 for (i = 1; i < nelt; next++, i++)
28078 /* If we hit the most significant element of the 2nd vector in
28079 the previous iteration, no need to test further. */
28080 if (next == 2 * nelt)
28081 return false;
28083 /* If we are operating on only one vector: it could be a
28084 rotation. If there are only two elements of size < 64, let
28085 arm_evpc_neon_vrev catch it. */
28086 if (d->one_vector_p && (next == nelt))
28088 if ((nelt == 2) && (d->vmode != V2DImode))
28089 return false;
28090 else
28091 next = 0;
28094 if (d->perm[i] != next)
28095 return false;
28098 location = d->perm[0];
28100 switch (d->vmode)
28102 case V16QImode: gen = gen_neon_vextv16qi; break;
28103 case V8QImode: gen = gen_neon_vextv8qi; break;
28104 case V4HImode: gen = gen_neon_vextv4hi; break;
28105 case V8HImode: gen = gen_neon_vextv8hi; break;
28106 case V2SImode: gen = gen_neon_vextv2si; break;
28107 case V4SImode: gen = gen_neon_vextv4si; break;
28108 case V2SFmode: gen = gen_neon_vextv2sf; break;
28109 case V4SFmode: gen = gen_neon_vextv4sf; break;
28110 case V2DImode: gen = gen_neon_vextv2di; break;
28111 default:
28112 return false;
28115 /* Success! */
28116 if (d->testing_p)
28117 return true;
28119 offset = GEN_INT (location);
28120 emit_insn (gen (d->target, d->op0, d->op1, offset));
28121 return true;
28124 /* The NEON VTBL instruction is a fully variable permuation that's even
28125 stronger than what we expose via VEC_PERM_EXPR. What it doesn't do
28126 is mask the index operand as VEC_PERM_EXPR requires. Therefore we
28127 can do slightly better by expanding this as a constant where we don't
28128 have to apply a mask. */
28130 static bool
28131 arm_evpc_neon_vtbl (struct expand_vec_perm_d *d)
28133 rtx rperm[MAX_VECT_LEN], sel;
28134 machine_mode vmode = d->vmode;
28135 unsigned int i, nelt = d->nelt;
28137 /* TODO: ARM's VTBL indexing is little-endian. In order to handle GCC's
28138 numbering of elements for big-endian, we must reverse the order. */
28139 if (BYTES_BIG_ENDIAN)
28140 return false;
28142 if (d->testing_p)
28143 return true;
28145 /* Generic code will try constant permutation twice. Once with the
28146 original mode and again with the elements lowered to QImode.
28147 So wait and don't do the selector expansion ourselves. */
28148 if (vmode != V8QImode && vmode != V16QImode)
28149 return false;
28151 for (i = 0; i < nelt; ++i)
28152 rperm[i] = GEN_INT (d->perm[i]);
28153 sel = gen_rtx_CONST_VECTOR (vmode, gen_rtvec_v (nelt, rperm));
28154 sel = force_reg (vmode, sel);
28156 arm_expand_vec_perm_1 (d->target, d->op0, d->op1, sel);
28157 return true;
28160 static bool
28161 arm_expand_vec_perm_const_1 (struct expand_vec_perm_d *d)
28163 /* Check if the input mask matches vext before reordering the
28164 operands. */
28165 if (TARGET_NEON)
28166 if (arm_evpc_neon_vext (d))
28167 return true;
28169 /* The pattern matching functions above are written to look for a small
28170 number to begin the sequence (0, 1, N/2). If we begin with an index
28171 from the second operand, we can swap the operands. */
28172 if (d->perm[0] >= d->nelt)
28174 unsigned i, nelt = d->nelt;
28175 rtx x;
28177 for (i = 0; i < nelt; ++i)
28178 d->perm[i] = (d->perm[i] + nelt) & (2 * nelt - 1);
28180 x = d->op0;
28181 d->op0 = d->op1;
28182 d->op1 = x;
28185 if (TARGET_NEON)
28187 if (arm_evpc_neon_vuzp (d))
28188 return true;
28189 if (arm_evpc_neon_vzip (d))
28190 return true;
28191 if (arm_evpc_neon_vrev (d))
28192 return true;
28193 if (arm_evpc_neon_vtrn (d))
28194 return true;
28195 return arm_evpc_neon_vtbl (d);
28197 return false;
28200 /* Expand a vec_perm_const pattern. */
28202 bool
28203 arm_expand_vec_perm_const (rtx target, rtx op0, rtx op1, rtx sel)
28205 struct expand_vec_perm_d d;
28206 int i, nelt, which;
28208 d.target = target;
28209 d.op0 = op0;
28210 d.op1 = op1;
28212 d.vmode = GET_MODE (target);
28213 gcc_assert (VECTOR_MODE_P (d.vmode));
28214 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
28215 d.testing_p = false;
28217 for (i = which = 0; i < nelt; ++i)
28219 rtx e = XVECEXP (sel, 0, i);
28220 int ei = INTVAL (e) & (2 * nelt - 1);
28221 which |= (ei < nelt ? 1 : 2);
28222 d.perm[i] = ei;
28225 switch (which)
28227 default:
28228 gcc_unreachable();
28230 case 3:
28231 d.one_vector_p = false;
28232 if (!rtx_equal_p (op0, op1))
28233 break;
28235 /* The elements of PERM do not suggest that only the first operand
28236 is used, but both operands are identical. Allow easier matching
28237 of the permutation by folding the permutation into the single
28238 input vector. */
28239 /* FALLTHRU */
28240 case 2:
28241 for (i = 0; i < nelt; ++i)
28242 d.perm[i] &= nelt - 1;
28243 d.op0 = op1;
28244 d.one_vector_p = true;
28245 break;
28247 case 1:
28248 d.op1 = op0;
28249 d.one_vector_p = true;
28250 break;
28253 return arm_expand_vec_perm_const_1 (&d);
28256 /* Implement TARGET_VECTORIZE_VEC_PERM_CONST_OK. */
28258 static bool
28259 arm_vectorize_vec_perm_const_ok (machine_mode vmode,
28260 const unsigned char *sel)
28262 struct expand_vec_perm_d d;
28263 unsigned int i, nelt, which;
28264 bool ret;
28266 d.vmode = vmode;
28267 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
28268 d.testing_p = true;
28269 memcpy (d.perm, sel, nelt);
28271 /* Categorize the set of elements in the selector. */
28272 for (i = which = 0; i < nelt; ++i)
28274 unsigned char e = d.perm[i];
28275 gcc_assert (e < 2 * nelt);
28276 which |= (e < nelt ? 1 : 2);
28279 /* For all elements from second vector, fold the elements to first. */
28280 if (which == 2)
28281 for (i = 0; i < nelt; ++i)
28282 d.perm[i] -= nelt;
28284 /* Check whether the mask can be applied to the vector type. */
28285 d.one_vector_p = (which != 3);
28287 d.target = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 1);
28288 d.op1 = d.op0 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 2);
28289 if (!d.one_vector_p)
28290 d.op1 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 3);
28292 start_sequence ();
28293 ret = arm_expand_vec_perm_const_1 (&d);
28294 end_sequence ();
28296 return ret;
28299 bool
28300 arm_autoinc_modes_ok_p (machine_mode mode, enum arm_auto_incmodes code)
28302 /* If we are soft float and we do not have ldrd
28303 then all auto increment forms are ok. */
28304 if (TARGET_SOFT_FLOAT && (TARGET_LDRD || GET_MODE_SIZE (mode) <= 4))
28305 return true;
28307 switch (code)
28309 /* Post increment and Pre Decrement are supported for all
28310 instruction forms except for vector forms. */
28311 case ARM_POST_INC:
28312 case ARM_PRE_DEC:
28313 if (VECTOR_MODE_P (mode))
28315 if (code != ARM_PRE_DEC)
28316 return true;
28317 else
28318 return false;
28321 return true;
28323 case ARM_POST_DEC:
28324 case ARM_PRE_INC:
28325 /* Without LDRD and mode size greater than
28326 word size, there is no point in auto-incrementing
28327 because ldm and stm will not have these forms. */
28328 if (!TARGET_LDRD && GET_MODE_SIZE (mode) > 4)
28329 return false;
28331 /* Vector and floating point modes do not support
28332 these auto increment forms. */
28333 if (FLOAT_MODE_P (mode) || VECTOR_MODE_P (mode))
28334 return false;
28336 return true;
28338 default:
28339 return false;
28343 return false;
28346 /* The default expansion of general 64-bit shifts in core-regs is suboptimal,
28347 on ARM, since we know that shifts by negative amounts are no-ops.
28348 Additionally, the default expansion code is not available or suitable
28349 for post-reload insn splits (this can occur when the register allocator
28350 chooses not to do a shift in NEON).
28352 This function is used in both initial expand and post-reload splits, and
28353 handles all kinds of 64-bit shifts.
28355 Input requirements:
28356 - It is safe for the input and output to be the same register, but
28357 early-clobber rules apply for the shift amount and scratch registers.
28358 - Shift by register requires both scratch registers. In all other cases
28359 the scratch registers may be NULL.
28360 - Ashiftrt by a register also clobbers the CC register. */
28361 void
28362 arm_emit_coreregs_64bit_shift (enum rtx_code code, rtx out, rtx in,
28363 rtx amount, rtx scratch1, rtx scratch2)
28365 rtx out_high = gen_highpart (SImode, out);
28366 rtx out_low = gen_lowpart (SImode, out);
28367 rtx in_high = gen_highpart (SImode, in);
28368 rtx in_low = gen_lowpart (SImode, in);
28370 /* Terminology:
28371 in = the register pair containing the input value.
28372 out = the destination register pair.
28373 up = the high- or low-part of each pair.
28374 down = the opposite part to "up".
28375 In a shift, we can consider bits to shift from "up"-stream to
28376 "down"-stream, so in a left-shift "up" is the low-part and "down"
28377 is the high-part of each register pair. */
28379 rtx out_up = code == ASHIFT ? out_low : out_high;
28380 rtx out_down = code == ASHIFT ? out_high : out_low;
28381 rtx in_up = code == ASHIFT ? in_low : in_high;
28382 rtx in_down = code == ASHIFT ? in_high : in_low;
28384 gcc_assert (code == ASHIFT || code == ASHIFTRT || code == LSHIFTRT);
28385 gcc_assert (out
28386 && (REG_P (out) || GET_CODE (out) == SUBREG)
28387 && GET_MODE (out) == DImode);
28388 gcc_assert (in
28389 && (REG_P (in) || GET_CODE (in) == SUBREG)
28390 && GET_MODE (in) == DImode);
28391 gcc_assert (amount
28392 && (((REG_P (amount) || GET_CODE (amount) == SUBREG)
28393 && GET_MODE (amount) == SImode)
28394 || CONST_INT_P (amount)));
28395 gcc_assert (scratch1 == NULL
28396 || (GET_CODE (scratch1) == SCRATCH)
28397 || (GET_MODE (scratch1) == SImode
28398 && REG_P (scratch1)));
28399 gcc_assert (scratch2 == NULL
28400 || (GET_CODE (scratch2) == SCRATCH)
28401 || (GET_MODE (scratch2) == SImode
28402 && REG_P (scratch2)));
28403 gcc_assert (!REG_P (out) || !REG_P (amount)
28404 || !HARD_REGISTER_P (out)
28405 || (REGNO (out) != REGNO (amount)
28406 && REGNO (out) + 1 != REGNO (amount)));
28408 /* Macros to make following code more readable. */
28409 #define SUB_32(DEST,SRC) \
28410 gen_addsi3 ((DEST), (SRC), GEN_INT (-32))
28411 #define RSB_32(DEST,SRC) \
28412 gen_subsi3 ((DEST), GEN_INT (32), (SRC))
28413 #define SUB_S_32(DEST,SRC) \
28414 gen_addsi3_compare0 ((DEST), (SRC), \
28415 GEN_INT (-32))
28416 #define SET(DEST,SRC) \
28417 gen_rtx_SET (SImode, (DEST), (SRC))
28418 #define SHIFT(CODE,SRC,AMOUNT) \
28419 gen_rtx_fmt_ee ((CODE), SImode, (SRC), (AMOUNT))
28420 #define LSHIFT(CODE,SRC,AMOUNT) \
28421 gen_rtx_fmt_ee ((CODE) == ASHIFT ? ASHIFT : LSHIFTRT, \
28422 SImode, (SRC), (AMOUNT))
28423 #define REV_LSHIFT(CODE,SRC,AMOUNT) \
28424 gen_rtx_fmt_ee ((CODE) == ASHIFT ? LSHIFTRT : ASHIFT, \
28425 SImode, (SRC), (AMOUNT))
28426 #define ORR(A,B) \
28427 gen_rtx_IOR (SImode, (A), (B))
28428 #define BRANCH(COND,LABEL) \
28429 gen_arm_cond_branch ((LABEL), \
28430 gen_rtx_ ## COND (CCmode, cc_reg, \
28431 const0_rtx), \
28432 cc_reg)
28434 /* Shifts by register and shifts by constant are handled separately. */
28435 if (CONST_INT_P (amount))
28437 /* We have a shift-by-constant. */
28439 /* First, handle out-of-range shift amounts.
28440 In both cases we try to match the result an ARM instruction in a
28441 shift-by-register would give. This helps reduce execution
28442 differences between optimization levels, but it won't stop other
28443 parts of the compiler doing different things. This is "undefined
28444 behaviour, in any case. */
28445 if (INTVAL (amount) <= 0)
28446 emit_insn (gen_movdi (out, in));
28447 else if (INTVAL (amount) >= 64)
28449 if (code == ASHIFTRT)
28451 rtx const31_rtx = GEN_INT (31);
28452 emit_insn (SET (out_down, SHIFT (code, in_up, const31_rtx)));
28453 emit_insn (SET (out_up, SHIFT (code, in_up, const31_rtx)));
28455 else
28456 emit_insn (gen_movdi (out, const0_rtx));
28459 /* Now handle valid shifts. */
28460 else if (INTVAL (amount) < 32)
28462 /* Shifts by a constant less than 32. */
28463 rtx reverse_amount = GEN_INT (32 - INTVAL (amount));
28465 emit_insn (SET (out_down, LSHIFT (code, in_down, amount)));
28466 emit_insn (SET (out_down,
28467 ORR (REV_LSHIFT (code, in_up, reverse_amount),
28468 out_down)));
28469 emit_insn (SET (out_up, SHIFT (code, in_up, amount)));
28471 else
28473 /* Shifts by a constant greater than 31. */
28474 rtx adj_amount = GEN_INT (INTVAL (amount) - 32);
28476 emit_insn (SET (out_down, SHIFT (code, in_up, adj_amount)));
28477 if (code == ASHIFTRT)
28478 emit_insn (gen_ashrsi3 (out_up, in_up,
28479 GEN_INT (31)));
28480 else
28481 emit_insn (SET (out_up, const0_rtx));
28484 else
28486 /* We have a shift-by-register. */
28487 rtx cc_reg = gen_rtx_REG (CC_NOOVmode, CC_REGNUM);
28489 /* This alternative requires the scratch registers. */
28490 gcc_assert (scratch1 && REG_P (scratch1));
28491 gcc_assert (scratch2 && REG_P (scratch2));
28493 /* We will need the values "amount-32" and "32-amount" later.
28494 Swapping them around now allows the later code to be more general. */
28495 switch (code)
28497 case ASHIFT:
28498 emit_insn (SUB_32 (scratch1, amount));
28499 emit_insn (RSB_32 (scratch2, amount));
28500 break;
28501 case ASHIFTRT:
28502 emit_insn (RSB_32 (scratch1, amount));
28503 /* Also set CC = amount > 32. */
28504 emit_insn (SUB_S_32 (scratch2, amount));
28505 break;
28506 case LSHIFTRT:
28507 emit_insn (RSB_32 (scratch1, amount));
28508 emit_insn (SUB_32 (scratch2, amount));
28509 break;
28510 default:
28511 gcc_unreachable ();
28514 /* Emit code like this:
28516 arithmetic-left:
28517 out_down = in_down << amount;
28518 out_down = (in_up << (amount - 32)) | out_down;
28519 out_down = ((unsigned)in_up >> (32 - amount)) | out_down;
28520 out_up = in_up << amount;
28522 arithmetic-right:
28523 out_down = in_down >> amount;
28524 out_down = (in_up << (32 - amount)) | out_down;
28525 if (amount < 32)
28526 out_down = ((signed)in_up >> (amount - 32)) | out_down;
28527 out_up = in_up << amount;
28529 logical-right:
28530 out_down = in_down >> amount;
28531 out_down = (in_up << (32 - amount)) | out_down;
28532 if (amount < 32)
28533 out_down = ((unsigned)in_up >> (amount - 32)) | out_down;
28534 out_up = in_up << amount;
28536 The ARM and Thumb2 variants are the same but implemented slightly
28537 differently. If this were only called during expand we could just
28538 use the Thumb2 case and let combine do the right thing, but this
28539 can also be called from post-reload splitters. */
28541 emit_insn (SET (out_down, LSHIFT (code, in_down, amount)));
28543 if (!TARGET_THUMB2)
28545 /* Emit code for ARM mode. */
28546 emit_insn (SET (out_down,
28547 ORR (SHIFT (ASHIFT, in_up, scratch1), out_down)));
28548 if (code == ASHIFTRT)
28550 rtx_code_label *done_label = gen_label_rtx ();
28551 emit_jump_insn (BRANCH (LT, done_label));
28552 emit_insn (SET (out_down, ORR (SHIFT (ASHIFTRT, in_up, scratch2),
28553 out_down)));
28554 emit_label (done_label);
28556 else
28557 emit_insn (SET (out_down, ORR (SHIFT (LSHIFTRT, in_up, scratch2),
28558 out_down)));
28560 else
28562 /* Emit code for Thumb2 mode.
28563 Thumb2 can't do shift and or in one insn. */
28564 emit_insn (SET (scratch1, SHIFT (ASHIFT, in_up, scratch1)));
28565 emit_insn (gen_iorsi3 (out_down, out_down, scratch1));
28567 if (code == ASHIFTRT)
28569 rtx_code_label *done_label = gen_label_rtx ();
28570 emit_jump_insn (BRANCH (LT, done_label));
28571 emit_insn (SET (scratch2, SHIFT (ASHIFTRT, in_up, scratch2)));
28572 emit_insn (SET (out_down, ORR (out_down, scratch2)));
28573 emit_label (done_label);
28575 else
28577 emit_insn (SET (scratch2, SHIFT (LSHIFTRT, in_up, scratch2)));
28578 emit_insn (gen_iorsi3 (out_down, out_down, scratch2));
28582 emit_insn (SET (out_up, SHIFT (code, in_up, amount)));
28585 #undef SUB_32
28586 #undef RSB_32
28587 #undef SUB_S_32
28588 #undef SET
28589 #undef SHIFT
28590 #undef LSHIFT
28591 #undef REV_LSHIFT
28592 #undef ORR
28593 #undef BRANCH
28597 /* Returns true if a valid comparison operation and makes
28598 the operands in a form that is valid. */
28599 bool
28600 arm_validize_comparison (rtx *comparison, rtx * op1, rtx * op2)
28602 enum rtx_code code = GET_CODE (*comparison);
28603 int code_int;
28604 machine_mode mode = (GET_MODE (*op1) == VOIDmode)
28605 ? GET_MODE (*op2) : GET_MODE (*op1);
28607 gcc_assert (GET_MODE (*op1) != VOIDmode || GET_MODE (*op2) != VOIDmode);
28609 if (code == UNEQ || code == LTGT)
28610 return false;
28612 code_int = (int)code;
28613 arm_canonicalize_comparison (&code_int, op1, op2, 0);
28614 PUT_CODE (*comparison, (enum rtx_code)code_int);
28616 switch (mode)
28618 case SImode:
28619 if (!arm_add_operand (*op1, mode))
28620 *op1 = force_reg (mode, *op1);
28621 if (!arm_add_operand (*op2, mode))
28622 *op2 = force_reg (mode, *op2);
28623 return true;
28625 case DImode:
28626 if (!cmpdi_operand (*op1, mode))
28627 *op1 = force_reg (mode, *op1);
28628 if (!cmpdi_operand (*op2, mode))
28629 *op2 = force_reg (mode, *op2);
28630 return true;
28632 case SFmode:
28633 case DFmode:
28634 if (!arm_float_compare_operand (*op1, mode))
28635 *op1 = force_reg (mode, *op1);
28636 if (!arm_float_compare_operand (*op2, mode))
28637 *op2 = force_reg (mode, *op2);
28638 return true;
28639 default:
28640 break;
28643 return false;
28647 /* Maximum number of instructions to set block of memory. */
28648 static int
28649 arm_block_set_max_insns (void)
28651 if (optimize_function_for_size_p (cfun))
28652 return 4;
28653 else
28654 return current_tune->max_insns_inline_memset;
28657 /* Return TRUE if it's profitable to set block of memory for
28658 non-vectorized case. VAL is the value to set the memory
28659 with. LENGTH is the number of bytes to set. ALIGN is the
28660 alignment of the destination memory in bytes. UNALIGNED_P
28661 is TRUE if we can only set the memory with instructions
28662 meeting alignment requirements. USE_STRD_P is TRUE if we
28663 can use strd to set the memory. */
28664 static bool
28665 arm_block_set_non_vect_profit_p (rtx val,
28666 unsigned HOST_WIDE_INT length,
28667 unsigned HOST_WIDE_INT align,
28668 bool unaligned_p, bool use_strd_p)
28670 int num = 0;
28671 /* For leftovers in bytes of 0-7, we can set the memory block using
28672 strb/strh/str with minimum instruction number. */
28673 const int leftover[8] = {0, 1, 1, 2, 1, 2, 2, 3};
28675 if (unaligned_p)
28677 num = arm_const_inline_cost (SET, val);
28678 num += length / align + length % align;
28680 else if (use_strd_p)
28682 num = arm_const_double_inline_cost (val);
28683 num += (length >> 3) + leftover[length & 7];
28685 else
28687 num = arm_const_inline_cost (SET, val);
28688 num += (length >> 2) + leftover[length & 3];
28691 /* We may be able to combine last pair STRH/STRB into a single STR
28692 by shifting one byte back. */
28693 if (unaligned_access && length > 3 && (length & 3) == 3)
28694 num--;
28696 return (num <= arm_block_set_max_insns ());
28699 /* Return TRUE if it's profitable to set block of memory for
28700 vectorized case. LENGTH is the number of bytes to set.
28701 ALIGN is the alignment of destination memory in bytes.
28702 MODE is the vector mode used to set the memory. */
28703 static bool
28704 arm_block_set_vect_profit_p (unsigned HOST_WIDE_INT length,
28705 unsigned HOST_WIDE_INT align,
28706 machine_mode mode)
28708 int num;
28709 bool unaligned_p = ((align & 3) != 0);
28710 unsigned int nelt = GET_MODE_NUNITS (mode);
28712 /* Instruction loading constant value. */
28713 num = 1;
28714 /* Instructions storing the memory. */
28715 num += (length + nelt - 1) / nelt;
28716 /* Instructions adjusting the address expression. Only need to
28717 adjust address expression if it's 4 bytes aligned and bytes
28718 leftover can only be stored by mis-aligned store instruction. */
28719 if (!unaligned_p && (length & 3) != 0)
28720 num++;
28722 /* Store the first 16 bytes using vst1:v16qi for the aligned case. */
28723 if (!unaligned_p && mode == V16QImode)
28724 num--;
28726 return (num <= arm_block_set_max_insns ());
28729 /* Set a block of memory using vectorization instructions for the
28730 unaligned case. We fill the first LENGTH bytes of the memory
28731 area starting from DSTBASE with byte constant VALUE. ALIGN is
28732 the alignment requirement of memory. Return TRUE if succeeded. */
28733 static bool
28734 arm_block_set_unaligned_vect (rtx dstbase,
28735 unsigned HOST_WIDE_INT length,
28736 unsigned HOST_WIDE_INT value,
28737 unsigned HOST_WIDE_INT align)
28739 unsigned int i, j, nelt_v16, nelt_v8, nelt_mode;
28740 rtx dst, mem;
28741 rtx val_elt, val_vec, reg;
28742 rtx rval[MAX_VECT_LEN];
28743 rtx (*gen_func) (rtx, rtx);
28744 machine_mode mode;
28745 unsigned HOST_WIDE_INT v = value;
28747 gcc_assert ((align & 0x3) != 0);
28748 nelt_v8 = GET_MODE_NUNITS (V8QImode);
28749 nelt_v16 = GET_MODE_NUNITS (V16QImode);
28750 if (length >= nelt_v16)
28752 mode = V16QImode;
28753 gen_func = gen_movmisalignv16qi;
28755 else
28757 mode = V8QImode;
28758 gen_func = gen_movmisalignv8qi;
28760 nelt_mode = GET_MODE_NUNITS (mode);
28761 gcc_assert (length >= nelt_mode);
28762 /* Skip if it isn't profitable. */
28763 if (!arm_block_set_vect_profit_p (length, align, mode))
28764 return false;
28766 dst = copy_addr_to_reg (XEXP (dstbase, 0));
28767 mem = adjust_automodify_address (dstbase, mode, dst, 0);
28769 v = sext_hwi (v, BITS_PER_WORD);
28770 val_elt = GEN_INT (v);
28771 for (j = 0; j < nelt_mode; j++)
28772 rval[j] = val_elt;
28774 reg = gen_reg_rtx (mode);
28775 val_vec = gen_rtx_CONST_VECTOR (mode, gen_rtvec_v (nelt_mode, rval));
28776 /* Emit instruction loading the constant value. */
28777 emit_move_insn (reg, val_vec);
28779 /* Handle nelt_mode bytes in a vector. */
28780 for (i = 0; (i + nelt_mode <= length); i += nelt_mode)
28782 emit_insn ((*gen_func) (mem, reg));
28783 if (i + 2 * nelt_mode <= length)
28784 emit_insn (gen_add2_insn (dst, GEN_INT (nelt_mode)));
28787 /* If there are not less than nelt_v8 bytes leftover, we must be in
28788 V16QI mode. */
28789 gcc_assert ((i + nelt_v8) > length || mode == V16QImode);
28791 /* Handle (8, 16) bytes leftover. */
28792 if (i + nelt_v8 < length)
28794 emit_insn (gen_add2_insn (dst, GEN_INT (length - i)));
28795 /* We are shifting bytes back, set the alignment accordingly. */
28796 if ((length & 1) != 0 && align >= 2)
28797 set_mem_align (mem, BITS_PER_UNIT);
28799 emit_insn (gen_movmisalignv16qi (mem, reg));
28801 /* Handle (0, 8] bytes leftover. */
28802 else if (i < length && i + nelt_v8 >= length)
28804 if (mode == V16QImode)
28806 reg = gen_lowpart (V8QImode, reg);
28807 mem = adjust_automodify_address (dstbase, V8QImode, dst, 0);
28809 emit_insn (gen_add2_insn (dst, GEN_INT ((length - i)
28810 + (nelt_mode - nelt_v8))));
28811 /* We are shifting bytes back, set the alignment accordingly. */
28812 if ((length & 1) != 0 && align >= 2)
28813 set_mem_align (mem, BITS_PER_UNIT);
28815 emit_insn (gen_movmisalignv8qi (mem, reg));
28818 return true;
28821 /* Set a block of memory using vectorization instructions for the
28822 aligned case. We fill the first LENGTH bytes of the memory area
28823 starting from DSTBASE with byte constant VALUE. ALIGN is the
28824 alignment requirement of memory. Return TRUE if succeeded. */
28825 static bool
28826 arm_block_set_aligned_vect (rtx dstbase,
28827 unsigned HOST_WIDE_INT length,
28828 unsigned HOST_WIDE_INT value,
28829 unsigned HOST_WIDE_INT align)
28831 unsigned int i, j, nelt_v8, nelt_v16, nelt_mode;
28832 rtx dst, addr, mem;
28833 rtx val_elt, val_vec, reg;
28834 rtx rval[MAX_VECT_LEN];
28835 machine_mode mode;
28836 unsigned HOST_WIDE_INT v = value;
28838 gcc_assert ((align & 0x3) == 0);
28839 nelt_v8 = GET_MODE_NUNITS (V8QImode);
28840 nelt_v16 = GET_MODE_NUNITS (V16QImode);
28841 if (length >= nelt_v16 && unaligned_access && !BYTES_BIG_ENDIAN)
28842 mode = V16QImode;
28843 else
28844 mode = V8QImode;
28846 nelt_mode = GET_MODE_NUNITS (mode);
28847 gcc_assert (length >= nelt_mode);
28848 /* Skip if it isn't profitable. */
28849 if (!arm_block_set_vect_profit_p (length, align, mode))
28850 return false;
28852 dst = copy_addr_to_reg (XEXP (dstbase, 0));
28854 v = sext_hwi (v, BITS_PER_WORD);
28855 val_elt = GEN_INT (v);
28856 for (j = 0; j < nelt_mode; j++)
28857 rval[j] = val_elt;
28859 reg = gen_reg_rtx (mode);
28860 val_vec = gen_rtx_CONST_VECTOR (mode, gen_rtvec_v (nelt_mode, rval));
28861 /* Emit instruction loading the constant value. */
28862 emit_move_insn (reg, val_vec);
28864 i = 0;
28865 /* Handle first 16 bytes specially using vst1:v16qi instruction. */
28866 if (mode == V16QImode)
28868 mem = adjust_automodify_address (dstbase, mode, dst, 0);
28869 emit_insn (gen_movmisalignv16qi (mem, reg));
28870 i += nelt_mode;
28871 /* Handle (8, 16) bytes leftover using vst1:v16qi again. */
28872 if (i + nelt_v8 < length && i + nelt_v16 > length)
28874 emit_insn (gen_add2_insn (dst, GEN_INT (length - nelt_mode)));
28875 mem = adjust_automodify_address (dstbase, mode, dst, 0);
28876 /* We are shifting bytes back, set the alignment accordingly. */
28877 if ((length & 0x3) == 0)
28878 set_mem_align (mem, BITS_PER_UNIT * 4);
28879 else if ((length & 0x1) == 0)
28880 set_mem_align (mem, BITS_PER_UNIT * 2);
28881 else
28882 set_mem_align (mem, BITS_PER_UNIT);
28884 emit_insn (gen_movmisalignv16qi (mem, reg));
28885 return true;
28887 /* Fall through for bytes leftover. */
28888 mode = V8QImode;
28889 nelt_mode = GET_MODE_NUNITS (mode);
28890 reg = gen_lowpart (V8QImode, reg);
28893 /* Handle 8 bytes in a vector. */
28894 for (; (i + nelt_mode <= length); i += nelt_mode)
28896 addr = plus_constant (Pmode, dst, i);
28897 mem = adjust_automodify_address (dstbase, mode, addr, i);
28898 emit_move_insn (mem, reg);
28901 /* Handle single word leftover by shifting 4 bytes back. We can
28902 use aligned access for this case. */
28903 if (i + UNITS_PER_WORD == length)
28905 addr = plus_constant (Pmode, dst, i - UNITS_PER_WORD);
28906 mem = adjust_automodify_address (dstbase, mode,
28907 addr, i - UNITS_PER_WORD);
28908 /* We are shifting 4 bytes back, set the alignment accordingly. */
28909 if (align > UNITS_PER_WORD)
28910 set_mem_align (mem, BITS_PER_UNIT * UNITS_PER_WORD);
28912 emit_move_insn (mem, reg);
28914 /* Handle (0, 4), (4, 8) bytes leftover by shifting bytes back.
28915 We have to use unaligned access for this case. */
28916 else if (i < length)
28918 emit_insn (gen_add2_insn (dst, GEN_INT (length - nelt_mode)));
28919 mem = adjust_automodify_address (dstbase, mode, dst, 0);
28920 /* We are shifting bytes back, set the alignment accordingly. */
28921 if ((length & 1) == 0)
28922 set_mem_align (mem, BITS_PER_UNIT * 2);
28923 else
28924 set_mem_align (mem, BITS_PER_UNIT);
28926 emit_insn (gen_movmisalignv8qi (mem, reg));
28929 return true;
28932 /* Set a block of memory using plain strh/strb instructions, only
28933 using instructions allowed by ALIGN on processor. We fill the
28934 first LENGTH bytes of the memory area starting from DSTBASE
28935 with byte constant VALUE. ALIGN is the alignment requirement
28936 of memory. */
28937 static bool
28938 arm_block_set_unaligned_non_vect (rtx dstbase,
28939 unsigned HOST_WIDE_INT length,
28940 unsigned HOST_WIDE_INT value,
28941 unsigned HOST_WIDE_INT align)
28943 unsigned int i;
28944 rtx dst, addr, mem;
28945 rtx val_exp, val_reg, reg;
28946 machine_mode mode;
28947 HOST_WIDE_INT v = value;
28949 gcc_assert (align == 1 || align == 2);
28951 if (align == 2)
28952 v |= (value << BITS_PER_UNIT);
28954 v = sext_hwi (v, BITS_PER_WORD);
28955 val_exp = GEN_INT (v);
28956 /* Skip if it isn't profitable. */
28957 if (!arm_block_set_non_vect_profit_p (val_exp, length,
28958 align, true, false))
28959 return false;
28961 dst = copy_addr_to_reg (XEXP (dstbase, 0));
28962 mode = (align == 2 ? HImode : QImode);
28963 val_reg = force_reg (SImode, val_exp);
28964 reg = gen_lowpart (mode, val_reg);
28966 for (i = 0; (i + GET_MODE_SIZE (mode) <= length); i += GET_MODE_SIZE (mode))
28968 addr = plus_constant (Pmode, dst, i);
28969 mem = adjust_automodify_address (dstbase, mode, addr, i);
28970 emit_move_insn (mem, reg);
28973 /* Handle single byte leftover. */
28974 if (i + 1 == length)
28976 reg = gen_lowpart (QImode, val_reg);
28977 addr = plus_constant (Pmode, dst, i);
28978 mem = adjust_automodify_address (dstbase, QImode, addr, i);
28979 emit_move_insn (mem, reg);
28980 i++;
28983 gcc_assert (i == length);
28984 return true;
28987 /* Set a block of memory using plain strd/str/strh/strb instructions,
28988 to permit unaligned copies on processors which support unaligned
28989 semantics for those instructions. We fill the first LENGTH bytes
28990 of the memory area starting from DSTBASE with byte constant VALUE.
28991 ALIGN is the alignment requirement of memory. */
28992 static bool
28993 arm_block_set_aligned_non_vect (rtx dstbase,
28994 unsigned HOST_WIDE_INT length,
28995 unsigned HOST_WIDE_INT value,
28996 unsigned HOST_WIDE_INT align)
28998 unsigned int i;
28999 rtx dst, addr, mem;
29000 rtx val_exp, val_reg, reg;
29001 unsigned HOST_WIDE_INT v;
29002 bool use_strd_p;
29004 use_strd_p = (length >= 2 * UNITS_PER_WORD && (align & 3) == 0
29005 && TARGET_LDRD && current_tune->prefer_ldrd_strd);
29007 v = (value | (value << 8) | (value << 16) | (value << 24));
29008 if (length < UNITS_PER_WORD)
29009 v &= (0xFFFFFFFF >> (UNITS_PER_WORD - length) * BITS_PER_UNIT);
29011 if (use_strd_p)
29012 v |= (v << BITS_PER_WORD);
29013 else
29014 v = sext_hwi (v, BITS_PER_WORD);
29016 val_exp = GEN_INT (v);
29017 /* Skip if it isn't profitable. */
29018 if (!arm_block_set_non_vect_profit_p (val_exp, length,
29019 align, false, use_strd_p))
29021 if (!use_strd_p)
29022 return false;
29024 /* Try without strd. */
29025 v = (v >> BITS_PER_WORD);
29026 v = sext_hwi (v, BITS_PER_WORD);
29027 val_exp = GEN_INT (v);
29028 use_strd_p = false;
29029 if (!arm_block_set_non_vect_profit_p (val_exp, length,
29030 align, false, use_strd_p))
29031 return false;
29034 i = 0;
29035 dst = copy_addr_to_reg (XEXP (dstbase, 0));
29036 /* Handle double words using strd if possible. */
29037 if (use_strd_p)
29039 val_reg = force_reg (DImode, val_exp);
29040 reg = val_reg;
29041 for (; (i + 8 <= length); i += 8)
29043 addr = plus_constant (Pmode, dst, i);
29044 mem = adjust_automodify_address (dstbase, DImode, addr, i);
29045 emit_move_insn (mem, reg);
29048 else
29049 val_reg = force_reg (SImode, val_exp);
29051 /* Handle words. */
29052 reg = (use_strd_p ? gen_lowpart (SImode, val_reg) : val_reg);
29053 for (; (i + 4 <= length); i += 4)
29055 addr = plus_constant (Pmode, dst, i);
29056 mem = adjust_automodify_address (dstbase, SImode, addr, i);
29057 if ((align & 3) == 0)
29058 emit_move_insn (mem, reg);
29059 else
29060 emit_insn (gen_unaligned_storesi (mem, reg));
29063 /* Merge last pair of STRH and STRB into a STR if possible. */
29064 if (unaligned_access && i > 0 && (i + 3) == length)
29066 addr = plus_constant (Pmode, dst, i - 1);
29067 mem = adjust_automodify_address (dstbase, SImode, addr, i - 1);
29068 /* We are shifting one byte back, set the alignment accordingly. */
29069 if ((align & 1) == 0)
29070 set_mem_align (mem, BITS_PER_UNIT);
29072 /* Most likely this is an unaligned access, and we can't tell at
29073 compilation time. */
29074 emit_insn (gen_unaligned_storesi (mem, reg));
29075 return true;
29078 /* Handle half word leftover. */
29079 if (i + 2 <= length)
29081 reg = gen_lowpart (HImode, val_reg);
29082 addr = plus_constant (Pmode, dst, i);
29083 mem = adjust_automodify_address (dstbase, HImode, addr, i);
29084 if ((align & 1) == 0)
29085 emit_move_insn (mem, reg);
29086 else
29087 emit_insn (gen_unaligned_storehi (mem, reg));
29089 i += 2;
29092 /* Handle single byte leftover. */
29093 if (i + 1 == length)
29095 reg = gen_lowpart (QImode, val_reg);
29096 addr = plus_constant (Pmode, dst, i);
29097 mem = adjust_automodify_address (dstbase, QImode, addr, i);
29098 emit_move_insn (mem, reg);
29101 return true;
29104 /* Set a block of memory using vectorization instructions for both
29105 aligned and unaligned cases. We fill the first LENGTH bytes of
29106 the memory area starting from DSTBASE with byte constant VALUE.
29107 ALIGN is the alignment requirement of memory. */
29108 static bool
29109 arm_block_set_vect (rtx dstbase,
29110 unsigned HOST_WIDE_INT length,
29111 unsigned HOST_WIDE_INT value,
29112 unsigned HOST_WIDE_INT align)
29114 /* Check whether we need to use unaligned store instruction. */
29115 if (((align & 3) != 0 || (length & 3) != 0)
29116 /* Check whether unaligned store instruction is available. */
29117 && (!unaligned_access || BYTES_BIG_ENDIAN))
29118 return false;
29120 if ((align & 3) == 0)
29121 return arm_block_set_aligned_vect (dstbase, length, value, align);
29122 else
29123 return arm_block_set_unaligned_vect (dstbase, length, value, align);
29126 /* Expand string store operation. Firstly we try to do that by using
29127 vectorization instructions, then try with ARM unaligned access and
29128 double-word store if profitable. OPERANDS[0] is the destination,
29129 OPERANDS[1] is the number of bytes, operands[2] is the value to
29130 initialize the memory, OPERANDS[3] is the known alignment of the
29131 destination. */
29132 bool
29133 arm_gen_setmem (rtx *operands)
29135 rtx dstbase = operands[0];
29136 unsigned HOST_WIDE_INT length;
29137 unsigned HOST_WIDE_INT value;
29138 unsigned HOST_WIDE_INT align;
29140 if (!CONST_INT_P (operands[2]) || !CONST_INT_P (operands[1]))
29141 return false;
29143 length = UINTVAL (operands[1]);
29144 if (length > 64)
29145 return false;
29147 value = (UINTVAL (operands[2]) & 0xFF);
29148 align = UINTVAL (operands[3]);
29149 if (TARGET_NEON && length >= 8
29150 && current_tune->string_ops_prefer_neon
29151 && arm_block_set_vect (dstbase, length, value, align))
29152 return true;
29154 if (!unaligned_access && (align & 3) != 0)
29155 return arm_block_set_unaligned_non_vect (dstbase, length, value, align);
29157 return arm_block_set_aligned_non_vect (dstbase, length, value, align);
29160 /* Implement the TARGET_ASAN_SHADOW_OFFSET hook. */
29162 static unsigned HOST_WIDE_INT
29163 arm_asan_shadow_offset (void)
29165 return (unsigned HOST_WIDE_INT) 1 << 29;
29169 /* This is a temporary fix for PR60655. Ideally we need
29170 to handle most of these cases in the generic part but
29171 currently we reject minus (..) (sym_ref). We try to
29172 ameliorate the case with minus (sym_ref1) (sym_ref2)
29173 where they are in the same section. */
29175 static bool
29176 arm_const_not_ok_for_debug_p (rtx p)
29178 tree decl_op0 = NULL;
29179 tree decl_op1 = NULL;
29181 if (GET_CODE (p) == MINUS)
29183 if (GET_CODE (XEXP (p, 1)) == SYMBOL_REF)
29185 decl_op1 = SYMBOL_REF_DECL (XEXP (p, 1));
29186 if (decl_op1
29187 && GET_CODE (XEXP (p, 0)) == SYMBOL_REF
29188 && (decl_op0 = SYMBOL_REF_DECL (XEXP (p, 0))))
29190 if ((TREE_CODE (decl_op1) == VAR_DECL
29191 || TREE_CODE (decl_op1) == CONST_DECL)
29192 && (TREE_CODE (decl_op0) == VAR_DECL
29193 || TREE_CODE (decl_op0) == CONST_DECL))
29194 return (get_variable_section (decl_op1, false)
29195 != get_variable_section (decl_op0, false));
29197 if (TREE_CODE (decl_op1) == LABEL_DECL
29198 && TREE_CODE (decl_op0) == LABEL_DECL)
29199 return (DECL_CONTEXT (decl_op1)
29200 != DECL_CONTEXT (decl_op0));
29203 return true;
29207 return false;
29210 /* return TRUE if x is a reference to a value in a constant pool */
29211 extern bool
29212 arm_is_constant_pool_ref (rtx x)
29214 return (MEM_P (x)
29215 && GET_CODE (XEXP (x, 0)) == SYMBOL_REF
29216 && CONSTANT_POOL_ADDRESS_P (XEXP (x, 0)));
29219 /* If MEM is in the form of [base+offset], extract the two parts
29220 of address and set to BASE and OFFSET, otherwise return false
29221 after clearing BASE and OFFSET. */
29223 static bool
29224 extract_base_offset_in_addr (rtx mem, rtx *base, rtx *offset)
29226 rtx addr;
29228 gcc_assert (MEM_P (mem));
29230 addr = XEXP (mem, 0);
29232 /* Strip off const from addresses like (const (addr)). */
29233 if (GET_CODE (addr) == CONST)
29234 addr = XEXP (addr, 0);
29236 if (GET_CODE (addr) == REG)
29238 *base = addr;
29239 *offset = const0_rtx;
29240 return true;
29243 if (GET_CODE (addr) == PLUS
29244 && GET_CODE (XEXP (addr, 0)) == REG
29245 && CONST_INT_P (XEXP (addr, 1)))
29247 *base = XEXP (addr, 0);
29248 *offset = XEXP (addr, 1);
29249 return true;
29252 *base = NULL_RTX;
29253 *offset = NULL_RTX;
29255 return false;
29258 /* If INSN is a load or store of address in the form of [base+offset],
29259 extract the two parts and set to BASE and OFFSET. IS_LOAD is set
29260 to TRUE if it's a load. Return TRUE if INSN is such an instruction,
29261 otherwise return FALSE. */
29263 static bool
29264 fusion_load_store (rtx_insn *insn, rtx *base, rtx *offset, bool *is_load)
29266 rtx x, dest, src;
29268 gcc_assert (INSN_P (insn));
29269 x = PATTERN (insn);
29270 if (GET_CODE (x) != SET)
29271 return false;
29273 src = SET_SRC (x);
29274 dest = SET_DEST (x);
29275 if (GET_CODE (src) == REG && GET_CODE (dest) == MEM)
29277 *is_load = false;
29278 extract_base_offset_in_addr (dest, base, offset);
29280 else if (GET_CODE (src) == MEM && GET_CODE (dest) == REG)
29282 *is_load = true;
29283 extract_base_offset_in_addr (src, base, offset);
29285 else
29286 return false;
29288 return (*base != NULL_RTX && *offset != NULL_RTX);
29291 /* Implement the TARGET_SCHED_FUSION_PRIORITY hook.
29293 Currently we only support to fuse ldr or str instructions, so FUSION_PRI
29294 and PRI are only calculated for these instructions. For other instruction,
29295 FUSION_PRI and PRI are simply set to MAX_PRI. In the future, other kind
29296 instruction fusion can be supported by returning different priorities.
29298 It's important that irrelevant instructions get the largest FUSION_PRI. */
29300 static void
29301 arm_sched_fusion_priority (rtx_insn *insn, int max_pri,
29302 int *fusion_pri, int *pri)
29304 int tmp, off_val;
29305 bool is_load;
29306 rtx base, offset;
29308 gcc_assert (INSN_P (insn));
29310 tmp = max_pri - 1;
29311 if (!fusion_load_store (insn, &base, &offset, &is_load))
29313 *pri = tmp;
29314 *fusion_pri = tmp;
29315 return;
29318 /* Load goes first. */
29319 if (is_load)
29320 *fusion_pri = tmp - 1;
29321 else
29322 *fusion_pri = tmp - 2;
29324 tmp /= 2;
29326 /* INSN with smaller base register goes first. */
29327 tmp -= ((REGNO (base) & 0xff) << 20);
29329 /* INSN with smaller offset goes first. */
29330 off_val = (int)(INTVAL (offset));
29331 if (off_val >= 0)
29332 tmp -= (off_val & 0xfffff);
29333 else
29334 tmp += ((- off_val) & 0xfffff);
29336 *pri = tmp;
29337 return;
29339 #include "gt-arm.h"