2016-10-19 Bernd Edlinger <bernd.edlinger@hotmail.de>
[official-gcc.git] / gcc / config / arm / arm.c
blob022c1d72a1272e56397dc7e2018483e77f18b90d
1 /* Output routines for GCC for ARM.
2 Copyright (C) 1991-2016 Free Software Foundation, Inc.
3 Contributed by Pieter `Tiggr' Schoenmakers (rcpieter@win.tue.nl)
4 and Martin Simmons (@harleqn.co.uk).
5 More major hacks by Richard Earnshaw (rearnsha@arm.com).
7 This file is part of GCC.
9 GCC is free software; you can redistribute it and/or modify it
10 under the terms of the GNU General Public License as published
11 by the Free Software Foundation; either version 3, or (at your
12 option) any later version.
14 GCC is distributed in the hope that it will be useful, but WITHOUT
15 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
16 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
17 License for more details.
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING3. If not see
21 <http://www.gnu.org/licenses/>. */
23 #include "config.h"
24 #include "system.h"
25 #include "coretypes.h"
26 #include "backend.h"
27 #include "target.h"
28 #include "rtl.h"
29 #include "tree.h"
30 #include "memmodel.h"
31 #include "cfghooks.h"
32 #include "df.h"
33 #include "tm_p.h"
34 #include "stringpool.h"
35 #include "optabs.h"
36 #include "regs.h"
37 #include "emit-rtl.h"
38 #include "recog.h"
39 #include "cgraph.h"
40 #include "diagnostic-core.h"
41 #include "alias.h"
42 #include "fold-const.h"
43 #include "stor-layout.h"
44 #include "calls.h"
45 #include "varasm.h"
46 #include "output.h"
47 #include "insn-attr.h"
48 #include "flags.h"
49 #include "reload.h"
50 #include "explow.h"
51 #include "expr.h"
52 #include "cfgrtl.h"
53 #include "sched-int.h"
54 #include "common/common-target.h"
55 #include "langhooks.h"
56 #include "intl.h"
57 #include "libfuncs.h"
58 #include "params.h"
59 #include "opts.h"
60 #include "dumpfile.h"
61 #include "target-globals.h"
62 #include "builtins.h"
63 #include "tm-constrs.h"
64 #include "rtl-iter.h"
66 /* This file should be included last. */
67 #include "target-def.h"
69 /* Forward definitions of types. */
70 typedef struct minipool_node Mnode;
71 typedef struct minipool_fixup Mfix;
73 void (*arm_lang_output_object_attributes_hook)(void);
75 struct four_ints
77 int i[4];
80 /* Forward function declarations. */
81 static bool arm_const_not_ok_for_debug_p (rtx);
82 static bool arm_needs_doubleword_align (machine_mode, const_tree);
83 static int arm_compute_static_chain_stack_bytes (void);
84 static arm_stack_offsets *arm_get_frame_offsets (void);
85 static void arm_add_gc_roots (void);
86 static int arm_gen_constant (enum rtx_code, machine_mode, rtx,
87 unsigned HOST_WIDE_INT, rtx, rtx, int, int);
88 static unsigned bit_count (unsigned long);
89 static unsigned feature_count (const arm_feature_set*);
90 static int arm_address_register_rtx_p (rtx, int);
91 static int arm_legitimate_index_p (machine_mode, rtx, RTX_CODE, int);
92 static bool is_called_in_ARM_mode (tree);
93 static int thumb2_legitimate_index_p (machine_mode, rtx, int);
94 static int thumb1_base_register_rtx_p (rtx, machine_mode, int);
95 static rtx arm_legitimize_address (rtx, rtx, machine_mode);
96 static reg_class_t arm_preferred_reload_class (rtx, reg_class_t);
97 static rtx thumb_legitimize_address (rtx, rtx, machine_mode);
98 inline static int thumb1_index_register_rtx_p (rtx, int);
99 static int thumb_far_jump_used_p (void);
100 static bool thumb_force_lr_save (void);
101 static unsigned arm_size_return_regs (void);
102 static bool arm_assemble_integer (rtx, unsigned int, int);
103 static void arm_print_operand (FILE *, rtx, int);
104 static void arm_print_operand_address (FILE *, machine_mode, rtx);
105 static bool arm_print_operand_punct_valid_p (unsigned char code);
106 static const char *fp_const_from_val (REAL_VALUE_TYPE *);
107 static arm_cc get_arm_condition_code (rtx);
108 static const char *output_multi_immediate (rtx *, const char *, const char *,
109 int, HOST_WIDE_INT);
110 static const char *shift_op (rtx, HOST_WIDE_INT *);
111 static struct machine_function *arm_init_machine_status (void);
112 static void thumb_exit (FILE *, int);
113 static HOST_WIDE_INT get_jump_table_size (rtx_jump_table_data *);
114 static Mnode *move_minipool_fix_forward_ref (Mnode *, Mnode *, HOST_WIDE_INT);
115 static Mnode *add_minipool_forward_ref (Mfix *);
116 static Mnode *move_minipool_fix_backward_ref (Mnode *, Mnode *, HOST_WIDE_INT);
117 static Mnode *add_minipool_backward_ref (Mfix *);
118 static void assign_minipool_offsets (Mfix *);
119 static void arm_print_value (FILE *, rtx);
120 static void dump_minipool (rtx_insn *);
121 static int arm_barrier_cost (rtx_insn *);
122 static Mfix *create_fix_barrier (Mfix *, HOST_WIDE_INT);
123 static void push_minipool_barrier (rtx_insn *, HOST_WIDE_INT);
124 static void push_minipool_fix (rtx_insn *, HOST_WIDE_INT, rtx *,
125 machine_mode, rtx);
126 static void arm_reorg (void);
127 static void note_invalid_constants (rtx_insn *, HOST_WIDE_INT, int);
128 static unsigned long arm_compute_save_reg0_reg12_mask (void);
129 static unsigned long arm_compute_save_reg_mask (void);
130 static unsigned long arm_isr_value (tree);
131 static unsigned long arm_compute_func_type (void);
132 static tree arm_handle_fndecl_attribute (tree *, tree, tree, int, bool *);
133 static tree arm_handle_pcs_attribute (tree *, tree, tree, int, bool *);
134 static tree arm_handle_isr_attribute (tree *, tree, tree, int, bool *);
135 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
136 static tree arm_handle_notshared_attribute (tree *, tree, tree, int, bool *);
137 #endif
138 static void arm_output_function_epilogue (FILE *, HOST_WIDE_INT);
139 static void arm_output_function_prologue (FILE *, HOST_WIDE_INT);
140 static int arm_comp_type_attributes (const_tree, const_tree);
141 static void arm_set_default_type_attributes (tree);
142 static int arm_adjust_cost (rtx_insn *, int, rtx_insn *, int, unsigned int);
143 static int arm_sched_reorder (FILE *, int, rtx_insn **, int *, int);
144 static int optimal_immediate_sequence (enum rtx_code code,
145 unsigned HOST_WIDE_INT val,
146 struct four_ints *return_sequence);
147 static int optimal_immediate_sequence_1 (enum rtx_code code,
148 unsigned HOST_WIDE_INT val,
149 struct four_ints *return_sequence,
150 int i);
151 static int arm_get_strip_length (int);
152 static bool arm_function_ok_for_sibcall (tree, tree);
153 static machine_mode arm_promote_function_mode (const_tree,
154 machine_mode, int *,
155 const_tree, int);
156 static bool arm_return_in_memory (const_tree, const_tree);
157 static rtx arm_function_value (const_tree, const_tree, bool);
158 static rtx arm_libcall_value_1 (machine_mode);
159 static rtx arm_libcall_value (machine_mode, const_rtx);
160 static bool arm_function_value_regno_p (const unsigned int);
161 static void arm_internal_label (FILE *, const char *, unsigned long);
162 static void arm_output_mi_thunk (FILE *, tree, HOST_WIDE_INT, HOST_WIDE_INT,
163 tree);
164 static bool arm_have_conditional_execution (void);
165 static bool arm_cannot_force_const_mem (machine_mode, rtx);
166 static bool arm_legitimate_constant_p (machine_mode, rtx);
167 static bool arm_rtx_costs_1 (rtx, enum rtx_code, int*, bool);
168 static bool arm_size_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *);
169 static bool arm_slowmul_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *, bool);
170 static bool arm_fastmul_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *, bool);
171 static bool arm_xscale_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *, bool);
172 static bool arm_9e_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *, bool);
173 static bool arm_rtx_costs (rtx, machine_mode, int, int, int *, bool);
174 static int arm_address_cost (rtx, machine_mode, addr_space_t, bool);
175 static int arm_register_move_cost (machine_mode, reg_class_t, reg_class_t);
176 static int arm_memory_move_cost (machine_mode, reg_class_t, bool);
177 static void emit_constant_insn (rtx cond, rtx pattern);
178 static rtx_insn *emit_set_insn (rtx, rtx);
179 static rtx emit_multi_reg_push (unsigned long, unsigned long);
180 static int arm_arg_partial_bytes (cumulative_args_t, machine_mode,
181 tree, bool);
182 static rtx arm_function_arg (cumulative_args_t, machine_mode,
183 const_tree, bool);
184 static void arm_function_arg_advance (cumulative_args_t, machine_mode,
185 const_tree, bool);
186 static unsigned int arm_function_arg_boundary (machine_mode, const_tree);
187 static rtx aapcs_allocate_return_reg (machine_mode, const_tree,
188 const_tree);
189 static rtx aapcs_libcall_value (machine_mode);
190 static int aapcs_select_return_coproc (const_tree, const_tree);
192 #ifdef OBJECT_FORMAT_ELF
193 static void arm_elf_asm_constructor (rtx, int) ATTRIBUTE_UNUSED;
194 static void arm_elf_asm_destructor (rtx, int) ATTRIBUTE_UNUSED;
195 #endif
196 #ifndef ARM_PE
197 static void arm_encode_section_info (tree, rtx, int);
198 #endif
200 static void arm_file_end (void);
201 static void arm_file_start (void);
202 static void arm_insert_attributes (tree, tree *);
204 static void arm_setup_incoming_varargs (cumulative_args_t, machine_mode,
205 tree, int *, int);
206 static bool arm_pass_by_reference (cumulative_args_t,
207 machine_mode, const_tree, bool);
208 static bool arm_promote_prototypes (const_tree);
209 static bool arm_default_short_enums (void);
210 static bool arm_align_anon_bitfield (void);
211 static bool arm_return_in_msb (const_tree);
212 static bool arm_must_pass_in_stack (machine_mode, const_tree);
213 static bool arm_return_in_memory (const_tree, const_tree);
214 #if ARM_UNWIND_INFO
215 static void arm_unwind_emit (FILE *, rtx_insn *);
216 static bool arm_output_ttype (rtx);
217 static void arm_asm_emit_except_personality (rtx);
218 #endif
219 static void arm_asm_init_sections (void);
220 static rtx arm_dwarf_register_span (rtx);
222 static tree arm_cxx_guard_type (void);
223 static bool arm_cxx_guard_mask_bit (void);
224 static tree arm_get_cookie_size (tree);
225 static bool arm_cookie_has_size (void);
226 static bool arm_cxx_cdtor_returns_this (void);
227 static bool arm_cxx_key_method_may_be_inline (void);
228 static void arm_cxx_determine_class_data_visibility (tree);
229 static bool arm_cxx_class_data_always_comdat (void);
230 static bool arm_cxx_use_aeabi_atexit (void);
231 static void arm_init_libfuncs (void);
232 static tree arm_build_builtin_va_list (void);
233 static void arm_expand_builtin_va_start (tree, rtx);
234 static tree arm_gimplify_va_arg_expr (tree, tree, gimple_seq *, gimple_seq *);
235 static void arm_option_override (void);
236 static void arm_override_options_after_change (void);
237 static void arm_option_print (FILE *, int, struct cl_target_option *);
238 static void arm_set_current_function (tree);
239 static bool arm_can_inline_p (tree, tree);
240 static void arm_relayout_function (tree);
241 static bool arm_valid_target_attribute_p (tree, tree, tree, int);
242 static unsigned HOST_WIDE_INT arm_shift_truncation_mask (machine_mode);
243 static bool arm_macro_fusion_p (void);
244 static bool arm_cannot_copy_insn_p (rtx_insn *);
245 static int arm_issue_rate (void);
246 static int arm_first_cycle_multipass_dfa_lookahead (void);
247 static int arm_first_cycle_multipass_dfa_lookahead_guard (rtx_insn *, int);
248 static void arm_output_dwarf_dtprel (FILE *, int, rtx) ATTRIBUTE_UNUSED;
249 static bool arm_output_addr_const_extra (FILE *, rtx);
250 static bool arm_allocate_stack_slots_for_args (void);
251 static bool arm_warn_func_return (tree);
252 static tree arm_promoted_type (const_tree t);
253 static tree arm_convert_to_type (tree type, tree expr);
254 static bool arm_scalar_mode_supported_p (machine_mode);
255 static bool arm_frame_pointer_required (void);
256 static bool arm_can_eliminate (const int, const int);
257 static void arm_asm_trampoline_template (FILE *);
258 static void arm_trampoline_init (rtx, tree, rtx);
259 static rtx arm_trampoline_adjust_address (rtx);
260 static rtx arm_pic_static_addr (rtx orig, rtx reg);
261 static bool cortex_a9_sched_adjust_cost (rtx_insn *, int, rtx_insn *, int *);
262 static bool xscale_sched_adjust_cost (rtx_insn *, int, rtx_insn *, int *);
263 static bool fa726te_sched_adjust_cost (rtx_insn *, int, rtx_insn *, int *);
264 static bool arm_array_mode_supported_p (machine_mode,
265 unsigned HOST_WIDE_INT);
266 static machine_mode arm_preferred_simd_mode (machine_mode);
267 static bool arm_class_likely_spilled_p (reg_class_t);
268 static HOST_WIDE_INT arm_vector_alignment (const_tree type);
269 static bool arm_vector_alignment_reachable (const_tree type, bool is_packed);
270 static bool arm_builtin_support_vector_misalignment (machine_mode mode,
271 const_tree type,
272 int misalignment,
273 bool is_packed);
274 static void arm_conditional_register_usage (void);
275 static reg_class_t arm_preferred_rename_class (reg_class_t rclass);
276 static unsigned int arm_autovectorize_vector_sizes (void);
277 static int arm_default_branch_cost (bool, bool);
278 static int arm_cortex_a5_branch_cost (bool, bool);
279 static int arm_cortex_m_branch_cost (bool, bool);
280 static int arm_cortex_m7_branch_cost (bool, bool);
282 static bool arm_vectorize_vec_perm_const_ok (machine_mode vmode,
283 const unsigned char *sel);
285 static bool aarch_macro_fusion_pair_p (rtx_insn*, rtx_insn*);
287 static int arm_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
288 tree vectype,
289 int misalign ATTRIBUTE_UNUSED);
290 static unsigned arm_add_stmt_cost (void *data, int count,
291 enum vect_cost_for_stmt kind,
292 struct _stmt_vec_info *stmt_info,
293 int misalign,
294 enum vect_cost_model_location where);
296 static void arm_canonicalize_comparison (int *code, rtx *op0, rtx *op1,
297 bool op0_preserve_value);
298 static unsigned HOST_WIDE_INT arm_asan_shadow_offset (void);
300 static void arm_sched_fusion_priority (rtx_insn *, int, int *, int*);
301 static bool arm_can_output_mi_thunk (const_tree, HOST_WIDE_INT, HOST_WIDE_INT,
302 const_tree);
303 static section *arm_function_section (tree, enum node_frequency, bool, bool);
304 static bool arm_asm_elf_flags_numeric (unsigned int flags, unsigned int *num);
305 static unsigned int arm_elf_section_type_flags (tree decl, const char *name,
306 int reloc);
308 /* Table of machine attributes. */
309 static const struct attribute_spec arm_attribute_table[] =
311 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler,
312 affects_type_identity } */
313 /* Function calls made to this symbol must be done indirectly, because
314 it may lie outside of the 26 bit addressing range of a normal function
315 call. */
316 { "long_call", 0, 0, false, true, true, NULL, false },
317 /* Whereas these functions are always known to reside within the 26 bit
318 addressing range. */
319 { "short_call", 0, 0, false, true, true, NULL, false },
320 /* Specify the procedure call conventions for a function. */
321 { "pcs", 1, 1, false, true, true, arm_handle_pcs_attribute,
322 false },
323 /* Interrupt Service Routines have special prologue and epilogue requirements. */
324 { "isr", 0, 1, false, false, false, arm_handle_isr_attribute,
325 false },
326 { "interrupt", 0, 1, false, false, false, arm_handle_isr_attribute,
327 false },
328 { "naked", 0, 0, true, false, false, arm_handle_fndecl_attribute,
329 false },
330 #ifdef ARM_PE
331 /* ARM/PE has three new attributes:
332 interfacearm - ?
333 dllexport - for exporting a function/variable that will live in a dll
334 dllimport - for importing a function/variable from a dll
336 Microsoft allows multiple declspecs in one __declspec, separating
337 them with spaces. We do NOT support this. Instead, use __declspec
338 multiple times.
340 { "dllimport", 0, 0, true, false, false, NULL, false },
341 { "dllexport", 0, 0, true, false, false, NULL, false },
342 { "interfacearm", 0, 0, true, false, false, arm_handle_fndecl_attribute,
343 false },
344 #elif TARGET_DLLIMPORT_DECL_ATTRIBUTES
345 { "dllimport", 0, 0, false, false, false, handle_dll_attribute, false },
346 { "dllexport", 0, 0, false, false, false, handle_dll_attribute, false },
347 { "notshared", 0, 0, false, true, false, arm_handle_notshared_attribute,
348 false },
349 #endif
350 { NULL, 0, 0, false, false, false, NULL, false }
353 /* Initialize the GCC target structure. */
354 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
355 #undef TARGET_MERGE_DECL_ATTRIBUTES
356 #define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
357 #endif
359 #undef TARGET_LEGITIMIZE_ADDRESS
360 #define TARGET_LEGITIMIZE_ADDRESS arm_legitimize_address
362 #undef TARGET_ATTRIBUTE_TABLE
363 #define TARGET_ATTRIBUTE_TABLE arm_attribute_table
365 #undef TARGET_INSERT_ATTRIBUTES
366 #define TARGET_INSERT_ATTRIBUTES arm_insert_attributes
368 #undef TARGET_ASM_FILE_START
369 #define TARGET_ASM_FILE_START arm_file_start
370 #undef TARGET_ASM_FILE_END
371 #define TARGET_ASM_FILE_END arm_file_end
373 #undef TARGET_ASM_ALIGNED_SI_OP
374 #define TARGET_ASM_ALIGNED_SI_OP NULL
375 #undef TARGET_ASM_INTEGER
376 #define TARGET_ASM_INTEGER arm_assemble_integer
378 #undef TARGET_PRINT_OPERAND
379 #define TARGET_PRINT_OPERAND arm_print_operand
380 #undef TARGET_PRINT_OPERAND_ADDRESS
381 #define TARGET_PRINT_OPERAND_ADDRESS arm_print_operand_address
382 #undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
383 #define TARGET_PRINT_OPERAND_PUNCT_VALID_P arm_print_operand_punct_valid_p
385 #undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA
386 #define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA arm_output_addr_const_extra
388 #undef TARGET_ASM_FUNCTION_PROLOGUE
389 #define TARGET_ASM_FUNCTION_PROLOGUE arm_output_function_prologue
391 #undef TARGET_ASM_FUNCTION_EPILOGUE
392 #define TARGET_ASM_FUNCTION_EPILOGUE arm_output_function_epilogue
394 #undef TARGET_CAN_INLINE_P
395 #define TARGET_CAN_INLINE_P arm_can_inline_p
397 #undef TARGET_RELAYOUT_FUNCTION
398 #define TARGET_RELAYOUT_FUNCTION arm_relayout_function
400 #undef TARGET_OPTION_OVERRIDE
401 #define TARGET_OPTION_OVERRIDE arm_option_override
403 #undef TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE
404 #define TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE arm_override_options_after_change
406 #undef TARGET_OPTION_PRINT
407 #define TARGET_OPTION_PRINT arm_option_print
409 #undef TARGET_COMP_TYPE_ATTRIBUTES
410 #define TARGET_COMP_TYPE_ATTRIBUTES arm_comp_type_attributes
412 #undef TARGET_SCHED_MACRO_FUSION_P
413 #define TARGET_SCHED_MACRO_FUSION_P arm_macro_fusion_p
415 #undef TARGET_SCHED_MACRO_FUSION_PAIR_P
416 #define TARGET_SCHED_MACRO_FUSION_PAIR_P aarch_macro_fusion_pair_p
418 #undef TARGET_SET_DEFAULT_TYPE_ATTRIBUTES
419 #define TARGET_SET_DEFAULT_TYPE_ATTRIBUTES arm_set_default_type_attributes
421 #undef TARGET_SCHED_ADJUST_COST
422 #define TARGET_SCHED_ADJUST_COST arm_adjust_cost
424 #undef TARGET_SET_CURRENT_FUNCTION
425 #define TARGET_SET_CURRENT_FUNCTION arm_set_current_function
427 #undef TARGET_OPTION_VALID_ATTRIBUTE_P
428 #define TARGET_OPTION_VALID_ATTRIBUTE_P arm_valid_target_attribute_p
430 #undef TARGET_SCHED_REORDER
431 #define TARGET_SCHED_REORDER arm_sched_reorder
433 #undef TARGET_REGISTER_MOVE_COST
434 #define TARGET_REGISTER_MOVE_COST arm_register_move_cost
436 #undef TARGET_MEMORY_MOVE_COST
437 #define TARGET_MEMORY_MOVE_COST arm_memory_move_cost
439 #undef TARGET_ENCODE_SECTION_INFO
440 #ifdef ARM_PE
441 #define TARGET_ENCODE_SECTION_INFO arm_pe_encode_section_info
442 #else
443 #define TARGET_ENCODE_SECTION_INFO arm_encode_section_info
444 #endif
446 #undef TARGET_STRIP_NAME_ENCODING
447 #define TARGET_STRIP_NAME_ENCODING arm_strip_name_encoding
449 #undef TARGET_ASM_INTERNAL_LABEL
450 #define TARGET_ASM_INTERNAL_LABEL arm_internal_label
452 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
453 #define TARGET_FUNCTION_OK_FOR_SIBCALL arm_function_ok_for_sibcall
455 #undef TARGET_FUNCTION_VALUE
456 #define TARGET_FUNCTION_VALUE arm_function_value
458 #undef TARGET_LIBCALL_VALUE
459 #define TARGET_LIBCALL_VALUE arm_libcall_value
461 #undef TARGET_FUNCTION_VALUE_REGNO_P
462 #define TARGET_FUNCTION_VALUE_REGNO_P arm_function_value_regno_p
464 #undef TARGET_ASM_OUTPUT_MI_THUNK
465 #define TARGET_ASM_OUTPUT_MI_THUNK arm_output_mi_thunk
466 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
467 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK arm_can_output_mi_thunk
469 #undef TARGET_RTX_COSTS
470 #define TARGET_RTX_COSTS arm_rtx_costs
471 #undef TARGET_ADDRESS_COST
472 #define TARGET_ADDRESS_COST arm_address_cost
474 #undef TARGET_SHIFT_TRUNCATION_MASK
475 #define TARGET_SHIFT_TRUNCATION_MASK arm_shift_truncation_mask
476 #undef TARGET_VECTOR_MODE_SUPPORTED_P
477 #define TARGET_VECTOR_MODE_SUPPORTED_P arm_vector_mode_supported_p
478 #undef TARGET_ARRAY_MODE_SUPPORTED_P
479 #define TARGET_ARRAY_MODE_SUPPORTED_P arm_array_mode_supported_p
480 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
481 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE arm_preferred_simd_mode
482 #undef TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES
483 #define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES \
484 arm_autovectorize_vector_sizes
486 #undef TARGET_MACHINE_DEPENDENT_REORG
487 #define TARGET_MACHINE_DEPENDENT_REORG arm_reorg
489 #undef TARGET_INIT_BUILTINS
490 #define TARGET_INIT_BUILTINS arm_init_builtins
491 #undef TARGET_EXPAND_BUILTIN
492 #define TARGET_EXPAND_BUILTIN arm_expand_builtin
493 #undef TARGET_BUILTIN_DECL
494 #define TARGET_BUILTIN_DECL arm_builtin_decl
496 #undef TARGET_INIT_LIBFUNCS
497 #define TARGET_INIT_LIBFUNCS arm_init_libfuncs
499 #undef TARGET_PROMOTE_FUNCTION_MODE
500 #define TARGET_PROMOTE_FUNCTION_MODE arm_promote_function_mode
501 #undef TARGET_PROMOTE_PROTOTYPES
502 #define TARGET_PROMOTE_PROTOTYPES arm_promote_prototypes
503 #undef TARGET_PASS_BY_REFERENCE
504 #define TARGET_PASS_BY_REFERENCE arm_pass_by_reference
505 #undef TARGET_ARG_PARTIAL_BYTES
506 #define TARGET_ARG_PARTIAL_BYTES arm_arg_partial_bytes
507 #undef TARGET_FUNCTION_ARG
508 #define TARGET_FUNCTION_ARG arm_function_arg
509 #undef TARGET_FUNCTION_ARG_ADVANCE
510 #define TARGET_FUNCTION_ARG_ADVANCE arm_function_arg_advance
511 #undef TARGET_FUNCTION_ARG_BOUNDARY
512 #define TARGET_FUNCTION_ARG_BOUNDARY arm_function_arg_boundary
514 #undef TARGET_SETUP_INCOMING_VARARGS
515 #define TARGET_SETUP_INCOMING_VARARGS arm_setup_incoming_varargs
517 #undef TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS
518 #define TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS arm_allocate_stack_slots_for_args
520 #undef TARGET_ASM_TRAMPOLINE_TEMPLATE
521 #define TARGET_ASM_TRAMPOLINE_TEMPLATE arm_asm_trampoline_template
522 #undef TARGET_TRAMPOLINE_INIT
523 #define TARGET_TRAMPOLINE_INIT arm_trampoline_init
524 #undef TARGET_TRAMPOLINE_ADJUST_ADDRESS
525 #define TARGET_TRAMPOLINE_ADJUST_ADDRESS arm_trampoline_adjust_address
527 #undef TARGET_WARN_FUNC_RETURN
528 #define TARGET_WARN_FUNC_RETURN arm_warn_func_return
530 #undef TARGET_DEFAULT_SHORT_ENUMS
531 #define TARGET_DEFAULT_SHORT_ENUMS arm_default_short_enums
533 #undef TARGET_ALIGN_ANON_BITFIELD
534 #define TARGET_ALIGN_ANON_BITFIELD arm_align_anon_bitfield
536 #undef TARGET_NARROW_VOLATILE_BITFIELD
537 #define TARGET_NARROW_VOLATILE_BITFIELD hook_bool_void_false
539 #undef TARGET_CXX_GUARD_TYPE
540 #define TARGET_CXX_GUARD_TYPE arm_cxx_guard_type
542 #undef TARGET_CXX_GUARD_MASK_BIT
543 #define TARGET_CXX_GUARD_MASK_BIT arm_cxx_guard_mask_bit
545 #undef TARGET_CXX_GET_COOKIE_SIZE
546 #define TARGET_CXX_GET_COOKIE_SIZE arm_get_cookie_size
548 #undef TARGET_CXX_COOKIE_HAS_SIZE
549 #define TARGET_CXX_COOKIE_HAS_SIZE arm_cookie_has_size
551 #undef TARGET_CXX_CDTOR_RETURNS_THIS
552 #define TARGET_CXX_CDTOR_RETURNS_THIS arm_cxx_cdtor_returns_this
554 #undef TARGET_CXX_KEY_METHOD_MAY_BE_INLINE
555 #define TARGET_CXX_KEY_METHOD_MAY_BE_INLINE arm_cxx_key_method_may_be_inline
557 #undef TARGET_CXX_USE_AEABI_ATEXIT
558 #define TARGET_CXX_USE_AEABI_ATEXIT arm_cxx_use_aeabi_atexit
560 #undef TARGET_CXX_DETERMINE_CLASS_DATA_VISIBILITY
561 #define TARGET_CXX_DETERMINE_CLASS_DATA_VISIBILITY \
562 arm_cxx_determine_class_data_visibility
564 #undef TARGET_CXX_CLASS_DATA_ALWAYS_COMDAT
565 #define TARGET_CXX_CLASS_DATA_ALWAYS_COMDAT arm_cxx_class_data_always_comdat
567 #undef TARGET_RETURN_IN_MSB
568 #define TARGET_RETURN_IN_MSB arm_return_in_msb
570 #undef TARGET_RETURN_IN_MEMORY
571 #define TARGET_RETURN_IN_MEMORY arm_return_in_memory
573 #undef TARGET_MUST_PASS_IN_STACK
574 #define TARGET_MUST_PASS_IN_STACK arm_must_pass_in_stack
576 #if ARM_UNWIND_INFO
577 #undef TARGET_ASM_UNWIND_EMIT
578 #define TARGET_ASM_UNWIND_EMIT arm_unwind_emit
580 /* EABI unwinding tables use a different format for the typeinfo tables. */
581 #undef TARGET_ASM_TTYPE
582 #define TARGET_ASM_TTYPE arm_output_ttype
584 #undef TARGET_ARM_EABI_UNWINDER
585 #define TARGET_ARM_EABI_UNWINDER true
587 #undef TARGET_ASM_EMIT_EXCEPT_PERSONALITY
588 #define TARGET_ASM_EMIT_EXCEPT_PERSONALITY arm_asm_emit_except_personality
590 #undef TARGET_ASM_INIT_SECTIONS
591 #endif /* ARM_UNWIND_INFO */
592 #define TARGET_ASM_INIT_SECTIONS arm_asm_init_sections
594 #undef TARGET_DWARF_REGISTER_SPAN
595 #define TARGET_DWARF_REGISTER_SPAN arm_dwarf_register_span
597 #undef TARGET_CANNOT_COPY_INSN_P
598 #define TARGET_CANNOT_COPY_INSN_P arm_cannot_copy_insn_p
600 #ifdef HAVE_AS_TLS
601 #undef TARGET_HAVE_TLS
602 #define TARGET_HAVE_TLS true
603 #endif
605 #undef TARGET_HAVE_CONDITIONAL_EXECUTION
606 #define TARGET_HAVE_CONDITIONAL_EXECUTION arm_have_conditional_execution
608 #undef TARGET_LEGITIMATE_CONSTANT_P
609 #define TARGET_LEGITIMATE_CONSTANT_P arm_legitimate_constant_p
611 #undef TARGET_CANNOT_FORCE_CONST_MEM
612 #define TARGET_CANNOT_FORCE_CONST_MEM arm_cannot_force_const_mem
614 #undef TARGET_MAX_ANCHOR_OFFSET
615 #define TARGET_MAX_ANCHOR_OFFSET 4095
617 /* The minimum is set such that the total size of the block
618 for a particular anchor is -4088 + 1 + 4095 bytes, which is
619 divisible by eight, ensuring natural spacing of anchors. */
620 #undef TARGET_MIN_ANCHOR_OFFSET
621 #define TARGET_MIN_ANCHOR_OFFSET -4088
623 #undef TARGET_SCHED_ISSUE_RATE
624 #define TARGET_SCHED_ISSUE_RATE arm_issue_rate
626 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
627 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
628 arm_first_cycle_multipass_dfa_lookahead
630 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD
631 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD \
632 arm_first_cycle_multipass_dfa_lookahead_guard
634 #undef TARGET_MANGLE_TYPE
635 #define TARGET_MANGLE_TYPE arm_mangle_type
637 #undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV
638 #define TARGET_ATOMIC_ASSIGN_EXPAND_FENV arm_atomic_assign_expand_fenv
640 #undef TARGET_BUILD_BUILTIN_VA_LIST
641 #define TARGET_BUILD_BUILTIN_VA_LIST arm_build_builtin_va_list
642 #undef TARGET_EXPAND_BUILTIN_VA_START
643 #define TARGET_EXPAND_BUILTIN_VA_START arm_expand_builtin_va_start
644 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
645 #define TARGET_GIMPLIFY_VA_ARG_EXPR arm_gimplify_va_arg_expr
647 #ifdef HAVE_AS_TLS
648 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
649 #define TARGET_ASM_OUTPUT_DWARF_DTPREL arm_output_dwarf_dtprel
650 #endif
652 #undef TARGET_LEGITIMATE_ADDRESS_P
653 #define TARGET_LEGITIMATE_ADDRESS_P arm_legitimate_address_p
655 #undef TARGET_PREFERRED_RELOAD_CLASS
656 #define TARGET_PREFERRED_RELOAD_CLASS arm_preferred_reload_class
658 #undef TARGET_PROMOTED_TYPE
659 #define TARGET_PROMOTED_TYPE arm_promoted_type
661 #undef TARGET_CONVERT_TO_TYPE
662 #define TARGET_CONVERT_TO_TYPE arm_convert_to_type
664 #undef TARGET_SCALAR_MODE_SUPPORTED_P
665 #define TARGET_SCALAR_MODE_SUPPORTED_P arm_scalar_mode_supported_p
667 #undef TARGET_FRAME_POINTER_REQUIRED
668 #define TARGET_FRAME_POINTER_REQUIRED arm_frame_pointer_required
670 #undef TARGET_CAN_ELIMINATE
671 #define TARGET_CAN_ELIMINATE arm_can_eliminate
673 #undef TARGET_CONDITIONAL_REGISTER_USAGE
674 #define TARGET_CONDITIONAL_REGISTER_USAGE arm_conditional_register_usage
676 #undef TARGET_CLASS_LIKELY_SPILLED_P
677 #define TARGET_CLASS_LIKELY_SPILLED_P arm_class_likely_spilled_p
679 #undef TARGET_VECTORIZE_BUILTINS
680 #define TARGET_VECTORIZE_BUILTINS
682 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
683 #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
684 arm_builtin_vectorized_function
686 #undef TARGET_VECTOR_ALIGNMENT
687 #define TARGET_VECTOR_ALIGNMENT arm_vector_alignment
689 #undef TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE
690 #define TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE \
691 arm_vector_alignment_reachable
693 #undef TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT
694 #define TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT \
695 arm_builtin_support_vector_misalignment
697 #undef TARGET_PREFERRED_RENAME_CLASS
698 #define TARGET_PREFERRED_RENAME_CLASS \
699 arm_preferred_rename_class
701 #undef TARGET_VECTORIZE_VEC_PERM_CONST_OK
702 #define TARGET_VECTORIZE_VEC_PERM_CONST_OK \
703 arm_vectorize_vec_perm_const_ok
705 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
706 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
707 arm_builtin_vectorization_cost
708 #undef TARGET_VECTORIZE_ADD_STMT_COST
709 #define TARGET_VECTORIZE_ADD_STMT_COST arm_add_stmt_cost
711 #undef TARGET_CANONICALIZE_COMPARISON
712 #define TARGET_CANONICALIZE_COMPARISON \
713 arm_canonicalize_comparison
715 #undef TARGET_ASAN_SHADOW_OFFSET
716 #define TARGET_ASAN_SHADOW_OFFSET arm_asan_shadow_offset
718 #undef MAX_INSN_PER_IT_BLOCK
719 #define MAX_INSN_PER_IT_BLOCK (arm_restrict_it ? 1 : 4)
721 #undef TARGET_CAN_USE_DOLOOP_P
722 #define TARGET_CAN_USE_DOLOOP_P can_use_doloop_if_innermost
724 #undef TARGET_CONST_NOT_OK_FOR_DEBUG_P
725 #define TARGET_CONST_NOT_OK_FOR_DEBUG_P arm_const_not_ok_for_debug_p
727 #undef TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS
728 #define TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS true
730 #undef TARGET_SCHED_FUSION_PRIORITY
731 #define TARGET_SCHED_FUSION_PRIORITY arm_sched_fusion_priority
733 #undef TARGET_ASM_FUNCTION_SECTION
734 #define TARGET_ASM_FUNCTION_SECTION arm_function_section
736 #undef TARGET_ASM_ELF_FLAGS_NUMERIC
737 #define TARGET_ASM_ELF_FLAGS_NUMERIC arm_asm_elf_flags_numeric
739 #undef TARGET_SECTION_TYPE_FLAGS
740 #define TARGET_SECTION_TYPE_FLAGS arm_elf_section_type_flags
742 struct gcc_target targetm = TARGET_INITIALIZER;
744 /* Obstack for minipool constant handling. */
745 static struct obstack minipool_obstack;
746 static char * minipool_startobj;
748 /* The maximum number of insns skipped which
749 will be conditionalised if possible. */
750 static int max_insns_skipped = 5;
752 extern FILE * asm_out_file;
754 /* True if we are currently building a constant table. */
755 int making_const_table;
757 /* The processor for which instructions should be scheduled. */
758 enum processor_type arm_tune = arm_none;
760 /* The current tuning set. */
761 const struct tune_params *current_tune;
763 /* Which floating point hardware to schedule for. */
764 int arm_fpu_attr;
766 /* Used for Thumb call_via trampolines. */
767 rtx thumb_call_via_label[14];
768 static int thumb_call_reg_needed;
770 /* The bits in this mask specify which
771 instructions we are allowed to generate. */
772 arm_feature_set insn_flags = ARM_FSET_EMPTY;
774 /* The bits in this mask specify which instruction scheduling options should
775 be used. */
776 arm_feature_set tune_flags = ARM_FSET_EMPTY;
778 /* The highest ARM architecture version supported by the
779 target. */
780 enum base_architecture arm_base_arch = BASE_ARCH_0;
782 /* The following are used in the arm.md file as equivalents to bits
783 in the above two flag variables. */
785 /* Nonzero if this chip supports the ARM Architecture 3M extensions. */
786 int arm_arch3m = 0;
788 /* Nonzero if this chip supports the ARM Architecture 4 extensions. */
789 int arm_arch4 = 0;
791 /* Nonzero if this chip supports the ARM Architecture 4t extensions. */
792 int arm_arch4t = 0;
794 /* Nonzero if this chip supports the ARM Architecture 5 extensions. */
795 int arm_arch5 = 0;
797 /* Nonzero if this chip supports the ARM Architecture 5E extensions. */
798 int arm_arch5e = 0;
800 /* Nonzero if this chip supports the ARM Architecture 6 extensions. */
801 int arm_arch6 = 0;
803 /* Nonzero if this chip supports the ARM 6K extensions. */
804 int arm_arch6k = 0;
806 /* Nonzero if this chip supports the ARM 6KZ extensions. */
807 int arm_arch6kz = 0;
809 /* Nonzero if instructions present in ARMv6-M can be used. */
810 int arm_arch6m = 0;
812 /* Nonzero if this chip supports the ARM 7 extensions. */
813 int arm_arch7 = 0;
815 /* Nonzero if instructions not present in the 'M' profile can be used. */
816 int arm_arch_notm = 0;
818 /* Nonzero if instructions present in ARMv7E-M can be used. */
819 int arm_arch7em = 0;
821 /* Nonzero if instructions present in ARMv8 can be used. */
822 int arm_arch8 = 0;
824 /* Nonzero if this chip supports the ARMv8.1 extensions. */
825 int arm_arch8_1 = 0;
827 /* Nonzero if this chip supports the ARM Architecture 8.2 extensions. */
828 int arm_arch8_2 = 0;
830 /* Nonzero if this chip supports the FP16 instructions extension of ARM
831 Architecture 8.2. */
832 int arm_fp16_inst = 0;
834 /* Nonzero if this chip can benefit from load scheduling. */
835 int arm_ld_sched = 0;
837 /* Nonzero if this chip is a StrongARM. */
838 int arm_tune_strongarm = 0;
840 /* Nonzero if this chip supports Intel Wireless MMX technology. */
841 int arm_arch_iwmmxt = 0;
843 /* Nonzero if this chip supports Intel Wireless MMX2 technology. */
844 int arm_arch_iwmmxt2 = 0;
846 /* Nonzero if this chip is an XScale. */
847 int arm_arch_xscale = 0;
849 /* Nonzero if tuning for XScale */
850 int arm_tune_xscale = 0;
852 /* Nonzero if we want to tune for stores that access the write-buffer.
853 This typically means an ARM6 or ARM7 with MMU or MPU. */
854 int arm_tune_wbuf = 0;
856 /* Nonzero if tuning for Cortex-A9. */
857 int arm_tune_cortex_a9 = 0;
859 /* Nonzero if we should define __THUMB_INTERWORK__ in the
860 preprocessor.
861 XXX This is a bit of a hack, it's intended to help work around
862 problems in GLD which doesn't understand that armv5t code is
863 interworking clean. */
864 int arm_cpp_interwork = 0;
866 /* Nonzero if chip supports Thumb 1. */
867 int arm_arch_thumb1;
869 /* Nonzero if chip supports Thumb 2. */
870 int arm_arch_thumb2;
872 /* Nonzero if chip supports integer division instruction. */
873 int arm_arch_arm_hwdiv;
874 int arm_arch_thumb_hwdiv;
876 /* Nonzero if chip disallows volatile memory access in IT block. */
877 int arm_arch_no_volatile_ce;
879 /* Nonzero if we should use Neon to handle 64-bits operations rather
880 than core registers. */
881 int prefer_neon_for_64bits = 0;
883 /* Nonzero if we shouldn't use literal pools. */
884 bool arm_disable_literal_pool = false;
886 /* The register number to be used for the PIC offset register. */
887 unsigned arm_pic_register = INVALID_REGNUM;
889 enum arm_pcs arm_pcs_default;
891 /* For an explanation of these variables, see final_prescan_insn below. */
892 int arm_ccfsm_state;
893 /* arm_current_cc is also used for Thumb-2 cond_exec blocks. */
894 enum arm_cond_code arm_current_cc;
896 rtx arm_target_insn;
897 int arm_target_label;
898 /* The number of conditionally executed insns, including the current insn. */
899 int arm_condexec_count = 0;
900 /* A bitmask specifying the patterns for the IT block.
901 Zero means do not output an IT block before this insn. */
902 int arm_condexec_mask = 0;
903 /* The number of bits used in arm_condexec_mask. */
904 int arm_condexec_masklen = 0;
906 /* Nonzero if chip supports the ARMv8 CRC instructions. */
907 int arm_arch_crc = 0;
909 /* Nonzero if the core has a very small, high-latency, multiply unit. */
910 int arm_m_profile_small_mul = 0;
912 /* The condition codes of the ARM, and the inverse function. */
913 static const char * const arm_condition_codes[] =
915 "eq", "ne", "cs", "cc", "mi", "pl", "vs", "vc",
916 "hi", "ls", "ge", "lt", "gt", "le", "al", "nv"
919 /* The register numbers in sequence, for passing to arm_gen_load_multiple. */
920 int arm_regs_in_sequence[] =
922 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
925 #define ARM_LSL_NAME "lsl"
926 #define streq(string1, string2) (strcmp (string1, string2) == 0)
928 #define THUMB2_WORK_REGS (0xff & ~( (1 << THUMB_HARD_FRAME_POINTER_REGNUM) \
929 | (1 << SP_REGNUM) | (1 << PC_REGNUM) \
930 | (1 << PIC_OFFSET_TABLE_REGNUM)))
932 /* Initialization code. */
934 struct processors
936 const char *const name;
937 enum processor_type core;
938 const char *arch;
939 enum base_architecture base_arch;
940 const arm_feature_set flags;
941 const struct tune_params *const tune;
945 #define ARM_PREFETCH_NOT_BENEFICIAL { 0, -1, -1 }
946 #define ARM_PREFETCH_BENEFICIAL(num_slots,l1_size,l1_line_size) \
948 num_slots, \
949 l1_size, \
950 l1_line_size \
953 /* arm generic vectorizer costs. */
954 static const
955 struct cpu_vec_costs arm_default_vec_cost = {
956 1, /* scalar_stmt_cost. */
957 1, /* scalar load_cost. */
958 1, /* scalar_store_cost. */
959 1, /* vec_stmt_cost. */
960 1, /* vec_to_scalar_cost. */
961 1, /* scalar_to_vec_cost. */
962 1, /* vec_align_load_cost. */
963 1, /* vec_unalign_load_cost. */
964 1, /* vec_unalign_store_cost. */
965 1, /* vec_store_cost. */
966 3, /* cond_taken_branch_cost. */
967 1, /* cond_not_taken_branch_cost. */
970 /* Cost tables for AArch32 + AArch64 cores should go in aarch-cost-tables.h */
971 #include "aarch-cost-tables.h"
975 const struct cpu_cost_table cortexa9_extra_costs =
977 /* ALU */
979 0, /* arith. */
980 0, /* logical. */
981 0, /* shift. */
982 COSTS_N_INSNS (1), /* shift_reg. */
983 COSTS_N_INSNS (1), /* arith_shift. */
984 COSTS_N_INSNS (2), /* arith_shift_reg. */
985 0, /* log_shift. */
986 COSTS_N_INSNS (1), /* log_shift_reg. */
987 COSTS_N_INSNS (1), /* extend. */
988 COSTS_N_INSNS (2), /* extend_arith. */
989 COSTS_N_INSNS (1), /* bfi. */
990 COSTS_N_INSNS (1), /* bfx. */
991 0, /* clz. */
992 0, /* rev. */
993 0, /* non_exec. */
994 true /* non_exec_costs_exec. */
997 /* MULT SImode */
999 COSTS_N_INSNS (3), /* simple. */
1000 COSTS_N_INSNS (3), /* flag_setting. */
1001 COSTS_N_INSNS (2), /* extend. */
1002 COSTS_N_INSNS (3), /* add. */
1003 COSTS_N_INSNS (2), /* extend_add. */
1004 COSTS_N_INSNS (30) /* idiv. No HW div on Cortex A9. */
1006 /* MULT DImode */
1008 0, /* simple (N/A). */
1009 0, /* flag_setting (N/A). */
1010 COSTS_N_INSNS (4), /* extend. */
1011 0, /* add (N/A). */
1012 COSTS_N_INSNS (4), /* extend_add. */
1013 0 /* idiv (N/A). */
1016 /* LD/ST */
1018 COSTS_N_INSNS (2), /* load. */
1019 COSTS_N_INSNS (2), /* load_sign_extend. */
1020 COSTS_N_INSNS (2), /* ldrd. */
1021 COSTS_N_INSNS (2), /* ldm_1st. */
1022 1, /* ldm_regs_per_insn_1st. */
1023 2, /* ldm_regs_per_insn_subsequent. */
1024 COSTS_N_INSNS (5), /* loadf. */
1025 COSTS_N_INSNS (5), /* loadd. */
1026 COSTS_N_INSNS (1), /* load_unaligned. */
1027 COSTS_N_INSNS (2), /* store. */
1028 COSTS_N_INSNS (2), /* strd. */
1029 COSTS_N_INSNS (2), /* stm_1st. */
1030 1, /* stm_regs_per_insn_1st. */
1031 2, /* stm_regs_per_insn_subsequent. */
1032 COSTS_N_INSNS (1), /* storef. */
1033 COSTS_N_INSNS (1), /* stored. */
1034 COSTS_N_INSNS (1), /* store_unaligned. */
1035 COSTS_N_INSNS (1), /* loadv. */
1036 COSTS_N_INSNS (1) /* storev. */
1039 /* FP SFmode */
1041 COSTS_N_INSNS (14), /* div. */
1042 COSTS_N_INSNS (4), /* mult. */
1043 COSTS_N_INSNS (7), /* mult_addsub. */
1044 COSTS_N_INSNS (30), /* fma. */
1045 COSTS_N_INSNS (3), /* addsub. */
1046 COSTS_N_INSNS (1), /* fpconst. */
1047 COSTS_N_INSNS (1), /* neg. */
1048 COSTS_N_INSNS (3), /* compare. */
1049 COSTS_N_INSNS (3), /* widen. */
1050 COSTS_N_INSNS (3), /* narrow. */
1051 COSTS_N_INSNS (3), /* toint. */
1052 COSTS_N_INSNS (3), /* fromint. */
1053 COSTS_N_INSNS (3) /* roundint. */
1055 /* FP DFmode */
1057 COSTS_N_INSNS (24), /* div. */
1058 COSTS_N_INSNS (5), /* mult. */
1059 COSTS_N_INSNS (8), /* mult_addsub. */
1060 COSTS_N_INSNS (30), /* fma. */
1061 COSTS_N_INSNS (3), /* addsub. */
1062 COSTS_N_INSNS (1), /* fpconst. */
1063 COSTS_N_INSNS (1), /* neg. */
1064 COSTS_N_INSNS (3), /* compare. */
1065 COSTS_N_INSNS (3), /* widen. */
1066 COSTS_N_INSNS (3), /* narrow. */
1067 COSTS_N_INSNS (3), /* toint. */
1068 COSTS_N_INSNS (3), /* fromint. */
1069 COSTS_N_INSNS (3) /* roundint. */
1072 /* Vector */
1074 COSTS_N_INSNS (1) /* alu. */
1078 const struct cpu_cost_table cortexa8_extra_costs =
1080 /* ALU */
1082 0, /* arith. */
1083 0, /* logical. */
1084 COSTS_N_INSNS (1), /* shift. */
1085 0, /* shift_reg. */
1086 COSTS_N_INSNS (1), /* arith_shift. */
1087 0, /* arith_shift_reg. */
1088 COSTS_N_INSNS (1), /* log_shift. */
1089 0, /* log_shift_reg. */
1090 0, /* extend. */
1091 0, /* extend_arith. */
1092 0, /* bfi. */
1093 0, /* bfx. */
1094 0, /* clz. */
1095 0, /* rev. */
1096 0, /* non_exec. */
1097 true /* non_exec_costs_exec. */
1100 /* MULT SImode */
1102 COSTS_N_INSNS (1), /* simple. */
1103 COSTS_N_INSNS (1), /* flag_setting. */
1104 COSTS_N_INSNS (1), /* extend. */
1105 COSTS_N_INSNS (1), /* add. */
1106 COSTS_N_INSNS (1), /* extend_add. */
1107 COSTS_N_INSNS (30) /* idiv. No HW div on Cortex A8. */
1109 /* MULT DImode */
1111 0, /* simple (N/A). */
1112 0, /* flag_setting (N/A). */
1113 COSTS_N_INSNS (2), /* extend. */
1114 0, /* add (N/A). */
1115 COSTS_N_INSNS (2), /* extend_add. */
1116 0 /* idiv (N/A). */
1119 /* LD/ST */
1121 COSTS_N_INSNS (1), /* load. */
1122 COSTS_N_INSNS (1), /* load_sign_extend. */
1123 COSTS_N_INSNS (1), /* ldrd. */
1124 COSTS_N_INSNS (1), /* ldm_1st. */
1125 1, /* ldm_regs_per_insn_1st. */
1126 2, /* ldm_regs_per_insn_subsequent. */
1127 COSTS_N_INSNS (1), /* loadf. */
1128 COSTS_N_INSNS (1), /* loadd. */
1129 COSTS_N_INSNS (1), /* load_unaligned. */
1130 COSTS_N_INSNS (1), /* store. */
1131 COSTS_N_INSNS (1), /* strd. */
1132 COSTS_N_INSNS (1), /* stm_1st. */
1133 1, /* stm_regs_per_insn_1st. */
1134 2, /* stm_regs_per_insn_subsequent. */
1135 COSTS_N_INSNS (1), /* storef. */
1136 COSTS_N_INSNS (1), /* stored. */
1137 COSTS_N_INSNS (1), /* store_unaligned. */
1138 COSTS_N_INSNS (1), /* loadv. */
1139 COSTS_N_INSNS (1) /* storev. */
1142 /* FP SFmode */
1144 COSTS_N_INSNS (36), /* div. */
1145 COSTS_N_INSNS (11), /* mult. */
1146 COSTS_N_INSNS (20), /* mult_addsub. */
1147 COSTS_N_INSNS (30), /* fma. */
1148 COSTS_N_INSNS (9), /* addsub. */
1149 COSTS_N_INSNS (3), /* fpconst. */
1150 COSTS_N_INSNS (3), /* neg. */
1151 COSTS_N_INSNS (6), /* compare. */
1152 COSTS_N_INSNS (4), /* widen. */
1153 COSTS_N_INSNS (4), /* narrow. */
1154 COSTS_N_INSNS (8), /* toint. */
1155 COSTS_N_INSNS (8), /* fromint. */
1156 COSTS_N_INSNS (8) /* roundint. */
1158 /* FP DFmode */
1160 COSTS_N_INSNS (64), /* div. */
1161 COSTS_N_INSNS (16), /* mult. */
1162 COSTS_N_INSNS (25), /* mult_addsub. */
1163 COSTS_N_INSNS (30), /* fma. */
1164 COSTS_N_INSNS (9), /* addsub. */
1165 COSTS_N_INSNS (3), /* fpconst. */
1166 COSTS_N_INSNS (3), /* neg. */
1167 COSTS_N_INSNS (6), /* compare. */
1168 COSTS_N_INSNS (6), /* widen. */
1169 COSTS_N_INSNS (6), /* narrow. */
1170 COSTS_N_INSNS (8), /* toint. */
1171 COSTS_N_INSNS (8), /* fromint. */
1172 COSTS_N_INSNS (8) /* roundint. */
1175 /* Vector */
1177 COSTS_N_INSNS (1) /* alu. */
1181 const struct cpu_cost_table cortexa5_extra_costs =
1183 /* ALU */
1185 0, /* arith. */
1186 0, /* logical. */
1187 COSTS_N_INSNS (1), /* shift. */
1188 COSTS_N_INSNS (1), /* shift_reg. */
1189 COSTS_N_INSNS (1), /* arith_shift. */
1190 COSTS_N_INSNS (1), /* arith_shift_reg. */
1191 COSTS_N_INSNS (1), /* log_shift. */
1192 COSTS_N_INSNS (1), /* log_shift_reg. */
1193 COSTS_N_INSNS (1), /* extend. */
1194 COSTS_N_INSNS (1), /* extend_arith. */
1195 COSTS_N_INSNS (1), /* bfi. */
1196 COSTS_N_INSNS (1), /* bfx. */
1197 COSTS_N_INSNS (1), /* clz. */
1198 COSTS_N_INSNS (1), /* rev. */
1199 0, /* non_exec. */
1200 true /* non_exec_costs_exec. */
1204 /* MULT SImode */
1206 0, /* simple. */
1207 COSTS_N_INSNS (1), /* flag_setting. */
1208 COSTS_N_INSNS (1), /* extend. */
1209 COSTS_N_INSNS (1), /* add. */
1210 COSTS_N_INSNS (1), /* extend_add. */
1211 COSTS_N_INSNS (7) /* idiv. */
1213 /* MULT DImode */
1215 0, /* simple (N/A). */
1216 0, /* flag_setting (N/A). */
1217 COSTS_N_INSNS (1), /* extend. */
1218 0, /* add. */
1219 COSTS_N_INSNS (2), /* extend_add. */
1220 0 /* idiv (N/A). */
1223 /* LD/ST */
1225 COSTS_N_INSNS (1), /* load. */
1226 COSTS_N_INSNS (1), /* load_sign_extend. */
1227 COSTS_N_INSNS (6), /* ldrd. */
1228 COSTS_N_INSNS (1), /* ldm_1st. */
1229 1, /* ldm_regs_per_insn_1st. */
1230 2, /* ldm_regs_per_insn_subsequent. */
1231 COSTS_N_INSNS (2), /* loadf. */
1232 COSTS_N_INSNS (4), /* loadd. */
1233 COSTS_N_INSNS (1), /* load_unaligned. */
1234 COSTS_N_INSNS (1), /* store. */
1235 COSTS_N_INSNS (3), /* strd. */
1236 COSTS_N_INSNS (1), /* stm_1st. */
1237 1, /* stm_regs_per_insn_1st. */
1238 2, /* stm_regs_per_insn_subsequent. */
1239 COSTS_N_INSNS (2), /* storef. */
1240 COSTS_N_INSNS (2), /* stored. */
1241 COSTS_N_INSNS (1), /* store_unaligned. */
1242 COSTS_N_INSNS (1), /* loadv. */
1243 COSTS_N_INSNS (1) /* storev. */
1246 /* FP SFmode */
1248 COSTS_N_INSNS (15), /* div. */
1249 COSTS_N_INSNS (3), /* mult. */
1250 COSTS_N_INSNS (7), /* mult_addsub. */
1251 COSTS_N_INSNS (7), /* fma. */
1252 COSTS_N_INSNS (3), /* addsub. */
1253 COSTS_N_INSNS (3), /* fpconst. */
1254 COSTS_N_INSNS (3), /* neg. */
1255 COSTS_N_INSNS (3), /* compare. */
1256 COSTS_N_INSNS (3), /* widen. */
1257 COSTS_N_INSNS (3), /* narrow. */
1258 COSTS_N_INSNS (3), /* toint. */
1259 COSTS_N_INSNS (3), /* fromint. */
1260 COSTS_N_INSNS (3) /* roundint. */
1262 /* FP DFmode */
1264 COSTS_N_INSNS (30), /* div. */
1265 COSTS_N_INSNS (6), /* mult. */
1266 COSTS_N_INSNS (10), /* mult_addsub. */
1267 COSTS_N_INSNS (7), /* fma. */
1268 COSTS_N_INSNS (3), /* addsub. */
1269 COSTS_N_INSNS (3), /* fpconst. */
1270 COSTS_N_INSNS (3), /* neg. */
1271 COSTS_N_INSNS (3), /* compare. */
1272 COSTS_N_INSNS (3), /* widen. */
1273 COSTS_N_INSNS (3), /* narrow. */
1274 COSTS_N_INSNS (3), /* toint. */
1275 COSTS_N_INSNS (3), /* fromint. */
1276 COSTS_N_INSNS (3) /* roundint. */
1279 /* Vector */
1281 COSTS_N_INSNS (1) /* alu. */
1286 const struct cpu_cost_table cortexa7_extra_costs =
1288 /* ALU */
1290 0, /* arith. */
1291 0, /* logical. */
1292 COSTS_N_INSNS (1), /* shift. */
1293 COSTS_N_INSNS (1), /* shift_reg. */
1294 COSTS_N_INSNS (1), /* arith_shift. */
1295 COSTS_N_INSNS (1), /* arith_shift_reg. */
1296 COSTS_N_INSNS (1), /* log_shift. */
1297 COSTS_N_INSNS (1), /* log_shift_reg. */
1298 COSTS_N_INSNS (1), /* extend. */
1299 COSTS_N_INSNS (1), /* extend_arith. */
1300 COSTS_N_INSNS (1), /* bfi. */
1301 COSTS_N_INSNS (1), /* bfx. */
1302 COSTS_N_INSNS (1), /* clz. */
1303 COSTS_N_INSNS (1), /* rev. */
1304 0, /* non_exec. */
1305 true /* non_exec_costs_exec. */
1309 /* MULT SImode */
1311 0, /* simple. */
1312 COSTS_N_INSNS (1), /* flag_setting. */
1313 COSTS_N_INSNS (1), /* extend. */
1314 COSTS_N_INSNS (1), /* add. */
1315 COSTS_N_INSNS (1), /* extend_add. */
1316 COSTS_N_INSNS (7) /* idiv. */
1318 /* MULT DImode */
1320 0, /* simple (N/A). */
1321 0, /* flag_setting (N/A). */
1322 COSTS_N_INSNS (1), /* extend. */
1323 0, /* add. */
1324 COSTS_N_INSNS (2), /* extend_add. */
1325 0 /* idiv (N/A). */
1328 /* LD/ST */
1330 COSTS_N_INSNS (1), /* load. */
1331 COSTS_N_INSNS (1), /* load_sign_extend. */
1332 COSTS_N_INSNS (3), /* ldrd. */
1333 COSTS_N_INSNS (1), /* ldm_1st. */
1334 1, /* ldm_regs_per_insn_1st. */
1335 2, /* ldm_regs_per_insn_subsequent. */
1336 COSTS_N_INSNS (2), /* loadf. */
1337 COSTS_N_INSNS (2), /* loadd. */
1338 COSTS_N_INSNS (1), /* load_unaligned. */
1339 COSTS_N_INSNS (1), /* store. */
1340 COSTS_N_INSNS (3), /* strd. */
1341 COSTS_N_INSNS (1), /* stm_1st. */
1342 1, /* stm_regs_per_insn_1st. */
1343 2, /* stm_regs_per_insn_subsequent. */
1344 COSTS_N_INSNS (2), /* storef. */
1345 COSTS_N_INSNS (2), /* stored. */
1346 COSTS_N_INSNS (1), /* store_unaligned. */
1347 COSTS_N_INSNS (1), /* loadv. */
1348 COSTS_N_INSNS (1) /* storev. */
1351 /* FP SFmode */
1353 COSTS_N_INSNS (15), /* div. */
1354 COSTS_N_INSNS (3), /* mult. */
1355 COSTS_N_INSNS (7), /* mult_addsub. */
1356 COSTS_N_INSNS (7), /* fma. */
1357 COSTS_N_INSNS (3), /* addsub. */
1358 COSTS_N_INSNS (3), /* fpconst. */
1359 COSTS_N_INSNS (3), /* neg. */
1360 COSTS_N_INSNS (3), /* compare. */
1361 COSTS_N_INSNS (3), /* widen. */
1362 COSTS_N_INSNS (3), /* narrow. */
1363 COSTS_N_INSNS (3), /* toint. */
1364 COSTS_N_INSNS (3), /* fromint. */
1365 COSTS_N_INSNS (3) /* roundint. */
1367 /* FP DFmode */
1369 COSTS_N_INSNS (30), /* div. */
1370 COSTS_N_INSNS (6), /* mult. */
1371 COSTS_N_INSNS (10), /* mult_addsub. */
1372 COSTS_N_INSNS (7), /* fma. */
1373 COSTS_N_INSNS (3), /* addsub. */
1374 COSTS_N_INSNS (3), /* fpconst. */
1375 COSTS_N_INSNS (3), /* neg. */
1376 COSTS_N_INSNS (3), /* compare. */
1377 COSTS_N_INSNS (3), /* widen. */
1378 COSTS_N_INSNS (3), /* narrow. */
1379 COSTS_N_INSNS (3), /* toint. */
1380 COSTS_N_INSNS (3), /* fromint. */
1381 COSTS_N_INSNS (3) /* roundint. */
1384 /* Vector */
1386 COSTS_N_INSNS (1) /* alu. */
1390 const struct cpu_cost_table cortexa12_extra_costs =
1392 /* ALU */
1394 0, /* arith. */
1395 0, /* logical. */
1396 0, /* shift. */
1397 COSTS_N_INSNS (1), /* shift_reg. */
1398 COSTS_N_INSNS (1), /* arith_shift. */
1399 COSTS_N_INSNS (1), /* arith_shift_reg. */
1400 COSTS_N_INSNS (1), /* log_shift. */
1401 COSTS_N_INSNS (1), /* log_shift_reg. */
1402 0, /* extend. */
1403 COSTS_N_INSNS (1), /* extend_arith. */
1404 0, /* bfi. */
1405 COSTS_N_INSNS (1), /* bfx. */
1406 COSTS_N_INSNS (1), /* clz. */
1407 COSTS_N_INSNS (1), /* rev. */
1408 0, /* non_exec. */
1409 true /* non_exec_costs_exec. */
1411 /* MULT SImode */
1414 COSTS_N_INSNS (2), /* simple. */
1415 COSTS_N_INSNS (3), /* flag_setting. */
1416 COSTS_N_INSNS (2), /* extend. */
1417 COSTS_N_INSNS (3), /* add. */
1418 COSTS_N_INSNS (2), /* extend_add. */
1419 COSTS_N_INSNS (18) /* idiv. */
1421 /* MULT DImode */
1423 0, /* simple (N/A). */
1424 0, /* flag_setting (N/A). */
1425 COSTS_N_INSNS (3), /* extend. */
1426 0, /* add (N/A). */
1427 COSTS_N_INSNS (3), /* extend_add. */
1428 0 /* idiv (N/A). */
1431 /* LD/ST */
1433 COSTS_N_INSNS (3), /* load. */
1434 COSTS_N_INSNS (3), /* load_sign_extend. */
1435 COSTS_N_INSNS (3), /* ldrd. */
1436 COSTS_N_INSNS (3), /* ldm_1st. */
1437 1, /* ldm_regs_per_insn_1st. */
1438 2, /* ldm_regs_per_insn_subsequent. */
1439 COSTS_N_INSNS (3), /* loadf. */
1440 COSTS_N_INSNS (3), /* loadd. */
1441 0, /* load_unaligned. */
1442 0, /* store. */
1443 0, /* strd. */
1444 0, /* stm_1st. */
1445 1, /* stm_regs_per_insn_1st. */
1446 2, /* stm_regs_per_insn_subsequent. */
1447 COSTS_N_INSNS (2), /* storef. */
1448 COSTS_N_INSNS (2), /* stored. */
1449 0, /* store_unaligned. */
1450 COSTS_N_INSNS (1), /* loadv. */
1451 COSTS_N_INSNS (1) /* storev. */
1454 /* FP SFmode */
1456 COSTS_N_INSNS (17), /* div. */
1457 COSTS_N_INSNS (4), /* mult. */
1458 COSTS_N_INSNS (8), /* mult_addsub. */
1459 COSTS_N_INSNS (8), /* fma. */
1460 COSTS_N_INSNS (4), /* addsub. */
1461 COSTS_N_INSNS (2), /* fpconst. */
1462 COSTS_N_INSNS (2), /* neg. */
1463 COSTS_N_INSNS (2), /* compare. */
1464 COSTS_N_INSNS (4), /* widen. */
1465 COSTS_N_INSNS (4), /* narrow. */
1466 COSTS_N_INSNS (4), /* toint. */
1467 COSTS_N_INSNS (4), /* fromint. */
1468 COSTS_N_INSNS (4) /* roundint. */
1470 /* FP DFmode */
1472 COSTS_N_INSNS (31), /* div. */
1473 COSTS_N_INSNS (4), /* mult. */
1474 COSTS_N_INSNS (8), /* mult_addsub. */
1475 COSTS_N_INSNS (8), /* fma. */
1476 COSTS_N_INSNS (4), /* addsub. */
1477 COSTS_N_INSNS (2), /* fpconst. */
1478 COSTS_N_INSNS (2), /* neg. */
1479 COSTS_N_INSNS (2), /* compare. */
1480 COSTS_N_INSNS (4), /* widen. */
1481 COSTS_N_INSNS (4), /* narrow. */
1482 COSTS_N_INSNS (4), /* toint. */
1483 COSTS_N_INSNS (4), /* fromint. */
1484 COSTS_N_INSNS (4) /* roundint. */
1487 /* Vector */
1489 COSTS_N_INSNS (1) /* alu. */
1493 const struct cpu_cost_table cortexa15_extra_costs =
1495 /* ALU */
1497 0, /* arith. */
1498 0, /* logical. */
1499 0, /* shift. */
1500 0, /* shift_reg. */
1501 COSTS_N_INSNS (1), /* arith_shift. */
1502 COSTS_N_INSNS (1), /* arith_shift_reg. */
1503 COSTS_N_INSNS (1), /* log_shift. */
1504 COSTS_N_INSNS (1), /* log_shift_reg. */
1505 0, /* extend. */
1506 COSTS_N_INSNS (1), /* extend_arith. */
1507 COSTS_N_INSNS (1), /* bfi. */
1508 0, /* bfx. */
1509 0, /* clz. */
1510 0, /* rev. */
1511 0, /* non_exec. */
1512 true /* non_exec_costs_exec. */
1514 /* MULT SImode */
1517 COSTS_N_INSNS (2), /* simple. */
1518 COSTS_N_INSNS (3), /* flag_setting. */
1519 COSTS_N_INSNS (2), /* extend. */
1520 COSTS_N_INSNS (2), /* add. */
1521 COSTS_N_INSNS (2), /* extend_add. */
1522 COSTS_N_INSNS (18) /* idiv. */
1524 /* MULT DImode */
1526 0, /* simple (N/A). */
1527 0, /* flag_setting (N/A). */
1528 COSTS_N_INSNS (3), /* extend. */
1529 0, /* add (N/A). */
1530 COSTS_N_INSNS (3), /* extend_add. */
1531 0 /* idiv (N/A). */
1534 /* LD/ST */
1536 COSTS_N_INSNS (3), /* load. */
1537 COSTS_N_INSNS (3), /* load_sign_extend. */
1538 COSTS_N_INSNS (3), /* ldrd. */
1539 COSTS_N_INSNS (4), /* ldm_1st. */
1540 1, /* ldm_regs_per_insn_1st. */
1541 2, /* ldm_regs_per_insn_subsequent. */
1542 COSTS_N_INSNS (4), /* loadf. */
1543 COSTS_N_INSNS (4), /* loadd. */
1544 0, /* load_unaligned. */
1545 0, /* store. */
1546 0, /* strd. */
1547 COSTS_N_INSNS (1), /* stm_1st. */
1548 1, /* stm_regs_per_insn_1st. */
1549 2, /* stm_regs_per_insn_subsequent. */
1550 0, /* storef. */
1551 0, /* stored. */
1552 0, /* store_unaligned. */
1553 COSTS_N_INSNS (1), /* loadv. */
1554 COSTS_N_INSNS (1) /* storev. */
1557 /* FP SFmode */
1559 COSTS_N_INSNS (17), /* div. */
1560 COSTS_N_INSNS (4), /* mult. */
1561 COSTS_N_INSNS (8), /* mult_addsub. */
1562 COSTS_N_INSNS (8), /* fma. */
1563 COSTS_N_INSNS (4), /* addsub. */
1564 COSTS_N_INSNS (2), /* fpconst. */
1565 COSTS_N_INSNS (2), /* neg. */
1566 COSTS_N_INSNS (5), /* compare. */
1567 COSTS_N_INSNS (4), /* widen. */
1568 COSTS_N_INSNS (4), /* narrow. */
1569 COSTS_N_INSNS (4), /* toint. */
1570 COSTS_N_INSNS (4), /* fromint. */
1571 COSTS_N_INSNS (4) /* roundint. */
1573 /* FP DFmode */
1575 COSTS_N_INSNS (31), /* div. */
1576 COSTS_N_INSNS (4), /* mult. */
1577 COSTS_N_INSNS (8), /* mult_addsub. */
1578 COSTS_N_INSNS (8), /* fma. */
1579 COSTS_N_INSNS (4), /* addsub. */
1580 COSTS_N_INSNS (2), /* fpconst. */
1581 COSTS_N_INSNS (2), /* neg. */
1582 COSTS_N_INSNS (2), /* compare. */
1583 COSTS_N_INSNS (4), /* widen. */
1584 COSTS_N_INSNS (4), /* narrow. */
1585 COSTS_N_INSNS (4), /* toint. */
1586 COSTS_N_INSNS (4), /* fromint. */
1587 COSTS_N_INSNS (4) /* roundint. */
1590 /* Vector */
1592 COSTS_N_INSNS (1) /* alu. */
1596 const struct cpu_cost_table v7m_extra_costs =
1598 /* ALU */
1600 0, /* arith. */
1601 0, /* logical. */
1602 0, /* shift. */
1603 0, /* shift_reg. */
1604 0, /* arith_shift. */
1605 COSTS_N_INSNS (1), /* arith_shift_reg. */
1606 0, /* log_shift. */
1607 COSTS_N_INSNS (1), /* log_shift_reg. */
1608 0, /* extend. */
1609 COSTS_N_INSNS (1), /* extend_arith. */
1610 0, /* bfi. */
1611 0, /* bfx. */
1612 0, /* clz. */
1613 0, /* rev. */
1614 COSTS_N_INSNS (1), /* non_exec. */
1615 false /* non_exec_costs_exec. */
1618 /* MULT SImode */
1620 COSTS_N_INSNS (1), /* simple. */
1621 COSTS_N_INSNS (1), /* flag_setting. */
1622 COSTS_N_INSNS (2), /* extend. */
1623 COSTS_N_INSNS (1), /* add. */
1624 COSTS_N_INSNS (3), /* extend_add. */
1625 COSTS_N_INSNS (8) /* idiv. */
1627 /* MULT DImode */
1629 0, /* simple (N/A). */
1630 0, /* flag_setting (N/A). */
1631 COSTS_N_INSNS (2), /* extend. */
1632 0, /* add (N/A). */
1633 COSTS_N_INSNS (3), /* extend_add. */
1634 0 /* idiv (N/A). */
1637 /* LD/ST */
1639 COSTS_N_INSNS (2), /* load. */
1640 0, /* load_sign_extend. */
1641 COSTS_N_INSNS (3), /* ldrd. */
1642 COSTS_N_INSNS (2), /* ldm_1st. */
1643 1, /* ldm_regs_per_insn_1st. */
1644 1, /* ldm_regs_per_insn_subsequent. */
1645 COSTS_N_INSNS (2), /* loadf. */
1646 COSTS_N_INSNS (3), /* loadd. */
1647 COSTS_N_INSNS (1), /* load_unaligned. */
1648 COSTS_N_INSNS (2), /* store. */
1649 COSTS_N_INSNS (3), /* strd. */
1650 COSTS_N_INSNS (2), /* stm_1st. */
1651 1, /* stm_regs_per_insn_1st. */
1652 1, /* stm_regs_per_insn_subsequent. */
1653 COSTS_N_INSNS (2), /* storef. */
1654 COSTS_N_INSNS (3), /* stored. */
1655 COSTS_N_INSNS (1), /* store_unaligned. */
1656 COSTS_N_INSNS (1), /* loadv. */
1657 COSTS_N_INSNS (1) /* storev. */
1660 /* FP SFmode */
1662 COSTS_N_INSNS (7), /* div. */
1663 COSTS_N_INSNS (2), /* mult. */
1664 COSTS_N_INSNS (5), /* mult_addsub. */
1665 COSTS_N_INSNS (3), /* fma. */
1666 COSTS_N_INSNS (1), /* addsub. */
1667 0, /* fpconst. */
1668 0, /* neg. */
1669 0, /* compare. */
1670 0, /* widen. */
1671 0, /* narrow. */
1672 0, /* toint. */
1673 0, /* fromint. */
1674 0 /* roundint. */
1676 /* FP DFmode */
1678 COSTS_N_INSNS (15), /* div. */
1679 COSTS_N_INSNS (5), /* mult. */
1680 COSTS_N_INSNS (7), /* mult_addsub. */
1681 COSTS_N_INSNS (7), /* fma. */
1682 COSTS_N_INSNS (3), /* addsub. */
1683 0, /* fpconst. */
1684 0, /* neg. */
1685 0, /* compare. */
1686 0, /* widen. */
1687 0, /* narrow. */
1688 0, /* toint. */
1689 0, /* fromint. */
1690 0 /* roundint. */
1693 /* Vector */
1695 COSTS_N_INSNS (1) /* alu. */
1699 const struct tune_params arm_slowmul_tune =
1701 arm_slowmul_rtx_costs,
1702 NULL, /* Insn extra costs. */
1703 NULL, /* Sched adj cost. */
1704 arm_default_branch_cost,
1705 &arm_default_vec_cost,
1706 3, /* Constant limit. */
1707 5, /* Max cond insns. */
1708 8, /* Memset max inline. */
1709 1, /* Issue rate. */
1710 ARM_PREFETCH_NOT_BENEFICIAL,
1711 tune_params::PREF_CONST_POOL_TRUE,
1712 tune_params::PREF_LDRD_FALSE,
1713 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
1714 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
1715 tune_params::DISPARAGE_FLAGS_NEITHER,
1716 tune_params::PREF_NEON_64_FALSE,
1717 tune_params::PREF_NEON_STRINGOPS_FALSE,
1718 tune_params::FUSE_NOTHING,
1719 tune_params::SCHED_AUTOPREF_OFF
1722 const struct tune_params arm_fastmul_tune =
1724 arm_fastmul_rtx_costs,
1725 NULL, /* Insn extra costs. */
1726 NULL, /* Sched adj cost. */
1727 arm_default_branch_cost,
1728 &arm_default_vec_cost,
1729 1, /* Constant limit. */
1730 5, /* Max cond insns. */
1731 8, /* Memset max inline. */
1732 1, /* Issue rate. */
1733 ARM_PREFETCH_NOT_BENEFICIAL,
1734 tune_params::PREF_CONST_POOL_TRUE,
1735 tune_params::PREF_LDRD_FALSE,
1736 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
1737 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
1738 tune_params::DISPARAGE_FLAGS_NEITHER,
1739 tune_params::PREF_NEON_64_FALSE,
1740 tune_params::PREF_NEON_STRINGOPS_FALSE,
1741 tune_params::FUSE_NOTHING,
1742 tune_params::SCHED_AUTOPREF_OFF
1745 /* StrongARM has early execution of branches, so a sequence that is worth
1746 skipping is shorter. Set max_insns_skipped to a lower value. */
1748 const struct tune_params arm_strongarm_tune =
1750 arm_fastmul_rtx_costs,
1751 NULL, /* Insn extra costs. */
1752 NULL, /* Sched adj cost. */
1753 arm_default_branch_cost,
1754 &arm_default_vec_cost,
1755 1, /* Constant limit. */
1756 3, /* Max cond insns. */
1757 8, /* Memset max inline. */
1758 1, /* Issue rate. */
1759 ARM_PREFETCH_NOT_BENEFICIAL,
1760 tune_params::PREF_CONST_POOL_TRUE,
1761 tune_params::PREF_LDRD_FALSE,
1762 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
1763 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
1764 tune_params::DISPARAGE_FLAGS_NEITHER,
1765 tune_params::PREF_NEON_64_FALSE,
1766 tune_params::PREF_NEON_STRINGOPS_FALSE,
1767 tune_params::FUSE_NOTHING,
1768 tune_params::SCHED_AUTOPREF_OFF
1771 const struct tune_params arm_xscale_tune =
1773 arm_xscale_rtx_costs,
1774 NULL, /* Insn extra costs. */
1775 xscale_sched_adjust_cost,
1776 arm_default_branch_cost,
1777 &arm_default_vec_cost,
1778 2, /* Constant limit. */
1779 3, /* Max cond insns. */
1780 8, /* Memset max inline. */
1781 1, /* Issue rate. */
1782 ARM_PREFETCH_NOT_BENEFICIAL,
1783 tune_params::PREF_CONST_POOL_TRUE,
1784 tune_params::PREF_LDRD_FALSE,
1785 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
1786 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
1787 tune_params::DISPARAGE_FLAGS_NEITHER,
1788 tune_params::PREF_NEON_64_FALSE,
1789 tune_params::PREF_NEON_STRINGOPS_FALSE,
1790 tune_params::FUSE_NOTHING,
1791 tune_params::SCHED_AUTOPREF_OFF
1794 const struct tune_params arm_9e_tune =
1796 arm_9e_rtx_costs,
1797 NULL, /* Insn extra costs. */
1798 NULL, /* Sched adj cost. */
1799 arm_default_branch_cost,
1800 &arm_default_vec_cost,
1801 1, /* Constant limit. */
1802 5, /* Max cond insns. */
1803 8, /* Memset max inline. */
1804 1, /* Issue rate. */
1805 ARM_PREFETCH_NOT_BENEFICIAL,
1806 tune_params::PREF_CONST_POOL_TRUE,
1807 tune_params::PREF_LDRD_FALSE,
1808 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
1809 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
1810 tune_params::DISPARAGE_FLAGS_NEITHER,
1811 tune_params::PREF_NEON_64_FALSE,
1812 tune_params::PREF_NEON_STRINGOPS_FALSE,
1813 tune_params::FUSE_NOTHING,
1814 tune_params::SCHED_AUTOPREF_OFF
1817 const struct tune_params arm_marvell_pj4_tune =
1819 arm_9e_rtx_costs,
1820 NULL, /* Insn extra costs. */
1821 NULL, /* Sched adj cost. */
1822 arm_default_branch_cost,
1823 &arm_default_vec_cost,
1824 1, /* Constant limit. */
1825 5, /* Max cond insns. */
1826 8, /* Memset max inline. */
1827 2, /* Issue rate. */
1828 ARM_PREFETCH_NOT_BENEFICIAL,
1829 tune_params::PREF_CONST_POOL_TRUE,
1830 tune_params::PREF_LDRD_FALSE,
1831 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
1832 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
1833 tune_params::DISPARAGE_FLAGS_NEITHER,
1834 tune_params::PREF_NEON_64_FALSE,
1835 tune_params::PREF_NEON_STRINGOPS_FALSE,
1836 tune_params::FUSE_NOTHING,
1837 tune_params::SCHED_AUTOPREF_OFF
1840 const struct tune_params arm_v6t2_tune =
1842 arm_9e_rtx_costs,
1843 NULL, /* Insn extra costs. */
1844 NULL, /* Sched adj cost. */
1845 arm_default_branch_cost,
1846 &arm_default_vec_cost,
1847 1, /* Constant limit. */
1848 5, /* Max cond insns. */
1849 8, /* Memset max inline. */
1850 1, /* Issue rate. */
1851 ARM_PREFETCH_NOT_BENEFICIAL,
1852 tune_params::PREF_CONST_POOL_FALSE,
1853 tune_params::PREF_LDRD_FALSE,
1854 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
1855 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
1856 tune_params::DISPARAGE_FLAGS_NEITHER,
1857 tune_params::PREF_NEON_64_FALSE,
1858 tune_params::PREF_NEON_STRINGOPS_FALSE,
1859 tune_params::FUSE_NOTHING,
1860 tune_params::SCHED_AUTOPREF_OFF
1864 /* Generic Cortex tuning. Use more specific tunings if appropriate. */
1865 const struct tune_params arm_cortex_tune =
1867 arm_9e_rtx_costs,
1868 &generic_extra_costs,
1869 NULL, /* Sched adj cost. */
1870 arm_default_branch_cost,
1871 &arm_default_vec_cost,
1872 1, /* Constant limit. */
1873 5, /* Max cond insns. */
1874 8, /* Memset max inline. */
1875 2, /* Issue rate. */
1876 ARM_PREFETCH_NOT_BENEFICIAL,
1877 tune_params::PREF_CONST_POOL_FALSE,
1878 tune_params::PREF_LDRD_FALSE,
1879 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
1880 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
1881 tune_params::DISPARAGE_FLAGS_NEITHER,
1882 tune_params::PREF_NEON_64_FALSE,
1883 tune_params::PREF_NEON_STRINGOPS_FALSE,
1884 tune_params::FUSE_NOTHING,
1885 tune_params::SCHED_AUTOPREF_OFF
1888 const struct tune_params arm_cortex_a8_tune =
1890 arm_9e_rtx_costs,
1891 &cortexa8_extra_costs,
1892 NULL, /* Sched adj cost. */
1893 arm_default_branch_cost,
1894 &arm_default_vec_cost,
1895 1, /* Constant limit. */
1896 5, /* Max cond insns. */
1897 8, /* Memset max inline. */
1898 2, /* Issue rate. */
1899 ARM_PREFETCH_NOT_BENEFICIAL,
1900 tune_params::PREF_CONST_POOL_FALSE,
1901 tune_params::PREF_LDRD_FALSE,
1902 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
1903 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
1904 tune_params::DISPARAGE_FLAGS_NEITHER,
1905 tune_params::PREF_NEON_64_FALSE,
1906 tune_params::PREF_NEON_STRINGOPS_TRUE,
1907 tune_params::FUSE_NOTHING,
1908 tune_params::SCHED_AUTOPREF_OFF
1911 const struct tune_params arm_cortex_a7_tune =
1913 arm_9e_rtx_costs,
1914 &cortexa7_extra_costs,
1915 NULL, /* Sched adj cost. */
1916 arm_default_branch_cost,
1917 &arm_default_vec_cost,
1918 1, /* Constant limit. */
1919 5, /* Max cond insns. */
1920 8, /* Memset max inline. */
1921 2, /* Issue rate. */
1922 ARM_PREFETCH_NOT_BENEFICIAL,
1923 tune_params::PREF_CONST_POOL_FALSE,
1924 tune_params::PREF_LDRD_FALSE,
1925 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
1926 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
1927 tune_params::DISPARAGE_FLAGS_NEITHER,
1928 tune_params::PREF_NEON_64_FALSE,
1929 tune_params::PREF_NEON_STRINGOPS_TRUE,
1930 tune_params::FUSE_NOTHING,
1931 tune_params::SCHED_AUTOPREF_OFF
1934 const struct tune_params arm_cortex_a15_tune =
1936 arm_9e_rtx_costs,
1937 &cortexa15_extra_costs,
1938 NULL, /* Sched adj cost. */
1939 arm_default_branch_cost,
1940 &arm_default_vec_cost,
1941 1, /* Constant limit. */
1942 2, /* Max cond insns. */
1943 8, /* Memset max inline. */
1944 3, /* Issue rate. */
1945 ARM_PREFETCH_NOT_BENEFICIAL,
1946 tune_params::PREF_CONST_POOL_FALSE,
1947 tune_params::PREF_LDRD_TRUE,
1948 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
1949 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
1950 tune_params::DISPARAGE_FLAGS_ALL,
1951 tune_params::PREF_NEON_64_FALSE,
1952 tune_params::PREF_NEON_STRINGOPS_TRUE,
1953 tune_params::FUSE_NOTHING,
1954 tune_params::SCHED_AUTOPREF_FULL
1957 const struct tune_params arm_cortex_a35_tune =
1959 arm_9e_rtx_costs,
1960 &cortexa53_extra_costs,
1961 NULL, /* Sched adj cost. */
1962 arm_default_branch_cost,
1963 &arm_default_vec_cost,
1964 1, /* Constant limit. */
1965 5, /* Max cond insns. */
1966 8, /* Memset max inline. */
1967 1, /* Issue rate. */
1968 ARM_PREFETCH_NOT_BENEFICIAL,
1969 tune_params::PREF_CONST_POOL_FALSE,
1970 tune_params::PREF_LDRD_FALSE,
1971 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
1972 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
1973 tune_params::DISPARAGE_FLAGS_NEITHER,
1974 tune_params::PREF_NEON_64_FALSE,
1975 tune_params::PREF_NEON_STRINGOPS_TRUE,
1976 FUSE_OPS (tune_params::FUSE_MOVW_MOVT),
1977 tune_params::SCHED_AUTOPREF_OFF
1980 const struct tune_params arm_cortex_a53_tune =
1982 arm_9e_rtx_costs,
1983 &cortexa53_extra_costs,
1984 NULL, /* Sched adj cost. */
1985 arm_default_branch_cost,
1986 &arm_default_vec_cost,
1987 1, /* Constant limit. */
1988 5, /* Max cond insns. */
1989 8, /* Memset max inline. */
1990 2, /* Issue rate. */
1991 ARM_PREFETCH_NOT_BENEFICIAL,
1992 tune_params::PREF_CONST_POOL_FALSE,
1993 tune_params::PREF_LDRD_FALSE,
1994 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
1995 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
1996 tune_params::DISPARAGE_FLAGS_NEITHER,
1997 tune_params::PREF_NEON_64_FALSE,
1998 tune_params::PREF_NEON_STRINGOPS_TRUE,
1999 FUSE_OPS (tune_params::FUSE_MOVW_MOVT | tune_params::FUSE_AES_AESMC),
2000 tune_params::SCHED_AUTOPREF_OFF
2003 const struct tune_params arm_cortex_a57_tune =
2005 arm_9e_rtx_costs,
2006 &cortexa57_extra_costs,
2007 NULL, /* Sched adj cost. */
2008 arm_default_branch_cost,
2009 &arm_default_vec_cost,
2010 1, /* Constant limit. */
2011 2, /* Max cond insns. */
2012 8, /* Memset max inline. */
2013 3, /* Issue rate. */
2014 ARM_PREFETCH_NOT_BENEFICIAL,
2015 tune_params::PREF_CONST_POOL_FALSE,
2016 tune_params::PREF_LDRD_TRUE,
2017 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
2018 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
2019 tune_params::DISPARAGE_FLAGS_ALL,
2020 tune_params::PREF_NEON_64_FALSE,
2021 tune_params::PREF_NEON_STRINGOPS_TRUE,
2022 FUSE_OPS (tune_params::FUSE_MOVW_MOVT | tune_params::FUSE_AES_AESMC),
2023 tune_params::SCHED_AUTOPREF_FULL
2026 const struct tune_params arm_exynosm1_tune =
2028 arm_9e_rtx_costs,
2029 &exynosm1_extra_costs,
2030 NULL, /* Sched adj cost. */
2031 arm_default_branch_cost,
2032 &arm_default_vec_cost,
2033 1, /* Constant limit. */
2034 2, /* Max cond insns. */
2035 8, /* Memset max inline. */
2036 3, /* Issue rate. */
2037 ARM_PREFETCH_NOT_BENEFICIAL,
2038 tune_params::PREF_CONST_POOL_FALSE,
2039 tune_params::PREF_LDRD_TRUE,
2040 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE, /* Thumb. */
2041 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE, /* ARM. */
2042 tune_params::DISPARAGE_FLAGS_ALL,
2043 tune_params::PREF_NEON_64_FALSE,
2044 tune_params::PREF_NEON_STRINGOPS_TRUE,
2045 tune_params::FUSE_NOTHING,
2046 tune_params::SCHED_AUTOPREF_OFF
2049 const struct tune_params arm_xgene1_tune =
2051 arm_9e_rtx_costs,
2052 &xgene1_extra_costs,
2053 NULL, /* Sched adj cost. */
2054 arm_default_branch_cost,
2055 &arm_default_vec_cost,
2056 1, /* Constant limit. */
2057 2, /* Max cond insns. */
2058 32, /* Memset max inline. */
2059 4, /* Issue rate. */
2060 ARM_PREFETCH_NOT_BENEFICIAL,
2061 tune_params::PREF_CONST_POOL_FALSE,
2062 tune_params::PREF_LDRD_TRUE,
2063 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
2064 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
2065 tune_params::DISPARAGE_FLAGS_ALL,
2066 tune_params::PREF_NEON_64_FALSE,
2067 tune_params::PREF_NEON_STRINGOPS_FALSE,
2068 tune_params::FUSE_NOTHING,
2069 tune_params::SCHED_AUTOPREF_OFF
2072 const struct tune_params arm_qdf24xx_tune =
2074 arm_9e_rtx_costs,
2075 &qdf24xx_extra_costs,
2076 NULL, /* Scheduler cost adjustment. */
2077 arm_default_branch_cost,
2078 &arm_default_vec_cost, /* Vectorizer costs. */
2079 1, /* Constant limit. */
2080 2, /* Max cond insns. */
2081 8, /* Memset max inline. */
2082 4, /* Issue rate. */
2083 ARM_PREFETCH_BENEFICIAL (0, -1, 64),
2084 tune_params::PREF_CONST_POOL_FALSE,
2085 tune_params::PREF_LDRD_TRUE,
2086 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
2087 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
2088 tune_params::DISPARAGE_FLAGS_ALL,
2089 tune_params::PREF_NEON_64_FALSE,
2090 tune_params::PREF_NEON_STRINGOPS_TRUE,
2091 FUSE_OPS (tune_params::FUSE_MOVW_MOVT),
2092 tune_params::SCHED_AUTOPREF_FULL
2095 /* Branches can be dual-issued on Cortex-A5, so conditional execution is
2096 less appealing. Set max_insns_skipped to a low value. */
2098 const struct tune_params arm_cortex_a5_tune =
2100 arm_9e_rtx_costs,
2101 &cortexa5_extra_costs,
2102 NULL, /* Sched adj cost. */
2103 arm_cortex_a5_branch_cost,
2104 &arm_default_vec_cost,
2105 1, /* Constant limit. */
2106 1, /* Max cond insns. */
2107 8, /* Memset max inline. */
2108 2, /* Issue rate. */
2109 ARM_PREFETCH_NOT_BENEFICIAL,
2110 tune_params::PREF_CONST_POOL_FALSE,
2111 tune_params::PREF_LDRD_FALSE,
2112 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE, /* Thumb. */
2113 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE, /* ARM. */
2114 tune_params::DISPARAGE_FLAGS_NEITHER,
2115 tune_params::PREF_NEON_64_FALSE,
2116 tune_params::PREF_NEON_STRINGOPS_TRUE,
2117 tune_params::FUSE_NOTHING,
2118 tune_params::SCHED_AUTOPREF_OFF
2121 const struct tune_params arm_cortex_a9_tune =
2123 arm_9e_rtx_costs,
2124 &cortexa9_extra_costs,
2125 cortex_a9_sched_adjust_cost,
2126 arm_default_branch_cost,
2127 &arm_default_vec_cost,
2128 1, /* Constant limit. */
2129 5, /* Max cond insns. */
2130 8, /* Memset max inline. */
2131 2, /* Issue rate. */
2132 ARM_PREFETCH_BENEFICIAL(4,32,32),
2133 tune_params::PREF_CONST_POOL_FALSE,
2134 tune_params::PREF_LDRD_FALSE,
2135 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
2136 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
2137 tune_params::DISPARAGE_FLAGS_NEITHER,
2138 tune_params::PREF_NEON_64_FALSE,
2139 tune_params::PREF_NEON_STRINGOPS_FALSE,
2140 tune_params::FUSE_NOTHING,
2141 tune_params::SCHED_AUTOPREF_OFF
2144 const struct tune_params arm_cortex_a12_tune =
2146 arm_9e_rtx_costs,
2147 &cortexa12_extra_costs,
2148 NULL, /* Sched adj cost. */
2149 arm_default_branch_cost,
2150 &arm_default_vec_cost, /* Vectorizer costs. */
2151 1, /* Constant limit. */
2152 2, /* Max cond insns. */
2153 8, /* Memset max inline. */
2154 2, /* Issue rate. */
2155 ARM_PREFETCH_NOT_BENEFICIAL,
2156 tune_params::PREF_CONST_POOL_FALSE,
2157 tune_params::PREF_LDRD_TRUE,
2158 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
2159 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
2160 tune_params::DISPARAGE_FLAGS_ALL,
2161 tune_params::PREF_NEON_64_FALSE,
2162 tune_params::PREF_NEON_STRINGOPS_TRUE,
2163 FUSE_OPS (tune_params::FUSE_MOVW_MOVT),
2164 tune_params::SCHED_AUTOPREF_OFF
2167 const struct tune_params arm_cortex_a73_tune =
2169 arm_9e_rtx_costs,
2170 &cortexa57_extra_costs,
2171 NULL, /* Sched adj cost. */
2172 arm_default_branch_cost,
2173 &arm_default_vec_cost, /* Vectorizer costs. */
2174 1, /* Constant limit. */
2175 2, /* Max cond insns. */
2176 8, /* Memset max inline. */
2177 2, /* Issue rate. */
2178 ARM_PREFETCH_NOT_BENEFICIAL,
2179 tune_params::PREF_CONST_POOL_FALSE,
2180 tune_params::PREF_LDRD_TRUE,
2181 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
2182 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
2183 tune_params::DISPARAGE_FLAGS_ALL,
2184 tune_params::PREF_NEON_64_FALSE,
2185 tune_params::PREF_NEON_STRINGOPS_TRUE,
2186 FUSE_OPS (tune_params::FUSE_AES_AESMC | tune_params::FUSE_MOVW_MOVT),
2187 tune_params::SCHED_AUTOPREF_FULL
2190 /* armv7m tuning. On Cortex-M4 cores for example, MOVW/MOVT take a single
2191 cycle to execute each. An LDR from the constant pool also takes two cycles
2192 to execute, but mildly increases pipelining opportunity (consecutive
2193 loads/stores can be pipelined together, saving one cycle), and may also
2194 improve icache utilisation. Hence we prefer the constant pool for such
2195 processors. */
2197 const struct tune_params arm_v7m_tune =
2199 arm_9e_rtx_costs,
2200 &v7m_extra_costs,
2201 NULL, /* Sched adj cost. */
2202 arm_cortex_m_branch_cost,
2203 &arm_default_vec_cost,
2204 1, /* Constant limit. */
2205 2, /* Max cond insns. */
2206 8, /* Memset max inline. */
2207 1, /* Issue rate. */
2208 ARM_PREFETCH_NOT_BENEFICIAL,
2209 tune_params::PREF_CONST_POOL_TRUE,
2210 tune_params::PREF_LDRD_FALSE,
2211 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE, /* Thumb. */
2212 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE, /* ARM. */
2213 tune_params::DISPARAGE_FLAGS_NEITHER,
2214 tune_params::PREF_NEON_64_FALSE,
2215 tune_params::PREF_NEON_STRINGOPS_FALSE,
2216 tune_params::FUSE_NOTHING,
2217 tune_params::SCHED_AUTOPREF_OFF
2220 /* Cortex-M7 tuning. */
2222 const struct tune_params arm_cortex_m7_tune =
2224 arm_9e_rtx_costs,
2225 &v7m_extra_costs,
2226 NULL, /* Sched adj cost. */
2227 arm_cortex_m7_branch_cost,
2228 &arm_default_vec_cost,
2229 0, /* Constant limit. */
2230 1, /* Max cond insns. */
2231 8, /* Memset max inline. */
2232 2, /* Issue rate. */
2233 ARM_PREFETCH_NOT_BENEFICIAL,
2234 tune_params::PREF_CONST_POOL_TRUE,
2235 tune_params::PREF_LDRD_FALSE,
2236 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
2237 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
2238 tune_params::DISPARAGE_FLAGS_NEITHER,
2239 tune_params::PREF_NEON_64_FALSE,
2240 tune_params::PREF_NEON_STRINGOPS_FALSE,
2241 tune_params::FUSE_NOTHING,
2242 tune_params::SCHED_AUTOPREF_OFF
2245 /* The arm_v6m_tune is duplicated from arm_cortex_tune, rather than
2246 arm_v6t2_tune. It is used for cortex-m0, cortex-m1 and cortex-m0plus. */
2247 const struct tune_params arm_v6m_tune =
2249 arm_9e_rtx_costs,
2250 NULL, /* Insn extra costs. */
2251 NULL, /* Sched adj cost. */
2252 arm_default_branch_cost,
2253 &arm_default_vec_cost, /* Vectorizer costs. */
2254 1, /* Constant limit. */
2255 5, /* Max cond insns. */
2256 8, /* Memset max inline. */
2257 1, /* Issue rate. */
2258 ARM_PREFETCH_NOT_BENEFICIAL,
2259 tune_params::PREF_CONST_POOL_FALSE,
2260 tune_params::PREF_LDRD_FALSE,
2261 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE, /* Thumb. */
2262 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE, /* ARM. */
2263 tune_params::DISPARAGE_FLAGS_NEITHER,
2264 tune_params::PREF_NEON_64_FALSE,
2265 tune_params::PREF_NEON_STRINGOPS_FALSE,
2266 tune_params::FUSE_NOTHING,
2267 tune_params::SCHED_AUTOPREF_OFF
2270 const struct tune_params arm_fa726te_tune =
2272 arm_9e_rtx_costs,
2273 NULL, /* Insn extra costs. */
2274 fa726te_sched_adjust_cost,
2275 arm_default_branch_cost,
2276 &arm_default_vec_cost,
2277 1, /* Constant limit. */
2278 5, /* Max cond insns. */
2279 8, /* Memset max inline. */
2280 2, /* Issue rate. */
2281 ARM_PREFETCH_NOT_BENEFICIAL,
2282 tune_params::PREF_CONST_POOL_TRUE,
2283 tune_params::PREF_LDRD_FALSE,
2284 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
2285 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
2286 tune_params::DISPARAGE_FLAGS_NEITHER,
2287 tune_params::PREF_NEON_64_FALSE,
2288 tune_params::PREF_NEON_STRINGOPS_FALSE,
2289 tune_params::FUSE_NOTHING,
2290 tune_params::SCHED_AUTOPREF_OFF
2294 /* Not all of these give usefully different compilation alternatives,
2295 but there is no simple way of generalizing them. */
2296 static const struct processors all_cores[] =
2298 /* ARM Cores */
2299 #define ARM_CORE(NAME, X, IDENT, ARCH, FLAGS, COSTS) \
2300 {NAME, IDENT, #ARCH, BASE_ARCH_##ARCH, \
2301 FLAGS, &arm_##COSTS##_tune},
2302 #include "arm-cores.def"
2303 #undef ARM_CORE
2304 {NULL, arm_none, NULL, BASE_ARCH_0, ARM_FSET_EMPTY, NULL}
2307 static const struct processors all_architectures[] =
2309 /* ARM Architectures */
2310 /* We don't specify tuning costs here as it will be figured out
2311 from the core. */
2313 #define ARM_ARCH(NAME, CORE, ARCH, FLAGS) \
2314 {NAME, CORE, #ARCH, BASE_ARCH_##ARCH, FLAGS, NULL},
2315 #include "arm-arches.def"
2316 #undef ARM_ARCH
2317 {NULL, arm_none, NULL, BASE_ARCH_0, ARM_FSET_EMPTY, NULL}
2321 /* These are populated as commandline arguments are processed, or NULL
2322 if not specified. */
2323 static const struct processors *arm_selected_arch;
2324 static const struct processors *arm_selected_cpu;
2325 static const struct processors *arm_selected_tune;
2327 /* The name of the preprocessor macro to define for this architecture. PROFILE
2328 is replaced by the architecture name (eg. 8A) in arm_option_override () and
2329 is thus chosen to be big enough to hold the longest architecture name. */
2331 char arm_arch_name[] = "__ARM_ARCH_PROFILE__";
2333 /* Available values for -mfpu=. */
2335 const struct arm_fpu_desc all_fpus[] =
2337 #define ARM_FPU(NAME, REV, VFP_REGS, FEATURES) \
2338 { NAME, REV, VFP_REGS, FEATURES },
2339 #include "arm-fpus.def"
2340 #undef ARM_FPU
2343 /* Supported TLS relocations. */
2345 enum tls_reloc {
2346 TLS_GD32,
2347 TLS_LDM32,
2348 TLS_LDO32,
2349 TLS_IE32,
2350 TLS_LE32,
2351 TLS_DESCSEQ /* GNU scheme */
2354 /* The maximum number of insns to be used when loading a constant. */
2355 inline static int
2356 arm_constant_limit (bool size_p)
2358 return size_p ? 1 : current_tune->constant_limit;
2361 /* Emit an insn that's a simple single-set. Both the operands must be known
2362 to be valid. */
2363 inline static rtx_insn *
2364 emit_set_insn (rtx x, rtx y)
2366 return emit_insn (gen_rtx_SET (x, y));
2369 /* Return the number of bits set in VALUE. */
2370 static unsigned
2371 bit_count (unsigned long value)
2373 unsigned long count = 0;
2375 while (value)
2377 count++;
2378 value &= value - 1; /* Clear the least-significant set bit. */
2381 return count;
2384 /* Return the number of features in feature-set SET. */
2385 static unsigned
2386 feature_count (const arm_feature_set * set)
2388 return (bit_count (ARM_FSET_CPU1 (*set))
2389 + bit_count (ARM_FSET_CPU2 (*set)));
2392 typedef struct
2394 machine_mode mode;
2395 const char *name;
2396 } arm_fixed_mode_set;
2398 /* A small helper for setting fixed-point library libfuncs. */
2400 static void
2401 arm_set_fixed_optab_libfunc (optab optable, machine_mode mode,
2402 const char *funcname, const char *modename,
2403 int num_suffix)
2405 char buffer[50];
2407 if (num_suffix == 0)
2408 sprintf (buffer, "__gnu_%s%s", funcname, modename);
2409 else
2410 sprintf (buffer, "__gnu_%s%s%d", funcname, modename, num_suffix);
2412 set_optab_libfunc (optable, mode, buffer);
2415 static void
2416 arm_set_fixed_conv_libfunc (convert_optab optable, machine_mode to,
2417 machine_mode from, const char *funcname,
2418 const char *toname, const char *fromname)
2420 char buffer[50];
2421 const char *maybe_suffix_2 = "";
2423 /* Follow the logic for selecting a "2" suffix in fixed-bit.h. */
2424 if (ALL_FIXED_POINT_MODE_P (from) && ALL_FIXED_POINT_MODE_P (to)
2425 && UNSIGNED_FIXED_POINT_MODE_P (from) == UNSIGNED_FIXED_POINT_MODE_P (to)
2426 && ALL_FRACT_MODE_P (from) == ALL_FRACT_MODE_P (to))
2427 maybe_suffix_2 = "2";
2429 sprintf (buffer, "__gnu_%s%s%s%s", funcname, fromname, toname,
2430 maybe_suffix_2);
2432 set_conv_libfunc (optable, to, from, buffer);
2435 /* Set up library functions unique to ARM. */
2437 static void
2438 arm_init_libfuncs (void)
2440 /* For Linux, we have access to kernel support for atomic operations. */
2441 if (arm_abi == ARM_ABI_AAPCS_LINUX)
2442 init_sync_libfuncs (MAX_SYNC_LIBFUNC_SIZE);
2444 /* There are no special library functions unless we are using the
2445 ARM BPABI. */
2446 if (!TARGET_BPABI)
2447 return;
2449 /* The functions below are described in Section 4 of the "Run-Time
2450 ABI for the ARM architecture", Version 1.0. */
2452 /* Double-precision floating-point arithmetic. Table 2. */
2453 set_optab_libfunc (add_optab, DFmode, "__aeabi_dadd");
2454 set_optab_libfunc (sdiv_optab, DFmode, "__aeabi_ddiv");
2455 set_optab_libfunc (smul_optab, DFmode, "__aeabi_dmul");
2456 set_optab_libfunc (neg_optab, DFmode, "__aeabi_dneg");
2457 set_optab_libfunc (sub_optab, DFmode, "__aeabi_dsub");
2459 /* Double-precision comparisons. Table 3. */
2460 set_optab_libfunc (eq_optab, DFmode, "__aeabi_dcmpeq");
2461 set_optab_libfunc (ne_optab, DFmode, NULL);
2462 set_optab_libfunc (lt_optab, DFmode, "__aeabi_dcmplt");
2463 set_optab_libfunc (le_optab, DFmode, "__aeabi_dcmple");
2464 set_optab_libfunc (ge_optab, DFmode, "__aeabi_dcmpge");
2465 set_optab_libfunc (gt_optab, DFmode, "__aeabi_dcmpgt");
2466 set_optab_libfunc (unord_optab, DFmode, "__aeabi_dcmpun");
2468 /* Single-precision floating-point arithmetic. Table 4. */
2469 set_optab_libfunc (add_optab, SFmode, "__aeabi_fadd");
2470 set_optab_libfunc (sdiv_optab, SFmode, "__aeabi_fdiv");
2471 set_optab_libfunc (smul_optab, SFmode, "__aeabi_fmul");
2472 set_optab_libfunc (neg_optab, SFmode, "__aeabi_fneg");
2473 set_optab_libfunc (sub_optab, SFmode, "__aeabi_fsub");
2475 /* Single-precision comparisons. Table 5. */
2476 set_optab_libfunc (eq_optab, SFmode, "__aeabi_fcmpeq");
2477 set_optab_libfunc (ne_optab, SFmode, NULL);
2478 set_optab_libfunc (lt_optab, SFmode, "__aeabi_fcmplt");
2479 set_optab_libfunc (le_optab, SFmode, "__aeabi_fcmple");
2480 set_optab_libfunc (ge_optab, SFmode, "__aeabi_fcmpge");
2481 set_optab_libfunc (gt_optab, SFmode, "__aeabi_fcmpgt");
2482 set_optab_libfunc (unord_optab, SFmode, "__aeabi_fcmpun");
2484 /* Floating-point to integer conversions. Table 6. */
2485 set_conv_libfunc (sfix_optab, SImode, DFmode, "__aeabi_d2iz");
2486 set_conv_libfunc (ufix_optab, SImode, DFmode, "__aeabi_d2uiz");
2487 set_conv_libfunc (sfix_optab, DImode, DFmode, "__aeabi_d2lz");
2488 set_conv_libfunc (ufix_optab, DImode, DFmode, "__aeabi_d2ulz");
2489 set_conv_libfunc (sfix_optab, SImode, SFmode, "__aeabi_f2iz");
2490 set_conv_libfunc (ufix_optab, SImode, SFmode, "__aeabi_f2uiz");
2491 set_conv_libfunc (sfix_optab, DImode, SFmode, "__aeabi_f2lz");
2492 set_conv_libfunc (ufix_optab, DImode, SFmode, "__aeabi_f2ulz");
2494 /* Conversions between floating types. Table 7. */
2495 set_conv_libfunc (trunc_optab, SFmode, DFmode, "__aeabi_d2f");
2496 set_conv_libfunc (sext_optab, DFmode, SFmode, "__aeabi_f2d");
2498 /* Integer to floating-point conversions. Table 8. */
2499 set_conv_libfunc (sfloat_optab, DFmode, SImode, "__aeabi_i2d");
2500 set_conv_libfunc (ufloat_optab, DFmode, SImode, "__aeabi_ui2d");
2501 set_conv_libfunc (sfloat_optab, DFmode, DImode, "__aeabi_l2d");
2502 set_conv_libfunc (ufloat_optab, DFmode, DImode, "__aeabi_ul2d");
2503 set_conv_libfunc (sfloat_optab, SFmode, SImode, "__aeabi_i2f");
2504 set_conv_libfunc (ufloat_optab, SFmode, SImode, "__aeabi_ui2f");
2505 set_conv_libfunc (sfloat_optab, SFmode, DImode, "__aeabi_l2f");
2506 set_conv_libfunc (ufloat_optab, SFmode, DImode, "__aeabi_ul2f");
2508 /* Long long. Table 9. */
2509 set_optab_libfunc (smul_optab, DImode, "__aeabi_lmul");
2510 set_optab_libfunc (sdivmod_optab, DImode, "__aeabi_ldivmod");
2511 set_optab_libfunc (udivmod_optab, DImode, "__aeabi_uldivmod");
2512 set_optab_libfunc (ashl_optab, DImode, "__aeabi_llsl");
2513 set_optab_libfunc (lshr_optab, DImode, "__aeabi_llsr");
2514 set_optab_libfunc (ashr_optab, DImode, "__aeabi_lasr");
2515 set_optab_libfunc (cmp_optab, DImode, "__aeabi_lcmp");
2516 set_optab_libfunc (ucmp_optab, DImode, "__aeabi_ulcmp");
2518 /* Integer (32/32->32) division. \S 4.3.1. */
2519 set_optab_libfunc (sdivmod_optab, SImode, "__aeabi_idivmod");
2520 set_optab_libfunc (udivmod_optab, SImode, "__aeabi_uidivmod");
2522 /* The divmod functions are designed so that they can be used for
2523 plain division, even though they return both the quotient and the
2524 remainder. The quotient is returned in the usual location (i.e.,
2525 r0 for SImode, {r0, r1} for DImode), just as would be expected
2526 for an ordinary division routine. Because the AAPCS calling
2527 conventions specify that all of { r0, r1, r2, r3 } are
2528 callee-saved registers, there is no need to tell the compiler
2529 explicitly that those registers are clobbered by these
2530 routines. */
2531 set_optab_libfunc (sdiv_optab, DImode, "__aeabi_ldivmod");
2532 set_optab_libfunc (udiv_optab, DImode, "__aeabi_uldivmod");
2534 /* For SImode division the ABI provides div-without-mod routines,
2535 which are faster. */
2536 set_optab_libfunc (sdiv_optab, SImode, "__aeabi_idiv");
2537 set_optab_libfunc (udiv_optab, SImode, "__aeabi_uidiv");
2539 /* We don't have mod libcalls. Fortunately gcc knows how to use the
2540 divmod libcalls instead. */
2541 set_optab_libfunc (smod_optab, DImode, NULL);
2542 set_optab_libfunc (umod_optab, DImode, NULL);
2543 set_optab_libfunc (smod_optab, SImode, NULL);
2544 set_optab_libfunc (umod_optab, SImode, NULL);
2546 /* Half-precision float operations. The compiler handles all operations
2547 with NULL libfuncs by converting the SFmode. */
2548 switch (arm_fp16_format)
2550 case ARM_FP16_FORMAT_IEEE:
2551 case ARM_FP16_FORMAT_ALTERNATIVE:
2553 /* Conversions. */
2554 set_conv_libfunc (trunc_optab, HFmode, SFmode,
2555 (arm_fp16_format == ARM_FP16_FORMAT_IEEE
2556 ? "__gnu_f2h_ieee"
2557 : "__gnu_f2h_alternative"));
2558 set_conv_libfunc (sext_optab, SFmode, HFmode,
2559 (arm_fp16_format == ARM_FP16_FORMAT_IEEE
2560 ? "__gnu_h2f_ieee"
2561 : "__gnu_h2f_alternative"));
2563 /* Arithmetic. */
2564 set_optab_libfunc (add_optab, HFmode, NULL);
2565 set_optab_libfunc (sdiv_optab, HFmode, NULL);
2566 set_optab_libfunc (smul_optab, HFmode, NULL);
2567 set_optab_libfunc (neg_optab, HFmode, NULL);
2568 set_optab_libfunc (sub_optab, HFmode, NULL);
2570 /* Comparisons. */
2571 set_optab_libfunc (eq_optab, HFmode, NULL);
2572 set_optab_libfunc (ne_optab, HFmode, NULL);
2573 set_optab_libfunc (lt_optab, HFmode, NULL);
2574 set_optab_libfunc (le_optab, HFmode, NULL);
2575 set_optab_libfunc (ge_optab, HFmode, NULL);
2576 set_optab_libfunc (gt_optab, HFmode, NULL);
2577 set_optab_libfunc (unord_optab, HFmode, NULL);
2578 break;
2580 default:
2581 break;
2584 /* Use names prefixed with __gnu_ for fixed-point helper functions. */
2586 const arm_fixed_mode_set fixed_arith_modes[] =
2588 { QQmode, "qq" },
2589 { UQQmode, "uqq" },
2590 { HQmode, "hq" },
2591 { UHQmode, "uhq" },
2592 { SQmode, "sq" },
2593 { USQmode, "usq" },
2594 { DQmode, "dq" },
2595 { UDQmode, "udq" },
2596 { TQmode, "tq" },
2597 { UTQmode, "utq" },
2598 { HAmode, "ha" },
2599 { UHAmode, "uha" },
2600 { SAmode, "sa" },
2601 { USAmode, "usa" },
2602 { DAmode, "da" },
2603 { UDAmode, "uda" },
2604 { TAmode, "ta" },
2605 { UTAmode, "uta" }
2607 const arm_fixed_mode_set fixed_conv_modes[] =
2609 { QQmode, "qq" },
2610 { UQQmode, "uqq" },
2611 { HQmode, "hq" },
2612 { UHQmode, "uhq" },
2613 { SQmode, "sq" },
2614 { USQmode, "usq" },
2615 { DQmode, "dq" },
2616 { UDQmode, "udq" },
2617 { TQmode, "tq" },
2618 { UTQmode, "utq" },
2619 { HAmode, "ha" },
2620 { UHAmode, "uha" },
2621 { SAmode, "sa" },
2622 { USAmode, "usa" },
2623 { DAmode, "da" },
2624 { UDAmode, "uda" },
2625 { TAmode, "ta" },
2626 { UTAmode, "uta" },
2627 { QImode, "qi" },
2628 { HImode, "hi" },
2629 { SImode, "si" },
2630 { DImode, "di" },
2631 { TImode, "ti" },
2632 { SFmode, "sf" },
2633 { DFmode, "df" }
2635 unsigned int i, j;
2637 for (i = 0; i < ARRAY_SIZE (fixed_arith_modes); i++)
2639 arm_set_fixed_optab_libfunc (add_optab, fixed_arith_modes[i].mode,
2640 "add", fixed_arith_modes[i].name, 3);
2641 arm_set_fixed_optab_libfunc (ssadd_optab, fixed_arith_modes[i].mode,
2642 "ssadd", fixed_arith_modes[i].name, 3);
2643 arm_set_fixed_optab_libfunc (usadd_optab, fixed_arith_modes[i].mode,
2644 "usadd", fixed_arith_modes[i].name, 3);
2645 arm_set_fixed_optab_libfunc (sub_optab, fixed_arith_modes[i].mode,
2646 "sub", fixed_arith_modes[i].name, 3);
2647 arm_set_fixed_optab_libfunc (sssub_optab, fixed_arith_modes[i].mode,
2648 "sssub", fixed_arith_modes[i].name, 3);
2649 arm_set_fixed_optab_libfunc (ussub_optab, fixed_arith_modes[i].mode,
2650 "ussub", fixed_arith_modes[i].name, 3);
2651 arm_set_fixed_optab_libfunc (smul_optab, fixed_arith_modes[i].mode,
2652 "mul", fixed_arith_modes[i].name, 3);
2653 arm_set_fixed_optab_libfunc (ssmul_optab, fixed_arith_modes[i].mode,
2654 "ssmul", fixed_arith_modes[i].name, 3);
2655 arm_set_fixed_optab_libfunc (usmul_optab, fixed_arith_modes[i].mode,
2656 "usmul", fixed_arith_modes[i].name, 3);
2657 arm_set_fixed_optab_libfunc (sdiv_optab, fixed_arith_modes[i].mode,
2658 "div", fixed_arith_modes[i].name, 3);
2659 arm_set_fixed_optab_libfunc (udiv_optab, fixed_arith_modes[i].mode,
2660 "udiv", fixed_arith_modes[i].name, 3);
2661 arm_set_fixed_optab_libfunc (ssdiv_optab, fixed_arith_modes[i].mode,
2662 "ssdiv", fixed_arith_modes[i].name, 3);
2663 arm_set_fixed_optab_libfunc (usdiv_optab, fixed_arith_modes[i].mode,
2664 "usdiv", fixed_arith_modes[i].name, 3);
2665 arm_set_fixed_optab_libfunc (neg_optab, fixed_arith_modes[i].mode,
2666 "neg", fixed_arith_modes[i].name, 2);
2667 arm_set_fixed_optab_libfunc (ssneg_optab, fixed_arith_modes[i].mode,
2668 "ssneg", fixed_arith_modes[i].name, 2);
2669 arm_set_fixed_optab_libfunc (usneg_optab, fixed_arith_modes[i].mode,
2670 "usneg", fixed_arith_modes[i].name, 2);
2671 arm_set_fixed_optab_libfunc (ashl_optab, fixed_arith_modes[i].mode,
2672 "ashl", fixed_arith_modes[i].name, 3);
2673 arm_set_fixed_optab_libfunc (ashr_optab, fixed_arith_modes[i].mode,
2674 "ashr", fixed_arith_modes[i].name, 3);
2675 arm_set_fixed_optab_libfunc (lshr_optab, fixed_arith_modes[i].mode,
2676 "lshr", fixed_arith_modes[i].name, 3);
2677 arm_set_fixed_optab_libfunc (ssashl_optab, fixed_arith_modes[i].mode,
2678 "ssashl", fixed_arith_modes[i].name, 3);
2679 arm_set_fixed_optab_libfunc (usashl_optab, fixed_arith_modes[i].mode,
2680 "usashl", fixed_arith_modes[i].name, 3);
2681 arm_set_fixed_optab_libfunc (cmp_optab, fixed_arith_modes[i].mode,
2682 "cmp", fixed_arith_modes[i].name, 2);
2685 for (i = 0; i < ARRAY_SIZE (fixed_conv_modes); i++)
2686 for (j = 0; j < ARRAY_SIZE (fixed_conv_modes); j++)
2688 if (i == j
2689 || (!ALL_FIXED_POINT_MODE_P (fixed_conv_modes[i].mode)
2690 && !ALL_FIXED_POINT_MODE_P (fixed_conv_modes[j].mode)))
2691 continue;
2693 arm_set_fixed_conv_libfunc (fract_optab, fixed_conv_modes[i].mode,
2694 fixed_conv_modes[j].mode, "fract",
2695 fixed_conv_modes[i].name,
2696 fixed_conv_modes[j].name);
2697 arm_set_fixed_conv_libfunc (satfract_optab,
2698 fixed_conv_modes[i].mode,
2699 fixed_conv_modes[j].mode, "satfract",
2700 fixed_conv_modes[i].name,
2701 fixed_conv_modes[j].name);
2702 arm_set_fixed_conv_libfunc (fractuns_optab,
2703 fixed_conv_modes[i].mode,
2704 fixed_conv_modes[j].mode, "fractuns",
2705 fixed_conv_modes[i].name,
2706 fixed_conv_modes[j].name);
2707 arm_set_fixed_conv_libfunc (satfractuns_optab,
2708 fixed_conv_modes[i].mode,
2709 fixed_conv_modes[j].mode, "satfractuns",
2710 fixed_conv_modes[i].name,
2711 fixed_conv_modes[j].name);
2715 if (TARGET_AAPCS_BASED)
2716 synchronize_libfunc = init_one_libfunc ("__sync_synchronize");
2719 /* On AAPCS systems, this is the "struct __va_list". */
2720 static GTY(()) tree va_list_type;
2722 /* Return the type to use as __builtin_va_list. */
2723 static tree
2724 arm_build_builtin_va_list (void)
2726 tree va_list_name;
2727 tree ap_field;
2729 if (!TARGET_AAPCS_BASED)
2730 return std_build_builtin_va_list ();
2732 /* AAPCS \S 7.1.4 requires that va_list be a typedef for a type
2733 defined as:
2735 struct __va_list
2737 void *__ap;
2740 The C Library ABI further reinforces this definition in \S
2741 4.1.
2743 We must follow this definition exactly. The structure tag
2744 name is visible in C++ mangled names, and thus forms a part
2745 of the ABI. The field name may be used by people who
2746 #include <stdarg.h>. */
2747 /* Create the type. */
2748 va_list_type = lang_hooks.types.make_type (RECORD_TYPE);
2749 /* Give it the required name. */
2750 va_list_name = build_decl (BUILTINS_LOCATION,
2751 TYPE_DECL,
2752 get_identifier ("__va_list"),
2753 va_list_type);
2754 DECL_ARTIFICIAL (va_list_name) = 1;
2755 TYPE_NAME (va_list_type) = va_list_name;
2756 TYPE_STUB_DECL (va_list_type) = va_list_name;
2757 /* Create the __ap field. */
2758 ap_field = build_decl (BUILTINS_LOCATION,
2759 FIELD_DECL,
2760 get_identifier ("__ap"),
2761 ptr_type_node);
2762 DECL_ARTIFICIAL (ap_field) = 1;
2763 DECL_FIELD_CONTEXT (ap_field) = va_list_type;
2764 TYPE_FIELDS (va_list_type) = ap_field;
2765 /* Compute its layout. */
2766 layout_type (va_list_type);
2768 return va_list_type;
2771 /* Return an expression of type "void *" pointing to the next
2772 available argument in a variable-argument list. VALIST is the
2773 user-level va_list object, of type __builtin_va_list. */
2774 static tree
2775 arm_extract_valist_ptr (tree valist)
2777 if (TREE_TYPE (valist) == error_mark_node)
2778 return error_mark_node;
2780 /* On an AAPCS target, the pointer is stored within "struct
2781 va_list". */
2782 if (TARGET_AAPCS_BASED)
2784 tree ap_field = TYPE_FIELDS (TREE_TYPE (valist));
2785 valist = build3 (COMPONENT_REF, TREE_TYPE (ap_field),
2786 valist, ap_field, NULL_TREE);
2789 return valist;
2792 /* Implement TARGET_EXPAND_BUILTIN_VA_START. */
2793 static void
2794 arm_expand_builtin_va_start (tree valist, rtx nextarg)
2796 valist = arm_extract_valist_ptr (valist);
2797 std_expand_builtin_va_start (valist, nextarg);
2800 /* Implement TARGET_GIMPLIFY_VA_ARG_EXPR. */
2801 static tree
2802 arm_gimplify_va_arg_expr (tree valist, tree type, gimple_seq *pre_p,
2803 gimple_seq *post_p)
2805 valist = arm_extract_valist_ptr (valist);
2806 return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
2809 /* Check any incompatible options that the user has specified. */
2810 static void
2811 arm_option_check_internal (struct gcc_options *opts)
2813 int flags = opts->x_target_flags;
2814 const struct arm_fpu_desc *fpu_desc = &all_fpus[opts->x_arm_fpu_index];
2816 /* iWMMXt and NEON are incompatible. */
2817 if (TARGET_IWMMXT
2818 && ARM_FPU_FSET_HAS (fpu_desc->features, FPU_FL_NEON))
2819 error ("iWMMXt and NEON are incompatible");
2821 /* Make sure that the processor choice does not conflict with any of the
2822 other command line choices. */
2823 if (TARGET_ARM_P (flags) && !ARM_FSET_HAS_CPU1 (insn_flags, FL_NOTM))
2824 error ("target CPU does not support ARM mode");
2826 /* TARGET_BACKTRACE calls leaf_function_p, which causes a crash if done
2827 from here where no function is being compiled currently. */
2828 if ((TARGET_TPCS_FRAME || TARGET_TPCS_LEAF_FRAME) && TARGET_ARM_P (flags))
2829 warning (0, "enabling backtrace support is only meaningful when compiling for the Thumb");
2831 if (TARGET_ARM_P (flags) && TARGET_CALLEE_INTERWORKING)
2832 warning (0, "enabling callee interworking support is only meaningful when compiling for the Thumb");
2834 /* If this target is normally configured to use APCS frames, warn if they
2835 are turned off and debugging is turned on. */
2836 if (TARGET_ARM_P (flags)
2837 && write_symbols != NO_DEBUG
2838 && !TARGET_APCS_FRAME
2839 && (TARGET_DEFAULT & MASK_APCS_FRAME))
2840 warning (0, "-g with -mno-apcs-frame may not give sensible debugging");
2842 /* iWMMXt unsupported under Thumb mode. */
2843 if (TARGET_THUMB_P (flags) && TARGET_IWMMXT)
2844 error ("iWMMXt unsupported under Thumb mode");
2846 if (TARGET_HARD_TP && TARGET_THUMB1_P (flags))
2847 error ("can not use -mtp=cp15 with 16-bit Thumb");
2849 if (TARGET_THUMB_P (flags) && TARGET_VXWORKS_RTP && flag_pic)
2851 error ("RTP PIC is incompatible with Thumb");
2852 flag_pic = 0;
2855 /* We only support -mslow-flash-data on armv7-m targets. */
2856 if (target_slow_flash_data
2857 && ((!(arm_arch7 && !arm_arch_notm) && !arm_arch7em)
2858 || (TARGET_THUMB1_P (flags) || flag_pic || TARGET_NEON)))
2859 error ("-mslow-flash-data only supports non-pic code on armv7-m targets");
2861 /* We only support pure-code on Thumb-2 M-profile targets. */
2862 if (target_pure_code
2863 && (!arm_arch_thumb2 || arm_arch_notm || flag_pic || TARGET_NEON))
2864 error ("-mpure-code only supports non-pic code on armv7-m targets");
2868 /* Recompute the global settings depending on target attribute options. */
2870 static void
2871 arm_option_params_internal (void)
2873 /* If we are not using the default (ARM mode) section anchor offset
2874 ranges, then set the correct ranges now. */
2875 if (TARGET_THUMB1)
2877 /* Thumb-1 LDR instructions cannot have negative offsets.
2878 Permissible positive offset ranges are 5-bit (for byte loads),
2879 6-bit (for halfword loads), or 7-bit (for word loads).
2880 Empirical results suggest a 7-bit anchor range gives the best
2881 overall code size. */
2882 targetm.min_anchor_offset = 0;
2883 targetm.max_anchor_offset = 127;
2885 else if (TARGET_THUMB2)
2887 /* The minimum is set such that the total size of the block
2888 for a particular anchor is 248 + 1 + 4095 bytes, which is
2889 divisible by eight, ensuring natural spacing of anchors. */
2890 targetm.min_anchor_offset = -248;
2891 targetm.max_anchor_offset = 4095;
2893 else
2895 targetm.min_anchor_offset = TARGET_MIN_ANCHOR_OFFSET;
2896 targetm.max_anchor_offset = TARGET_MAX_ANCHOR_OFFSET;
2899 if (optimize_size)
2901 /* If optimizing for size, bump the number of instructions that we
2902 are prepared to conditionally execute (even on a StrongARM). */
2903 max_insns_skipped = 6;
2905 /* For THUMB2, we limit the conditional sequence to one IT block. */
2906 if (TARGET_THUMB2)
2907 max_insns_skipped = arm_restrict_it ? 1 : 4;
2909 else
2910 /* When -mrestrict-it is in use tone down the if-conversion. */
2911 max_insns_skipped = (TARGET_THUMB2 && arm_restrict_it)
2912 ? 1 : current_tune->max_insns_skipped;
2915 /* True if -mflip-thumb should next add an attribute for the default
2916 mode, false if it should next add an attribute for the opposite mode. */
2917 static GTY(()) bool thumb_flipper;
2919 /* Options after initial target override. */
2920 static GTY(()) tree init_optimize;
2922 static void
2923 arm_override_options_after_change_1 (struct gcc_options *opts)
2925 if (opts->x_align_functions <= 0)
2926 opts->x_align_functions = TARGET_THUMB_P (opts->x_target_flags)
2927 && opts->x_optimize_size ? 2 : 4;
2930 /* Implement targetm.override_options_after_change. */
2932 static void
2933 arm_override_options_after_change (void)
2935 arm_override_options_after_change_1 (&global_options);
2938 /* Reset options between modes that the user has specified. */
2939 static void
2940 arm_option_override_internal (struct gcc_options *opts,
2941 struct gcc_options *opts_set)
2943 arm_override_options_after_change_1 (opts);
2945 if (TARGET_INTERWORK && !ARM_FSET_HAS_CPU1 (insn_flags, FL_THUMB))
2947 /* The default is to enable interworking, so this warning message would
2948 be confusing to users who have just compiled with, eg, -march=armv3. */
2949 /* warning (0, "ignoring -minterwork because target CPU does not support THUMB"); */
2950 opts->x_target_flags &= ~MASK_INTERWORK;
2953 if (TARGET_THUMB_P (opts->x_target_flags)
2954 && !(ARM_FSET_HAS_CPU1 (insn_flags, FL_THUMB)))
2956 warning (0, "target CPU does not support THUMB instructions");
2957 opts->x_target_flags &= ~MASK_THUMB;
2960 if (TARGET_APCS_FRAME && TARGET_THUMB_P (opts->x_target_flags))
2962 /* warning (0, "ignoring -mapcs-frame because -mthumb was used"); */
2963 opts->x_target_flags &= ~MASK_APCS_FRAME;
2966 /* Callee super interworking implies thumb interworking. Adding
2967 this to the flags here simplifies the logic elsewhere. */
2968 if (TARGET_THUMB_P (opts->x_target_flags) && TARGET_CALLEE_INTERWORKING)
2969 opts->x_target_flags |= MASK_INTERWORK;
2971 /* need to remember initial values so combinaisons of options like
2972 -mflip-thumb -mthumb -fno-schedule-insns work for any attribute. */
2973 cl_optimization *to = TREE_OPTIMIZATION (init_optimize);
2975 if (! opts_set->x_arm_restrict_it)
2976 opts->x_arm_restrict_it = arm_arch8;
2978 /* ARM execution state and M profile don't have [restrict] IT. */
2979 if (!TARGET_THUMB2_P (opts->x_target_flags) || !arm_arch_notm)
2980 opts->x_arm_restrict_it = 0;
2982 /* Enable -munaligned-access by default for
2983 - all ARMv6 architecture-based processors when compiling for a 32-bit ISA
2984 i.e. Thumb2 and ARM state only.
2985 - ARMv7-A, ARMv7-R, and ARMv7-M architecture-based processors.
2986 - ARMv8 architecture-base processors.
2988 Disable -munaligned-access by default for
2989 - all pre-ARMv6 architecture-based processors
2990 - ARMv6-M architecture-based processors
2991 - ARMv8-M Baseline processors. */
2993 if (! opts_set->x_unaligned_access)
2995 opts->x_unaligned_access = (TARGET_32BIT_P (opts->x_target_flags)
2996 && arm_arch6 && (arm_arch_notm || arm_arch7));
2998 else if (opts->x_unaligned_access == 1
2999 && !(arm_arch6 && (arm_arch_notm || arm_arch7)))
3001 warning (0, "target CPU does not support unaligned accesses");
3002 opts->x_unaligned_access = 0;
3005 /* Don't warn since it's on by default in -O2. */
3006 if (TARGET_THUMB1_P (opts->x_target_flags))
3007 opts->x_flag_schedule_insns = 0;
3008 else
3009 opts->x_flag_schedule_insns = to->x_flag_schedule_insns;
3011 /* Disable shrink-wrap when optimizing function for size, since it tends to
3012 generate additional returns. */
3013 if (optimize_function_for_size_p (cfun)
3014 && TARGET_THUMB2_P (opts->x_target_flags))
3015 opts->x_flag_shrink_wrap = false;
3016 else
3017 opts->x_flag_shrink_wrap = to->x_flag_shrink_wrap;
3019 /* In Thumb1 mode, we emit the epilogue in RTL, but the last insn
3020 - epilogue_insns - does not accurately model the corresponding insns
3021 emitted in the asm file. In particular, see the comment in thumb_exit
3022 'Find out how many of the (return) argument registers we can corrupt'.
3023 As a consequence, the epilogue may clobber registers without fipa-ra
3024 finding out about it. Therefore, disable fipa-ra in Thumb1 mode.
3025 TODO: Accurately model clobbers for epilogue_insns and reenable
3026 fipa-ra. */
3027 if (TARGET_THUMB1_P (opts->x_target_flags))
3028 opts->x_flag_ipa_ra = 0;
3029 else
3030 opts->x_flag_ipa_ra = to->x_flag_ipa_ra;
3032 /* Thumb2 inline assembly code should always use unified syntax.
3033 This will apply to ARM and Thumb1 eventually. */
3034 opts->x_inline_asm_unified = TARGET_THUMB2_P (opts->x_target_flags);
3036 #ifdef SUBTARGET_OVERRIDE_INTERNAL_OPTIONS
3037 SUBTARGET_OVERRIDE_INTERNAL_OPTIONS;
3038 #endif
3041 /* Fix up any incompatible options that the user has specified. */
3042 static void
3043 arm_option_override (void)
3045 arm_selected_arch = NULL;
3046 arm_selected_cpu = NULL;
3047 arm_selected_tune = NULL;
3049 if (global_options_set.x_arm_arch_option)
3050 arm_selected_arch = &all_architectures[arm_arch_option];
3052 if (global_options_set.x_arm_cpu_option)
3054 arm_selected_cpu = &all_cores[(int) arm_cpu_option];
3055 arm_selected_tune = &all_cores[(int) arm_cpu_option];
3058 if (global_options_set.x_arm_tune_option)
3059 arm_selected_tune = &all_cores[(int) arm_tune_option];
3061 #ifdef SUBTARGET_OVERRIDE_OPTIONS
3062 SUBTARGET_OVERRIDE_OPTIONS;
3063 #endif
3065 if (arm_selected_arch)
3067 if (arm_selected_cpu)
3069 const arm_feature_set tuning_flags = ARM_FSET_MAKE_CPU1 (FL_TUNE);
3070 arm_feature_set selected_flags;
3071 ARM_FSET_XOR (selected_flags, arm_selected_cpu->flags,
3072 arm_selected_arch->flags);
3073 ARM_FSET_EXCLUDE (selected_flags, selected_flags, tuning_flags);
3074 /* Check for conflict between mcpu and march. */
3075 if (!ARM_FSET_IS_EMPTY (selected_flags))
3077 warning (0, "switch -mcpu=%s conflicts with -march=%s switch",
3078 arm_selected_cpu->name, arm_selected_arch->name);
3079 /* -march wins for code generation.
3080 -mcpu wins for default tuning. */
3081 if (!arm_selected_tune)
3082 arm_selected_tune = arm_selected_cpu;
3084 arm_selected_cpu = arm_selected_arch;
3086 else
3087 /* -mcpu wins. */
3088 arm_selected_arch = NULL;
3090 else
3091 /* Pick a CPU based on the architecture. */
3092 arm_selected_cpu = arm_selected_arch;
3095 /* If the user did not specify a processor, choose one for them. */
3096 if (!arm_selected_cpu)
3098 const struct processors * sel;
3099 arm_feature_set sought = ARM_FSET_EMPTY;;
3101 arm_selected_cpu = &all_cores[TARGET_CPU_DEFAULT];
3102 if (!arm_selected_cpu->name)
3104 #ifdef SUBTARGET_CPU_DEFAULT
3105 /* Use the subtarget default CPU if none was specified by
3106 configure. */
3107 arm_selected_cpu = &all_cores[SUBTARGET_CPU_DEFAULT];
3108 #endif
3109 /* Default to ARM6. */
3110 if (!arm_selected_cpu->name)
3111 arm_selected_cpu = &all_cores[arm6];
3114 sel = arm_selected_cpu;
3115 insn_flags = sel->flags;
3117 /* Now check to see if the user has specified some command line
3118 switch that require certain abilities from the cpu. */
3120 if (TARGET_INTERWORK || TARGET_THUMB)
3122 ARM_FSET_ADD_CPU1 (sought, FL_THUMB);
3123 ARM_FSET_ADD_CPU1 (sought, FL_MODE32);
3125 /* There are no ARM processors that support both APCS-26 and
3126 interworking. Therefore we force FL_MODE26 to be removed
3127 from insn_flags here (if it was set), so that the search
3128 below will always be able to find a compatible processor. */
3129 ARM_FSET_DEL_CPU1 (insn_flags, FL_MODE26);
3132 if (!ARM_FSET_IS_EMPTY (sought)
3133 && !(ARM_FSET_CPU_SUBSET (sought, insn_flags)))
3135 /* Try to locate a CPU type that supports all of the abilities
3136 of the default CPU, plus the extra abilities requested by
3137 the user. */
3138 for (sel = all_cores; sel->name != NULL; sel++)
3139 if (ARM_FSET_CPU_SUBSET (sought, sel->flags))
3140 break;
3142 if (sel->name == NULL)
3144 unsigned current_bit_count = 0;
3145 const struct processors * best_fit = NULL;
3147 /* Ideally we would like to issue an error message here
3148 saying that it was not possible to find a CPU compatible
3149 with the default CPU, but which also supports the command
3150 line options specified by the programmer, and so they
3151 ought to use the -mcpu=<name> command line option to
3152 override the default CPU type.
3154 If we cannot find a cpu that has both the
3155 characteristics of the default cpu and the given
3156 command line options we scan the array again looking
3157 for a best match. */
3158 for (sel = all_cores; sel->name != NULL; sel++)
3160 arm_feature_set required = ARM_FSET_EMPTY;
3161 ARM_FSET_UNION (required, sought, insn_flags);
3162 if (ARM_FSET_CPU_SUBSET (required, sel->flags))
3164 unsigned count;
3165 arm_feature_set flags;
3166 ARM_FSET_INTER (flags, sel->flags, insn_flags);
3167 count = feature_count (&flags);
3169 if (count >= current_bit_count)
3171 best_fit = sel;
3172 current_bit_count = count;
3176 gcc_assert (best_fit);
3177 sel = best_fit;
3180 arm_selected_cpu = sel;
3184 gcc_assert (arm_selected_cpu);
3185 /* The selected cpu may be an architecture, so lookup tuning by core ID. */
3186 if (!arm_selected_tune)
3187 arm_selected_tune = &all_cores[arm_selected_cpu->core];
3189 sprintf (arm_arch_name, "__ARM_ARCH_%s__", arm_selected_cpu->arch);
3190 insn_flags = arm_selected_cpu->flags;
3191 arm_base_arch = arm_selected_cpu->base_arch;
3193 arm_tune = arm_selected_tune->core;
3194 tune_flags = arm_selected_tune->flags;
3195 current_tune = arm_selected_tune->tune;
3197 /* TBD: Dwarf info for apcs frame is not handled yet. */
3198 if (TARGET_APCS_FRAME)
3199 flag_shrink_wrap = false;
3201 /* BPABI targets use linker tricks to allow interworking on cores
3202 without thumb support. */
3203 if (TARGET_INTERWORK
3204 && !(ARM_FSET_HAS_CPU1 (insn_flags, FL_THUMB) || TARGET_BPABI))
3206 warning (0, "target CPU does not support interworking" );
3207 target_flags &= ~MASK_INTERWORK;
3210 if (TARGET_APCS_STACK && !TARGET_APCS_FRAME)
3212 warning (0, "-mapcs-stack-check incompatible with -mno-apcs-frame");
3213 target_flags |= MASK_APCS_FRAME;
3216 if (TARGET_POKE_FUNCTION_NAME)
3217 target_flags |= MASK_APCS_FRAME;
3219 if (TARGET_APCS_REENT && flag_pic)
3220 error ("-fpic and -mapcs-reent are incompatible");
3222 if (TARGET_APCS_REENT)
3223 warning (0, "APCS reentrant code not supported. Ignored");
3225 if (TARGET_APCS_FLOAT)
3226 warning (0, "passing floating point arguments in fp regs not yet supported");
3228 /* Initialize boolean versions of the flags, for use in the arm.md file. */
3229 arm_arch3m = ARM_FSET_HAS_CPU1 (insn_flags, FL_ARCH3M);
3230 arm_arch4 = ARM_FSET_HAS_CPU1 (insn_flags, FL_ARCH4);
3231 arm_arch4t = arm_arch4 && (ARM_FSET_HAS_CPU1 (insn_flags, FL_THUMB));
3232 arm_arch5 = ARM_FSET_HAS_CPU1 (insn_flags, FL_ARCH5);
3233 arm_arch5e = ARM_FSET_HAS_CPU1 (insn_flags, FL_ARCH5E);
3234 arm_arch6 = ARM_FSET_HAS_CPU1 (insn_flags, FL_ARCH6);
3235 arm_arch6k = ARM_FSET_HAS_CPU1 (insn_flags, FL_ARCH6K);
3236 arm_arch6kz = arm_arch6k && ARM_FSET_HAS_CPU1 (insn_flags, FL_ARCH6KZ);
3237 arm_arch_notm = ARM_FSET_HAS_CPU1 (insn_flags, FL_NOTM);
3238 arm_arch6m = arm_arch6 && !arm_arch_notm;
3239 arm_arch7 = ARM_FSET_HAS_CPU1 (insn_flags, FL_ARCH7);
3240 arm_arch7em = ARM_FSET_HAS_CPU1 (insn_flags, FL_ARCH7EM);
3241 arm_arch8 = ARM_FSET_HAS_CPU1 (insn_flags, FL_ARCH8);
3242 arm_arch8_1 = ARM_FSET_HAS_CPU2 (insn_flags, FL2_ARCH8_1);
3243 arm_arch8_2 = ARM_FSET_HAS_CPU2 (insn_flags, FL2_ARCH8_2);
3244 arm_arch_thumb1 = ARM_FSET_HAS_CPU1 (insn_flags, FL_THUMB);
3245 arm_arch_thumb2 = ARM_FSET_HAS_CPU1 (insn_flags, FL_THUMB2);
3246 arm_arch_xscale = ARM_FSET_HAS_CPU1 (insn_flags, FL_XSCALE);
3248 arm_ld_sched = ARM_FSET_HAS_CPU1 (tune_flags, FL_LDSCHED);
3249 arm_tune_strongarm = ARM_FSET_HAS_CPU1 (tune_flags, FL_STRONG);
3250 arm_tune_wbuf = ARM_FSET_HAS_CPU1 (tune_flags, FL_WBUF);
3251 arm_tune_xscale = ARM_FSET_HAS_CPU1 (tune_flags, FL_XSCALE);
3252 arm_arch_iwmmxt = ARM_FSET_HAS_CPU1 (insn_flags, FL_IWMMXT);
3253 arm_arch_iwmmxt2 = ARM_FSET_HAS_CPU1 (insn_flags, FL_IWMMXT2);
3254 arm_arch_thumb_hwdiv = ARM_FSET_HAS_CPU1 (insn_flags, FL_THUMB_DIV);
3255 arm_arch_arm_hwdiv = ARM_FSET_HAS_CPU1 (insn_flags, FL_ARM_DIV);
3256 arm_arch_no_volatile_ce = ARM_FSET_HAS_CPU1 (insn_flags, FL_NO_VOLATILE_CE);
3257 arm_tune_cortex_a9 = (arm_tune == cortexa9) != 0;
3258 arm_arch_crc = ARM_FSET_HAS_CPU1 (insn_flags, FL_CRC32);
3259 arm_m_profile_small_mul = ARM_FSET_HAS_CPU1 (insn_flags, FL_SMALLMUL);
3260 arm_fp16_inst = ARM_FSET_HAS_CPU2 (insn_flags, FL2_FP16INST);
3261 if (arm_fp16_inst)
3263 if (arm_fp16_format == ARM_FP16_FORMAT_ALTERNATIVE)
3264 error ("selected fp16 options are incompatible.");
3265 arm_fp16_format = ARM_FP16_FORMAT_IEEE;
3268 /* V5 code we generate is completely interworking capable, so we turn off
3269 TARGET_INTERWORK here to avoid many tests later on. */
3271 /* XXX However, we must pass the right pre-processor defines to CPP
3272 or GLD can get confused. This is a hack. */
3273 if (TARGET_INTERWORK)
3274 arm_cpp_interwork = 1;
3276 if (arm_arch5)
3277 target_flags &= ~MASK_INTERWORK;
3279 if (TARGET_IWMMXT && !ARM_DOUBLEWORD_ALIGN)
3280 error ("iwmmxt requires an AAPCS compatible ABI for proper operation");
3282 if (TARGET_IWMMXT_ABI && !TARGET_IWMMXT)
3283 error ("iwmmxt abi requires an iwmmxt capable cpu");
3285 if (!global_options_set.x_arm_fpu_index)
3287 const char *target_fpu_name;
3288 bool ok;
3290 #ifdef FPUTYPE_DEFAULT
3291 target_fpu_name = FPUTYPE_DEFAULT;
3292 #else
3293 target_fpu_name = "vfp";
3294 #endif
3296 ok = opt_enum_arg_to_value (OPT_mfpu_, target_fpu_name, &arm_fpu_index,
3297 CL_TARGET);
3298 gcc_assert (ok);
3301 /* If soft-float is specified then don't use FPU. */
3302 if (TARGET_SOFT_FLOAT)
3303 arm_fpu_attr = FPU_NONE;
3304 else
3305 arm_fpu_attr = FPU_VFP;
3307 if (TARGET_AAPCS_BASED)
3309 if (TARGET_CALLER_INTERWORKING)
3310 error ("AAPCS does not support -mcaller-super-interworking");
3311 else
3312 if (TARGET_CALLEE_INTERWORKING)
3313 error ("AAPCS does not support -mcallee-super-interworking");
3316 /* __fp16 support currently assumes the core has ldrh. */
3317 if (!arm_arch4 && arm_fp16_format != ARM_FP16_FORMAT_NONE)
3318 sorry ("__fp16 and no ldrh");
3320 if (TARGET_AAPCS_BASED)
3322 if (arm_abi == ARM_ABI_IWMMXT)
3323 arm_pcs_default = ARM_PCS_AAPCS_IWMMXT;
3324 else if (arm_float_abi == ARM_FLOAT_ABI_HARD
3325 && TARGET_HARD_FLOAT)
3326 arm_pcs_default = ARM_PCS_AAPCS_VFP;
3327 else
3328 arm_pcs_default = ARM_PCS_AAPCS;
3330 else
3332 if (arm_float_abi == ARM_FLOAT_ABI_HARD)
3333 sorry ("-mfloat-abi=hard and VFP");
3335 if (arm_abi == ARM_ABI_APCS)
3336 arm_pcs_default = ARM_PCS_APCS;
3337 else
3338 arm_pcs_default = ARM_PCS_ATPCS;
3341 /* For arm2/3 there is no need to do any scheduling if we are doing
3342 software floating-point. */
3343 if (TARGET_SOFT_FLOAT && !ARM_FSET_HAS_CPU1 (tune_flags, FL_MODE32))
3344 flag_schedule_insns = flag_schedule_insns_after_reload = 0;
3346 /* Use the cp15 method if it is available. */
3347 if (target_thread_pointer == TP_AUTO)
3349 if (arm_arch6k && !TARGET_THUMB1)
3350 target_thread_pointer = TP_CP15;
3351 else
3352 target_thread_pointer = TP_SOFT;
3355 /* Override the default structure alignment for AAPCS ABI. */
3356 if (!global_options_set.x_arm_structure_size_boundary)
3358 if (TARGET_AAPCS_BASED)
3359 arm_structure_size_boundary = 8;
3361 else
3363 if (arm_structure_size_boundary != 8
3364 && arm_structure_size_boundary != 32
3365 && !(ARM_DOUBLEWORD_ALIGN && arm_structure_size_boundary == 64))
3367 if (ARM_DOUBLEWORD_ALIGN)
3368 warning (0,
3369 "structure size boundary can only be set to 8, 32 or 64");
3370 else
3371 warning (0, "structure size boundary can only be set to 8 or 32");
3372 arm_structure_size_boundary
3373 = (TARGET_AAPCS_BASED ? 8 : DEFAULT_STRUCTURE_SIZE_BOUNDARY);
3377 if (TARGET_VXWORKS_RTP)
3379 if (!global_options_set.x_arm_pic_data_is_text_relative)
3380 arm_pic_data_is_text_relative = 0;
3382 else if (flag_pic
3383 && !arm_pic_data_is_text_relative
3384 && !(global_options_set.x_target_flags & MASK_SINGLE_PIC_BASE))
3385 /* When text & data segments don't have a fixed displacement, the
3386 intended use is with a single, read only, pic base register.
3387 Unless the user explicitly requested not to do that, set
3388 it. */
3389 target_flags |= MASK_SINGLE_PIC_BASE;
3391 /* If stack checking is disabled, we can use r10 as the PIC register,
3392 which keeps r9 available. The EABI specifies r9 as the PIC register. */
3393 if (flag_pic && TARGET_SINGLE_PIC_BASE)
3395 if (TARGET_VXWORKS_RTP)
3396 warning (0, "RTP PIC is incompatible with -msingle-pic-base");
3397 arm_pic_register = (TARGET_APCS_STACK || TARGET_AAPCS_BASED) ? 9 : 10;
3400 if (flag_pic && TARGET_VXWORKS_RTP)
3401 arm_pic_register = 9;
3403 if (arm_pic_register_string != NULL)
3405 int pic_register = decode_reg_name (arm_pic_register_string);
3407 if (!flag_pic)
3408 warning (0, "-mpic-register= is useless without -fpic");
3410 /* Prevent the user from choosing an obviously stupid PIC register. */
3411 else if (pic_register < 0 || call_used_regs[pic_register]
3412 || pic_register == HARD_FRAME_POINTER_REGNUM
3413 || pic_register == STACK_POINTER_REGNUM
3414 || pic_register >= PC_REGNUM
3415 || (TARGET_VXWORKS_RTP
3416 && (unsigned int) pic_register != arm_pic_register))
3417 error ("unable to use '%s' for PIC register", arm_pic_register_string);
3418 else
3419 arm_pic_register = pic_register;
3422 /* Enable -mfix-cortex-m3-ldrd by default for Cortex-M3 cores. */
3423 if (fix_cm3_ldrd == 2)
3425 if (arm_selected_cpu->core == cortexm3)
3426 fix_cm3_ldrd = 1;
3427 else
3428 fix_cm3_ldrd = 0;
3431 /* Hot/Cold partitioning is not currently supported, since we can't
3432 handle literal pool placement in that case. */
3433 if (flag_reorder_blocks_and_partition)
3435 inform (input_location,
3436 "-freorder-blocks-and-partition not supported on this architecture");
3437 flag_reorder_blocks_and_partition = 0;
3438 flag_reorder_blocks = 1;
3441 if (flag_pic)
3442 /* Hoisting PIC address calculations more aggressively provides a small,
3443 but measurable, size reduction for PIC code. Therefore, we decrease
3444 the bar for unrestricted expression hoisting to the cost of PIC address
3445 calculation, which is 2 instructions. */
3446 maybe_set_param_value (PARAM_GCSE_UNRESTRICTED_COST, 2,
3447 global_options.x_param_values,
3448 global_options_set.x_param_values);
3450 /* ARM EABI defaults to strict volatile bitfields. */
3451 if (TARGET_AAPCS_BASED && flag_strict_volatile_bitfields < 0
3452 && abi_version_at_least(2))
3453 flag_strict_volatile_bitfields = 1;
3455 /* Enable sw prefetching at -O3 for CPUS that have prefetch, and we
3456 have deemed it beneficial (signified by setting
3457 prefetch.num_slots to 1 or more). */
3458 if (flag_prefetch_loop_arrays < 0
3459 && HAVE_prefetch
3460 && optimize >= 3
3461 && current_tune->prefetch.num_slots > 0)
3462 flag_prefetch_loop_arrays = 1;
3464 /* Set up parameters to be used in prefetching algorithm. Do not
3465 override the defaults unless we are tuning for a core we have
3466 researched values for. */
3467 if (current_tune->prefetch.num_slots > 0)
3468 maybe_set_param_value (PARAM_SIMULTANEOUS_PREFETCHES,
3469 current_tune->prefetch.num_slots,
3470 global_options.x_param_values,
3471 global_options_set.x_param_values);
3472 if (current_tune->prefetch.l1_cache_line_size >= 0)
3473 maybe_set_param_value (PARAM_L1_CACHE_LINE_SIZE,
3474 current_tune->prefetch.l1_cache_line_size,
3475 global_options.x_param_values,
3476 global_options_set.x_param_values);
3477 if (current_tune->prefetch.l1_cache_size >= 0)
3478 maybe_set_param_value (PARAM_L1_CACHE_SIZE,
3479 current_tune->prefetch.l1_cache_size,
3480 global_options.x_param_values,
3481 global_options_set.x_param_values);
3483 /* Use Neon to perform 64-bits operations rather than core
3484 registers. */
3485 prefer_neon_for_64bits = current_tune->prefer_neon_for_64bits;
3486 if (use_neon_for_64bits == 1)
3487 prefer_neon_for_64bits = true;
3489 /* Use the alternative scheduling-pressure algorithm by default. */
3490 maybe_set_param_value (PARAM_SCHED_PRESSURE_ALGORITHM, SCHED_PRESSURE_MODEL,
3491 global_options.x_param_values,
3492 global_options_set.x_param_values);
3494 /* Look through ready list and all of queue for instructions
3495 relevant for L2 auto-prefetcher. */
3496 int param_sched_autopref_queue_depth;
3498 switch (current_tune->sched_autopref)
3500 case tune_params::SCHED_AUTOPREF_OFF:
3501 param_sched_autopref_queue_depth = -1;
3502 break;
3504 case tune_params::SCHED_AUTOPREF_RANK:
3505 param_sched_autopref_queue_depth = 0;
3506 break;
3508 case tune_params::SCHED_AUTOPREF_FULL:
3509 param_sched_autopref_queue_depth = max_insn_queue_index + 1;
3510 break;
3512 default:
3513 gcc_unreachable ();
3516 maybe_set_param_value (PARAM_SCHED_AUTOPREF_QUEUE_DEPTH,
3517 param_sched_autopref_queue_depth,
3518 global_options.x_param_values,
3519 global_options_set.x_param_values);
3521 /* Currently, for slow flash data, we just disable literal pools. We also
3522 disable it for pure-code. */
3523 if (target_slow_flash_data || target_pure_code)
3524 arm_disable_literal_pool = true;
3526 /* Disable scheduling fusion by default if it's not armv7 processor
3527 or doesn't prefer ldrd/strd. */
3528 if (flag_schedule_fusion == 2
3529 && (!arm_arch7 || !current_tune->prefer_ldrd_strd))
3530 flag_schedule_fusion = 0;
3532 /* Need to remember initial options before they are overriden. */
3533 init_optimize = build_optimization_node (&global_options);
3535 arm_option_override_internal (&global_options, &global_options_set);
3536 arm_option_check_internal (&global_options);
3537 arm_option_params_internal ();
3539 /* Register global variables with the garbage collector. */
3540 arm_add_gc_roots ();
3542 /* Save the initial options in case the user does function specific
3543 options or #pragma target. */
3544 target_option_default_node = target_option_current_node
3545 = build_target_option_node (&global_options);
3547 /* Init initial mode for testing. */
3548 thumb_flipper = TARGET_THUMB;
3551 static void
3552 arm_add_gc_roots (void)
3554 gcc_obstack_init(&minipool_obstack);
3555 minipool_startobj = (char *) obstack_alloc (&minipool_obstack, 0);
3558 /* A table of known ARM exception types.
3559 For use with the interrupt function attribute. */
3561 typedef struct
3563 const char *const arg;
3564 const unsigned long return_value;
3566 isr_attribute_arg;
3568 static const isr_attribute_arg isr_attribute_args [] =
3570 { "IRQ", ARM_FT_ISR },
3571 { "irq", ARM_FT_ISR },
3572 { "FIQ", ARM_FT_FIQ },
3573 { "fiq", ARM_FT_FIQ },
3574 { "ABORT", ARM_FT_ISR },
3575 { "abort", ARM_FT_ISR },
3576 { "ABORT", ARM_FT_ISR },
3577 { "abort", ARM_FT_ISR },
3578 { "UNDEF", ARM_FT_EXCEPTION },
3579 { "undef", ARM_FT_EXCEPTION },
3580 { "SWI", ARM_FT_EXCEPTION },
3581 { "swi", ARM_FT_EXCEPTION },
3582 { NULL, ARM_FT_NORMAL }
3585 /* Returns the (interrupt) function type of the current
3586 function, or ARM_FT_UNKNOWN if the type cannot be determined. */
3588 static unsigned long
3589 arm_isr_value (tree argument)
3591 const isr_attribute_arg * ptr;
3592 const char * arg;
3594 if (!arm_arch_notm)
3595 return ARM_FT_NORMAL | ARM_FT_STACKALIGN;
3597 /* No argument - default to IRQ. */
3598 if (argument == NULL_TREE)
3599 return ARM_FT_ISR;
3601 /* Get the value of the argument. */
3602 if (TREE_VALUE (argument) == NULL_TREE
3603 || TREE_CODE (TREE_VALUE (argument)) != STRING_CST)
3604 return ARM_FT_UNKNOWN;
3606 arg = TREE_STRING_POINTER (TREE_VALUE (argument));
3608 /* Check it against the list of known arguments. */
3609 for (ptr = isr_attribute_args; ptr->arg != NULL; ptr++)
3610 if (streq (arg, ptr->arg))
3611 return ptr->return_value;
3613 /* An unrecognized interrupt type. */
3614 return ARM_FT_UNKNOWN;
3617 /* Computes the type of the current function. */
3619 static unsigned long
3620 arm_compute_func_type (void)
3622 unsigned long type = ARM_FT_UNKNOWN;
3623 tree a;
3624 tree attr;
3626 gcc_assert (TREE_CODE (current_function_decl) == FUNCTION_DECL);
3628 /* Decide if the current function is volatile. Such functions
3629 never return, and many memory cycles can be saved by not storing
3630 register values that will never be needed again. This optimization
3631 was added to speed up context switching in a kernel application. */
3632 if (optimize > 0
3633 && (TREE_NOTHROW (current_function_decl)
3634 || !(flag_unwind_tables
3635 || (flag_exceptions
3636 && arm_except_unwind_info (&global_options) != UI_SJLJ)))
3637 && TREE_THIS_VOLATILE (current_function_decl))
3638 type |= ARM_FT_VOLATILE;
3640 if (cfun->static_chain_decl != NULL)
3641 type |= ARM_FT_NESTED;
3643 attr = DECL_ATTRIBUTES (current_function_decl);
3645 a = lookup_attribute ("naked", attr);
3646 if (a != NULL_TREE)
3647 type |= ARM_FT_NAKED;
3649 a = lookup_attribute ("isr", attr);
3650 if (a == NULL_TREE)
3651 a = lookup_attribute ("interrupt", attr);
3653 if (a == NULL_TREE)
3654 type |= TARGET_INTERWORK ? ARM_FT_INTERWORKED : ARM_FT_NORMAL;
3655 else
3656 type |= arm_isr_value (TREE_VALUE (a));
3658 return type;
3661 /* Returns the type of the current function. */
3663 unsigned long
3664 arm_current_func_type (void)
3666 if (ARM_FUNC_TYPE (cfun->machine->func_type) == ARM_FT_UNKNOWN)
3667 cfun->machine->func_type = arm_compute_func_type ();
3669 return cfun->machine->func_type;
3672 bool
3673 arm_allocate_stack_slots_for_args (void)
3675 /* Naked functions should not allocate stack slots for arguments. */
3676 return !IS_NAKED (arm_current_func_type ());
3679 static bool
3680 arm_warn_func_return (tree decl)
3682 /* Naked functions are implemented entirely in assembly, including the
3683 return sequence, so suppress warnings about this. */
3684 return lookup_attribute ("naked", DECL_ATTRIBUTES (decl)) == NULL_TREE;
3688 /* Output assembler code for a block containing the constant parts
3689 of a trampoline, leaving space for the variable parts.
3691 On the ARM, (if r8 is the static chain regnum, and remembering that
3692 referencing pc adds an offset of 8) the trampoline looks like:
3693 ldr r8, [pc, #0]
3694 ldr pc, [pc]
3695 .word static chain value
3696 .word function's address
3697 XXX FIXME: When the trampoline returns, r8 will be clobbered. */
3699 static void
3700 arm_asm_trampoline_template (FILE *f)
3702 fprintf (f, "\t.syntax unified\n");
3704 if (TARGET_ARM)
3706 fprintf (f, "\t.arm\n");
3707 asm_fprintf (f, "\tldr\t%r, [%r, #0]\n", STATIC_CHAIN_REGNUM, PC_REGNUM);
3708 asm_fprintf (f, "\tldr\t%r, [%r, #0]\n", PC_REGNUM, PC_REGNUM);
3710 else if (TARGET_THUMB2)
3712 fprintf (f, "\t.thumb\n");
3713 /* The Thumb-2 trampoline is similar to the arm implementation.
3714 Unlike 16-bit Thumb, we enter the stub in thumb mode. */
3715 asm_fprintf (f, "\tldr.w\t%r, [%r, #4]\n",
3716 STATIC_CHAIN_REGNUM, PC_REGNUM);
3717 asm_fprintf (f, "\tldr.w\t%r, [%r, #4]\n", PC_REGNUM, PC_REGNUM);
3719 else
3721 ASM_OUTPUT_ALIGN (f, 2);
3722 fprintf (f, "\t.code\t16\n");
3723 fprintf (f, ".Ltrampoline_start:\n");
3724 asm_fprintf (f, "\tpush\t{r0, r1}\n");
3725 asm_fprintf (f, "\tldr\tr0, [%r, #8]\n", PC_REGNUM);
3726 asm_fprintf (f, "\tmov\t%r, r0\n", STATIC_CHAIN_REGNUM);
3727 asm_fprintf (f, "\tldr\tr0, [%r, #8]\n", PC_REGNUM);
3728 asm_fprintf (f, "\tstr\tr0, [%r, #4]\n", SP_REGNUM);
3729 asm_fprintf (f, "\tpop\t{r0, %r}\n", PC_REGNUM);
3731 assemble_aligned_integer (UNITS_PER_WORD, const0_rtx);
3732 assemble_aligned_integer (UNITS_PER_WORD, const0_rtx);
3735 /* Emit RTL insns to initialize the variable parts of a trampoline. */
3737 static void
3738 arm_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value)
3740 rtx fnaddr, mem, a_tramp;
3742 emit_block_move (m_tramp, assemble_trampoline_template (),
3743 GEN_INT (TRAMPOLINE_SIZE), BLOCK_OP_NORMAL);
3745 mem = adjust_address (m_tramp, SImode, TARGET_32BIT ? 8 : 12);
3746 emit_move_insn (mem, chain_value);
3748 mem = adjust_address (m_tramp, SImode, TARGET_32BIT ? 12 : 16);
3749 fnaddr = XEXP (DECL_RTL (fndecl), 0);
3750 emit_move_insn (mem, fnaddr);
3752 a_tramp = XEXP (m_tramp, 0);
3753 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__clear_cache"),
3754 LCT_NORMAL, VOIDmode, 2, a_tramp, Pmode,
3755 plus_constant (Pmode, a_tramp, TRAMPOLINE_SIZE), Pmode);
3758 /* Thumb trampolines should be entered in thumb mode, so set
3759 the bottom bit of the address. */
3761 static rtx
3762 arm_trampoline_adjust_address (rtx addr)
3764 if (TARGET_THUMB)
3765 addr = expand_simple_binop (Pmode, IOR, addr, const1_rtx,
3766 NULL, 0, OPTAB_LIB_WIDEN);
3767 return addr;
3770 /* Return 1 if it is possible to return using a single instruction.
3771 If SIBLING is non-null, this is a test for a return before a sibling
3772 call. SIBLING is the call insn, so we can examine its register usage. */
3775 use_return_insn (int iscond, rtx sibling)
3777 int regno;
3778 unsigned int func_type;
3779 unsigned long saved_int_regs;
3780 unsigned HOST_WIDE_INT stack_adjust;
3781 arm_stack_offsets *offsets;
3783 /* Never use a return instruction before reload has run. */
3784 if (!reload_completed)
3785 return 0;
3787 func_type = arm_current_func_type ();
3789 /* Naked, volatile and stack alignment functions need special
3790 consideration. */
3791 if (func_type & (ARM_FT_VOLATILE | ARM_FT_NAKED | ARM_FT_STACKALIGN))
3792 return 0;
3794 /* So do interrupt functions that use the frame pointer and Thumb
3795 interrupt functions. */
3796 if (IS_INTERRUPT (func_type) && (frame_pointer_needed || TARGET_THUMB))
3797 return 0;
3799 if (TARGET_LDRD && current_tune->prefer_ldrd_strd
3800 && !optimize_function_for_size_p (cfun))
3801 return 0;
3803 offsets = arm_get_frame_offsets ();
3804 stack_adjust = offsets->outgoing_args - offsets->saved_regs;
3806 /* As do variadic functions. */
3807 if (crtl->args.pretend_args_size
3808 || cfun->machine->uses_anonymous_args
3809 /* Or if the function calls __builtin_eh_return () */
3810 || crtl->calls_eh_return
3811 /* Or if the function calls alloca */
3812 || cfun->calls_alloca
3813 /* Or if there is a stack adjustment. However, if the stack pointer
3814 is saved on the stack, we can use a pre-incrementing stack load. */
3815 || !(stack_adjust == 0 || (TARGET_APCS_FRAME && frame_pointer_needed
3816 && stack_adjust == 4))
3817 /* Or if the static chain register was saved above the frame, under the
3818 assumption that the stack pointer isn't saved on the stack. */
3819 || (!(TARGET_APCS_FRAME && frame_pointer_needed)
3820 && arm_compute_static_chain_stack_bytes() != 0))
3821 return 0;
3823 saved_int_regs = offsets->saved_regs_mask;
3825 /* Unfortunately, the insn
3827 ldmib sp, {..., sp, ...}
3829 triggers a bug on most SA-110 based devices, such that the stack
3830 pointer won't be correctly restored if the instruction takes a
3831 page fault. We work around this problem by popping r3 along with
3832 the other registers, since that is never slower than executing
3833 another instruction.
3835 We test for !arm_arch5 here, because code for any architecture
3836 less than this could potentially be run on one of the buggy
3837 chips. */
3838 if (stack_adjust == 4 && !arm_arch5 && TARGET_ARM)
3840 /* Validate that r3 is a call-clobbered register (always true in
3841 the default abi) ... */
3842 if (!call_used_regs[3])
3843 return 0;
3845 /* ... that it isn't being used for a return value ... */
3846 if (arm_size_return_regs () >= (4 * UNITS_PER_WORD))
3847 return 0;
3849 /* ... or for a tail-call argument ... */
3850 if (sibling)
3852 gcc_assert (CALL_P (sibling));
3854 if (find_regno_fusage (sibling, USE, 3))
3855 return 0;
3858 /* ... and that there are no call-saved registers in r0-r2
3859 (always true in the default ABI). */
3860 if (saved_int_regs & 0x7)
3861 return 0;
3864 /* Can't be done if interworking with Thumb, and any registers have been
3865 stacked. */
3866 if (TARGET_INTERWORK && saved_int_regs != 0 && !IS_INTERRUPT(func_type))
3867 return 0;
3869 /* On StrongARM, conditional returns are expensive if they aren't
3870 taken and multiple registers have been stacked. */
3871 if (iscond && arm_tune_strongarm)
3873 /* Conditional return when just the LR is stored is a simple
3874 conditional-load instruction, that's not expensive. */
3875 if (saved_int_regs != 0 && saved_int_regs != (1 << LR_REGNUM))
3876 return 0;
3878 if (flag_pic
3879 && arm_pic_register != INVALID_REGNUM
3880 && df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM))
3881 return 0;
3884 /* If there are saved registers but the LR isn't saved, then we need
3885 two instructions for the return. */
3886 if (saved_int_regs && !(saved_int_regs & (1 << LR_REGNUM)))
3887 return 0;
3889 /* Can't be done if any of the VFP regs are pushed,
3890 since this also requires an insn. */
3891 if (TARGET_HARD_FLOAT)
3892 for (regno = FIRST_VFP_REGNUM; regno <= LAST_VFP_REGNUM; regno++)
3893 if (df_regs_ever_live_p (regno) && !call_used_regs[regno])
3894 return 0;
3896 if (TARGET_REALLY_IWMMXT)
3897 for (regno = FIRST_IWMMXT_REGNUM; regno <= LAST_IWMMXT_REGNUM; regno++)
3898 if (df_regs_ever_live_p (regno) && ! call_used_regs[regno])
3899 return 0;
3901 return 1;
3904 /* Return TRUE if we should try to use a simple_return insn, i.e. perform
3905 shrink-wrapping if possible. This is the case if we need to emit a
3906 prologue, which we can test by looking at the offsets. */
3907 bool
3908 use_simple_return_p (void)
3910 arm_stack_offsets *offsets;
3912 offsets = arm_get_frame_offsets ();
3913 return offsets->outgoing_args != 0;
3916 /* Return TRUE if int I is a valid immediate ARM constant. */
3919 const_ok_for_arm (HOST_WIDE_INT i)
3921 int lowbit;
3923 /* For machines with >32 bit HOST_WIDE_INT, the bits above bit 31 must
3924 be all zero, or all one. */
3925 if ((i & ~(unsigned HOST_WIDE_INT) 0xffffffff) != 0
3926 && ((i & ~(unsigned HOST_WIDE_INT) 0xffffffff)
3927 != ((~(unsigned HOST_WIDE_INT) 0)
3928 & ~(unsigned HOST_WIDE_INT) 0xffffffff)))
3929 return FALSE;
3931 i &= (unsigned HOST_WIDE_INT) 0xffffffff;
3933 /* Fast return for 0 and small values. We must do this for zero, since
3934 the code below can't handle that one case. */
3935 if ((i & ~(unsigned HOST_WIDE_INT) 0xff) == 0)
3936 return TRUE;
3938 /* Get the number of trailing zeros. */
3939 lowbit = ffs((int) i) - 1;
3941 /* Only even shifts are allowed in ARM mode so round down to the
3942 nearest even number. */
3943 if (TARGET_ARM)
3944 lowbit &= ~1;
3946 if ((i & ~(((unsigned HOST_WIDE_INT) 0xff) << lowbit)) == 0)
3947 return TRUE;
3949 if (TARGET_ARM)
3951 /* Allow rotated constants in ARM mode. */
3952 if (lowbit <= 4
3953 && ((i & ~0xc000003f) == 0
3954 || (i & ~0xf000000f) == 0
3955 || (i & ~0xfc000003) == 0))
3956 return TRUE;
3958 else
3960 HOST_WIDE_INT v;
3962 /* Allow repeated patterns 0x00XY00XY or 0xXYXYXYXY. */
3963 v = i & 0xff;
3964 v |= v << 16;
3965 if (i == v || i == (v | (v << 8)))
3966 return TRUE;
3968 /* Allow repeated pattern 0xXY00XY00. */
3969 v = i & 0xff00;
3970 v |= v << 16;
3971 if (i == v)
3972 return TRUE;
3975 return FALSE;
3978 /* Return true if I is a valid constant for the operation CODE. */
3980 const_ok_for_op (HOST_WIDE_INT i, enum rtx_code code)
3982 if (const_ok_for_arm (i))
3983 return 1;
3985 switch (code)
3987 case SET:
3988 /* See if we can use movw. */
3989 if (TARGET_HAVE_MOVT && (i & 0xffff0000) == 0)
3990 return 1;
3991 else
3992 /* Otherwise, try mvn. */
3993 return const_ok_for_arm (ARM_SIGN_EXTEND (~i));
3995 case PLUS:
3996 /* See if we can use addw or subw. */
3997 if (TARGET_THUMB2
3998 && ((i & 0xfffff000) == 0
3999 || ((-i) & 0xfffff000) == 0))
4000 return 1;
4001 /* Fall through. */
4002 case COMPARE:
4003 case EQ:
4004 case NE:
4005 case GT:
4006 case LE:
4007 case LT:
4008 case GE:
4009 case GEU:
4010 case LTU:
4011 case GTU:
4012 case LEU:
4013 case UNORDERED:
4014 case ORDERED:
4015 case UNEQ:
4016 case UNGE:
4017 case UNLT:
4018 case UNGT:
4019 case UNLE:
4020 return const_ok_for_arm (ARM_SIGN_EXTEND (-i));
4022 case MINUS: /* Should only occur with (MINUS I reg) => rsb */
4023 case XOR:
4024 return 0;
4026 case IOR:
4027 if (TARGET_THUMB2)
4028 return const_ok_for_arm (ARM_SIGN_EXTEND (~i));
4029 return 0;
4031 case AND:
4032 return const_ok_for_arm (ARM_SIGN_EXTEND (~i));
4034 default:
4035 gcc_unreachable ();
4039 /* Return true if I is a valid di mode constant for the operation CODE. */
4041 const_ok_for_dimode_op (HOST_WIDE_INT i, enum rtx_code code)
4043 HOST_WIDE_INT hi_val = (i >> 32) & 0xFFFFFFFF;
4044 HOST_WIDE_INT lo_val = i & 0xFFFFFFFF;
4045 rtx hi = GEN_INT (hi_val);
4046 rtx lo = GEN_INT (lo_val);
4048 if (TARGET_THUMB1)
4049 return 0;
4051 switch (code)
4053 case AND:
4054 case IOR:
4055 case XOR:
4056 return (const_ok_for_op (hi_val, code) || hi_val == 0xFFFFFFFF)
4057 && (const_ok_for_op (lo_val, code) || lo_val == 0xFFFFFFFF);
4058 case PLUS:
4059 return arm_not_operand (hi, SImode) && arm_add_operand (lo, SImode);
4061 default:
4062 return 0;
4066 /* Emit a sequence of insns to handle a large constant.
4067 CODE is the code of the operation required, it can be any of SET, PLUS,
4068 IOR, AND, XOR, MINUS;
4069 MODE is the mode in which the operation is being performed;
4070 VAL is the integer to operate on;
4071 SOURCE is the other operand (a register, or a null-pointer for SET);
4072 SUBTARGETS means it is safe to create scratch registers if that will
4073 either produce a simpler sequence, or we will want to cse the values.
4074 Return value is the number of insns emitted. */
4076 /* ??? Tweak this for thumb2. */
4078 arm_split_constant (enum rtx_code code, machine_mode mode, rtx insn,
4079 HOST_WIDE_INT val, rtx target, rtx source, int subtargets)
4081 rtx cond;
4083 if (insn && GET_CODE (PATTERN (insn)) == COND_EXEC)
4084 cond = COND_EXEC_TEST (PATTERN (insn));
4085 else
4086 cond = NULL_RTX;
4088 if (subtargets || code == SET
4089 || (REG_P (target) && REG_P (source)
4090 && REGNO (target) != REGNO (source)))
4092 /* After arm_reorg has been called, we can't fix up expensive
4093 constants by pushing them into memory so we must synthesize
4094 them in-line, regardless of the cost. This is only likely to
4095 be more costly on chips that have load delay slots and we are
4096 compiling without running the scheduler (so no splitting
4097 occurred before the final instruction emission).
4099 Ref: gcc -O1 -mcpu=strongarm gcc.c-torture/compile/980506-2.c
4101 if (!cfun->machine->after_arm_reorg
4102 && !cond
4103 && (arm_gen_constant (code, mode, NULL_RTX, val, target, source,
4104 1, 0)
4105 > (arm_constant_limit (optimize_function_for_size_p (cfun))
4106 + (code != SET))))
4108 if (code == SET)
4110 /* Currently SET is the only monadic value for CODE, all
4111 the rest are diadic. */
4112 if (TARGET_USE_MOVT)
4113 arm_emit_movpair (target, GEN_INT (val));
4114 else
4115 emit_set_insn (target, GEN_INT (val));
4117 return 1;
4119 else
4121 rtx temp = subtargets ? gen_reg_rtx (mode) : target;
4123 if (TARGET_USE_MOVT)
4124 arm_emit_movpair (temp, GEN_INT (val));
4125 else
4126 emit_set_insn (temp, GEN_INT (val));
4128 /* For MINUS, the value is subtracted from, since we never
4129 have subtraction of a constant. */
4130 if (code == MINUS)
4131 emit_set_insn (target, gen_rtx_MINUS (mode, temp, source));
4132 else
4133 emit_set_insn (target,
4134 gen_rtx_fmt_ee (code, mode, source, temp));
4135 return 2;
4140 return arm_gen_constant (code, mode, cond, val, target, source, subtargets,
4144 /* Return a sequence of integers, in RETURN_SEQUENCE that fit into
4145 ARM/THUMB2 immediates, and add up to VAL.
4146 Thr function return value gives the number of insns required. */
4147 static int
4148 optimal_immediate_sequence (enum rtx_code code, unsigned HOST_WIDE_INT val,
4149 struct four_ints *return_sequence)
4151 int best_consecutive_zeros = 0;
4152 int i;
4153 int best_start = 0;
4154 int insns1, insns2;
4155 struct four_ints tmp_sequence;
4157 /* If we aren't targeting ARM, the best place to start is always at
4158 the bottom, otherwise look more closely. */
4159 if (TARGET_ARM)
4161 for (i = 0; i < 32; i += 2)
4163 int consecutive_zeros = 0;
4165 if (!(val & (3 << i)))
4167 while ((i < 32) && !(val & (3 << i)))
4169 consecutive_zeros += 2;
4170 i += 2;
4172 if (consecutive_zeros > best_consecutive_zeros)
4174 best_consecutive_zeros = consecutive_zeros;
4175 best_start = i - consecutive_zeros;
4177 i -= 2;
4182 /* So long as it won't require any more insns to do so, it's
4183 desirable to emit a small constant (in bits 0...9) in the last
4184 insn. This way there is more chance that it can be combined with
4185 a later addressing insn to form a pre-indexed load or store
4186 operation. Consider:
4188 *((volatile int *)0xe0000100) = 1;
4189 *((volatile int *)0xe0000110) = 2;
4191 We want this to wind up as:
4193 mov rA, #0xe0000000
4194 mov rB, #1
4195 str rB, [rA, #0x100]
4196 mov rB, #2
4197 str rB, [rA, #0x110]
4199 rather than having to synthesize both large constants from scratch.
4201 Therefore, we calculate how many insns would be required to emit
4202 the constant starting from `best_start', and also starting from
4203 zero (i.e. with bit 31 first to be output). If `best_start' doesn't
4204 yield a shorter sequence, we may as well use zero. */
4205 insns1 = optimal_immediate_sequence_1 (code, val, return_sequence, best_start);
4206 if (best_start != 0
4207 && ((HOST_WIDE_INT_1U << best_start) < val))
4209 insns2 = optimal_immediate_sequence_1 (code, val, &tmp_sequence, 0);
4210 if (insns2 <= insns1)
4212 *return_sequence = tmp_sequence;
4213 insns1 = insns2;
4217 return insns1;
4220 /* As for optimal_immediate_sequence, but starting at bit-position I. */
4221 static int
4222 optimal_immediate_sequence_1 (enum rtx_code code, unsigned HOST_WIDE_INT val,
4223 struct four_ints *return_sequence, int i)
4225 int remainder = val & 0xffffffff;
4226 int insns = 0;
4228 /* Try and find a way of doing the job in either two or three
4229 instructions.
4231 In ARM mode we can use 8-bit constants, rotated to any 2-bit aligned
4232 location. We start at position I. This may be the MSB, or
4233 optimial_immediate_sequence may have positioned it at the largest block
4234 of zeros that are aligned on a 2-bit boundary. We then fill up the temps,
4235 wrapping around to the top of the word when we drop off the bottom.
4236 In the worst case this code should produce no more than four insns.
4238 In Thumb2 mode, we can use 32/16-bit replicated constants, and 8-bit
4239 constants, shifted to any arbitrary location. We should always start
4240 at the MSB. */
4243 int end;
4244 unsigned int b1, b2, b3, b4;
4245 unsigned HOST_WIDE_INT result;
4246 int loc;
4248 gcc_assert (insns < 4);
4250 if (i <= 0)
4251 i += 32;
4253 /* First, find the next normal 12/8-bit shifted/rotated immediate. */
4254 if (remainder & ((TARGET_ARM ? (3 << (i - 2)) : (1 << (i - 1)))))
4256 loc = i;
4257 if (i <= 12 && TARGET_THUMB2 && code == PLUS)
4258 /* We can use addw/subw for the last 12 bits. */
4259 result = remainder;
4260 else
4262 /* Use an 8-bit shifted/rotated immediate. */
4263 end = i - 8;
4264 if (end < 0)
4265 end += 32;
4266 result = remainder & ((0x0ff << end)
4267 | ((i < end) ? (0xff >> (32 - end))
4268 : 0));
4269 i -= 8;
4272 else
4274 /* Arm allows rotates by a multiple of two. Thumb-2 allows
4275 arbitrary shifts. */
4276 i -= TARGET_ARM ? 2 : 1;
4277 continue;
4280 /* Next, see if we can do a better job with a thumb2 replicated
4281 constant.
4283 We do it this way around to catch the cases like 0x01F001E0 where
4284 two 8-bit immediates would work, but a replicated constant would
4285 make it worse.
4287 TODO: 16-bit constants that don't clear all the bits, but still win.
4288 TODO: Arithmetic splitting for set/add/sub, rather than bitwise. */
4289 if (TARGET_THUMB2)
4291 b1 = (remainder & 0xff000000) >> 24;
4292 b2 = (remainder & 0x00ff0000) >> 16;
4293 b3 = (remainder & 0x0000ff00) >> 8;
4294 b4 = remainder & 0xff;
4296 if (loc > 24)
4298 /* The 8-bit immediate already found clears b1 (and maybe b2),
4299 but must leave b3 and b4 alone. */
4301 /* First try to find a 32-bit replicated constant that clears
4302 almost everything. We can assume that we can't do it in one,
4303 or else we wouldn't be here. */
4304 unsigned int tmp = b1 & b2 & b3 & b4;
4305 unsigned int tmp2 = tmp + (tmp << 8) + (tmp << 16)
4306 + (tmp << 24);
4307 unsigned int matching_bytes = (tmp == b1) + (tmp == b2)
4308 + (tmp == b3) + (tmp == b4);
4309 if (tmp
4310 && (matching_bytes >= 3
4311 || (matching_bytes == 2
4312 && const_ok_for_op (remainder & ~tmp2, code))))
4314 /* At least 3 of the bytes match, and the fourth has at
4315 least as many bits set, or two of the bytes match
4316 and it will only require one more insn to finish. */
4317 result = tmp2;
4318 i = tmp != b1 ? 32
4319 : tmp != b2 ? 24
4320 : tmp != b3 ? 16
4321 : 8;
4324 /* Second, try to find a 16-bit replicated constant that can
4325 leave three of the bytes clear. If b2 or b4 is already
4326 zero, then we can. If the 8-bit from above would not
4327 clear b2 anyway, then we still win. */
4328 else if (b1 == b3 && (!b2 || !b4
4329 || (remainder & 0x00ff0000 & ~result)))
4331 result = remainder & 0xff00ff00;
4332 i = 24;
4335 else if (loc > 16)
4337 /* The 8-bit immediate already found clears b2 (and maybe b3)
4338 and we don't get here unless b1 is alredy clear, but it will
4339 leave b4 unchanged. */
4341 /* If we can clear b2 and b4 at once, then we win, since the
4342 8-bits couldn't possibly reach that far. */
4343 if (b2 == b4)
4345 result = remainder & 0x00ff00ff;
4346 i = 16;
4351 return_sequence->i[insns++] = result;
4352 remainder &= ~result;
4354 if (code == SET || code == MINUS)
4355 code = PLUS;
4357 while (remainder);
4359 return insns;
4362 /* Emit an instruction with the indicated PATTERN. If COND is
4363 non-NULL, conditionalize the execution of the instruction on COND
4364 being true. */
4366 static void
4367 emit_constant_insn (rtx cond, rtx pattern)
4369 if (cond)
4370 pattern = gen_rtx_COND_EXEC (VOIDmode, copy_rtx (cond), pattern);
4371 emit_insn (pattern);
4374 /* As above, but extra parameter GENERATE which, if clear, suppresses
4375 RTL generation. */
4377 static int
4378 arm_gen_constant (enum rtx_code code, machine_mode mode, rtx cond,
4379 unsigned HOST_WIDE_INT val, rtx target, rtx source,
4380 int subtargets, int generate)
4382 int can_invert = 0;
4383 int can_negate = 0;
4384 int final_invert = 0;
4385 int i;
4386 int set_sign_bit_copies = 0;
4387 int clear_sign_bit_copies = 0;
4388 int clear_zero_bit_copies = 0;
4389 int set_zero_bit_copies = 0;
4390 int insns = 0, neg_insns, inv_insns;
4391 unsigned HOST_WIDE_INT temp1, temp2;
4392 unsigned HOST_WIDE_INT remainder = val & 0xffffffff;
4393 struct four_ints *immediates;
4394 struct four_ints pos_immediates, neg_immediates, inv_immediates;
4396 /* Find out which operations are safe for a given CODE. Also do a quick
4397 check for degenerate cases; these can occur when DImode operations
4398 are split. */
4399 switch (code)
4401 case SET:
4402 can_invert = 1;
4403 break;
4405 case PLUS:
4406 can_negate = 1;
4407 break;
4409 case IOR:
4410 if (remainder == 0xffffffff)
4412 if (generate)
4413 emit_constant_insn (cond,
4414 gen_rtx_SET (target,
4415 GEN_INT (ARM_SIGN_EXTEND (val))));
4416 return 1;
4419 if (remainder == 0)
4421 if (reload_completed && rtx_equal_p (target, source))
4422 return 0;
4424 if (generate)
4425 emit_constant_insn (cond, gen_rtx_SET (target, source));
4426 return 1;
4428 break;
4430 case AND:
4431 if (remainder == 0)
4433 if (generate)
4434 emit_constant_insn (cond, gen_rtx_SET (target, const0_rtx));
4435 return 1;
4437 if (remainder == 0xffffffff)
4439 if (reload_completed && rtx_equal_p (target, source))
4440 return 0;
4441 if (generate)
4442 emit_constant_insn (cond, gen_rtx_SET (target, source));
4443 return 1;
4445 can_invert = 1;
4446 break;
4448 case XOR:
4449 if (remainder == 0)
4451 if (reload_completed && rtx_equal_p (target, source))
4452 return 0;
4453 if (generate)
4454 emit_constant_insn (cond, gen_rtx_SET (target, source));
4455 return 1;
4458 if (remainder == 0xffffffff)
4460 if (generate)
4461 emit_constant_insn (cond,
4462 gen_rtx_SET (target,
4463 gen_rtx_NOT (mode, source)));
4464 return 1;
4466 final_invert = 1;
4467 break;
4469 case MINUS:
4470 /* We treat MINUS as (val - source), since (source - val) is always
4471 passed as (source + (-val)). */
4472 if (remainder == 0)
4474 if (generate)
4475 emit_constant_insn (cond,
4476 gen_rtx_SET (target,
4477 gen_rtx_NEG (mode, source)));
4478 return 1;
4480 if (const_ok_for_arm (val))
4482 if (generate)
4483 emit_constant_insn (cond,
4484 gen_rtx_SET (target,
4485 gen_rtx_MINUS (mode, GEN_INT (val),
4486 source)));
4487 return 1;
4490 break;
4492 default:
4493 gcc_unreachable ();
4496 /* If we can do it in one insn get out quickly. */
4497 if (const_ok_for_op (val, code))
4499 if (generate)
4500 emit_constant_insn (cond,
4501 gen_rtx_SET (target,
4502 (source
4503 ? gen_rtx_fmt_ee (code, mode, source,
4504 GEN_INT (val))
4505 : GEN_INT (val))));
4506 return 1;
4509 /* On targets with UXTH/UBFX, we can deal with AND (2^N)-1 in a single
4510 insn. */
4511 if (code == AND && (i = exact_log2 (remainder + 1)) > 0
4512 && (arm_arch_thumb2 || (i == 16 && arm_arch6 && mode == SImode)))
4514 if (generate)
4516 if (mode == SImode && i == 16)
4517 /* Use UXTH in preference to UBFX, since on Thumb2 it's a
4518 smaller insn. */
4519 emit_constant_insn (cond,
4520 gen_zero_extendhisi2
4521 (target, gen_lowpart (HImode, source)));
4522 else
4523 /* Extz only supports SImode, but we can coerce the operands
4524 into that mode. */
4525 emit_constant_insn (cond,
4526 gen_extzv_t2 (gen_lowpart (SImode, target),
4527 gen_lowpart (SImode, source),
4528 GEN_INT (i), const0_rtx));
4531 return 1;
4534 /* Calculate a few attributes that may be useful for specific
4535 optimizations. */
4536 /* Count number of leading zeros. */
4537 for (i = 31; i >= 0; i--)
4539 if ((remainder & (1 << i)) == 0)
4540 clear_sign_bit_copies++;
4541 else
4542 break;
4545 /* Count number of leading 1's. */
4546 for (i = 31; i >= 0; i--)
4548 if ((remainder & (1 << i)) != 0)
4549 set_sign_bit_copies++;
4550 else
4551 break;
4554 /* Count number of trailing zero's. */
4555 for (i = 0; i <= 31; i++)
4557 if ((remainder & (1 << i)) == 0)
4558 clear_zero_bit_copies++;
4559 else
4560 break;
4563 /* Count number of trailing 1's. */
4564 for (i = 0; i <= 31; i++)
4566 if ((remainder & (1 << i)) != 0)
4567 set_zero_bit_copies++;
4568 else
4569 break;
4572 switch (code)
4574 case SET:
4575 /* See if we can do this by sign_extending a constant that is known
4576 to be negative. This is a good, way of doing it, since the shift
4577 may well merge into a subsequent insn. */
4578 if (set_sign_bit_copies > 1)
4580 if (const_ok_for_arm
4581 (temp1 = ARM_SIGN_EXTEND (remainder
4582 << (set_sign_bit_copies - 1))))
4584 if (generate)
4586 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
4587 emit_constant_insn (cond,
4588 gen_rtx_SET (new_src, GEN_INT (temp1)));
4589 emit_constant_insn (cond,
4590 gen_ashrsi3 (target, new_src,
4591 GEN_INT (set_sign_bit_copies - 1)));
4593 return 2;
4595 /* For an inverted constant, we will need to set the low bits,
4596 these will be shifted out of harm's way. */
4597 temp1 |= (1 << (set_sign_bit_copies - 1)) - 1;
4598 if (const_ok_for_arm (~temp1))
4600 if (generate)
4602 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
4603 emit_constant_insn (cond,
4604 gen_rtx_SET (new_src, GEN_INT (temp1)));
4605 emit_constant_insn (cond,
4606 gen_ashrsi3 (target, new_src,
4607 GEN_INT (set_sign_bit_copies - 1)));
4609 return 2;
4613 /* See if we can calculate the value as the difference between two
4614 valid immediates. */
4615 if (clear_sign_bit_copies + clear_zero_bit_copies <= 16)
4617 int topshift = clear_sign_bit_copies & ~1;
4619 temp1 = ARM_SIGN_EXTEND ((remainder + (0x00800000 >> topshift))
4620 & (0xff000000 >> topshift));
4622 /* If temp1 is zero, then that means the 9 most significant
4623 bits of remainder were 1 and we've caused it to overflow.
4624 When topshift is 0 we don't need to do anything since we
4625 can borrow from 'bit 32'. */
4626 if (temp1 == 0 && topshift != 0)
4627 temp1 = 0x80000000 >> (topshift - 1);
4629 temp2 = ARM_SIGN_EXTEND (temp1 - remainder);
4631 if (const_ok_for_arm (temp2))
4633 if (generate)
4635 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
4636 emit_constant_insn (cond,
4637 gen_rtx_SET (new_src, GEN_INT (temp1)));
4638 emit_constant_insn (cond,
4639 gen_addsi3 (target, new_src,
4640 GEN_INT (-temp2)));
4643 return 2;
4647 /* See if we can generate this by setting the bottom (or the top)
4648 16 bits, and then shifting these into the other half of the
4649 word. We only look for the simplest cases, to do more would cost
4650 too much. Be careful, however, not to generate this when the
4651 alternative would take fewer insns. */
4652 if (val & 0xffff0000)
4654 temp1 = remainder & 0xffff0000;
4655 temp2 = remainder & 0x0000ffff;
4657 /* Overlaps outside this range are best done using other methods. */
4658 for (i = 9; i < 24; i++)
4660 if ((((temp2 | (temp2 << i)) & 0xffffffff) == remainder)
4661 && !const_ok_for_arm (temp2))
4663 rtx new_src = (subtargets
4664 ? (generate ? gen_reg_rtx (mode) : NULL_RTX)
4665 : target);
4666 insns = arm_gen_constant (code, mode, cond, temp2, new_src,
4667 source, subtargets, generate);
4668 source = new_src;
4669 if (generate)
4670 emit_constant_insn
4671 (cond,
4672 gen_rtx_SET
4673 (target,
4674 gen_rtx_IOR (mode,
4675 gen_rtx_ASHIFT (mode, source,
4676 GEN_INT (i)),
4677 source)));
4678 return insns + 1;
4682 /* Don't duplicate cases already considered. */
4683 for (i = 17; i < 24; i++)
4685 if (((temp1 | (temp1 >> i)) == remainder)
4686 && !const_ok_for_arm (temp1))
4688 rtx new_src = (subtargets
4689 ? (generate ? gen_reg_rtx (mode) : NULL_RTX)
4690 : target);
4691 insns = arm_gen_constant (code, mode, cond, temp1, new_src,
4692 source, subtargets, generate);
4693 source = new_src;
4694 if (generate)
4695 emit_constant_insn
4696 (cond,
4697 gen_rtx_SET (target,
4698 gen_rtx_IOR
4699 (mode,
4700 gen_rtx_LSHIFTRT (mode, source,
4701 GEN_INT (i)),
4702 source)));
4703 return insns + 1;
4707 break;
4709 case IOR:
4710 case XOR:
4711 /* If we have IOR or XOR, and the constant can be loaded in a
4712 single instruction, and we can find a temporary to put it in,
4713 then this can be done in two instructions instead of 3-4. */
4714 if (subtargets
4715 /* TARGET can't be NULL if SUBTARGETS is 0 */
4716 || (reload_completed && !reg_mentioned_p (target, source)))
4718 if (const_ok_for_arm (ARM_SIGN_EXTEND (~val)))
4720 if (generate)
4722 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
4724 emit_constant_insn (cond,
4725 gen_rtx_SET (sub, GEN_INT (val)));
4726 emit_constant_insn (cond,
4727 gen_rtx_SET (target,
4728 gen_rtx_fmt_ee (code, mode,
4729 source, sub)));
4731 return 2;
4735 if (code == XOR)
4736 break;
4738 /* Convert.
4739 x = y | constant ( which is composed of set_sign_bit_copies of leading 1s
4740 and the remainder 0s for e.g. 0xfff00000)
4741 x = ~(~(y ashift set_sign_bit_copies) lshiftrt set_sign_bit_copies)
4743 This can be done in 2 instructions by using shifts with mov or mvn.
4744 e.g. for
4745 x = x | 0xfff00000;
4746 we generate.
4747 mvn r0, r0, asl #12
4748 mvn r0, r0, lsr #12 */
4749 if (set_sign_bit_copies > 8
4750 && (val & (HOST_WIDE_INT_M1U << (32 - set_sign_bit_copies))) == val)
4752 if (generate)
4754 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
4755 rtx shift = GEN_INT (set_sign_bit_copies);
4757 emit_constant_insn
4758 (cond,
4759 gen_rtx_SET (sub,
4760 gen_rtx_NOT (mode,
4761 gen_rtx_ASHIFT (mode,
4762 source,
4763 shift))));
4764 emit_constant_insn
4765 (cond,
4766 gen_rtx_SET (target,
4767 gen_rtx_NOT (mode,
4768 gen_rtx_LSHIFTRT (mode, sub,
4769 shift))));
4771 return 2;
4774 /* Convert
4775 x = y | constant (which has set_zero_bit_copies number of trailing ones).
4777 x = ~((~y lshiftrt set_zero_bit_copies) ashift set_zero_bit_copies).
4779 For eg. r0 = r0 | 0xfff
4780 mvn r0, r0, lsr #12
4781 mvn r0, r0, asl #12
4784 if (set_zero_bit_copies > 8
4785 && (remainder & ((1 << set_zero_bit_copies) - 1)) == remainder)
4787 if (generate)
4789 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
4790 rtx shift = GEN_INT (set_zero_bit_copies);
4792 emit_constant_insn
4793 (cond,
4794 gen_rtx_SET (sub,
4795 gen_rtx_NOT (mode,
4796 gen_rtx_LSHIFTRT (mode,
4797 source,
4798 shift))));
4799 emit_constant_insn
4800 (cond,
4801 gen_rtx_SET (target,
4802 gen_rtx_NOT (mode,
4803 gen_rtx_ASHIFT (mode, sub,
4804 shift))));
4806 return 2;
4809 /* This will never be reached for Thumb2 because orn is a valid
4810 instruction. This is for Thumb1 and the ARM 32 bit cases.
4812 x = y | constant (such that ~constant is a valid constant)
4813 Transform this to
4814 x = ~(~y & ~constant).
4816 if (const_ok_for_arm (temp1 = ARM_SIGN_EXTEND (~val)))
4818 if (generate)
4820 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
4821 emit_constant_insn (cond,
4822 gen_rtx_SET (sub,
4823 gen_rtx_NOT (mode, source)));
4824 source = sub;
4825 if (subtargets)
4826 sub = gen_reg_rtx (mode);
4827 emit_constant_insn (cond,
4828 gen_rtx_SET (sub,
4829 gen_rtx_AND (mode, source,
4830 GEN_INT (temp1))));
4831 emit_constant_insn (cond,
4832 gen_rtx_SET (target,
4833 gen_rtx_NOT (mode, sub)));
4835 return 3;
4837 break;
4839 case AND:
4840 /* See if two shifts will do 2 or more insn's worth of work. */
4841 if (clear_sign_bit_copies >= 16 && clear_sign_bit_copies < 24)
4843 HOST_WIDE_INT shift_mask = ((0xffffffff
4844 << (32 - clear_sign_bit_copies))
4845 & 0xffffffff);
4847 if ((remainder | shift_mask) != 0xffffffff)
4849 HOST_WIDE_INT new_val
4850 = ARM_SIGN_EXTEND (remainder | shift_mask);
4852 if (generate)
4854 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
4855 insns = arm_gen_constant (AND, SImode, cond, new_val,
4856 new_src, source, subtargets, 1);
4857 source = new_src;
4859 else
4861 rtx targ = subtargets ? NULL_RTX : target;
4862 insns = arm_gen_constant (AND, mode, cond, new_val,
4863 targ, source, subtargets, 0);
4867 if (generate)
4869 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
4870 rtx shift = GEN_INT (clear_sign_bit_copies);
4872 emit_insn (gen_ashlsi3 (new_src, source, shift));
4873 emit_insn (gen_lshrsi3 (target, new_src, shift));
4876 return insns + 2;
4879 if (clear_zero_bit_copies >= 16 && clear_zero_bit_copies < 24)
4881 HOST_WIDE_INT shift_mask = (1 << clear_zero_bit_copies) - 1;
4883 if ((remainder | shift_mask) != 0xffffffff)
4885 HOST_WIDE_INT new_val
4886 = ARM_SIGN_EXTEND (remainder | shift_mask);
4887 if (generate)
4889 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
4891 insns = arm_gen_constant (AND, mode, cond, new_val,
4892 new_src, source, subtargets, 1);
4893 source = new_src;
4895 else
4897 rtx targ = subtargets ? NULL_RTX : target;
4899 insns = arm_gen_constant (AND, mode, cond, new_val,
4900 targ, source, subtargets, 0);
4904 if (generate)
4906 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
4907 rtx shift = GEN_INT (clear_zero_bit_copies);
4909 emit_insn (gen_lshrsi3 (new_src, source, shift));
4910 emit_insn (gen_ashlsi3 (target, new_src, shift));
4913 return insns + 2;
4916 break;
4918 default:
4919 break;
4922 /* Calculate what the instruction sequences would be if we generated it
4923 normally, negated, or inverted. */
4924 if (code == AND)
4925 /* AND cannot be split into multiple insns, so invert and use BIC. */
4926 insns = 99;
4927 else
4928 insns = optimal_immediate_sequence (code, remainder, &pos_immediates);
4930 if (can_negate)
4931 neg_insns = optimal_immediate_sequence (code, (-remainder) & 0xffffffff,
4932 &neg_immediates);
4933 else
4934 neg_insns = 99;
4936 if (can_invert || final_invert)
4937 inv_insns = optimal_immediate_sequence (code, remainder ^ 0xffffffff,
4938 &inv_immediates);
4939 else
4940 inv_insns = 99;
4942 immediates = &pos_immediates;
4944 /* Is the negated immediate sequence more efficient? */
4945 if (neg_insns < insns && neg_insns <= inv_insns)
4947 insns = neg_insns;
4948 immediates = &neg_immediates;
4950 else
4951 can_negate = 0;
4953 /* Is the inverted immediate sequence more efficient?
4954 We must allow for an extra NOT instruction for XOR operations, although
4955 there is some chance that the final 'mvn' will get optimized later. */
4956 if ((inv_insns + 1) < insns || (!final_invert && inv_insns < insns))
4958 insns = inv_insns;
4959 immediates = &inv_immediates;
4961 else
4963 can_invert = 0;
4964 final_invert = 0;
4967 /* Now output the chosen sequence as instructions. */
4968 if (generate)
4970 for (i = 0; i < insns; i++)
4972 rtx new_src, temp1_rtx;
4974 temp1 = immediates->i[i];
4976 if (code == SET || code == MINUS)
4977 new_src = (subtargets ? gen_reg_rtx (mode) : target);
4978 else if ((final_invert || i < (insns - 1)) && subtargets)
4979 new_src = gen_reg_rtx (mode);
4980 else
4981 new_src = target;
4983 if (can_invert)
4984 temp1 = ~temp1;
4985 else if (can_negate)
4986 temp1 = -temp1;
4988 temp1 = trunc_int_for_mode (temp1, mode);
4989 temp1_rtx = GEN_INT (temp1);
4991 if (code == SET)
4993 else if (code == MINUS)
4994 temp1_rtx = gen_rtx_MINUS (mode, temp1_rtx, source);
4995 else
4996 temp1_rtx = gen_rtx_fmt_ee (code, mode, source, temp1_rtx);
4998 emit_constant_insn (cond, gen_rtx_SET (new_src, temp1_rtx));
4999 source = new_src;
5001 if (code == SET)
5003 can_negate = can_invert;
5004 can_invert = 0;
5005 code = PLUS;
5007 else if (code == MINUS)
5008 code = PLUS;
5012 if (final_invert)
5014 if (generate)
5015 emit_constant_insn (cond, gen_rtx_SET (target,
5016 gen_rtx_NOT (mode, source)));
5017 insns++;
5020 return insns;
5023 /* Canonicalize a comparison so that we are more likely to recognize it.
5024 This can be done for a few constant compares, where we can make the
5025 immediate value easier to load. */
5027 static void
5028 arm_canonicalize_comparison (int *code, rtx *op0, rtx *op1,
5029 bool op0_preserve_value)
5031 machine_mode mode;
5032 unsigned HOST_WIDE_INT i, maxval;
5034 mode = GET_MODE (*op0);
5035 if (mode == VOIDmode)
5036 mode = GET_MODE (*op1);
5038 maxval = (HOST_WIDE_INT_1U << (GET_MODE_BITSIZE (mode) - 1)) - 1;
5040 /* For DImode, we have GE/LT/GEU/LTU comparisons. In ARM mode
5041 we can also use cmp/cmpeq for GTU/LEU. GT/LE must be either
5042 reversed or (for constant OP1) adjusted to GE/LT. Similarly
5043 for GTU/LEU in Thumb mode. */
5044 if (mode == DImode)
5047 if (*code == GT || *code == LE
5048 || (!TARGET_ARM && (*code == GTU || *code == LEU)))
5050 /* Missing comparison. First try to use an available
5051 comparison. */
5052 if (CONST_INT_P (*op1))
5054 i = INTVAL (*op1);
5055 switch (*code)
5057 case GT:
5058 case LE:
5059 if (i != maxval
5060 && arm_const_double_by_immediates (GEN_INT (i + 1)))
5062 *op1 = GEN_INT (i + 1);
5063 *code = *code == GT ? GE : LT;
5064 return;
5066 break;
5067 case GTU:
5068 case LEU:
5069 if (i != ~((unsigned HOST_WIDE_INT) 0)
5070 && arm_const_double_by_immediates (GEN_INT (i + 1)))
5072 *op1 = GEN_INT (i + 1);
5073 *code = *code == GTU ? GEU : LTU;
5074 return;
5076 break;
5077 default:
5078 gcc_unreachable ();
5082 /* If that did not work, reverse the condition. */
5083 if (!op0_preserve_value)
5085 std::swap (*op0, *op1);
5086 *code = (int)swap_condition ((enum rtx_code)*code);
5089 return;
5092 /* If *op0 is (zero_extend:SI (subreg:QI (reg:SI) 0)) and comparing
5093 with const0_rtx, change it to (and:SI (reg:SI) (const_int 255)),
5094 to facilitate possible combining with a cmp into 'ands'. */
5095 if (mode == SImode
5096 && GET_CODE (*op0) == ZERO_EXTEND
5097 && GET_CODE (XEXP (*op0, 0)) == SUBREG
5098 && GET_MODE (XEXP (*op0, 0)) == QImode
5099 && GET_MODE (SUBREG_REG (XEXP (*op0, 0))) == SImode
5100 && subreg_lowpart_p (XEXP (*op0, 0))
5101 && *op1 == const0_rtx)
5102 *op0 = gen_rtx_AND (SImode, SUBREG_REG (XEXP (*op0, 0)),
5103 GEN_INT (255));
5105 /* Comparisons smaller than DImode. Only adjust comparisons against
5106 an out-of-range constant. */
5107 if (!CONST_INT_P (*op1)
5108 || const_ok_for_arm (INTVAL (*op1))
5109 || const_ok_for_arm (- INTVAL (*op1)))
5110 return;
5112 i = INTVAL (*op1);
5114 switch (*code)
5116 case EQ:
5117 case NE:
5118 return;
5120 case GT:
5121 case LE:
5122 if (i != maxval
5123 && (const_ok_for_arm (i + 1) || const_ok_for_arm (-(i + 1))))
5125 *op1 = GEN_INT (ARM_SIGN_EXTEND (i + 1));
5126 *code = *code == GT ? GE : LT;
5127 return;
5129 break;
5131 case GE:
5132 case LT:
5133 if (i != ~maxval
5134 && (const_ok_for_arm (i - 1) || const_ok_for_arm (-(i - 1))))
5136 *op1 = GEN_INT (i - 1);
5137 *code = *code == GE ? GT : LE;
5138 return;
5140 break;
5142 case GTU:
5143 case LEU:
5144 if (i != ~((unsigned HOST_WIDE_INT) 0)
5145 && (const_ok_for_arm (i + 1) || const_ok_for_arm (-(i + 1))))
5147 *op1 = GEN_INT (ARM_SIGN_EXTEND (i + 1));
5148 *code = *code == GTU ? GEU : LTU;
5149 return;
5151 break;
5153 case GEU:
5154 case LTU:
5155 if (i != 0
5156 && (const_ok_for_arm (i - 1) || const_ok_for_arm (-(i - 1))))
5158 *op1 = GEN_INT (i - 1);
5159 *code = *code == GEU ? GTU : LEU;
5160 return;
5162 break;
5164 default:
5165 gcc_unreachable ();
5170 /* Define how to find the value returned by a function. */
5172 static rtx
5173 arm_function_value(const_tree type, const_tree func,
5174 bool outgoing ATTRIBUTE_UNUSED)
5176 machine_mode mode;
5177 int unsignedp ATTRIBUTE_UNUSED;
5178 rtx r ATTRIBUTE_UNUSED;
5180 mode = TYPE_MODE (type);
5182 if (TARGET_AAPCS_BASED)
5183 return aapcs_allocate_return_reg (mode, type, func);
5185 /* Promote integer types. */
5186 if (INTEGRAL_TYPE_P (type))
5187 mode = arm_promote_function_mode (type, mode, &unsignedp, func, 1);
5189 /* Promotes small structs returned in a register to full-word size
5190 for big-endian AAPCS. */
5191 if (arm_return_in_msb (type))
5193 HOST_WIDE_INT size = int_size_in_bytes (type);
5194 if (size % UNITS_PER_WORD != 0)
5196 size += UNITS_PER_WORD - size % UNITS_PER_WORD;
5197 mode = mode_for_size (size * BITS_PER_UNIT, MODE_INT, 0);
5201 return arm_libcall_value_1 (mode);
5204 /* libcall hashtable helpers. */
5206 struct libcall_hasher : nofree_ptr_hash <const rtx_def>
5208 static inline hashval_t hash (const rtx_def *);
5209 static inline bool equal (const rtx_def *, const rtx_def *);
5210 static inline void remove (rtx_def *);
5213 inline bool
5214 libcall_hasher::equal (const rtx_def *p1, const rtx_def *p2)
5216 return rtx_equal_p (p1, p2);
5219 inline hashval_t
5220 libcall_hasher::hash (const rtx_def *p1)
5222 return hash_rtx (p1, VOIDmode, NULL, NULL, FALSE);
5225 typedef hash_table<libcall_hasher> libcall_table_type;
5227 static void
5228 add_libcall (libcall_table_type *htab, rtx libcall)
5230 *htab->find_slot (libcall, INSERT) = libcall;
5233 static bool
5234 arm_libcall_uses_aapcs_base (const_rtx libcall)
5236 static bool init_done = false;
5237 static libcall_table_type *libcall_htab = NULL;
5239 if (!init_done)
5241 init_done = true;
5243 libcall_htab = new libcall_table_type (31);
5244 add_libcall (libcall_htab,
5245 convert_optab_libfunc (sfloat_optab, SFmode, SImode));
5246 add_libcall (libcall_htab,
5247 convert_optab_libfunc (sfloat_optab, DFmode, SImode));
5248 add_libcall (libcall_htab,
5249 convert_optab_libfunc (sfloat_optab, SFmode, DImode));
5250 add_libcall (libcall_htab,
5251 convert_optab_libfunc (sfloat_optab, DFmode, DImode));
5253 add_libcall (libcall_htab,
5254 convert_optab_libfunc (ufloat_optab, SFmode, SImode));
5255 add_libcall (libcall_htab,
5256 convert_optab_libfunc (ufloat_optab, DFmode, SImode));
5257 add_libcall (libcall_htab,
5258 convert_optab_libfunc (ufloat_optab, SFmode, DImode));
5259 add_libcall (libcall_htab,
5260 convert_optab_libfunc (ufloat_optab, DFmode, DImode));
5262 add_libcall (libcall_htab,
5263 convert_optab_libfunc (sext_optab, SFmode, HFmode));
5264 add_libcall (libcall_htab,
5265 convert_optab_libfunc (trunc_optab, HFmode, SFmode));
5266 add_libcall (libcall_htab,
5267 convert_optab_libfunc (sfix_optab, SImode, DFmode));
5268 add_libcall (libcall_htab,
5269 convert_optab_libfunc (ufix_optab, SImode, DFmode));
5270 add_libcall (libcall_htab,
5271 convert_optab_libfunc (sfix_optab, DImode, DFmode));
5272 add_libcall (libcall_htab,
5273 convert_optab_libfunc (ufix_optab, DImode, DFmode));
5274 add_libcall (libcall_htab,
5275 convert_optab_libfunc (sfix_optab, DImode, SFmode));
5276 add_libcall (libcall_htab,
5277 convert_optab_libfunc (ufix_optab, DImode, SFmode));
5279 /* Values from double-precision helper functions are returned in core
5280 registers if the selected core only supports single-precision
5281 arithmetic, even if we are using the hard-float ABI. The same is
5282 true for single-precision helpers, but we will never be using the
5283 hard-float ABI on a CPU which doesn't support single-precision
5284 operations in hardware. */
5285 add_libcall (libcall_htab, optab_libfunc (add_optab, DFmode));
5286 add_libcall (libcall_htab, optab_libfunc (sdiv_optab, DFmode));
5287 add_libcall (libcall_htab, optab_libfunc (smul_optab, DFmode));
5288 add_libcall (libcall_htab, optab_libfunc (neg_optab, DFmode));
5289 add_libcall (libcall_htab, optab_libfunc (sub_optab, DFmode));
5290 add_libcall (libcall_htab, optab_libfunc (eq_optab, DFmode));
5291 add_libcall (libcall_htab, optab_libfunc (lt_optab, DFmode));
5292 add_libcall (libcall_htab, optab_libfunc (le_optab, DFmode));
5293 add_libcall (libcall_htab, optab_libfunc (ge_optab, DFmode));
5294 add_libcall (libcall_htab, optab_libfunc (gt_optab, DFmode));
5295 add_libcall (libcall_htab, optab_libfunc (unord_optab, DFmode));
5296 add_libcall (libcall_htab, convert_optab_libfunc (sext_optab, DFmode,
5297 SFmode));
5298 add_libcall (libcall_htab, convert_optab_libfunc (trunc_optab, SFmode,
5299 DFmode));
5302 return libcall && libcall_htab->find (libcall) != NULL;
5305 static rtx
5306 arm_libcall_value_1 (machine_mode mode)
5308 if (TARGET_AAPCS_BASED)
5309 return aapcs_libcall_value (mode);
5310 else if (TARGET_IWMMXT_ABI
5311 && arm_vector_mode_supported_p (mode))
5312 return gen_rtx_REG (mode, FIRST_IWMMXT_REGNUM);
5313 else
5314 return gen_rtx_REG (mode, ARG_REGISTER (1));
5317 /* Define how to find the value returned by a library function
5318 assuming the value has mode MODE. */
5320 static rtx
5321 arm_libcall_value (machine_mode mode, const_rtx libcall)
5323 if (TARGET_AAPCS_BASED && arm_pcs_default != ARM_PCS_AAPCS
5324 && GET_MODE_CLASS (mode) == MODE_FLOAT)
5326 /* The following libcalls return their result in integer registers,
5327 even though they return a floating point value. */
5328 if (arm_libcall_uses_aapcs_base (libcall))
5329 return gen_rtx_REG (mode, ARG_REGISTER(1));
5333 return arm_libcall_value_1 (mode);
5336 /* Implement TARGET_FUNCTION_VALUE_REGNO_P. */
5338 static bool
5339 arm_function_value_regno_p (const unsigned int regno)
5341 if (regno == ARG_REGISTER (1)
5342 || (TARGET_32BIT
5343 && TARGET_AAPCS_BASED
5344 && TARGET_HARD_FLOAT
5345 && regno == FIRST_VFP_REGNUM)
5346 || (TARGET_IWMMXT_ABI
5347 && regno == FIRST_IWMMXT_REGNUM))
5348 return true;
5350 return false;
5353 /* Determine the amount of memory needed to store the possible return
5354 registers of an untyped call. */
5356 arm_apply_result_size (void)
5358 int size = 16;
5360 if (TARGET_32BIT)
5362 if (TARGET_HARD_FLOAT_ABI)
5363 size += 32;
5364 if (TARGET_IWMMXT_ABI)
5365 size += 8;
5368 return size;
5371 /* Decide whether TYPE should be returned in memory (true)
5372 or in a register (false). FNTYPE is the type of the function making
5373 the call. */
5374 static bool
5375 arm_return_in_memory (const_tree type, const_tree fntype)
5377 HOST_WIDE_INT size;
5379 size = int_size_in_bytes (type); /* Negative if not fixed size. */
5381 if (TARGET_AAPCS_BASED)
5383 /* Simple, non-aggregate types (ie not including vectors and
5384 complex) are always returned in a register (or registers).
5385 We don't care about which register here, so we can short-cut
5386 some of the detail. */
5387 if (!AGGREGATE_TYPE_P (type)
5388 && TREE_CODE (type) != VECTOR_TYPE
5389 && TREE_CODE (type) != COMPLEX_TYPE)
5390 return false;
5392 /* Any return value that is no larger than one word can be
5393 returned in r0. */
5394 if (((unsigned HOST_WIDE_INT) size) <= UNITS_PER_WORD)
5395 return false;
5397 /* Check any available co-processors to see if they accept the
5398 type as a register candidate (VFP, for example, can return
5399 some aggregates in consecutive registers). These aren't
5400 available if the call is variadic. */
5401 if (aapcs_select_return_coproc (type, fntype) >= 0)
5402 return false;
5404 /* Vector values should be returned using ARM registers, not
5405 memory (unless they're over 16 bytes, which will break since
5406 we only have four call-clobbered registers to play with). */
5407 if (TREE_CODE (type) == VECTOR_TYPE)
5408 return (size < 0 || size > (4 * UNITS_PER_WORD));
5410 /* The rest go in memory. */
5411 return true;
5414 if (TREE_CODE (type) == VECTOR_TYPE)
5415 return (size < 0 || size > (4 * UNITS_PER_WORD));
5417 if (!AGGREGATE_TYPE_P (type) &&
5418 (TREE_CODE (type) != VECTOR_TYPE))
5419 /* All simple types are returned in registers. */
5420 return false;
5422 if (arm_abi != ARM_ABI_APCS)
5424 /* ATPCS and later return aggregate types in memory only if they are
5425 larger than a word (or are variable size). */
5426 return (size < 0 || size > UNITS_PER_WORD);
5429 /* For the arm-wince targets we choose to be compatible with Microsoft's
5430 ARM and Thumb compilers, which always return aggregates in memory. */
5431 #ifndef ARM_WINCE
5432 /* All structures/unions bigger than one word are returned in memory.
5433 Also catch the case where int_size_in_bytes returns -1. In this case
5434 the aggregate is either huge or of variable size, and in either case
5435 we will want to return it via memory and not in a register. */
5436 if (size < 0 || size > UNITS_PER_WORD)
5437 return true;
5439 if (TREE_CODE (type) == RECORD_TYPE)
5441 tree field;
5443 /* For a struct the APCS says that we only return in a register
5444 if the type is 'integer like' and every addressable element
5445 has an offset of zero. For practical purposes this means
5446 that the structure can have at most one non bit-field element
5447 and that this element must be the first one in the structure. */
5449 /* Find the first field, ignoring non FIELD_DECL things which will
5450 have been created by C++. */
5451 for (field = TYPE_FIELDS (type);
5452 field && TREE_CODE (field) != FIELD_DECL;
5453 field = DECL_CHAIN (field))
5454 continue;
5456 if (field == NULL)
5457 return false; /* An empty structure. Allowed by an extension to ANSI C. */
5459 /* Check that the first field is valid for returning in a register. */
5461 /* ... Floats are not allowed */
5462 if (FLOAT_TYPE_P (TREE_TYPE (field)))
5463 return true;
5465 /* ... Aggregates that are not themselves valid for returning in
5466 a register are not allowed. */
5467 if (arm_return_in_memory (TREE_TYPE (field), NULL_TREE))
5468 return true;
5470 /* Now check the remaining fields, if any. Only bitfields are allowed,
5471 since they are not addressable. */
5472 for (field = DECL_CHAIN (field);
5473 field;
5474 field = DECL_CHAIN (field))
5476 if (TREE_CODE (field) != FIELD_DECL)
5477 continue;
5479 if (!DECL_BIT_FIELD_TYPE (field))
5480 return true;
5483 return false;
5486 if (TREE_CODE (type) == UNION_TYPE)
5488 tree field;
5490 /* Unions can be returned in registers if every element is
5491 integral, or can be returned in an integer register. */
5492 for (field = TYPE_FIELDS (type);
5493 field;
5494 field = DECL_CHAIN (field))
5496 if (TREE_CODE (field) != FIELD_DECL)
5497 continue;
5499 if (FLOAT_TYPE_P (TREE_TYPE (field)))
5500 return true;
5502 if (arm_return_in_memory (TREE_TYPE (field), NULL_TREE))
5503 return true;
5506 return false;
5508 #endif /* not ARM_WINCE */
5510 /* Return all other types in memory. */
5511 return true;
5514 const struct pcs_attribute_arg
5516 const char *arg;
5517 enum arm_pcs value;
5518 } pcs_attribute_args[] =
5520 {"aapcs", ARM_PCS_AAPCS},
5521 {"aapcs-vfp", ARM_PCS_AAPCS_VFP},
5522 #if 0
5523 /* We could recognize these, but changes would be needed elsewhere
5524 * to implement them. */
5525 {"aapcs-iwmmxt", ARM_PCS_AAPCS_IWMMXT},
5526 {"atpcs", ARM_PCS_ATPCS},
5527 {"apcs", ARM_PCS_APCS},
5528 #endif
5529 {NULL, ARM_PCS_UNKNOWN}
5532 static enum arm_pcs
5533 arm_pcs_from_attribute (tree attr)
5535 const struct pcs_attribute_arg *ptr;
5536 const char *arg;
5538 /* Get the value of the argument. */
5539 if (TREE_VALUE (attr) == NULL_TREE
5540 || TREE_CODE (TREE_VALUE (attr)) != STRING_CST)
5541 return ARM_PCS_UNKNOWN;
5543 arg = TREE_STRING_POINTER (TREE_VALUE (attr));
5545 /* Check it against the list of known arguments. */
5546 for (ptr = pcs_attribute_args; ptr->arg != NULL; ptr++)
5547 if (streq (arg, ptr->arg))
5548 return ptr->value;
5550 /* An unrecognized interrupt type. */
5551 return ARM_PCS_UNKNOWN;
5554 /* Get the PCS variant to use for this call. TYPE is the function's type
5555 specification, DECL is the specific declartion. DECL may be null if
5556 the call could be indirect or if this is a library call. */
5557 static enum arm_pcs
5558 arm_get_pcs_model (const_tree type, const_tree decl)
5560 bool user_convention = false;
5561 enum arm_pcs user_pcs = arm_pcs_default;
5562 tree attr;
5564 gcc_assert (type);
5566 attr = lookup_attribute ("pcs", TYPE_ATTRIBUTES (type));
5567 if (attr)
5569 user_pcs = arm_pcs_from_attribute (TREE_VALUE (attr));
5570 user_convention = true;
5573 if (TARGET_AAPCS_BASED)
5575 /* Detect varargs functions. These always use the base rules
5576 (no argument is ever a candidate for a co-processor
5577 register). */
5578 bool base_rules = stdarg_p (type);
5580 if (user_convention)
5582 if (user_pcs > ARM_PCS_AAPCS_LOCAL)
5583 sorry ("non-AAPCS derived PCS variant");
5584 else if (base_rules && user_pcs != ARM_PCS_AAPCS)
5585 error ("variadic functions must use the base AAPCS variant");
5588 if (base_rules)
5589 return ARM_PCS_AAPCS;
5590 else if (user_convention)
5591 return user_pcs;
5592 else if (decl && flag_unit_at_a_time)
5594 /* Local functions never leak outside this compilation unit,
5595 so we are free to use whatever conventions are
5596 appropriate. */
5597 /* FIXME: remove CONST_CAST_TREE when cgraph is constified. */
5598 cgraph_local_info *i = cgraph_node::local_info (CONST_CAST_TREE(decl));
5599 if (i && i->local)
5600 return ARM_PCS_AAPCS_LOCAL;
5603 else if (user_convention && user_pcs != arm_pcs_default)
5604 sorry ("PCS variant");
5606 /* For everything else we use the target's default. */
5607 return arm_pcs_default;
5611 static void
5612 aapcs_vfp_cum_init (CUMULATIVE_ARGS *pcum ATTRIBUTE_UNUSED,
5613 const_tree fntype ATTRIBUTE_UNUSED,
5614 rtx libcall ATTRIBUTE_UNUSED,
5615 const_tree fndecl ATTRIBUTE_UNUSED)
5617 /* Record the unallocated VFP registers. */
5618 pcum->aapcs_vfp_regs_free = (1 << NUM_VFP_ARG_REGS) - 1;
5619 pcum->aapcs_vfp_reg_alloc = 0;
5622 /* Walk down the type tree of TYPE counting consecutive base elements.
5623 If *MODEP is VOIDmode, then set it to the first valid floating point
5624 type. If a non-floating point type is found, or if a floating point
5625 type that doesn't match a non-VOIDmode *MODEP is found, then return -1,
5626 otherwise return the count in the sub-tree. */
5627 static int
5628 aapcs_vfp_sub_candidate (const_tree type, machine_mode *modep)
5630 machine_mode mode;
5631 HOST_WIDE_INT size;
5633 switch (TREE_CODE (type))
5635 case REAL_TYPE:
5636 mode = TYPE_MODE (type);
5637 if (mode != DFmode && mode != SFmode && mode != HFmode)
5638 return -1;
5640 if (*modep == VOIDmode)
5641 *modep = mode;
5643 if (*modep == mode)
5644 return 1;
5646 break;
5648 case COMPLEX_TYPE:
5649 mode = TYPE_MODE (TREE_TYPE (type));
5650 if (mode != DFmode && mode != SFmode)
5651 return -1;
5653 if (*modep == VOIDmode)
5654 *modep = mode;
5656 if (*modep == mode)
5657 return 2;
5659 break;
5661 case VECTOR_TYPE:
5662 /* Use V2SImode and V4SImode as representatives of all 64-bit
5663 and 128-bit vector types, whether or not those modes are
5664 supported with the present options. */
5665 size = int_size_in_bytes (type);
5666 switch (size)
5668 case 8:
5669 mode = V2SImode;
5670 break;
5671 case 16:
5672 mode = V4SImode;
5673 break;
5674 default:
5675 return -1;
5678 if (*modep == VOIDmode)
5679 *modep = mode;
5681 /* Vector modes are considered to be opaque: two vectors are
5682 equivalent for the purposes of being homogeneous aggregates
5683 if they are the same size. */
5684 if (*modep == mode)
5685 return 1;
5687 break;
5689 case ARRAY_TYPE:
5691 int count;
5692 tree index = TYPE_DOMAIN (type);
5694 /* Can't handle incomplete types nor sizes that are not
5695 fixed. */
5696 if (!COMPLETE_TYPE_P (type)
5697 || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
5698 return -1;
5700 count = aapcs_vfp_sub_candidate (TREE_TYPE (type), modep);
5701 if (count == -1
5702 || !index
5703 || !TYPE_MAX_VALUE (index)
5704 || !tree_fits_uhwi_p (TYPE_MAX_VALUE (index))
5705 || !TYPE_MIN_VALUE (index)
5706 || !tree_fits_uhwi_p (TYPE_MIN_VALUE (index))
5707 || count < 0)
5708 return -1;
5710 count *= (1 + tree_to_uhwi (TYPE_MAX_VALUE (index))
5711 - tree_to_uhwi (TYPE_MIN_VALUE (index)));
5713 /* There must be no padding. */
5714 if (wi::ne_p (TYPE_SIZE (type), count * GET_MODE_BITSIZE (*modep)))
5715 return -1;
5717 return count;
5720 case RECORD_TYPE:
5722 int count = 0;
5723 int sub_count;
5724 tree field;
5726 /* Can't handle incomplete types nor sizes that are not
5727 fixed. */
5728 if (!COMPLETE_TYPE_P (type)
5729 || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
5730 return -1;
5732 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
5734 if (TREE_CODE (field) != FIELD_DECL)
5735 continue;
5737 sub_count = aapcs_vfp_sub_candidate (TREE_TYPE (field), modep);
5738 if (sub_count < 0)
5739 return -1;
5740 count += sub_count;
5743 /* There must be no padding. */
5744 if (wi::ne_p (TYPE_SIZE (type), count * GET_MODE_BITSIZE (*modep)))
5745 return -1;
5747 return count;
5750 case UNION_TYPE:
5751 case QUAL_UNION_TYPE:
5753 /* These aren't very interesting except in a degenerate case. */
5754 int count = 0;
5755 int sub_count;
5756 tree field;
5758 /* Can't handle incomplete types nor sizes that are not
5759 fixed. */
5760 if (!COMPLETE_TYPE_P (type)
5761 || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
5762 return -1;
5764 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
5766 if (TREE_CODE (field) != FIELD_DECL)
5767 continue;
5769 sub_count = aapcs_vfp_sub_candidate (TREE_TYPE (field), modep);
5770 if (sub_count < 0)
5771 return -1;
5772 count = count > sub_count ? count : sub_count;
5775 /* There must be no padding. */
5776 if (wi::ne_p (TYPE_SIZE (type), count * GET_MODE_BITSIZE (*modep)))
5777 return -1;
5779 return count;
5782 default:
5783 break;
5786 return -1;
5789 /* Return true if PCS_VARIANT should use VFP registers. */
5790 static bool
5791 use_vfp_abi (enum arm_pcs pcs_variant, bool is_double)
5793 if (pcs_variant == ARM_PCS_AAPCS_VFP)
5795 static bool seen_thumb1_vfp = false;
5797 if (TARGET_THUMB1 && !seen_thumb1_vfp)
5799 sorry ("Thumb-1 hard-float VFP ABI");
5800 /* sorry() is not immediately fatal, so only display this once. */
5801 seen_thumb1_vfp = true;
5804 return true;
5807 if (pcs_variant != ARM_PCS_AAPCS_LOCAL)
5808 return false;
5810 return (TARGET_32BIT && TARGET_HARD_FLOAT &&
5811 (TARGET_VFP_DOUBLE || !is_double));
5814 /* Return true if an argument whose type is TYPE, or mode is MODE, is
5815 suitable for passing or returning in VFP registers for the PCS
5816 variant selected. If it is, then *BASE_MODE is updated to contain
5817 a machine mode describing each element of the argument's type and
5818 *COUNT to hold the number of such elements. */
5819 static bool
5820 aapcs_vfp_is_call_or_return_candidate (enum arm_pcs pcs_variant,
5821 machine_mode mode, const_tree type,
5822 machine_mode *base_mode, int *count)
5824 machine_mode new_mode = VOIDmode;
5826 /* If we have the type information, prefer that to working things
5827 out from the mode. */
5828 if (type)
5830 int ag_count = aapcs_vfp_sub_candidate (type, &new_mode);
5832 if (ag_count > 0 && ag_count <= 4)
5833 *count = ag_count;
5834 else
5835 return false;
5837 else if (GET_MODE_CLASS (mode) == MODE_FLOAT
5838 || GET_MODE_CLASS (mode) == MODE_VECTOR_INT
5839 || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
5841 *count = 1;
5842 new_mode = mode;
5844 else if (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT)
5846 *count = 2;
5847 new_mode = (mode == DCmode ? DFmode : SFmode);
5849 else
5850 return false;
5853 if (!use_vfp_abi (pcs_variant, ARM_NUM_REGS (new_mode) > 1))
5854 return false;
5856 *base_mode = new_mode;
5857 return true;
5860 static bool
5861 aapcs_vfp_is_return_candidate (enum arm_pcs pcs_variant,
5862 machine_mode mode, const_tree type)
5864 int count ATTRIBUTE_UNUSED;
5865 machine_mode ag_mode ATTRIBUTE_UNUSED;
5867 if (!use_vfp_abi (pcs_variant, false))
5868 return false;
5869 return aapcs_vfp_is_call_or_return_candidate (pcs_variant, mode, type,
5870 &ag_mode, &count);
5873 static bool
5874 aapcs_vfp_is_call_candidate (CUMULATIVE_ARGS *pcum, machine_mode mode,
5875 const_tree type)
5877 if (!use_vfp_abi (pcum->pcs_variant, false))
5878 return false;
5880 return aapcs_vfp_is_call_or_return_candidate (pcum->pcs_variant, mode, type,
5881 &pcum->aapcs_vfp_rmode,
5882 &pcum->aapcs_vfp_rcount);
5885 /* Implement the allocate field in aapcs_cp_arg_layout. See the comment there
5886 for the behaviour of this function. */
5888 static bool
5889 aapcs_vfp_allocate (CUMULATIVE_ARGS *pcum, machine_mode mode,
5890 const_tree type ATTRIBUTE_UNUSED)
5892 int rmode_size
5893 = MAX (GET_MODE_SIZE (pcum->aapcs_vfp_rmode), GET_MODE_SIZE (SFmode));
5894 int shift = rmode_size / GET_MODE_SIZE (SFmode);
5895 unsigned mask = (1 << (shift * pcum->aapcs_vfp_rcount)) - 1;
5896 int regno;
5898 for (regno = 0; regno < NUM_VFP_ARG_REGS; regno += shift)
5899 if (((pcum->aapcs_vfp_regs_free >> regno) & mask) == mask)
5901 pcum->aapcs_vfp_reg_alloc = mask << regno;
5902 if (mode == BLKmode
5903 || (mode == TImode && ! TARGET_NEON)
5904 || ! arm_hard_regno_mode_ok (FIRST_VFP_REGNUM + regno, mode))
5906 int i;
5907 int rcount = pcum->aapcs_vfp_rcount;
5908 int rshift = shift;
5909 machine_mode rmode = pcum->aapcs_vfp_rmode;
5910 rtx par;
5911 if (!TARGET_NEON)
5913 /* Avoid using unsupported vector modes. */
5914 if (rmode == V2SImode)
5915 rmode = DImode;
5916 else if (rmode == V4SImode)
5918 rmode = DImode;
5919 rcount *= 2;
5920 rshift /= 2;
5923 par = gen_rtx_PARALLEL (mode, rtvec_alloc (rcount));
5924 for (i = 0; i < rcount; i++)
5926 rtx tmp = gen_rtx_REG (rmode,
5927 FIRST_VFP_REGNUM + regno + i * rshift);
5928 tmp = gen_rtx_EXPR_LIST
5929 (VOIDmode, tmp,
5930 GEN_INT (i * GET_MODE_SIZE (rmode)));
5931 XVECEXP (par, 0, i) = tmp;
5934 pcum->aapcs_reg = par;
5936 else
5937 pcum->aapcs_reg = gen_rtx_REG (mode, FIRST_VFP_REGNUM + regno);
5938 return true;
5940 return false;
5943 /* Implement the allocate_return_reg field in aapcs_cp_arg_layout. See the
5944 comment there for the behaviour of this function. */
5946 static rtx
5947 aapcs_vfp_allocate_return_reg (enum arm_pcs pcs_variant ATTRIBUTE_UNUSED,
5948 machine_mode mode,
5949 const_tree type ATTRIBUTE_UNUSED)
5951 if (!use_vfp_abi (pcs_variant, false))
5952 return NULL;
5954 if (mode == BLKmode
5955 || (GET_MODE_CLASS (mode) == MODE_INT
5956 && GET_MODE_SIZE (mode) >= GET_MODE_SIZE (TImode)
5957 && !TARGET_NEON))
5959 int count;
5960 machine_mode ag_mode;
5961 int i;
5962 rtx par;
5963 int shift;
5965 aapcs_vfp_is_call_or_return_candidate (pcs_variant, mode, type,
5966 &ag_mode, &count);
5968 if (!TARGET_NEON)
5970 if (ag_mode == V2SImode)
5971 ag_mode = DImode;
5972 else if (ag_mode == V4SImode)
5974 ag_mode = DImode;
5975 count *= 2;
5978 shift = GET_MODE_SIZE(ag_mode) / GET_MODE_SIZE(SFmode);
5979 par = gen_rtx_PARALLEL (mode, rtvec_alloc (count));
5980 for (i = 0; i < count; i++)
5982 rtx tmp = gen_rtx_REG (ag_mode, FIRST_VFP_REGNUM + i * shift);
5983 tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp,
5984 GEN_INT (i * GET_MODE_SIZE (ag_mode)));
5985 XVECEXP (par, 0, i) = tmp;
5988 return par;
5991 return gen_rtx_REG (mode, FIRST_VFP_REGNUM);
5994 static void
5995 aapcs_vfp_advance (CUMULATIVE_ARGS *pcum ATTRIBUTE_UNUSED,
5996 machine_mode mode ATTRIBUTE_UNUSED,
5997 const_tree type ATTRIBUTE_UNUSED)
5999 pcum->aapcs_vfp_regs_free &= ~pcum->aapcs_vfp_reg_alloc;
6000 pcum->aapcs_vfp_reg_alloc = 0;
6001 return;
6004 #define AAPCS_CP(X) \
6006 aapcs_ ## X ## _cum_init, \
6007 aapcs_ ## X ## _is_call_candidate, \
6008 aapcs_ ## X ## _allocate, \
6009 aapcs_ ## X ## _is_return_candidate, \
6010 aapcs_ ## X ## _allocate_return_reg, \
6011 aapcs_ ## X ## _advance \
6014 /* Table of co-processors that can be used to pass arguments in
6015 registers. Idealy no arugment should be a candidate for more than
6016 one co-processor table entry, but the table is processed in order
6017 and stops after the first match. If that entry then fails to put
6018 the argument into a co-processor register, the argument will go on
6019 the stack. */
6020 static struct
6022 /* Initialize co-processor related state in CUMULATIVE_ARGS structure. */
6023 void (*cum_init) (CUMULATIVE_ARGS *, const_tree, rtx, const_tree);
6025 /* Return true if an argument of mode MODE (or type TYPE if MODE is
6026 BLKmode) is a candidate for this co-processor's registers; this
6027 function should ignore any position-dependent state in
6028 CUMULATIVE_ARGS and only use call-type dependent information. */
6029 bool (*is_call_candidate) (CUMULATIVE_ARGS *, machine_mode, const_tree);
6031 /* Return true if the argument does get a co-processor register; it
6032 should set aapcs_reg to an RTX of the register allocated as is
6033 required for a return from FUNCTION_ARG. */
6034 bool (*allocate) (CUMULATIVE_ARGS *, machine_mode, const_tree);
6036 /* Return true if a result of mode MODE (or type TYPE if MODE is BLKmode) can
6037 be returned in this co-processor's registers. */
6038 bool (*is_return_candidate) (enum arm_pcs, machine_mode, const_tree);
6040 /* Allocate and return an RTX element to hold the return type of a call. This
6041 routine must not fail and will only be called if is_return_candidate
6042 returned true with the same parameters. */
6043 rtx (*allocate_return_reg) (enum arm_pcs, machine_mode, const_tree);
6045 /* Finish processing this argument and prepare to start processing
6046 the next one. */
6047 void (*advance) (CUMULATIVE_ARGS *, machine_mode, const_tree);
6048 } aapcs_cp_arg_layout[ARM_NUM_COPROC_SLOTS] =
6050 AAPCS_CP(vfp)
6053 #undef AAPCS_CP
6055 static int
6056 aapcs_select_call_coproc (CUMULATIVE_ARGS *pcum, machine_mode mode,
6057 const_tree type)
6059 int i;
6061 for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
6062 if (aapcs_cp_arg_layout[i].is_call_candidate (pcum, mode, type))
6063 return i;
6065 return -1;
6068 static int
6069 aapcs_select_return_coproc (const_tree type, const_tree fntype)
6071 /* We aren't passed a decl, so we can't check that a call is local.
6072 However, it isn't clear that that would be a win anyway, since it
6073 might limit some tail-calling opportunities. */
6074 enum arm_pcs pcs_variant;
6076 if (fntype)
6078 const_tree fndecl = NULL_TREE;
6080 if (TREE_CODE (fntype) == FUNCTION_DECL)
6082 fndecl = fntype;
6083 fntype = TREE_TYPE (fntype);
6086 pcs_variant = arm_get_pcs_model (fntype, fndecl);
6088 else
6089 pcs_variant = arm_pcs_default;
6091 if (pcs_variant != ARM_PCS_AAPCS)
6093 int i;
6095 for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
6096 if (aapcs_cp_arg_layout[i].is_return_candidate (pcs_variant,
6097 TYPE_MODE (type),
6098 type))
6099 return i;
6101 return -1;
6104 static rtx
6105 aapcs_allocate_return_reg (machine_mode mode, const_tree type,
6106 const_tree fntype)
6108 /* We aren't passed a decl, so we can't check that a call is local.
6109 However, it isn't clear that that would be a win anyway, since it
6110 might limit some tail-calling opportunities. */
6111 enum arm_pcs pcs_variant;
6112 int unsignedp ATTRIBUTE_UNUSED;
6114 if (fntype)
6116 const_tree fndecl = NULL_TREE;
6118 if (TREE_CODE (fntype) == FUNCTION_DECL)
6120 fndecl = fntype;
6121 fntype = TREE_TYPE (fntype);
6124 pcs_variant = arm_get_pcs_model (fntype, fndecl);
6126 else
6127 pcs_variant = arm_pcs_default;
6129 /* Promote integer types. */
6130 if (type && INTEGRAL_TYPE_P (type))
6131 mode = arm_promote_function_mode (type, mode, &unsignedp, fntype, 1);
6133 if (pcs_variant != ARM_PCS_AAPCS)
6135 int i;
6137 for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
6138 if (aapcs_cp_arg_layout[i].is_return_candidate (pcs_variant, mode,
6139 type))
6140 return aapcs_cp_arg_layout[i].allocate_return_reg (pcs_variant,
6141 mode, type);
6144 /* Promotes small structs returned in a register to full-word size
6145 for big-endian AAPCS. */
6146 if (type && arm_return_in_msb (type))
6148 HOST_WIDE_INT size = int_size_in_bytes (type);
6149 if (size % UNITS_PER_WORD != 0)
6151 size += UNITS_PER_WORD - size % UNITS_PER_WORD;
6152 mode = mode_for_size (size * BITS_PER_UNIT, MODE_INT, 0);
6156 return gen_rtx_REG (mode, R0_REGNUM);
6159 static rtx
6160 aapcs_libcall_value (machine_mode mode)
6162 if (BYTES_BIG_ENDIAN && ALL_FIXED_POINT_MODE_P (mode)
6163 && GET_MODE_SIZE (mode) <= 4)
6164 mode = SImode;
6166 return aapcs_allocate_return_reg (mode, NULL_TREE, NULL_TREE);
6169 /* Lay out a function argument using the AAPCS rules. The rule
6170 numbers referred to here are those in the AAPCS. */
6171 static void
6172 aapcs_layout_arg (CUMULATIVE_ARGS *pcum, machine_mode mode,
6173 const_tree type, bool named)
6175 int nregs, nregs2;
6176 int ncrn;
6178 /* We only need to do this once per argument. */
6179 if (pcum->aapcs_arg_processed)
6180 return;
6182 pcum->aapcs_arg_processed = true;
6184 /* Special case: if named is false then we are handling an incoming
6185 anonymous argument which is on the stack. */
6186 if (!named)
6187 return;
6189 /* Is this a potential co-processor register candidate? */
6190 if (pcum->pcs_variant != ARM_PCS_AAPCS)
6192 int slot = aapcs_select_call_coproc (pcum, mode, type);
6193 pcum->aapcs_cprc_slot = slot;
6195 /* We don't have to apply any of the rules from part B of the
6196 preparation phase, these are handled elsewhere in the
6197 compiler. */
6199 if (slot >= 0)
6201 /* A Co-processor register candidate goes either in its own
6202 class of registers or on the stack. */
6203 if (!pcum->aapcs_cprc_failed[slot])
6205 /* C1.cp - Try to allocate the argument to co-processor
6206 registers. */
6207 if (aapcs_cp_arg_layout[slot].allocate (pcum, mode, type))
6208 return;
6210 /* C2.cp - Put the argument on the stack and note that we
6211 can't assign any more candidates in this slot. We also
6212 need to note that we have allocated stack space, so that
6213 we won't later try to split a non-cprc candidate between
6214 core registers and the stack. */
6215 pcum->aapcs_cprc_failed[slot] = true;
6216 pcum->can_split = false;
6219 /* We didn't get a register, so this argument goes on the
6220 stack. */
6221 gcc_assert (pcum->can_split == false);
6222 return;
6226 /* C3 - For double-word aligned arguments, round the NCRN up to the
6227 next even number. */
6228 ncrn = pcum->aapcs_ncrn;
6229 if ((ncrn & 1) && arm_needs_doubleword_align (mode, type))
6230 ncrn++;
6232 nregs = ARM_NUM_REGS2(mode, type);
6234 /* Sigh, this test should really assert that nregs > 0, but a GCC
6235 extension allows empty structs and then gives them empty size; it
6236 then allows such a structure to be passed by value. For some of
6237 the code below we have to pretend that such an argument has
6238 non-zero size so that we 'locate' it correctly either in
6239 registers or on the stack. */
6240 gcc_assert (nregs >= 0);
6242 nregs2 = nregs ? nregs : 1;
6244 /* C4 - Argument fits entirely in core registers. */
6245 if (ncrn + nregs2 <= NUM_ARG_REGS)
6247 pcum->aapcs_reg = gen_rtx_REG (mode, ncrn);
6248 pcum->aapcs_next_ncrn = ncrn + nregs;
6249 return;
6252 /* C5 - Some core registers left and there are no arguments already
6253 on the stack: split this argument between the remaining core
6254 registers and the stack. */
6255 if (ncrn < NUM_ARG_REGS && pcum->can_split)
6257 pcum->aapcs_reg = gen_rtx_REG (mode, ncrn);
6258 pcum->aapcs_next_ncrn = NUM_ARG_REGS;
6259 pcum->aapcs_partial = (NUM_ARG_REGS - ncrn) * UNITS_PER_WORD;
6260 return;
6263 /* C6 - NCRN is set to 4. */
6264 pcum->aapcs_next_ncrn = NUM_ARG_REGS;
6266 /* C7,C8 - arugment goes on the stack. We have nothing to do here. */
6267 return;
6270 /* Initialize a variable CUM of type CUMULATIVE_ARGS
6271 for a call to a function whose data type is FNTYPE.
6272 For a library call, FNTYPE is NULL. */
6273 void
6274 arm_init_cumulative_args (CUMULATIVE_ARGS *pcum, tree fntype,
6275 rtx libname,
6276 tree fndecl ATTRIBUTE_UNUSED)
6278 /* Long call handling. */
6279 if (fntype)
6280 pcum->pcs_variant = arm_get_pcs_model (fntype, fndecl);
6281 else
6282 pcum->pcs_variant = arm_pcs_default;
6284 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
6286 if (arm_libcall_uses_aapcs_base (libname))
6287 pcum->pcs_variant = ARM_PCS_AAPCS;
6289 pcum->aapcs_ncrn = pcum->aapcs_next_ncrn = 0;
6290 pcum->aapcs_reg = NULL_RTX;
6291 pcum->aapcs_partial = 0;
6292 pcum->aapcs_arg_processed = false;
6293 pcum->aapcs_cprc_slot = -1;
6294 pcum->can_split = true;
6296 if (pcum->pcs_variant != ARM_PCS_AAPCS)
6298 int i;
6300 for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
6302 pcum->aapcs_cprc_failed[i] = false;
6303 aapcs_cp_arg_layout[i].cum_init (pcum, fntype, libname, fndecl);
6306 return;
6309 /* Legacy ABIs */
6311 /* On the ARM, the offset starts at 0. */
6312 pcum->nregs = 0;
6313 pcum->iwmmxt_nregs = 0;
6314 pcum->can_split = true;
6316 /* Varargs vectors are treated the same as long long.
6317 named_count avoids having to change the way arm handles 'named' */
6318 pcum->named_count = 0;
6319 pcum->nargs = 0;
6321 if (TARGET_REALLY_IWMMXT && fntype)
6323 tree fn_arg;
6325 for (fn_arg = TYPE_ARG_TYPES (fntype);
6326 fn_arg;
6327 fn_arg = TREE_CHAIN (fn_arg))
6328 pcum->named_count += 1;
6330 if (! pcum->named_count)
6331 pcum->named_count = INT_MAX;
6335 /* Return true if mode/type need doubleword alignment. */
6336 static bool
6337 arm_needs_doubleword_align (machine_mode mode, const_tree type)
6339 if (!type)
6340 return PARM_BOUNDARY < GET_MODE_ALIGNMENT (mode);
6342 /* Scalar and vector types: Use natural alignment, i.e. of base type. */
6343 if (!AGGREGATE_TYPE_P (type))
6344 return TYPE_ALIGN (TYPE_MAIN_VARIANT (type)) > PARM_BOUNDARY;
6346 /* Array types: Use member alignment of element type. */
6347 if (TREE_CODE (type) == ARRAY_TYPE)
6348 return TYPE_ALIGN (TREE_TYPE (type)) > PARM_BOUNDARY;
6350 /* Record/aggregate types: Use greatest member alignment of any member. */
6351 for (tree field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
6352 if (DECL_ALIGN (field) > PARM_BOUNDARY)
6353 return true;
6355 return false;
6359 /* Determine where to put an argument to a function.
6360 Value is zero to push the argument on the stack,
6361 or a hard register in which to store the argument.
6363 MODE is the argument's machine mode.
6364 TYPE is the data type of the argument (as a tree).
6365 This is null for libcalls where that information may
6366 not be available.
6367 CUM is a variable of type CUMULATIVE_ARGS which gives info about
6368 the preceding args and about the function being called.
6369 NAMED is nonzero if this argument is a named parameter
6370 (otherwise it is an extra parameter matching an ellipsis).
6372 On the ARM, normally the first 16 bytes are passed in registers r0-r3; all
6373 other arguments are passed on the stack. If (NAMED == 0) (which happens
6374 only in assign_parms, since TARGET_SETUP_INCOMING_VARARGS is
6375 defined), say it is passed in the stack (function_prologue will
6376 indeed make it pass in the stack if necessary). */
6378 static rtx
6379 arm_function_arg (cumulative_args_t pcum_v, machine_mode mode,
6380 const_tree type, bool named)
6382 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
6383 int nregs;
6385 /* Handle the special case quickly. Pick an arbitrary value for op2 of
6386 a call insn (op3 of a call_value insn). */
6387 if (mode == VOIDmode)
6388 return const0_rtx;
6390 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
6392 aapcs_layout_arg (pcum, mode, type, named);
6393 return pcum->aapcs_reg;
6396 /* Varargs vectors are treated the same as long long.
6397 named_count avoids having to change the way arm handles 'named' */
6398 if (TARGET_IWMMXT_ABI
6399 && arm_vector_mode_supported_p (mode)
6400 && pcum->named_count > pcum->nargs + 1)
6402 if (pcum->iwmmxt_nregs <= 9)
6403 return gen_rtx_REG (mode, pcum->iwmmxt_nregs + FIRST_IWMMXT_REGNUM);
6404 else
6406 pcum->can_split = false;
6407 return NULL_RTX;
6411 /* Put doubleword aligned quantities in even register pairs. */
6412 if (pcum->nregs & 1
6413 && ARM_DOUBLEWORD_ALIGN
6414 && arm_needs_doubleword_align (mode, type))
6415 pcum->nregs++;
6417 /* Only allow splitting an arg between regs and memory if all preceding
6418 args were allocated to regs. For args passed by reference we only count
6419 the reference pointer. */
6420 if (pcum->can_split)
6421 nregs = 1;
6422 else
6423 nregs = ARM_NUM_REGS2 (mode, type);
6425 if (!named || pcum->nregs + nregs > NUM_ARG_REGS)
6426 return NULL_RTX;
6428 return gen_rtx_REG (mode, pcum->nregs);
6431 static unsigned int
6432 arm_function_arg_boundary (machine_mode mode, const_tree type)
6434 return (ARM_DOUBLEWORD_ALIGN && arm_needs_doubleword_align (mode, type)
6435 ? DOUBLEWORD_ALIGNMENT
6436 : PARM_BOUNDARY);
6439 static int
6440 arm_arg_partial_bytes (cumulative_args_t pcum_v, machine_mode mode,
6441 tree type, bool named)
6443 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
6444 int nregs = pcum->nregs;
6446 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
6448 aapcs_layout_arg (pcum, mode, type, named);
6449 return pcum->aapcs_partial;
6452 if (TARGET_IWMMXT_ABI && arm_vector_mode_supported_p (mode))
6453 return 0;
6455 if (NUM_ARG_REGS > nregs
6456 && (NUM_ARG_REGS < nregs + ARM_NUM_REGS2 (mode, type))
6457 && pcum->can_split)
6458 return (NUM_ARG_REGS - nregs) * UNITS_PER_WORD;
6460 return 0;
6463 /* Update the data in PCUM to advance over an argument
6464 of mode MODE and data type TYPE.
6465 (TYPE is null for libcalls where that information may not be available.) */
6467 static void
6468 arm_function_arg_advance (cumulative_args_t pcum_v, machine_mode mode,
6469 const_tree type, bool named)
6471 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
6473 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
6475 aapcs_layout_arg (pcum, mode, type, named);
6477 if (pcum->aapcs_cprc_slot >= 0)
6479 aapcs_cp_arg_layout[pcum->aapcs_cprc_slot].advance (pcum, mode,
6480 type);
6481 pcum->aapcs_cprc_slot = -1;
6484 /* Generic stuff. */
6485 pcum->aapcs_arg_processed = false;
6486 pcum->aapcs_ncrn = pcum->aapcs_next_ncrn;
6487 pcum->aapcs_reg = NULL_RTX;
6488 pcum->aapcs_partial = 0;
6490 else
6492 pcum->nargs += 1;
6493 if (arm_vector_mode_supported_p (mode)
6494 && pcum->named_count > pcum->nargs
6495 && TARGET_IWMMXT_ABI)
6496 pcum->iwmmxt_nregs += 1;
6497 else
6498 pcum->nregs += ARM_NUM_REGS2 (mode, type);
6502 /* Variable sized types are passed by reference. This is a GCC
6503 extension to the ARM ABI. */
6505 static bool
6506 arm_pass_by_reference (cumulative_args_t cum ATTRIBUTE_UNUSED,
6507 machine_mode mode ATTRIBUTE_UNUSED,
6508 const_tree type, bool named ATTRIBUTE_UNUSED)
6510 return type && TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST;
6513 /* Encode the current state of the #pragma [no_]long_calls. */
6514 typedef enum
6516 OFF, /* No #pragma [no_]long_calls is in effect. */
6517 LONG, /* #pragma long_calls is in effect. */
6518 SHORT /* #pragma no_long_calls is in effect. */
6519 } arm_pragma_enum;
6521 static arm_pragma_enum arm_pragma_long_calls = OFF;
6523 void
6524 arm_pr_long_calls (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
6526 arm_pragma_long_calls = LONG;
6529 void
6530 arm_pr_no_long_calls (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
6532 arm_pragma_long_calls = SHORT;
6535 void
6536 arm_pr_long_calls_off (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
6538 arm_pragma_long_calls = OFF;
6541 /* Handle an attribute requiring a FUNCTION_DECL;
6542 arguments as in struct attribute_spec.handler. */
6543 static tree
6544 arm_handle_fndecl_attribute (tree *node, tree name, tree args ATTRIBUTE_UNUSED,
6545 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
6547 if (TREE_CODE (*node) != FUNCTION_DECL)
6549 warning (OPT_Wattributes, "%qE attribute only applies to functions",
6550 name);
6551 *no_add_attrs = true;
6554 return NULL_TREE;
6557 /* Handle an "interrupt" or "isr" attribute;
6558 arguments as in struct attribute_spec.handler. */
6559 static tree
6560 arm_handle_isr_attribute (tree *node, tree name, tree args, int flags,
6561 bool *no_add_attrs)
6563 if (DECL_P (*node))
6565 if (TREE_CODE (*node) != FUNCTION_DECL)
6567 warning (OPT_Wattributes, "%qE attribute only applies to functions",
6568 name);
6569 *no_add_attrs = true;
6571 /* FIXME: the argument if any is checked for type attributes;
6572 should it be checked for decl ones? */
6574 else
6576 if (TREE_CODE (*node) == FUNCTION_TYPE
6577 || TREE_CODE (*node) == METHOD_TYPE)
6579 if (arm_isr_value (args) == ARM_FT_UNKNOWN)
6581 warning (OPT_Wattributes, "%qE attribute ignored",
6582 name);
6583 *no_add_attrs = true;
6586 else if (TREE_CODE (*node) == POINTER_TYPE
6587 && (TREE_CODE (TREE_TYPE (*node)) == FUNCTION_TYPE
6588 || TREE_CODE (TREE_TYPE (*node)) == METHOD_TYPE)
6589 && arm_isr_value (args) != ARM_FT_UNKNOWN)
6591 *node = build_variant_type_copy (*node);
6592 TREE_TYPE (*node) = build_type_attribute_variant
6593 (TREE_TYPE (*node),
6594 tree_cons (name, args, TYPE_ATTRIBUTES (TREE_TYPE (*node))));
6595 *no_add_attrs = true;
6597 else
6599 /* Possibly pass this attribute on from the type to a decl. */
6600 if (flags & ((int) ATTR_FLAG_DECL_NEXT
6601 | (int) ATTR_FLAG_FUNCTION_NEXT
6602 | (int) ATTR_FLAG_ARRAY_NEXT))
6604 *no_add_attrs = true;
6605 return tree_cons (name, args, NULL_TREE);
6607 else
6609 warning (OPT_Wattributes, "%qE attribute ignored",
6610 name);
6615 return NULL_TREE;
6618 /* Handle a "pcs" attribute; arguments as in struct
6619 attribute_spec.handler. */
6620 static tree
6621 arm_handle_pcs_attribute (tree *node ATTRIBUTE_UNUSED, tree name, tree args,
6622 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
6624 if (arm_pcs_from_attribute (args) == ARM_PCS_UNKNOWN)
6626 warning (OPT_Wattributes, "%qE attribute ignored", name);
6627 *no_add_attrs = true;
6629 return NULL_TREE;
6632 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
6633 /* Handle the "notshared" attribute. This attribute is another way of
6634 requesting hidden visibility. ARM's compiler supports
6635 "__declspec(notshared)"; we support the same thing via an
6636 attribute. */
6638 static tree
6639 arm_handle_notshared_attribute (tree *node,
6640 tree name ATTRIBUTE_UNUSED,
6641 tree args ATTRIBUTE_UNUSED,
6642 int flags ATTRIBUTE_UNUSED,
6643 bool *no_add_attrs)
6645 tree decl = TYPE_NAME (*node);
6647 if (decl)
6649 DECL_VISIBILITY (decl) = VISIBILITY_HIDDEN;
6650 DECL_VISIBILITY_SPECIFIED (decl) = 1;
6651 *no_add_attrs = false;
6653 return NULL_TREE;
6655 #endif
6657 /* Return 0 if the attributes for two types are incompatible, 1 if they
6658 are compatible, and 2 if they are nearly compatible (which causes a
6659 warning to be generated). */
6660 static int
6661 arm_comp_type_attributes (const_tree type1, const_tree type2)
6663 int l1, l2, s1, s2;
6665 /* Check for mismatch of non-default calling convention. */
6666 if (TREE_CODE (type1) != FUNCTION_TYPE)
6667 return 1;
6669 /* Check for mismatched call attributes. */
6670 l1 = lookup_attribute ("long_call", TYPE_ATTRIBUTES (type1)) != NULL;
6671 l2 = lookup_attribute ("long_call", TYPE_ATTRIBUTES (type2)) != NULL;
6672 s1 = lookup_attribute ("short_call", TYPE_ATTRIBUTES (type1)) != NULL;
6673 s2 = lookup_attribute ("short_call", TYPE_ATTRIBUTES (type2)) != NULL;
6675 /* Only bother to check if an attribute is defined. */
6676 if (l1 | l2 | s1 | s2)
6678 /* If one type has an attribute, the other must have the same attribute. */
6679 if ((l1 != l2) || (s1 != s2))
6680 return 0;
6682 /* Disallow mixed attributes. */
6683 if ((l1 & s2) || (l2 & s1))
6684 return 0;
6687 /* Check for mismatched ISR attribute. */
6688 l1 = lookup_attribute ("isr", TYPE_ATTRIBUTES (type1)) != NULL;
6689 if (! l1)
6690 l1 = lookup_attribute ("interrupt", TYPE_ATTRIBUTES (type1)) != NULL;
6691 l2 = lookup_attribute ("isr", TYPE_ATTRIBUTES (type2)) != NULL;
6692 if (! l2)
6693 l1 = lookup_attribute ("interrupt", TYPE_ATTRIBUTES (type2)) != NULL;
6694 if (l1 != l2)
6695 return 0;
6697 return 1;
6700 /* Assigns default attributes to newly defined type. This is used to
6701 set short_call/long_call attributes for function types of
6702 functions defined inside corresponding #pragma scopes. */
6703 static void
6704 arm_set_default_type_attributes (tree type)
6706 /* Add __attribute__ ((long_call)) to all functions, when
6707 inside #pragma long_calls or __attribute__ ((short_call)),
6708 when inside #pragma no_long_calls. */
6709 if (TREE_CODE (type) == FUNCTION_TYPE || TREE_CODE (type) == METHOD_TYPE)
6711 tree type_attr_list, attr_name;
6712 type_attr_list = TYPE_ATTRIBUTES (type);
6714 if (arm_pragma_long_calls == LONG)
6715 attr_name = get_identifier ("long_call");
6716 else if (arm_pragma_long_calls == SHORT)
6717 attr_name = get_identifier ("short_call");
6718 else
6719 return;
6721 type_attr_list = tree_cons (attr_name, NULL_TREE, type_attr_list);
6722 TYPE_ATTRIBUTES (type) = type_attr_list;
6726 /* Return true if DECL is known to be linked into section SECTION. */
6728 static bool
6729 arm_function_in_section_p (tree decl, section *section)
6731 /* We can only be certain about the prevailing symbol definition. */
6732 if (!decl_binds_to_current_def_p (decl))
6733 return false;
6735 /* If DECL_SECTION_NAME is set, assume it is trustworthy. */
6736 if (!DECL_SECTION_NAME (decl))
6738 /* Make sure that we will not create a unique section for DECL. */
6739 if (flag_function_sections || DECL_COMDAT_GROUP (decl))
6740 return false;
6743 return function_section (decl) == section;
6746 /* Return nonzero if a 32-bit "long_call" should be generated for
6747 a call from the current function to DECL. We generate a long_call
6748 if the function:
6750 a. has an __attribute__((long call))
6751 or b. is within the scope of a #pragma long_calls
6752 or c. the -mlong-calls command line switch has been specified
6754 However we do not generate a long call if the function:
6756 d. has an __attribute__ ((short_call))
6757 or e. is inside the scope of a #pragma no_long_calls
6758 or f. is defined in the same section as the current function. */
6760 bool
6761 arm_is_long_call_p (tree decl)
6763 tree attrs;
6765 if (!decl)
6766 return TARGET_LONG_CALLS;
6768 attrs = TYPE_ATTRIBUTES (TREE_TYPE (decl));
6769 if (lookup_attribute ("short_call", attrs))
6770 return false;
6772 /* For "f", be conservative, and only cater for cases in which the
6773 whole of the current function is placed in the same section. */
6774 if (!flag_reorder_blocks_and_partition
6775 && TREE_CODE (decl) == FUNCTION_DECL
6776 && arm_function_in_section_p (decl, current_function_section ()))
6777 return false;
6779 if (lookup_attribute ("long_call", attrs))
6780 return true;
6782 return TARGET_LONG_CALLS;
6785 /* Return nonzero if it is ok to make a tail-call to DECL. */
6786 static bool
6787 arm_function_ok_for_sibcall (tree decl, tree exp)
6789 unsigned long func_type;
6791 if (cfun->machine->sibcall_blocked)
6792 return false;
6794 /* Never tailcall something if we are generating code for Thumb-1. */
6795 if (TARGET_THUMB1)
6796 return false;
6798 /* The PIC register is live on entry to VxWorks PLT entries, so we
6799 must make the call before restoring the PIC register. */
6800 if (TARGET_VXWORKS_RTP && flag_pic && decl && !targetm.binds_local_p (decl))
6801 return false;
6803 /* If we are interworking and the function is not declared static
6804 then we can't tail-call it unless we know that it exists in this
6805 compilation unit (since it might be a Thumb routine). */
6806 if (TARGET_INTERWORK && decl && TREE_PUBLIC (decl)
6807 && !TREE_ASM_WRITTEN (decl))
6808 return false;
6810 func_type = arm_current_func_type ();
6811 /* Never tailcall from an ISR routine - it needs a special exit sequence. */
6812 if (IS_INTERRUPT (func_type))
6813 return false;
6815 if (!VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun->decl))))
6817 /* Check that the return value locations are the same. For
6818 example that we aren't returning a value from the sibling in
6819 a VFP register but then need to transfer it to a core
6820 register. */
6821 rtx a, b;
6822 tree decl_or_type = decl;
6824 /* If it is an indirect function pointer, get the function type. */
6825 if (!decl)
6826 decl_or_type = TREE_TYPE (TREE_TYPE (CALL_EXPR_FN (exp)));
6828 a = arm_function_value (TREE_TYPE (exp), decl_or_type, false);
6829 b = arm_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)),
6830 cfun->decl, false);
6831 if (!rtx_equal_p (a, b))
6832 return false;
6835 /* Never tailcall if function may be called with a misaligned SP. */
6836 if (IS_STACKALIGN (func_type))
6837 return false;
6839 /* The AAPCS says that, on bare-metal, calls to unresolved weak
6840 references should become a NOP. Don't convert such calls into
6841 sibling calls. */
6842 if (TARGET_AAPCS_BASED
6843 && arm_abi == ARM_ABI_AAPCS
6844 && decl
6845 && DECL_WEAK (decl))
6846 return false;
6848 /* Everything else is ok. */
6849 return true;
6853 /* Addressing mode support functions. */
6855 /* Return nonzero if X is a legitimate immediate operand when compiling
6856 for PIC. We know that X satisfies CONSTANT_P and flag_pic is true. */
6858 legitimate_pic_operand_p (rtx x)
6860 if (GET_CODE (x) == SYMBOL_REF
6861 || (GET_CODE (x) == CONST
6862 && GET_CODE (XEXP (x, 0)) == PLUS
6863 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF))
6864 return 0;
6866 return 1;
6869 /* Record that the current function needs a PIC register. Initialize
6870 cfun->machine->pic_reg if we have not already done so. */
6872 static void
6873 require_pic_register (void)
6875 /* A lot of the logic here is made obscure by the fact that this
6876 routine gets called as part of the rtx cost estimation process.
6877 We don't want those calls to affect any assumptions about the real
6878 function; and further, we can't call entry_of_function() until we
6879 start the real expansion process. */
6880 if (!crtl->uses_pic_offset_table)
6882 gcc_assert (can_create_pseudo_p ());
6883 if (arm_pic_register != INVALID_REGNUM
6884 && !(TARGET_THUMB1 && arm_pic_register > LAST_LO_REGNUM))
6886 if (!cfun->machine->pic_reg)
6887 cfun->machine->pic_reg = gen_rtx_REG (Pmode, arm_pic_register);
6889 /* Play games to avoid marking the function as needing pic
6890 if we are being called as part of the cost-estimation
6891 process. */
6892 if (current_ir_type () != IR_GIMPLE || currently_expanding_to_rtl)
6893 crtl->uses_pic_offset_table = 1;
6895 else
6897 rtx_insn *seq, *insn;
6899 if (!cfun->machine->pic_reg)
6900 cfun->machine->pic_reg = gen_reg_rtx (Pmode);
6902 /* Play games to avoid marking the function as needing pic
6903 if we are being called as part of the cost-estimation
6904 process. */
6905 if (current_ir_type () != IR_GIMPLE || currently_expanding_to_rtl)
6907 crtl->uses_pic_offset_table = 1;
6908 start_sequence ();
6910 if (TARGET_THUMB1 && arm_pic_register != INVALID_REGNUM
6911 && arm_pic_register > LAST_LO_REGNUM)
6912 emit_move_insn (cfun->machine->pic_reg,
6913 gen_rtx_REG (Pmode, arm_pic_register));
6914 else
6915 arm_load_pic_register (0UL);
6917 seq = get_insns ();
6918 end_sequence ();
6920 for (insn = seq; insn; insn = NEXT_INSN (insn))
6921 if (INSN_P (insn))
6922 INSN_LOCATION (insn) = prologue_location;
6924 /* We can be called during expansion of PHI nodes, where
6925 we can't yet emit instructions directly in the final
6926 insn stream. Queue the insns on the entry edge, they will
6927 be committed after everything else is expanded. */
6928 insert_insn_on_edge (seq,
6929 single_succ_edge (ENTRY_BLOCK_PTR_FOR_FN (cfun)));
6936 legitimize_pic_address (rtx orig, machine_mode mode, rtx reg)
6938 if (GET_CODE (orig) == SYMBOL_REF
6939 || GET_CODE (orig) == LABEL_REF)
6941 rtx insn;
6943 if (reg == 0)
6945 gcc_assert (can_create_pseudo_p ());
6946 reg = gen_reg_rtx (Pmode);
6949 /* VxWorks does not impose a fixed gap between segments; the run-time
6950 gap can be different from the object-file gap. We therefore can't
6951 use GOTOFF unless we are absolutely sure that the symbol is in the
6952 same segment as the GOT. Unfortunately, the flexibility of linker
6953 scripts means that we can't be sure of that in general, so assume
6954 that GOTOFF is never valid on VxWorks. */
6955 if ((GET_CODE (orig) == LABEL_REF
6956 || (GET_CODE (orig) == SYMBOL_REF &&
6957 SYMBOL_REF_LOCAL_P (orig)))
6958 && NEED_GOT_RELOC
6959 && arm_pic_data_is_text_relative)
6960 insn = arm_pic_static_addr (orig, reg);
6961 else
6963 rtx pat;
6964 rtx mem;
6966 /* If this function doesn't have a pic register, create one now. */
6967 require_pic_register ();
6969 pat = gen_calculate_pic_address (reg, cfun->machine->pic_reg, orig);
6971 /* Make the MEM as close to a constant as possible. */
6972 mem = SET_SRC (pat);
6973 gcc_assert (MEM_P (mem) && !MEM_VOLATILE_P (mem));
6974 MEM_READONLY_P (mem) = 1;
6975 MEM_NOTRAP_P (mem) = 1;
6977 insn = emit_insn (pat);
6980 /* Put a REG_EQUAL note on this insn, so that it can be optimized
6981 by loop. */
6982 set_unique_reg_note (insn, REG_EQUAL, orig);
6984 return reg;
6986 else if (GET_CODE (orig) == CONST)
6988 rtx base, offset;
6990 if (GET_CODE (XEXP (orig, 0)) == PLUS
6991 && XEXP (XEXP (orig, 0), 0) == cfun->machine->pic_reg)
6992 return orig;
6994 /* Handle the case where we have: const (UNSPEC_TLS). */
6995 if (GET_CODE (XEXP (orig, 0)) == UNSPEC
6996 && XINT (XEXP (orig, 0), 1) == UNSPEC_TLS)
6997 return orig;
6999 /* Handle the case where we have:
7000 const (plus (UNSPEC_TLS) (ADDEND)). The ADDEND must be a
7001 CONST_INT. */
7002 if (GET_CODE (XEXP (orig, 0)) == PLUS
7003 && GET_CODE (XEXP (XEXP (orig, 0), 0)) == UNSPEC
7004 && XINT (XEXP (XEXP (orig, 0), 0), 1) == UNSPEC_TLS)
7006 gcc_assert (CONST_INT_P (XEXP (XEXP (orig, 0), 1)));
7007 return orig;
7010 if (reg == 0)
7012 gcc_assert (can_create_pseudo_p ());
7013 reg = gen_reg_rtx (Pmode);
7016 gcc_assert (GET_CODE (XEXP (orig, 0)) == PLUS);
7018 base = legitimize_pic_address (XEXP (XEXP (orig, 0), 0), Pmode, reg);
7019 offset = legitimize_pic_address (XEXP (XEXP (orig, 0), 1), Pmode,
7020 base == reg ? 0 : reg);
7022 if (CONST_INT_P (offset))
7024 /* The base register doesn't really matter, we only want to
7025 test the index for the appropriate mode. */
7026 if (!arm_legitimate_index_p (mode, offset, SET, 0))
7028 gcc_assert (can_create_pseudo_p ());
7029 offset = force_reg (Pmode, offset);
7032 if (CONST_INT_P (offset))
7033 return plus_constant (Pmode, base, INTVAL (offset));
7036 if (GET_MODE_SIZE (mode) > 4
7037 && (GET_MODE_CLASS (mode) == MODE_INT
7038 || TARGET_SOFT_FLOAT))
7040 emit_insn (gen_addsi3 (reg, base, offset));
7041 return reg;
7044 return gen_rtx_PLUS (Pmode, base, offset);
7047 return orig;
7051 /* Find a spare register to use during the prolog of a function. */
7053 static int
7054 thumb_find_work_register (unsigned long pushed_regs_mask)
7056 int reg;
7058 /* Check the argument registers first as these are call-used. The
7059 register allocation order means that sometimes r3 might be used
7060 but earlier argument registers might not, so check them all. */
7061 for (reg = LAST_ARG_REGNUM; reg >= 0; reg --)
7062 if (!df_regs_ever_live_p (reg))
7063 return reg;
7065 /* Before going on to check the call-saved registers we can try a couple
7066 more ways of deducing that r3 is available. The first is when we are
7067 pushing anonymous arguments onto the stack and we have less than 4
7068 registers worth of fixed arguments(*). In this case r3 will be part of
7069 the variable argument list and so we can be sure that it will be
7070 pushed right at the start of the function. Hence it will be available
7071 for the rest of the prologue.
7072 (*): ie crtl->args.pretend_args_size is greater than 0. */
7073 if (cfun->machine->uses_anonymous_args
7074 && crtl->args.pretend_args_size > 0)
7075 return LAST_ARG_REGNUM;
7077 /* The other case is when we have fixed arguments but less than 4 registers
7078 worth. In this case r3 might be used in the body of the function, but
7079 it is not being used to convey an argument into the function. In theory
7080 we could just check crtl->args.size to see how many bytes are
7081 being passed in argument registers, but it seems that it is unreliable.
7082 Sometimes it will have the value 0 when in fact arguments are being
7083 passed. (See testcase execute/20021111-1.c for an example). So we also
7084 check the args_info.nregs field as well. The problem with this field is
7085 that it makes no allowances for arguments that are passed to the
7086 function but which are not used. Hence we could miss an opportunity
7087 when a function has an unused argument in r3. But it is better to be
7088 safe than to be sorry. */
7089 if (! cfun->machine->uses_anonymous_args
7090 && crtl->args.size >= 0
7091 && crtl->args.size <= (LAST_ARG_REGNUM * UNITS_PER_WORD)
7092 && (TARGET_AAPCS_BASED
7093 ? crtl->args.info.aapcs_ncrn < 4
7094 : crtl->args.info.nregs < 4))
7095 return LAST_ARG_REGNUM;
7097 /* Otherwise look for a call-saved register that is going to be pushed. */
7098 for (reg = LAST_LO_REGNUM; reg > LAST_ARG_REGNUM; reg --)
7099 if (pushed_regs_mask & (1 << reg))
7100 return reg;
7102 if (TARGET_THUMB2)
7104 /* Thumb-2 can use high regs. */
7105 for (reg = FIRST_HI_REGNUM; reg < 15; reg ++)
7106 if (pushed_regs_mask & (1 << reg))
7107 return reg;
7109 /* Something went wrong - thumb_compute_save_reg_mask()
7110 should have arranged for a suitable register to be pushed. */
7111 gcc_unreachable ();
7114 static GTY(()) int pic_labelno;
7116 /* Generate code to load the PIC register. In thumb mode SCRATCH is a
7117 low register. */
7119 void
7120 arm_load_pic_register (unsigned long saved_regs ATTRIBUTE_UNUSED)
7122 rtx l1, labelno, pic_tmp, pic_rtx, pic_reg;
7124 if (crtl->uses_pic_offset_table == 0 || TARGET_SINGLE_PIC_BASE)
7125 return;
7127 gcc_assert (flag_pic);
7129 pic_reg = cfun->machine->pic_reg;
7130 if (TARGET_VXWORKS_RTP)
7132 pic_rtx = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_BASE);
7133 pic_rtx = gen_rtx_CONST (Pmode, pic_rtx);
7134 emit_insn (gen_pic_load_addr_32bit (pic_reg, pic_rtx));
7136 emit_insn (gen_rtx_SET (pic_reg, gen_rtx_MEM (Pmode, pic_reg)));
7138 pic_tmp = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_INDEX);
7139 emit_insn (gen_pic_offset_arm (pic_reg, pic_reg, pic_tmp));
7141 else
7143 /* We use an UNSPEC rather than a LABEL_REF because this label
7144 never appears in the code stream. */
7146 labelno = GEN_INT (pic_labelno++);
7147 l1 = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
7148 l1 = gen_rtx_CONST (VOIDmode, l1);
7150 /* On the ARM the PC register contains 'dot + 8' at the time of the
7151 addition, on the Thumb it is 'dot + 4'. */
7152 pic_rtx = plus_constant (Pmode, l1, TARGET_ARM ? 8 : 4);
7153 pic_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, pic_rtx),
7154 UNSPEC_GOTSYM_OFF);
7155 pic_rtx = gen_rtx_CONST (Pmode, pic_rtx);
7157 if (TARGET_32BIT)
7159 emit_insn (gen_pic_load_addr_unified (pic_reg, pic_rtx, labelno));
7161 else /* TARGET_THUMB1 */
7163 if (arm_pic_register != INVALID_REGNUM
7164 && REGNO (pic_reg) > LAST_LO_REGNUM)
7166 /* We will have pushed the pic register, so we should always be
7167 able to find a work register. */
7168 pic_tmp = gen_rtx_REG (SImode,
7169 thumb_find_work_register (saved_regs));
7170 emit_insn (gen_pic_load_addr_thumb1 (pic_tmp, pic_rtx));
7171 emit_insn (gen_movsi (pic_offset_table_rtx, pic_tmp));
7172 emit_insn (gen_pic_add_dot_plus_four (pic_reg, pic_reg, labelno));
7174 else if (arm_pic_register != INVALID_REGNUM
7175 && arm_pic_register > LAST_LO_REGNUM
7176 && REGNO (pic_reg) <= LAST_LO_REGNUM)
7178 emit_insn (gen_pic_load_addr_unified (pic_reg, pic_rtx, labelno));
7179 emit_move_insn (gen_rtx_REG (Pmode, arm_pic_register), pic_reg);
7180 emit_use (gen_rtx_REG (Pmode, arm_pic_register));
7182 else
7183 emit_insn (gen_pic_load_addr_unified (pic_reg, pic_rtx, labelno));
7187 /* Need to emit this whether or not we obey regdecls,
7188 since setjmp/longjmp can cause life info to screw up. */
7189 emit_use (pic_reg);
7192 /* Generate code to load the address of a static var when flag_pic is set. */
7193 static rtx
7194 arm_pic_static_addr (rtx orig, rtx reg)
7196 rtx l1, labelno, offset_rtx, insn;
7198 gcc_assert (flag_pic);
7200 /* We use an UNSPEC rather than a LABEL_REF because this label
7201 never appears in the code stream. */
7202 labelno = GEN_INT (pic_labelno++);
7203 l1 = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
7204 l1 = gen_rtx_CONST (VOIDmode, l1);
7206 /* On the ARM the PC register contains 'dot + 8' at the time of the
7207 addition, on the Thumb it is 'dot + 4'. */
7208 offset_rtx = plus_constant (Pmode, l1, TARGET_ARM ? 8 : 4);
7209 offset_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, orig, offset_rtx),
7210 UNSPEC_SYMBOL_OFFSET);
7211 offset_rtx = gen_rtx_CONST (Pmode, offset_rtx);
7213 insn = emit_insn (gen_pic_load_addr_unified (reg, offset_rtx, labelno));
7214 return insn;
7217 /* Return nonzero if X is valid as an ARM state addressing register. */
7218 static int
7219 arm_address_register_rtx_p (rtx x, int strict_p)
7221 int regno;
7223 if (!REG_P (x))
7224 return 0;
7226 regno = REGNO (x);
7228 if (strict_p)
7229 return ARM_REGNO_OK_FOR_BASE_P (regno);
7231 return (regno <= LAST_ARM_REGNUM
7232 || regno >= FIRST_PSEUDO_REGISTER
7233 || regno == FRAME_POINTER_REGNUM
7234 || regno == ARG_POINTER_REGNUM);
7237 /* Return TRUE if this rtx is the difference of a symbol and a label,
7238 and will reduce to a PC-relative relocation in the object file.
7239 Expressions like this can be left alone when generating PIC, rather
7240 than forced through the GOT. */
7241 static int
7242 pcrel_constant_p (rtx x)
7244 if (GET_CODE (x) == MINUS)
7245 return symbol_mentioned_p (XEXP (x, 0)) && label_mentioned_p (XEXP (x, 1));
7247 return FALSE;
7250 /* Return true if X will surely end up in an index register after next
7251 splitting pass. */
7252 static bool
7253 will_be_in_index_register (const_rtx x)
7255 /* arm.md: calculate_pic_address will split this into a register. */
7256 return GET_CODE (x) == UNSPEC && (XINT (x, 1) == UNSPEC_PIC_SYM);
7259 /* Return nonzero if X is a valid ARM state address operand. */
7261 arm_legitimate_address_outer_p (machine_mode mode, rtx x, RTX_CODE outer,
7262 int strict_p)
7264 bool use_ldrd;
7265 enum rtx_code code = GET_CODE (x);
7267 if (arm_address_register_rtx_p (x, strict_p))
7268 return 1;
7270 use_ldrd = (TARGET_LDRD
7271 && (mode == DImode || mode == DFmode));
7273 if (code == POST_INC || code == PRE_DEC
7274 || ((code == PRE_INC || code == POST_DEC)
7275 && (use_ldrd || GET_MODE_SIZE (mode) <= 4)))
7276 return arm_address_register_rtx_p (XEXP (x, 0), strict_p);
7278 else if ((code == POST_MODIFY || code == PRE_MODIFY)
7279 && arm_address_register_rtx_p (XEXP (x, 0), strict_p)
7280 && GET_CODE (XEXP (x, 1)) == PLUS
7281 && rtx_equal_p (XEXP (XEXP (x, 1), 0), XEXP (x, 0)))
7283 rtx addend = XEXP (XEXP (x, 1), 1);
7285 /* Don't allow ldrd post increment by register because it's hard
7286 to fixup invalid register choices. */
7287 if (use_ldrd
7288 && GET_CODE (x) == POST_MODIFY
7289 && REG_P (addend))
7290 return 0;
7292 return ((use_ldrd || GET_MODE_SIZE (mode) <= 4)
7293 && arm_legitimate_index_p (mode, addend, outer, strict_p));
7296 /* After reload constants split into minipools will have addresses
7297 from a LABEL_REF. */
7298 else if (reload_completed
7299 && (code == LABEL_REF
7300 || (code == CONST
7301 && GET_CODE (XEXP (x, 0)) == PLUS
7302 && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
7303 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))))
7304 return 1;
7306 else if (mode == TImode || (TARGET_NEON && VALID_NEON_STRUCT_MODE (mode)))
7307 return 0;
7309 else if (code == PLUS)
7311 rtx xop0 = XEXP (x, 0);
7312 rtx xop1 = XEXP (x, 1);
7314 return ((arm_address_register_rtx_p (xop0, strict_p)
7315 && ((CONST_INT_P (xop1)
7316 && arm_legitimate_index_p (mode, xop1, outer, strict_p))
7317 || (!strict_p && will_be_in_index_register (xop1))))
7318 || (arm_address_register_rtx_p (xop1, strict_p)
7319 && arm_legitimate_index_p (mode, xop0, outer, strict_p)));
7322 #if 0
7323 /* Reload currently can't handle MINUS, so disable this for now */
7324 else if (GET_CODE (x) == MINUS)
7326 rtx xop0 = XEXP (x, 0);
7327 rtx xop1 = XEXP (x, 1);
7329 return (arm_address_register_rtx_p (xop0, strict_p)
7330 && arm_legitimate_index_p (mode, xop1, outer, strict_p));
7332 #endif
7334 else if (GET_MODE_CLASS (mode) != MODE_FLOAT
7335 && code == SYMBOL_REF
7336 && CONSTANT_POOL_ADDRESS_P (x)
7337 && ! (flag_pic
7338 && symbol_mentioned_p (get_pool_constant (x))
7339 && ! pcrel_constant_p (get_pool_constant (x))))
7340 return 1;
7342 return 0;
7345 /* Return nonzero if X is a valid Thumb-2 address operand. */
7346 static int
7347 thumb2_legitimate_address_p (machine_mode mode, rtx x, int strict_p)
7349 bool use_ldrd;
7350 enum rtx_code code = GET_CODE (x);
7352 if (arm_address_register_rtx_p (x, strict_p))
7353 return 1;
7355 use_ldrd = (TARGET_LDRD
7356 && (mode == DImode || mode == DFmode));
7358 if (code == POST_INC || code == PRE_DEC
7359 || ((code == PRE_INC || code == POST_DEC)
7360 && (use_ldrd || GET_MODE_SIZE (mode) <= 4)))
7361 return arm_address_register_rtx_p (XEXP (x, 0), strict_p);
7363 else if ((code == POST_MODIFY || code == PRE_MODIFY)
7364 && arm_address_register_rtx_p (XEXP (x, 0), strict_p)
7365 && GET_CODE (XEXP (x, 1)) == PLUS
7366 && rtx_equal_p (XEXP (XEXP (x, 1), 0), XEXP (x, 0)))
7368 /* Thumb-2 only has autoincrement by constant. */
7369 rtx addend = XEXP (XEXP (x, 1), 1);
7370 HOST_WIDE_INT offset;
7372 if (!CONST_INT_P (addend))
7373 return 0;
7375 offset = INTVAL(addend);
7376 if (GET_MODE_SIZE (mode) <= 4)
7377 return (offset > -256 && offset < 256);
7379 return (use_ldrd && offset > -1024 && offset < 1024
7380 && (offset & 3) == 0);
7383 /* After reload constants split into minipools will have addresses
7384 from a LABEL_REF. */
7385 else if (reload_completed
7386 && (code == LABEL_REF
7387 || (code == CONST
7388 && GET_CODE (XEXP (x, 0)) == PLUS
7389 && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
7390 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))))
7391 return 1;
7393 else if (mode == TImode || (TARGET_NEON && VALID_NEON_STRUCT_MODE (mode)))
7394 return 0;
7396 else if (code == PLUS)
7398 rtx xop0 = XEXP (x, 0);
7399 rtx xop1 = XEXP (x, 1);
7401 return ((arm_address_register_rtx_p (xop0, strict_p)
7402 && (thumb2_legitimate_index_p (mode, xop1, strict_p)
7403 || (!strict_p && will_be_in_index_register (xop1))))
7404 || (arm_address_register_rtx_p (xop1, strict_p)
7405 && thumb2_legitimate_index_p (mode, xop0, strict_p)));
7408 /* Normally we can assign constant values to target registers without
7409 the help of constant pool. But there are cases we have to use constant
7410 pool like:
7411 1) assign a label to register.
7412 2) sign-extend a 8bit value to 32bit and then assign to register.
7414 Constant pool access in format:
7415 (set (reg r0) (mem (symbol_ref (".LC0"))))
7416 will cause the use of literal pool (later in function arm_reorg).
7417 So here we mark such format as an invalid format, then the compiler
7418 will adjust it into:
7419 (set (reg r0) (symbol_ref (".LC0")))
7420 (set (reg r0) (mem (reg r0))).
7421 No extra register is required, and (mem (reg r0)) won't cause the use
7422 of literal pools. */
7423 else if (arm_disable_literal_pool && code == SYMBOL_REF
7424 && CONSTANT_POOL_ADDRESS_P (x))
7425 return 0;
7427 else if (GET_MODE_CLASS (mode) != MODE_FLOAT
7428 && code == SYMBOL_REF
7429 && CONSTANT_POOL_ADDRESS_P (x)
7430 && ! (flag_pic
7431 && symbol_mentioned_p (get_pool_constant (x))
7432 && ! pcrel_constant_p (get_pool_constant (x))))
7433 return 1;
7435 return 0;
7438 /* Return nonzero if INDEX is valid for an address index operand in
7439 ARM state. */
7440 static int
7441 arm_legitimate_index_p (machine_mode mode, rtx index, RTX_CODE outer,
7442 int strict_p)
7444 HOST_WIDE_INT range;
7445 enum rtx_code code = GET_CODE (index);
7447 /* Standard coprocessor addressing modes. */
7448 if (TARGET_HARD_FLOAT
7449 && (mode == SFmode || mode == DFmode))
7450 return (code == CONST_INT && INTVAL (index) < 1024
7451 && INTVAL (index) > -1024
7452 && (INTVAL (index) & 3) == 0);
7454 /* For quad modes, we restrict the constant offset to be slightly less
7455 than what the instruction format permits. We do this because for
7456 quad mode moves, we will actually decompose them into two separate
7457 double-mode reads or writes. INDEX must therefore be a valid
7458 (double-mode) offset and so should INDEX+8. */
7459 if (TARGET_NEON && VALID_NEON_QREG_MODE (mode))
7460 return (code == CONST_INT
7461 && INTVAL (index) < 1016
7462 && INTVAL (index) > -1024
7463 && (INTVAL (index) & 3) == 0);
7465 /* We have no such constraint on double mode offsets, so we permit the
7466 full range of the instruction format. */
7467 if (TARGET_NEON && VALID_NEON_DREG_MODE (mode))
7468 return (code == CONST_INT
7469 && INTVAL (index) < 1024
7470 && INTVAL (index) > -1024
7471 && (INTVAL (index) & 3) == 0);
7473 if (TARGET_REALLY_IWMMXT && VALID_IWMMXT_REG_MODE (mode))
7474 return (code == CONST_INT
7475 && INTVAL (index) < 1024
7476 && INTVAL (index) > -1024
7477 && (INTVAL (index) & 3) == 0);
7479 if (arm_address_register_rtx_p (index, strict_p)
7480 && (GET_MODE_SIZE (mode) <= 4))
7481 return 1;
7483 if (mode == DImode || mode == DFmode)
7485 if (code == CONST_INT)
7487 HOST_WIDE_INT val = INTVAL (index);
7489 if (TARGET_LDRD)
7490 return val > -256 && val < 256;
7491 else
7492 return val > -4096 && val < 4092;
7495 return TARGET_LDRD && arm_address_register_rtx_p (index, strict_p);
7498 if (GET_MODE_SIZE (mode) <= 4
7499 && ! (arm_arch4
7500 && (mode == HImode
7501 || mode == HFmode
7502 || (mode == QImode && outer == SIGN_EXTEND))))
7504 if (code == MULT)
7506 rtx xiop0 = XEXP (index, 0);
7507 rtx xiop1 = XEXP (index, 1);
7509 return ((arm_address_register_rtx_p (xiop0, strict_p)
7510 && power_of_two_operand (xiop1, SImode))
7511 || (arm_address_register_rtx_p (xiop1, strict_p)
7512 && power_of_two_operand (xiop0, SImode)));
7514 else if (code == LSHIFTRT || code == ASHIFTRT
7515 || code == ASHIFT || code == ROTATERT)
7517 rtx op = XEXP (index, 1);
7519 return (arm_address_register_rtx_p (XEXP (index, 0), strict_p)
7520 && CONST_INT_P (op)
7521 && INTVAL (op) > 0
7522 && INTVAL (op) <= 31);
7526 /* For ARM v4 we may be doing a sign-extend operation during the
7527 load. */
7528 if (arm_arch4)
7530 if (mode == HImode
7531 || mode == HFmode
7532 || (outer == SIGN_EXTEND && mode == QImode))
7533 range = 256;
7534 else
7535 range = 4096;
7537 else
7538 range = (mode == HImode || mode == HFmode) ? 4095 : 4096;
7540 return (code == CONST_INT
7541 && INTVAL (index) < range
7542 && INTVAL (index) > -range);
7545 /* Return true if OP is a valid index scaling factor for Thumb-2 address
7546 index operand. i.e. 1, 2, 4 or 8. */
7547 static bool
7548 thumb2_index_mul_operand (rtx op)
7550 HOST_WIDE_INT val;
7552 if (!CONST_INT_P (op))
7553 return false;
7555 val = INTVAL(op);
7556 return (val == 1 || val == 2 || val == 4 || val == 8);
7559 /* Return nonzero if INDEX is a valid Thumb-2 address index operand. */
7560 static int
7561 thumb2_legitimate_index_p (machine_mode mode, rtx index, int strict_p)
7563 enum rtx_code code = GET_CODE (index);
7565 /* ??? Combine arm and thumb2 coprocessor addressing modes. */
7566 /* Standard coprocessor addressing modes. */
7567 if (TARGET_HARD_FLOAT
7568 && (mode == SFmode || mode == DFmode))
7569 return (code == CONST_INT && INTVAL (index) < 1024
7570 /* Thumb-2 allows only > -256 index range for it's core register
7571 load/stores. Since we allow SF/DF in core registers, we have
7572 to use the intersection between -256~4096 (core) and -1024~1024
7573 (coprocessor). */
7574 && INTVAL (index) > -256
7575 && (INTVAL (index) & 3) == 0);
7577 if (TARGET_REALLY_IWMMXT && VALID_IWMMXT_REG_MODE (mode))
7579 /* For DImode assume values will usually live in core regs
7580 and only allow LDRD addressing modes. */
7581 if (!TARGET_LDRD || mode != DImode)
7582 return (code == CONST_INT
7583 && INTVAL (index) < 1024
7584 && INTVAL (index) > -1024
7585 && (INTVAL (index) & 3) == 0);
7588 /* For quad modes, we restrict the constant offset to be slightly less
7589 than what the instruction format permits. We do this because for
7590 quad mode moves, we will actually decompose them into two separate
7591 double-mode reads or writes. INDEX must therefore be a valid
7592 (double-mode) offset and so should INDEX+8. */
7593 if (TARGET_NEON && VALID_NEON_QREG_MODE (mode))
7594 return (code == CONST_INT
7595 && INTVAL (index) < 1016
7596 && INTVAL (index) > -1024
7597 && (INTVAL (index) & 3) == 0);
7599 /* We have no such constraint on double mode offsets, so we permit the
7600 full range of the instruction format. */
7601 if (TARGET_NEON && VALID_NEON_DREG_MODE (mode))
7602 return (code == CONST_INT
7603 && INTVAL (index) < 1024
7604 && INTVAL (index) > -1024
7605 && (INTVAL (index) & 3) == 0);
7607 if (arm_address_register_rtx_p (index, strict_p)
7608 && (GET_MODE_SIZE (mode) <= 4))
7609 return 1;
7611 if (mode == DImode || mode == DFmode)
7613 if (code == CONST_INT)
7615 HOST_WIDE_INT val = INTVAL (index);
7616 /* ??? Can we assume ldrd for thumb2? */
7617 /* Thumb-2 ldrd only has reg+const addressing modes. */
7618 /* ldrd supports offsets of +-1020.
7619 However the ldr fallback does not. */
7620 return val > -256 && val < 256 && (val & 3) == 0;
7622 else
7623 return 0;
7626 if (code == MULT)
7628 rtx xiop0 = XEXP (index, 0);
7629 rtx xiop1 = XEXP (index, 1);
7631 return ((arm_address_register_rtx_p (xiop0, strict_p)
7632 && thumb2_index_mul_operand (xiop1))
7633 || (arm_address_register_rtx_p (xiop1, strict_p)
7634 && thumb2_index_mul_operand (xiop0)));
7636 else if (code == ASHIFT)
7638 rtx op = XEXP (index, 1);
7640 return (arm_address_register_rtx_p (XEXP (index, 0), strict_p)
7641 && CONST_INT_P (op)
7642 && INTVAL (op) > 0
7643 && INTVAL (op) <= 3);
7646 return (code == CONST_INT
7647 && INTVAL (index) < 4096
7648 && INTVAL (index) > -256);
7651 /* Return nonzero if X is valid as a 16-bit Thumb state base register. */
7652 static int
7653 thumb1_base_register_rtx_p (rtx x, machine_mode mode, int strict_p)
7655 int regno;
7657 if (!REG_P (x))
7658 return 0;
7660 regno = REGNO (x);
7662 if (strict_p)
7663 return THUMB1_REGNO_MODE_OK_FOR_BASE_P (regno, mode);
7665 return (regno <= LAST_LO_REGNUM
7666 || regno > LAST_VIRTUAL_REGISTER
7667 || regno == FRAME_POINTER_REGNUM
7668 || (GET_MODE_SIZE (mode) >= 4
7669 && (regno == STACK_POINTER_REGNUM
7670 || regno >= FIRST_PSEUDO_REGISTER
7671 || x == hard_frame_pointer_rtx
7672 || x == arg_pointer_rtx)));
7675 /* Return nonzero if x is a legitimate index register. This is the case
7676 for any base register that can access a QImode object. */
7677 inline static int
7678 thumb1_index_register_rtx_p (rtx x, int strict_p)
7680 return thumb1_base_register_rtx_p (x, QImode, strict_p);
7683 /* Return nonzero if x is a legitimate 16-bit Thumb-state address.
7685 The AP may be eliminated to either the SP or the FP, so we use the
7686 least common denominator, e.g. SImode, and offsets from 0 to 64.
7688 ??? Verify whether the above is the right approach.
7690 ??? Also, the FP may be eliminated to the SP, so perhaps that
7691 needs special handling also.
7693 ??? Look at how the mips16 port solves this problem. It probably uses
7694 better ways to solve some of these problems.
7696 Although it is not incorrect, we don't accept QImode and HImode
7697 addresses based on the frame pointer or arg pointer until the
7698 reload pass starts. This is so that eliminating such addresses
7699 into stack based ones won't produce impossible code. */
7701 thumb1_legitimate_address_p (machine_mode mode, rtx x, int strict_p)
7703 /* ??? Not clear if this is right. Experiment. */
7704 if (GET_MODE_SIZE (mode) < 4
7705 && !(reload_in_progress || reload_completed)
7706 && (reg_mentioned_p (frame_pointer_rtx, x)
7707 || reg_mentioned_p (arg_pointer_rtx, x)
7708 || reg_mentioned_p (virtual_incoming_args_rtx, x)
7709 || reg_mentioned_p (virtual_outgoing_args_rtx, x)
7710 || reg_mentioned_p (virtual_stack_dynamic_rtx, x)
7711 || reg_mentioned_p (virtual_stack_vars_rtx, x)))
7712 return 0;
7714 /* Accept any base register. SP only in SImode or larger. */
7715 else if (thumb1_base_register_rtx_p (x, mode, strict_p))
7716 return 1;
7718 /* This is PC relative data before arm_reorg runs. */
7719 else if (GET_MODE_SIZE (mode) >= 4 && CONSTANT_P (x)
7720 && GET_CODE (x) == SYMBOL_REF
7721 && CONSTANT_POOL_ADDRESS_P (x) && !flag_pic)
7722 return 1;
7724 /* This is PC relative data after arm_reorg runs. */
7725 else if ((GET_MODE_SIZE (mode) >= 4 || mode == HFmode)
7726 && reload_completed
7727 && (GET_CODE (x) == LABEL_REF
7728 || (GET_CODE (x) == CONST
7729 && GET_CODE (XEXP (x, 0)) == PLUS
7730 && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
7731 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))))
7732 return 1;
7734 /* Post-inc indexing only supported for SImode and larger. */
7735 else if (GET_CODE (x) == POST_INC && GET_MODE_SIZE (mode) >= 4
7736 && thumb1_index_register_rtx_p (XEXP (x, 0), strict_p))
7737 return 1;
7739 else if (GET_CODE (x) == PLUS)
7741 /* REG+REG address can be any two index registers. */
7742 /* We disallow FRAME+REG addressing since we know that FRAME
7743 will be replaced with STACK, and SP relative addressing only
7744 permits SP+OFFSET. */
7745 if (GET_MODE_SIZE (mode) <= 4
7746 && XEXP (x, 0) != frame_pointer_rtx
7747 && XEXP (x, 1) != frame_pointer_rtx
7748 && thumb1_index_register_rtx_p (XEXP (x, 0), strict_p)
7749 && (thumb1_index_register_rtx_p (XEXP (x, 1), strict_p)
7750 || (!strict_p && will_be_in_index_register (XEXP (x, 1)))))
7751 return 1;
7753 /* REG+const has 5-7 bit offset for non-SP registers. */
7754 else if ((thumb1_index_register_rtx_p (XEXP (x, 0), strict_p)
7755 || XEXP (x, 0) == arg_pointer_rtx)
7756 && CONST_INT_P (XEXP (x, 1))
7757 && thumb_legitimate_offset_p (mode, INTVAL (XEXP (x, 1))))
7758 return 1;
7760 /* REG+const has 10-bit offset for SP, but only SImode and
7761 larger is supported. */
7762 /* ??? Should probably check for DI/DFmode overflow here
7763 just like GO_IF_LEGITIMATE_OFFSET does. */
7764 else if (REG_P (XEXP (x, 0))
7765 && REGNO (XEXP (x, 0)) == STACK_POINTER_REGNUM
7766 && GET_MODE_SIZE (mode) >= 4
7767 && CONST_INT_P (XEXP (x, 1))
7768 && INTVAL (XEXP (x, 1)) >= 0
7769 && INTVAL (XEXP (x, 1)) + GET_MODE_SIZE (mode) <= 1024
7770 && (INTVAL (XEXP (x, 1)) & 3) == 0)
7771 return 1;
7773 else if (REG_P (XEXP (x, 0))
7774 && (REGNO (XEXP (x, 0)) == FRAME_POINTER_REGNUM
7775 || REGNO (XEXP (x, 0)) == ARG_POINTER_REGNUM
7776 || (REGNO (XEXP (x, 0)) >= FIRST_VIRTUAL_REGISTER
7777 && REGNO (XEXP (x, 0))
7778 <= LAST_VIRTUAL_POINTER_REGISTER))
7779 && GET_MODE_SIZE (mode) >= 4
7780 && CONST_INT_P (XEXP (x, 1))
7781 && (INTVAL (XEXP (x, 1)) & 3) == 0)
7782 return 1;
7785 else if (GET_MODE_CLASS (mode) != MODE_FLOAT
7786 && GET_MODE_SIZE (mode) == 4
7787 && GET_CODE (x) == SYMBOL_REF
7788 && CONSTANT_POOL_ADDRESS_P (x)
7789 && ! (flag_pic
7790 && symbol_mentioned_p (get_pool_constant (x))
7791 && ! pcrel_constant_p (get_pool_constant (x))))
7792 return 1;
7794 return 0;
7797 /* Return nonzero if VAL can be used as an offset in a Thumb-state address
7798 instruction of mode MODE. */
7800 thumb_legitimate_offset_p (machine_mode mode, HOST_WIDE_INT val)
7802 switch (GET_MODE_SIZE (mode))
7804 case 1:
7805 return val >= 0 && val < 32;
7807 case 2:
7808 return val >= 0 && val < 64 && (val & 1) == 0;
7810 default:
7811 return (val >= 0
7812 && (val + GET_MODE_SIZE (mode)) <= 128
7813 && (val & 3) == 0);
7817 bool
7818 arm_legitimate_address_p (machine_mode mode, rtx x, bool strict_p)
7820 if (TARGET_ARM)
7821 return arm_legitimate_address_outer_p (mode, x, SET, strict_p);
7822 else if (TARGET_THUMB2)
7823 return thumb2_legitimate_address_p (mode, x, strict_p);
7824 else /* if (TARGET_THUMB1) */
7825 return thumb1_legitimate_address_p (mode, x, strict_p);
7828 /* Worker function for TARGET_PREFERRED_RELOAD_CLASS.
7830 Given an rtx X being reloaded into a reg required to be
7831 in class CLASS, return the class of reg to actually use.
7832 In general this is just CLASS, but for the Thumb core registers and
7833 immediate constants we prefer a LO_REGS class or a subset. */
7835 static reg_class_t
7836 arm_preferred_reload_class (rtx x ATTRIBUTE_UNUSED, reg_class_t rclass)
7838 if (TARGET_32BIT)
7839 return rclass;
7840 else
7842 if (rclass == GENERAL_REGS)
7843 return LO_REGS;
7844 else
7845 return rclass;
7849 /* Build the SYMBOL_REF for __tls_get_addr. */
7851 static GTY(()) rtx tls_get_addr_libfunc;
7853 static rtx
7854 get_tls_get_addr (void)
7856 if (!tls_get_addr_libfunc)
7857 tls_get_addr_libfunc = init_one_libfunc ("__tls_get_addr");
7858 return tls_get_addr_libfunc;
7862 arm_load_tp (rtx target)
7864 if (!target)
7865 target = gen_reg_rtx (SImode);
7867 if (TARGET_HARD_TP)
7869 /* Can return in any reg. */
7870 emit_insn (gen_load_tp_hard (target));
7872 else
7874 /* Always returned in r0. Immediately copy the result into a pseudo,
7875 otherwise other uses of r0 (e.g. setting up function arguments) may
7876 clobber the value. */
7878 rtx tmp;
7880 emit_insn (gen_load_tp_soft ());
7882 tmp = gen_rtx_REG (SImode, R0_REGNUM);
7883 emit_move_insn (target, tmp);
7885 return target;
7888 static rtx
7889 load_tls_operand (rtx x, rtx reg)
7891 rtx tmp;
7893 if (reg == NULL_RTX)
7894 reg = gen_reg_rtx (SImode);
7896 tmp = gen_rtx_CONST (SImode, x);
7898 emit_move_insn (reg, tmp);
7900 return reg;
7903 static rtx
7904 arm_call_tls_get_addr (rtx x, rtx reg, rtx *valuep, int reloc)
7906 rtx insns, label, labelno, sum;
7908 gcc_assert (reloc != TLS_DESCSEQ);
7909 start_sequence ();
7911 labelno = GEN_INT (pic_labelno++);
7912 label = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
7913 label = gen_rtx_CONST (VOIDmode, label);
7915 sum = gen_rtx_UNSPEC (Pmode,
7916 gen_rtvec (4, x, GEN_INT (reloc), label,
7917 GEN_INT (TARGET_ARM ? 8 : 4)),
7918 UNSPEC_TLS);
7919 reg = load_tls_operand (sum, reg);
7921 if (TARGET_ARM)
7922 emit_insn (gen_pic_add_dot_plus_eight (reg, reg, labelno));
7923 else
7924 emit_insn (gen_pic_add_dot_plus_four (reg, reg, labelno));
7926 *valuep = emit_library_call_value (get_tls_get_addr (), NULL_RTX,
7927 LCT_PURE, /* LCT_CONST? */
7928 Pmode, 1, reg, Pmode);
7930 insns = get_insns ();
7931 end_sequence ();
7933 return insns;
7936 static rtx
7937 arm_tls_descseq_addr (rtx x, rtx reg)
7939 rtx labelno = GEN_INT (pic_labelno++);
7940 rtx label = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
7941 rtx sum = gen_rtx_UNSPEC (Pmode,
7942 gen_rtvec (4, x, GEN_INT (TLS_DESCSEQ),
7943 gen_rtx_CONST (VOIDmode, label),
7944 GEN_INT (!TARGET_ARM)),
7945 UNSPEC_TLS);
7946 rtx reg0 = load_tls_operand (sum, gen_rtx_REG (SImode, R0_REGNUM));
7948 emit_insn (gen_tlscall (x, labelno));
7949 if (!reg)
7950 reg = gen_reg_rtx (SImode);
7951 else
7952 gcc_assert (REGNO (reg) != R0_REGNUM);
7954 emit_move_insn (reg, reg0);
7956 return reg;
7960 legitimize_tls_address (rtx x, rtx reg)
7962 rtx dest, tp, label, labelno, sum, insns, ret, eqv, addend;
7963 unsigned int model = SYMBOL_REF_TLS_MODEL (x);
7965 switch (model)
7967 case TLS_MODEL_GLOBAL_DYNAMIC:
7968 if (TARGET_GNU2_TLS)
7970 reg = arm_tls_descseq_addr (x, reg);
7972 tp = arm_load_tp (NULL_RTX);
7974 dest = gen_rtx_PLUS (Pmode, tp, reg);
7976 else
7978 /* Original scheme */
7979 insns = arm_call_tls_get_addr (x, reg, &ret, TLS_GD32);
7980 dest = gen_reg_rtx (Pmode);
7981 emit_libcall_block (insns, dest, ret, x);
7983 return dest;
7985 case TLS_MODEL_LOCAL_DYNAMIC:
7986 if (TARGET_GNU2_TLS)
7988 reg = arm_tls_descseq_addr (x, reg);
7990 tp = arm_load_tp (NULL_RTX);
7992 dest = gen_rtx_PLUS (Pmode, tp, reg);
7994 else
7996 insns = arm_call_tls_get_addr (x, reg, &ret, TLS_LDM32);
7998 /* Attach a unique REG_EQUIV, to allow the RTL optimizers to
7999 share the LDM result with other LD model accesses. */
8000 eqv = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const1_rtx),
8001 UNSPEC_TLS);
8002 dest = gen_reg_rtx (Pmode);
8003 emit_libcall_block (insns, dest, ret, eqv);
8005 /* Load the addend. */
8006 addend = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, x,
8007 GEN_INT (TLS_LDO32)),
8008 UNSPEC_TLS);
8009 addend = force_reg (SImode, gen_rtx_CONST (SImode, addend));
8010 dest = gen_rtx_PLUS (Pmode, dest, addend);
8012 return dest;
8014 case TLS_MODEL_INITIAL_EXEC:
8015 labelno = GEN_INT (pic_labelno++);
8016 label = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
8017 label = gen_rtx_CONST (VOIDmode, label);
8018 sum = gen_rtx_UNSPEC (Pmode,
8019 gen_rtvec (4, x, GEN_INT (TLS_IE32), label,
8020 GEN_INT (TARGET_ARM ? 8 : 4)),
8021 UNSPEC_TLS);
8022 reg = load_tls_operand (sum, reg);
8024 if (TARGET_ARM)
8025 emit_insn (gen_tls_load_dot_plus_eight (reg, reg, labelno));
8026 else if (TARGET_THUMB2)
8027 emit_insn (gen_tls_load_dot_plus_four (reg, NULL, reg, labelno));
8028 else
8030 emit_insn (gen_pic_add_dot_plus_four (reg, reg, labelno));
8031 emit_move_insn (reg, gen_const_mem (SImode, reg));
8034 tp = arm_load_tp (NULL_RTX);
8036 return gen_rtx_PLUS (Pmode, tp, reg);
8038 case TLS_MODEL_LOCAL_EXEC:
8039 tp = arm_load_tp (NULL_RTX);
8041 reg = gen_rtx_UNSPEC (Pmode,
8042 gen_rtvec (2, x, GEN_INT (TLS_LE32)),
8043 UNSPEC_TLS);
8044 reg = force_reg (SImode, gen_rtx_CONST (SImode, reg));
8046 return gen_rtx_PLUS (Pmode, tp, reg);
8048 default:
8049 abort ();
8053 /* Try machine-dependent ways of modifying an illegitimate address
8054 to be legitimate. If we find one, return the new, valid address. */
8056 arm_legitimize_address (rtx x, rtx orig_x, machine_mode mode)
8058 if (arm_tls_referenced_p (x))
8060 rtx addend = NULL;
8062 if (GET_CODE (x) == CONST && GET_CODE (XEXP (x, 0)) == PLUS)
8064 addend = XEXP (XEXP (x, 0), 1);
8065 x = XEXP (XEXP (x, 0), 0);
8068 if (GET_CODE (x) != SYMBOL_REF)
8069 return x;
8071 gcc_assert (SYMBOL_REF_TLS_MODEL (x) != 0);
8073 x = legitimize_tls_address (x, NULL_RTX);
8075 if (addend)
8077 x = gen_rtx_PLUS (SImode, x, addend);
8078 orig_x = x;
8080 else
8081 return x;
8084 if (!TARGET_ARM)
8086 /* TODO: legitimize_address for Thumb2. */
8087 if (TARGET_THUMB2)
8088 return x;
8089 return thumb_legitimize_address (x, orig_x, mode);
8092 if (GET_CODE (x) == PLUS)
8094 rtx xop0 = XEXP (x, 0);
8095 rtx xop1 = XEXP (x, 1);
8097 if (CONSTANT_P (xop0) && !symbol_mentioned_p (xop0))
8098 xop0 = force_reg (SImode, xop0);
8100 if (CONSTANT_P (xop1) && !CONST_INT_P (xop1)
8101 && !symbol_mentioned_p (xop1))
8102 xop1 = force_reg (SImode, xop1);
8104 if (ARM_BASE_REGISTER_RTX_P (xop0)
8105 && CONST_INT_P (xop1))
8107 HOST_WIDE_INT n, low_n;
8108 rtx base_reg, val;
8109 n = INTVAL (xop1);
8111 /* VFP addressing modes actually allow greater offsets, but for
8112 now we just stick with the lowest common denominator. */
8113 if (mode == DImode || mode == DFmode)
8115 low_n = n & 0x0f;
8116 n &= ~0x0f;
8117 if (low_n > 4)
8119 n += 16;
8120 low_n -= 16;
8123 else
8125 low_n = ((mode) == TImode ? 0
8126 : n >= 0 ? (n & 0xfff) : -((-n) & 0xfff));
8127 n -= low_n;
8130 base_reg = gen_reg_rtx (SImode);
8131 val = force_operand (plus_constant (Pmode, xop0, n), NULL_RTX);
8132 emit_move_insn (base_reg, val);
8133 x = plus_constant (Pmode, base_reg, low_n);
8135 else if (xop0 != XEXP (x, 0) || xop1 != XEXP (x, 1))
8136 x = gen_rtx_PLUS (SImode, xop0, xop1);
8139 /* XXX We don't allow MINUS any more -- see comment in
8140 arm_legitimate_address_outer_p (). */
8141 else if (GET_CODE (x) == MINUS)
8143 rtx xop0 = XEXP (x, 0);
8144 rtx xop1 = XEXP (x, 1);
8146 if (CONSTANT_P (xop0))
8147 xop0 = force_reg (SImode, xop0);
8149 if (CONSTANT_P (xop1) && ! symbol_mentioned_p (xop1))
8150 xop1 = force_reg (SImode, xop1);
8152 if (xop0 != XEXP (x, 0) || xop1 != XEXP (x, 1))
8153 x = gen_rtx_MINUS (SImode, xop0, xop1);
8156 /* Make sure to take full advantage of the pre-indexed addressing mode
8157 with absolute addresses which often allows for the base register to
8158 be factorized for multiple adjacent memory references, and it might
8159 even allows for the mini pool to be avoided entirely. */
8160 else if (CONST_INT_P (x) && optimize > 0)
8162 unsigned int bits;
8163 HOST_WIDE_INT mask, base, index;
8164 rtx base_reg;
8166 /* ldr and ldrb can use a 12-bit index, ldrsb and the rest can only
8167 use a 8-bit index. So let's use a 12-bit index for SImode only and
8168 hope that arm_gen_constant will enable ldrb to use more bits. */
8169 bits = (mode == SImode) ? 12 : 8;
8170 mask = (1 << bits) - 1;
8171 base = INTVAL (x) & ~mask;
8172 index = INTVAL (x) & mask;
8173 if (bit_count (base & 0xffffffff) > (32 - bits)/2)
8175 /* It'll most probably be more efficient to generate the base
8176 with more bits set and use a negative index instead. */
8177 base |= mask;
8178 index -= mask;
8180 base_reg = force_reg (SImode, GEN_INT (base));
8181 x = plus_constant (Pmode, base_reg, index);
8184 if (flag_pic)
8186 /* We need to find and carefully transform any SYMBOL and LABEL
8187 references; so go back to the original address expression. */
8188 rtx new_x = legitimize_pic_address (orig_x, mode, NULL_RTX);
8190 if (new_x != orig_x)
8191 x = new_x;
8194 return x;
8198 /* Try machine-dependent ways of modifying an illegitimate Thumb address
8199 to be legitimate. If we find one, return the new, valid address. */
8201 thumb_legitimize_address (rtx x, rtx orig_x, machine_mode mode)
8203 if (GET_CODE (x) == PLUS
8204 && CONST_INT_P (XEXP (x, 1))
8205 && (INTVAL (XEXP (x, 1)) >= 32 * GET_MODE_SIZE (mode)
8206 || INTVAL (XEXP (x, 1)) < 0))
8208 rtx xop0 = XEXP (x, 0);
8209 rtx xop1 = XEXP (x, 1);
8210 HOST_WIDE_INT offset = INTVAL (xop1);
8212 /* Try and fold the offset into a biasing of the base register and
8213 then offsetting that. Don't do this when optimizing for space
8214 since it can cause too many CSEs. */
8215 if (optimize_size && offset >= 0
8216 && offset < 256 + 31 * GET_MODE_SIZE (mode))
8218 HOST_WIDE_INT delta;
8220 if (offset >= 256)
8221 delta = offset - (256 - GET_MODE_SIZE (mode));
8222 else if (offset < 32 * GET_MODE_SIZE (mode) + 8)
8223 delta = 31 * GET_MODE_SIZE (mode);
8224 else
8225 delta = offset & (~31 * GET_MODE_SIZE (mode));
8227 xop0 = force_operand (plus_constant (Pmode, xop0, offset - delta),
8228 NULL_RTX);
8229 x = plus_constant (Pmode, xop0, delta);
8231 else if (offset < 0 && offset > -256)
8232 /* Small negative offsets are best done with a subtract before the
8233 dereference, forcing these into a register normally takes two
8234 instructions. */
8235 x = force_operand (x, NULL_RTX);
8236 else
8238 /* For the remaining cases, force the constant into a register. */
8239 xop1 = force_reg (SImode, xop1);
8240 x = gen_rtx_PLUS (SImode, xop0, xop1);
8243 else if (GET_CODE (x) == PLUS
8244 && s_register_operand (XEXP (x, 1), SImode)
8245 && !s_register_operand (XEXP (x, 0), SImode))
8247 rtx xop0 = force_operand (XEXP (x, 0), NULL_RTX);
8249 x = gen_rtx_PLUS (SImode, xop0, XEXP (x, 1));
8252 if (flag_pic)
8254 /* We need to find and carefully transform any SYMBOL and LABEL
8255 references; so go back to the original address expression. */
8256 rtx new_x = legitimize_pic_address (orig_x, mode, NULL_RTX);
8258 if (new_x != orig_x)
8259 x = new_x;
8262 return x;
8265 /* Return TRUE if X contains any TLS symbol references. */
8267 bool
8268 arm_tls_referenced_p (rtx x)
8270 if (! TARGET_HAVE_TLS)
8271 return false;
8273 subrtx_iterator::array_type array;
8274 FOR_EACH_SUBRTX (iter, array, x, ALL)
8276 const_rtx x = *iter;
8277 if (GET_CODE (x) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (x) != 0)
8278 return true;
8280 /* Don't recurse into UNSPEC_TLS looking for TLS symbols; these are
8281 TLS offsets, not real symbol references. */
8282 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
8283 iter.skip_subrtxes ();
8285 return false;
8288 /* Implement TARGET_LEGITIMATE_CONSTANT_P.
8290 On the ARM, allow any integer (invalid ones are removed later by insn
8291 patterns), nice doubles and symbol_refs which refer to the function's
8292 constant pool XXX.
8294 When generating pic allow anything. */
8296 static bool
8297 arm_legitimate_constant_p_1 (machine_mode, rtx x)
8299 return flag_pic || !label_mentioned_p (x);
8302 static bool
8303 thumb_legitimate_constant_p (machine_mode mode ATTRIBUTE_UNUSED, rtx x)
8305 /* Splitters for TARGET_USE_MOVT call arm_emit_movpair which creates high
8306 RTX. These RTX must therefore be allowed for Thumb-1 so that when run
8307 for ARMv8-M Baseline or later the result is valid. */
8308 if (TARGET_HAVE_MOVT && GET_CODE (x) == HIGH)
8309 x = XEXP (x, 0);
8311 return (CONST_INT_P (x)
8312 || CONST_DOUBLE_P (x)
8313 || CONSTANT_ADDRESS_P (x)
8314 || flag_pic);
8317 static bool
8318 arm_legitimate_constant_p (machine_mode mode, rtx x)
8320 return (!arm_cannot_force_const_mem (mode, x)
8321 && (TARGET_32BIT
8322 ? arm_legitimate_constant_p_1 (mode, x)
8323 : thumb_legitimate_constant_p (mode, x)));
8326 /* Implement TARGET_CANNOT_FORCE_CONST_MEM. */
8328 static bool
8329 arm_cannot_force_const_mem (machine_mode mode ATTRIBUTE_UNUSED, rtx x)
8331 rtx base, offset;
8333 if (ARM_OFFSETS_MUST_BE_WITHIN_SECTIONS_P)
8335 split_const (x, &base, &offset);
8336 if (GET_CODE (base) == SYMBOL_REF
8337 && !offset_within_block_p (base, INTVAL (offset)))
8338 return true;
8340 return arm_tls_referenced_p (x);
8343 #define REG_OR_SUBREG_REG(X) \
8344 (REG_P (X) \
8345 || (GET_CODE (X) == SUBREG && REG_P (SUBREG_REG (X))))
8347 #define REG_OR_SUBREG_RTX(X) \
8348 (REG_P (X) ? (X) : SUBREG_REG (X))
8350 static inline int
8351 thumb1_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer)
8353 machine_mode mode = GET_MODE (x);
8354 int total, words;
8356 switch (code)
8358 case ASHIFT:
8359 case ASHIFTRT:
8360 case LSHIFTRT:
8361 case ROTATERT:
8362 return (mode == SImode) ? COSTS_N_INSNS (1) : COSTS_N_INSNS (2);
8364 case PLUS:
8365 case MINUS:
8366 case COMPARE:
8367 case NEG:
8368 case NOT:
8369 return COSTS_N_INSNS (1);
8371 case MULT:
8372 if (CONST_INT_P (XEXP (x, 1)))
8374 int cycles = 0;
8375 unsigned HOST_WIDE_INT i = INTVAL (XEXP (x, 1));
8377 while (i)
8379 i >>= 2;
8380 cycles++;
8382 return COSTS_N_INSNS (2) + cycles;
8384 return COSTS_N_INSNS (1) + 16;
8386 case SET:
8387 /* A SET doesn't have a mode, so let's look at the SET_DEST to get
8388 the mode. */
8389 words = ARM_NUM_INTS (GET_MODE_SIZE (GET_MODE (SET_DEST (x))));
8390 return (COSTS_N_INSNS (words)
8391 + 4 * ((MEM_P (SET_SRC (x)))
8392 + MEM_P (SET_DEST (x))));
8394 case CONST_INT:
8395 if (outer == SET)
8397 if (UINTVAL (x) < 256
8398 /* 16-bit constant. */
8399 || (TARGET_HAVE_MOVT && !(INTVAL (x) & 0xffff0000)))
8400 return 0;
8401 if (thumb_shiftable_const (INTVAL (x)))
8402 return COSTS_N_INSNS (2);
8403 return COSTS_N_INSNS (3);
8405 else if ((outer == PLUS || outer == COMPARE)
8406 && INTVAL (x) < 256 && INTVAL (x) > -256)
8407 return 0;
8408 else if ((outer == IOR || outer == XOR || outer == AND)
8409 && INTVAL (x) < 256 && INTVAL (x) >= -256)
8410 return COSTS_N_INSNS (1);
8411 else if (outer == AND)
8413 int i;
8414 /* This duplicates the tests in the andsi3 expander. */
8415 for (i = 9; i <= 31; i++)
8416 if ((HOST_WIDE_INT_1 << i) - 1 == INTVAL (x)
8417 || (HOST_WIDE_INT_1 << i) - 1 == ~INTVAL (x))
8418 return COSTS_N_INSNS (2);
8420 else if (outer == ASHIFT || outer == ASHIFTRT
8421 || outer == LSHIFTRT)
8422 return 0;
8423 return COSTS_N_INSNS (2);
8425 case CONST:
8426 case CONST_DOUBLE:
8427 case LABEL_REF:
8428 case SYMBOL_REF:
8429 return COSTS_N_INSNS (3);
8431 case UDIV:
8432 case UMOD:
8433 case DIV:
8434 case MOD:
8435 return 100;
8437 case TRUNCATE:
8438 return 99;
8440 case AND:
8441 case XOR:
8442 case IOR:
8443 /* XXX guess. */
8444 return 8;
8446 case MEM:
8447 /* XXX another guess. */
8448 /* Memory costs quite a lot for the first word, but subsequent words
8449 load at the equivalent of a single insn each. */
8450 return (10 + 4 * ((GET_MODE_SIZE (mode) - 1) / UNITS_PER_WORD)
8451 + ((GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
8452 ? 4 : 0));
8454 case IF_THEN_ELSE:
8455 /* XXX a guess. */
8456 if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
8457 return 14;
8458 return 2;
8460 case SIGN_EXTEND:
8461 case ZERO_EXTEND:
8462 total = mode == DImode ? COSTS_N_INSNS (1) : 0;
8463 total += thumb1_rtx_costs (XEXP (x, 0), GET_CODE (XEXP (x, 0)), code);
8465 if (mode == SImode)
8466 return total;
8468 if (arm_arch6)
8469 return total + COSTS_N_INSNS (1);
8471 /* Assume a two-shift sequence. Increase the cost slightly so
8472 we prefer actual shifts over an extend operation. */
8473 return total + 1 + COSTS_N_INSNS (2);
8475 default:
8476 return 99;
8480 static inline bool
8481 arm_rtx_costs_1 (rtx x, enum rtx_code outer, int* total, bool speed)
8483 machine_mode mode = GET_MODE (x);
8484 enum rtx_code subcode;
8485 rtx operand;
8486 enum rtx_code code = GET_CODE (x);
8487 *total = 0;
8489 switch (code)
8491 case MEM:
8492 /* Memory costs quite a lot for the first word, but subsequent words
8493 load at the equivalent of a single insn each. */
8494 *total = COSTS_N_INSNS (2 + ARM_NUM_REGS (mode));
8495 return true;
8497 case DIV:
8498 case MOD:
8499 case UDIV:
8500 case UMOD:
8501 if (TARGET_HARD_FLOAT && mode == SFmode)
8502 *total = COSTS_N_INSNS (2);
8503 else if (TARGET_HARD_FLOAT && mode == DFmode && !TARGET_VFP_SINGLE)
8504 *total = COSTS_N_INSNS (4);
8505 else
8506 *total = COSTS_N_INSNS (20);
8507 return false;
8509 case ROTATE:
8510 if (REG_P (XEXP (x, 1)))
8511 *total = COSTS_N_INSNS (1); /* Need to subtract from 32 */
8512 else if (!CONST_INT_P (XEXP (x, 1)))
8513 *total = rtx_cost (XEXP (x, 1), mode, code, 1, speed);
8515 /* Fall through */
8516 case ROTATERT:
8517 if (mode != SImode)
8519 *total += COSTS_N_INSNS (4);
8520 return true;
8523 /* Fall through */
8524 case ASHIFT: case LSHIFTRT: case ASHIFTRT:
8525 *total += rtx_cost (XEXP (x, 0), mode, code, 0, speed);
8526 if (mode == DImode)
8528 *total += COSTS_N_INSNS (3);
8529 return true;
8532 *total += COSTS_N_INSNS (1);
8533 /* Increase the cost of complex shifts because they aren't any faster,
8534 and reduce dual issue opportunities. */
8535 if (arm_tune_cortex_a9
8536 && outer != SET && !CONST_INT_P (XEXP (x, 1)))
8537 ++*total;
8539 return true;
8541 case MINUS:
8542 if (mode == DImode)
8544 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
8545 if (CONST_INT_P (XEXP (x, 0))
8546 && const_ok_for_arm (INTVAL (XEXP (x, 0))))
8548 *total += rtx_cost (XEXP (x, 1), mode, code, 1, speed);
8549 return true;
8552 if (CONST_INT_P (XEXP (x, 1))
8553 && const_ok_for_arm (INTVAL (XEXP (x, 1))))
8555 *total += rtx_cost (XEXP (x, 0), mode, code, 0, speed);
8556 return true;
8559 return false;
8562 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
8564 if (TARGET_HARD_FLOAT
8565 && (mode == SFmode
8566 || (mode == DFmode && !TARGET_VFP_SINGLE)))
8568 *total = COSTS_N_INSNS (1);
8569 if (CONST_DOUBLE_P (XEXP (x, 0))
8570 && arm_const_double_rtx (XEXP (x, 0)))
8572 *total += rtx_cost (XEXP (x, 1), mode, code, 1, speed);
8573 return true;
8576 if (CONST_DOUBLE_P (XEXP (x, 1))
8577 && arm_const_double_rtx (XEXP (x, 1)))
8579 *total += rtx_cost (XEXP (x, 0), mode, code, 0, speed);
8580 return true;
8583 return false;
8585 *total = COSTS_N_INSNS (20);
8586 return false;
8589 *total = COSTS_N_INSNS (1);
8590 if (CONST_INT_P (XEXP (x, 0))
8591 && const_ok_for_arm (INTVAL (XEXP (x, 0))))
8593 *total += rtx_cost (XEXP (x, 1), mode, code, 1, speed);
8594 return true;
8597 subcode = GET_CODE (XEXP (x, 1));
8598 if (subcode == ASHIFT || subcode == ASHIFTRT
8599 || subcode == LSHIFTRT
8600 || subcode == ROTATE || subcode == ROTATERT)
8602 *total += rtx_cost (XEXP (x, 0), mode, code, 0, speed);
8603 *total += rtx_cost (XEXP (XEXP (x, 1), 0), mode, subcode, 0, speed);
8604 return true;
8607 /* A shift as a part of RSB costs no more than RSB itself. */
8608 if (GET_CODE (XEXP (x, 0)) == MULT
8609 && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode))
8611 *total += rtx_cost (XEXP (XEXP (x, 0), 0), mode, code, 0, speed);
8612 *total += rtx_cost (XEXP (x, 1), mode, code, 1, speed);
8613 return true;
8616 if (subcode == MULT
8617 && power_of_two_operand (XEXP (XEXP (x, 1), 1), SImode))
8619 *total += rtx_cost (XEXP (x, 0), mode, code, 0, speed);
8620 *total += rtx_cost (XEXP (XEXP (x, 1), 0), mode, subcode, 0, speed);
8621 return true;
8624 if (GET_RTX_CLASS (GET_CODE (XEXP (x, 1))) == RTX_COMPARE
8625 || GET_RTX_CLASS (GET_CODE (XEXP (x, 1))) == RTX_COMM_COMPARE)
8627 *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 0), mode, code,
8628 0, speed);
8629 if (REG_P (XEXP (XEXP (x, 1), 0))
8630 && REGNO (XEXP (XEXP (x, 1), 0)) != CC_REGNUM)
8631 *total += COSTS_N_INSNS (1);
8633 return true;
8636 /* Fall through */
8638 case PLUS:
8639 if (code == PLUS && arm_arch6 && mode == SImode
8640 && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
8641 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
8643 *total = COSTS_N_INSNS (1);
8644 *total += rtx_cost (XEXP (XEXP (x, 0), 0), VOIDmode,
8645 GET_CODE (XEXP (x, 0)), 0, speed);
8646 *total += rtx_cost (XEXP (x, 1), mode, code, 1, speed);
8647 return true;
8650 /* MLA: All arguments must be registers. We filter out
8651 multiplication by a power of two, so that we fall down into
8652 the code below. */
8653 if (GET_CODE (XEXP (x, 0)) == MULT
8654 && !power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode))
8656 /* The cost comes from the cost of the multiply. */
8657 return false;
8660 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
8662 if (TARGET_HARD_FLOAT
8663 && (mode == SFmode
8664 || (mode == DFmode && !TARGET_VFP_SINGLE)))
8666 *total = COSTS_N_INSNS (1);
8667 if (CONST_DOUBLE_P (XEXP (x, 1))
8668 && arm_const_double_rtx (XEXP (x, 1)))
8670 *total += rtx_cost (XEXP (x, 0), mode, code, 0, speed);
8671 return true;
8674 return false;
8677 *total = COSTS_N_INSNS (20);
8678 return false;
8681 if (GET_RTX_CLASS (GET_CODE (XEXP (x, 0))) == RTX_COMPARE
8682 || GET_RTX_CLASS (GET_CODE (XEXP (x, 0))) == RTX_COMM_COMPARE)
8684 *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 1), mode, code,
8685 1, speed);
8686 if (REG_P (XEXP (XEXP (x, 0), 0))
8687 && REGNO (XEXP (XEXP (x, 0), 0)) != CC_REGNUM)
8688 *total += COSTS_N_INSNS (1);
8689 return true;
8692 /* Fall through */
8694 case AND: case XOR: case IOR:
8696 /* Normally the frame registers will be spilt into reg+const during
8697 reload, so it is a bad idea to combine them with other instructions,
8698 since then they might not be moved outside of loops. As a compromise
8699 we allow integration with ops that have a constant as their second
8700 operand. */
8701 if (REG_OR_SUBREG_REG (XEXP (x, 0))
8702 && ARM_FRAME_RTX (REG_OR_SUBREG_RTX (XEXP (x, 0)))
8703 && !CONST_INT_P (XEXP (x, 1)))
8704 *total = COSTS_N_INSNS (1);
8706 if (mode == DImode)
8708 *total += COSTS_N_INSNS (2);
8709 if (CONST_INT_P (XEXP (x, 1))
8710 && const_ok_for_op (INTVAL (XEXP (x, 1)), code))
8712 *total += rtx_cost (XEXP (x, 0), mode, code, 0, speed);
8713 return true;
8716 return false;
8719 *total += COSTS_N_INSNS (1);
8720 if (CONST_INT_P (XEXP (x, 1))
8721 && const_ok_for_op (INTVAL (XEXP (x, 1)), code))
8723 *total += rtx_cost (XEXP (x, 0), mode, code, 0, speed);
8724 return true;
8726 subcode = GET_CODE (XEXP (x, 0));
8727 if (subcode == ASHIFT || subcode == ASHIFTRT
8728 || subcode == LSHIFTRT
8729 || subcode == ROTATE || subcode == ROTATERT)
8731 *total += rtx_cost (XEXP (x, 1), mode, code, 1, speed);
8732 *total += rtx_cost (XEXP (XEXP (x, 0), 0), mode, subcode, 0, speed);
8733 return true;
8736 if (subcode == MULT
8737 && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode))
8739 *total += rtx_cost (XEXP (x, 1), mode, code, 1, speed);
8740 *total += rtx_cost (XEXP (XEXP (x, 0), 0), mode, subcode, 0, speed);
8741 return true;
8744 if (subcode == UMIN || subcode == UMAX
8745 || subcode == SMIN || subcode == SMAX)
8747 *total = COSTS_N_INSNS (3);
8748 return true;
8751 return false;
8753 case MULT:
8754 /* This should have been handled by the CPU specific routines. */
8755 gcc_unreachable ();
8757 case TRUNCATE:
8758 if (arm_arch3m && mode == SImode
8759 && GET_CODE (XEXP (x, 0)) == LSHIFTRT
8760 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
8761 && (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0))
8762 == GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1)))
8763 && (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == ZERO_EXTEND
8764 || GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == SIGN_EXTEND))
8766 *total = rtx_cost (XEXP (XEXP (x, 0), 0), VOIDmode, LSHIFTRT,
8767 0, speed);
8768 return true;
8770 *total = COSTS_N_INSNS (2); /* Plus the cost of the MULT */
8771 return false;
8773 case NEG:
8774 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
8776 if (TARGET_HARD_FLOAT
8777 && (mode == SFmode
8778 || (mode == DFmode && !TARGET_VFP_SINGLE)))
8780 *total = COSTS_N_INSNS (1);
8781 return false;
8783 *total = COSTS_N_INSNS (2);
8784 return false;
8787 /* Fall through */
8788 case NOT:
8789 *total = COSTS_N_INSNS (ARM_NUM_REGS(mode));
8790 if (mode == SImode && code == NOT)
8792 subcode = GET_CODE (XEXP (x, 0));
8793 if (subcode == ASHIFT || subcode == ASHIFTRT
8794 || subcode == LSHIFTRT
8795 || subcode == ROTATE || subcode == ROTATERT
8796 || (subcode == MULT
8797 && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode)))
8799 *total += rtx_cost (XEXP (XEXP (x, 0), 0), mode, subcode,
8800 0, speed);
8801 /* Register shifts cost an extra cycle. */
8802 if (!CONST_INT_P (XEXP (XEXP (x, 0), 1)))
8803 *total += COSTS_N_INSNS (1) + rtx_cost (XEXP (XEXP (x, 0), 1),
8804 mode, subcode,
8805 1, speed);
8806 return true;
8810 return false;
8812 case IF_THEN_ELSE:
8813 if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
8815 *total = COSTS_N_INSNS (4);
8816 return true;
8819 operand = XEXP (x, 0);
8821 if (!((GET_RTX_CLASS (GET_CODE (operand)) == RTX_COMPARE
8822 || GET_RTX_CLASS (GET_CODE (operand)) == RTX_COMM_COMPARE)
8823 && REG_P (XEXP (operand, 0))
8824 && REGNO (XEXP (operand, 0)) == CC_REGNUM))
8825 *total += COSTS_N_INSNS (1);
8826 *total += rtx_cost (XEXP (x, 1), VOIDmode, code, 1, speed);
8827 *total += rtx_cost (XEXP (x, 2), VOIDmode, code, 2, speed);
8828 return true;
8830 case NE:
8831 if (mode == SImode && XEXP (x, 1) == const0_rtx)
8833 *total = COSTS_N_INSNS (2) + rtx_cost (XEXP (x, 0), mode, code,
8834 0, speed);
8835 return true;
8837 goto scc_insn;
8839 case GE:
8840 if ((!REG_P (XEXP (x, 0)) || REGNO (XEXP (x, 0)) != CC_REGNUM)
8841 && mode == SImode && XEXP (x, 1) == const0_rtx)
8843 *total = COSTS_N_INSNS (2) + rtx_cost (XEXP (x, 0), mode, code,
8844 0, speed);
8845 return true;
8847 goto scc_insn;
8849 case LT:
8850 if ((!REG_P (XEXP (x, 0)) || REGNO (XEXP (x, 0)) != CC_REGNUM)
8851 && mode == SImode && XEXP (x, 1) == const0_rtx)
8853 *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 0), mode, code,
8854 0, speed);
8855 return true;
8857 goto scc_insn;
8859 case EQ:
8860 case GT:
8861 case LE:
8862 case GEU:
8863 case LTU:
8864 case GTU:
8865 case LEU:
8866 case UNORDERED:
8867 case ORDERED:
8868 case UNEQ:
8869 case UNGE:
8870 case UNLT:
8871 case UNGT:
8872 case UNLE:
8873 scc_insn:
8874 /* SCC insns. In the case where the comparison has already been
8875 performed, then they cost 2 instructions. Otherwise they need
8876 an additional comparison before them. */
8877 *total = COSTS_N_INSNS (2);
8878 if (REG_P (XEXP (x, 0)) && REGNO (XEXP (x, 0)) == CC_REGNUM)
8880 return true;
8883 /* Fall through */
8884 case COMPARE:
8885 if (REG_P (XEXP (x, 0)) && REGNO (XEXP (x, 0)) == CC_REGNUM)
8887 *total = 0;
8888 return true;
8891 *total += COSTS_N_INSNS (1);
8892 if (CONST_INT_P (XEXP (x, 1))
8893 && const_ok_for_op (INTVAL (XEXP (x, 1)), code))
8895 *total += rtx_cost (XEXP (x, 0), VOIDmode, code, 0, speed);
8896 return true;
8899 subcode = GET_CODE (XEXP (x, 0));
8900 if (subcode == ASHIFT || subcode == ASHIFTRT
8901 || subcode == LSHIFTRT
8902 || subcode == ROTATE || subcode == ROTATERT)
8904 mode = GET_MODE (XEXP (x, 0));
8905 *total += rtx_cost (XEXP (x, 1), mode, code, 1, speed);
8906 *total += rtx_cost (XEXP (XEXP (x, 0), 0), mode, subcode, 0, speed);
8907 return true;
8910 if (subcode == MULT
8911 && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode))
8913 mode = GET_MODE (XEXP (x, 0));
8914 *total += rtx_cost (XEXP (x, 1), mode, code, 1, speed);
8915 *total += rtx_cost (XEXP (XEXP (x, 0), 0), mode, subcode, 0, speed);
8916 return true;
8919 return false;
8921 case UMIN:
8922 case UMAX:
8923 case SMIN:
8924 case SMAX:
8925 *total = COSTS_N_INSNS (2) + rtx_cost (XEXP (x, 0), mode, code, 0, speed);
8926 if (!CONST_INT_P (XEXP (x, 1))
8927 || !const_ok_for_arm (INTVAL (XEXP (x, 1))))
8928 *total += rtx_cost (XEXP (x, 1), mode, code, 1, speed);
8929 return true;
8931 case ABS:
8932 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
8934 if (TARGET_HARD_FLOAT
8935 && (mode == SFmode
8936 || (mode == DFmode && !TARGET_VFP_SINGLE)))
8938 *total = COSTS_N_INSNS (1);
8939 return false;
8941 *total = COSTS_N_INSNS (20);
8942 return false;
8944 *total = COSTS_N_INSNS (1);
8945 if (mode == DImode)
8946 *total += COSTS_N_INSNS (3);
8947 return false;
8949 case SIGN_EXTEND:
8950 case ZERO_EXTEND:
8951 *total = 0;
8952 if (GET_MODE_CLASS (mode) == MODE_INT)
8954 rtx op = XEXP (x, 0);
8955 machine_mode opmode = GET_MODE (op);
8957 if (mode == DImode)
8958 *total += COSTS_N_INSNS (1);
8960 if (opmode != SImode)
8962 if (MEM_P (op))
8964 /* If !arm_arch4, we use one of the extendhisi2_mem
8965 or movhi_bytes patterns for HImode. For a QImode
8966 sign extension, we first zero-extend from memory
8967 and then perform a shift sequence. */
8968 if (!arm_arch4 && (opmode != QImode || code == SIGN_EXTEND))
8969 *total += COSTS_N_INSNS (2);
8971 else if (arm_arch6)
8972 *total += COSTS_N_INSNS (1);
8974 /* We don't have the necessary insn, so we need to perform some
8975 other operation. */
8976 else if (TARGET_ARM && code == ZERO_EXTEND && mode == QImode)
8977 /* An and with constant 255. */
8978 *total += COSTS_N_INSNS (1);
8979 else
8980 /* A shift sequence. Increase costs slightly to avoid
8981 combining two shifts into an extend operation. */
8982 *total += COSTS_N_INSNS (2) + 1;
8985 return false;
8988 switch (GET_MODE (XEXP (x, 0)))
8990 case V8QImode:
8991 case V4HImode:
8992 case V2SImode:
8993 case V4QImode:
8994 case V2HImode:
8995 *total = COSTS_N_INSNS (1);
8996 return false;
8998 default:
8999 gcc_unreachable ();
9001 gcc_unreachable ();
9003 case ZERO_EXTRACT:
9004 case SIGN_EXTRACT:
9005 mode = GET_MODE (XEXP (x, 0));
9006 *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 0), mode, code, 0, speed);
9007 return true;
9009 case CONST_INT:
9010 if (const_ok_for_arm (INTVAL (x))
9011 || const_ok_for_arm (~INTVAL (x)))
9012 *total = COSTS_N_INSNS (1);
9013 else
9014 *total = COSTS_N_INSNS (arm_gen_constant (SET, mode, NULL_RTX,
9015 INTVAL (x), NULL_RTX,
9016 NULL_RTX, 0, 0));
9017 return true;
9019 case CONST:
9020 case LABEL_REF:
9021 case SYMBOL_REF:
9022 *total = COSTS_N_INSNS (3);
9023 return true;
9025 case HIGH:
9026 *total = COSTS_N_INSNS (1);
9027 return true;
9029 case LO_SUM:
9030 *total = COSTS_N_INSNS (1);
9031 *total += rtx_cost (XEXP (x, 0), mode, code, 0, speed);
9032 return true;
9034 case CONST_DOUBLE:
9035 if (TARGET_HARD_FLOAT && vfp3_const_double_rtx (x)
9036 && (mode == SFmode || !TARGET_VFP_SINGLE))
9037 *total = COSTS_N_INSNS (1);
9038 else
9039 *total = COSTS_N_INSNS (4);
9040 return true;
9042 case SET:
9043 /* The vec_extract patterns accept memory operands that require an
9044 address reload. Account for the cost of that reload to give the
9045 auto-inc-dec pass an incentive to try to replace them. */
9046 if (TARGET_NEON && MEM_P (SET_DEST (x))
9047 && GET_CODE (SET_SRC (x)) == VEC_SELECT)
9049 mode = GET_MODE (SET_DEST (x));
9050 *total = rtx_cost (SET_DEST (x), mode, code, 0, speed);
9051 if (!neon_vector_mem_operand (SET_DEST (x), 2, true))
9052 *total += COSTS_N_INSNS (1);
9053 return true;
9055 /* Likewise for the vec_set patterns. */
9056 if (TARGET_NEON && GET_CODE (SET_SRC (x)) == VEC_MERGE
9057 && GET_CODE (XEXP (SET_SRC (x), 0)) == VEC_DUPLICATE
9058 && MEM_P (XEXP (XEXP (SET_SRC (x), 0), 0)))
9060 rtx mem = XEXP (XEXP (SET_SRC (x), 0), 0);
9061 mode = GET_MODE (SET_DEST (x));
9062 *total = rtx_cost (mem, mode, code, 0, speed);
9063 if (!neon_vector_mem_operand (mem, 2, true))
9064 *total += COSTS_N_INSNS (1);
9065 return true;
9067 return false;
9069 case UNSPEC:
9070 /* We cost this as high as our memory costs to allow this to
9071 be hoisted from loops. */
9072 if (XINT (x, 1) == UNSPEC_PIC_UNIFIED)
9074 *total = COSTS_N_INSNS (2 + ARM_NUM_REGS (mode));
9076 return true;
9078 case CONST_VECTOR:
9079 if (TARGET_NEON
9080 && TARGET_HARD_FLOAT
9081 && outer == SET
9082 && (VALID_NEON_DREG_MODE (mode) || VALID_NEON_QREG_MODE (mode))
9083 && neon_immediate_valid_for_move (x, mode, NULL, NULL))
9084 *total = COSTS_N_INSNS (1);
9085 else
9086 *total = COSTS_N_INSNS (4);
9087 return true;
9089 default:
9090 *total = COSTS_N_INSNS (4);
9091 return false;
9095 /* Estimates the size cost of thumb1 instructions.
9096 For now most of the code is copied from thumb1_rtx_costs. We need more
9097 fine grain tuning when we have more related test cases. */
9098 static inline int
9099 thumb1_size_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer)
9101 machine_mode mode = GET_MODE (x);
9102 int words, cost;
9104 switch (code)
9106 case ASHIFT:
9107 case ASHIFTRT:
9108 case LSHIFTRT:
9109 case ROTATERT:
9110 return (mode == SImode) ? COSTS_N_INSNS (1) : COSTS_N_INSNS (2);
9112 case PLUS:
9113 case MINUS:
9114 /* Thumb-1 needs two instructions to fulfill shiftadd/shiftsub0/shiftsub1
9115 defined by RTL expansion, especially for the expansion of
9116 multiplication. */
9117 if ((GET_CODE (XEXP (x, 0)) == MULT
9118 && power_of_two_operand (XEXP (XEXP (x,0),1), SImode))
9119 || (GET_CODE (XEXP (x, 1)) == MULT
9120 && power_of_two_operand (XEXP (XEXP (x, 1), 1), SImode)))
9121 return COSTS_N_INSNS (2);
9122 /* Fall through. */
9123 case COMPARE:
9124 case NEG:
9125 case NOT:
9126 return COSTS_N_INSNS (1);
9128 case MULT:
9129 if (CONST_INT_P (XEXP (x, 1)))
9131 /* Thumb1 mul instruction can't operate on const. We must Load it
9132 into a register first. */
9133 int const_size = thumb1_size_rtx_costs (XEXP (x, 1), CONST_INT, SET);
9134 /* For the targets which have a very small and high-latency multiply
9135 unit, we prefer to synthesize the mult with up to 5 instructions,
9136 giving a good balance between size and performance. */
9137 if (arm_arch6m && arm_m_profile_small_mul)
9138 return COSTS_N_INSNS (5);
9139 else
9140 return COSTS_N_INSNS (1) + const_size;
9142 return COSTS_N_INSNS (1);
9144 case SET:
9145 /* A SET doesn't have a mode, so let's look at the SET_DEST to get
9146 the mode. */
9147 words = ARM_NUM_INTS (GET_MODE_SIZE (GET_MODE (SET_DEST (x))));
9148 cost = COSTS_N_INSNS (words);
9149 if (satisfies_constraint_J (SET_SRC (x))
9150 || satisfies_constraint_K (SET_SRC (x))
9151 /* Too big an immediate for a 2-byte mov, using MOVT. */
9152 || (CONST_INT_P (SET_SRC (x))
9153 && UINTVAL (SET_SRC (x)) >= 256
9154 && TARGET_HAVE_MOVT
9155 && satisfies_constraint_j (SET_SRC (x)))
9156 /* thumb1_movdi_insn. */
9157 || ((words > 1) && MEM_P (SET_SRC (x))))
9158 cost += COSTS_N_INSNS (1);
9159 return cost;
9161 case CONST_INT:
9162 if (outer == SET)
9164 if (UINTVAL (x) < 256)
9165 return COSTS_N_INSNS (1);
9166 /* movw is 4byte long. */
9167 if (TARGET_HAVE_MOVT && !(INTVAL (x) & 0xffff0000))
9168 return COSTS_N_INSNS (2);
9169 /* See split "TARGET_THUMB1 && satisfies_constraint_J". */
9170 if (INTVAL (x) >= -255 && INTVAL (x) <= -1)
9171 return COSTS_N_INSNS (2);
9172 /* See split "TARGET_THUMB1 && satisfies_constraint_K". */
9173 if (thumb_shiftable_const (INTVAL (x)))
9174 return COSTS_N_INSNS (2);
9175 return COSTS_N_INSNS (3);
9177 else if ((outer == PLUS || outer == COMPARE)
9178 && INTVAL (x) < 256 && INTVAL (x) > -256)
9179 return 0;
9180 else if ((outer == IOR || outer == XOR || outer == AND)
9181 && INTVAL (x) < 256 && INTVAL (x) >= -256)
9182 return COSTS_N_INSNS (1);
9183 else if (outer == AND)
9185 int i;
9186 /* This duplicates the tests in the andsi3 expander. */
9187 for (i = 9; i <= 31; i++)
9188 if ((HOST_WIDE_INT_1 << i) - 1 == INTVAL (x)
9189 || (HOST_WIDE_INT_1 << i) - 1 == ~INTVAL (x))
9190 return COSTS_N_INSNS (2);
9192 else if (outer == ASHIFT || outer == ASHIFTRT
9193 || outer == LSHIFTRT)
9194 return 0;
9195 return COSTS_N_INSNS (2);
9197 case CONST:
9198 case CONST_DOUBLE:
9199 case LABEL_REF:
9200 case SYMBOL_REF:
9201 return COSTS_N_INSNS (3);
9203 case UDIV:
9204 case UMOD:
9205 case DIV:
9206 case MOD:
9207 return 100;
9209 case TRUNCATE:
9210 return 99;
9212 case AND:
9213 case XOR:
9214 case IOR:
9215 return COSTS_N_INSNS (1);
9217 case MEM:
9218 return (COSTS_N_INSNS (1)
9219 + COSTS_N_INSNS (1)
9220 * ((GET_MODE_SIZE (mode) - 1) / UNITS_PER_WORD)
9221 + ((GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
9222 ? COSTS_N_INSNS (1) : 0));
9224 case IF_THEN_ELSE:
9225 /* XXX a guess. */
9226 if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
9227 return 14;
9228 return 2;
9230 case ZERO_EXTEND:
9231 /* XXX still guessing. */
9232 switch (GET_MODE (XEXP (x, 0)))
9234 case QImode:
9235 return (1 + (mode == DImode ? 4 : 0)
9236 + (MEM_P (XEXP (x, 0)) ? 10 : 0));
9238 case HImode:
9239 return (4 + (mode == DImode ? 4 : 0)
9240 + (MEM_P (XEXP (x, 0)) ? 10 : 0));
9242 case SImode:
9243 return (1 + (MEM_P (XEXP (x, 0)) ? 10 : 0));
9245 default:
9246 return 99;
9249 default:
9250 return 99;
9254 /* RTX costs when optimizing for size. */
9255 static bool
9256 arm_size_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
9257 int *total)
9259 machine_mode mode = GET_MODE (x);
9260 if (TARGET_THUMB1)
9262 *total = thumb1_size_rtx_costs (x, code, outer_code);
9263 return true;
9266 /* FIXME: This makes no attempt to prefer narrow Thumb-2 instructions. */
9267 switch (code)
9269 case MEM:
9270 /* A memory access costs 1 insn if the mode is small, or the address is
9271 a single register, otherwise it costs one insn per word. */
9272 if (REG_P (XEXP (x, 0)))
9273 *total = COSTS_N_INSNS (1);
9274 else if (flag_pic
9275 && GET_CODE (XEXP (x, 0)) == PLUS
9276 && will_be_in_index_register (XEXP (XEXP (x, 0), 1)))
9277 /* This will be split into two instructions.
9278 See arm.md:calculate_pic_address. */
9279 *total = COSTS_N_INSNS (2);
9280 else
9281 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
9282 return true;
9284 case DIV:
9285 case MOD:
9286 case UDIV:
9287 case UMOD:
9288 /* Needs a libcall, so it costs about this. */
9289 *total = COSTS_N_INSNS (2);
9290 return false;
9292 case ROTATE:
9293 if (mode == SImode && REG_P (XEXP (x, 1)))
9295 *total = COSTS_N_INSNS (2) + rtx_cost (XEXP (x, 0), mode, code,
9296 0, false);
9297 return true;
9299 /* Fall through */
9300 case ROTATERT:
9301 case ASHIFT:
9302 case LSHIFTRT:
9303 case ASHIFTRT:
9304 if (mode == DImode && CONST_INT_P (XEXP (x, 1)))
9306 *total = COSTS_N_INSNS (3) + rtx_cost (XEXP (x, 0), mode, code,
9307 0, false);
9308 return true;
9310 else if (mode == SImode)
9312 *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 0), mode, code,
9313 0, false);
9314 /* Slightly disparage register shifts, but not by much. */
9315 if (!CONST_INT_P (XEXP (x, 1)))
9316 *total += 1 + rtx_cost (XEXP (x, 1), mode, code, 1, false);
9317 return true;
9320 /* Needs a libcall. */
9321 *total = COSTS_N_INSNS (2);
9322 return false;
9324 case MINUS:
9325 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
9326 && (mode == SFmode || !TARGET_VFP_SINGLE))
9328 *total = COSTS_N_INSNS (1);
9329 return false;
9332 if (mode == SImode)
9334 enum rtx_code subcode0 = GET_CODE (XEXP (x, 0));
9335 enum rtx_code subcode1 = GET_CODE (XEXP (x, 1));
9337 if (subcode0 == ROTATE || subcode0 == ROTATERT || subcode0 == ASHIFT
9338 || subcode0 == LSHIFTRT || subcode0 == ASHIFTRT
9339 || subcode1 == ROTATE || subcode1 == ROTATERT
9340 || subcode1 == ASHIFT || subcode1 == LSHIFTRT
9341 || subcode1 == ASHIFTRT)
9343 /* It's just the cost of the two operands. */
9344 *total = 0;
9345 return false;
9348 *total = COSTS_N_INSNS (1);
9349 return false;
9352 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
9353 return false;
9355 case PLUS:
9356 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
9357 && (mode == SFmode || !TARGET_VFP_SINGLE))
9359 *total = COSTS_N_INSNS (1);
9360 return false;
9363 /* A shift as a part of ADD costs nothing. */
9364 if (GET_CODE (XEXP (x, 0)) == MULT
9365 && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode))
9367 *total = COSTS_N_INSNS (TARGET_THUMB2 ? 2 : 1);
9368 *total += rtx_cost (XEXP (XEXP (x, 0), 0), mode, code, 0, false);
9369 *total += rtx_cost (XEXP (x, 1), mode, code, 1, false);
9370 return true;
9373 /* Fall through */
9374 case AND: case XOR: case IOR:
9375 if (mode == SImode)
9377 enum rtx_code subcode = GET_CODE (XEXP (x, 0));
9379 if (subcode == ROTATE || subcode == ROTATERT || subcode == ASHIFT
9380 || subcode == LSHIFTRT || subcode == ASHIFTRT
9381 || (code == AND && subcode == NOT))
9383 /* It's just the cost of the two operands. */
9384 *total = 0;
9385 return false;
9389 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
9390 return false;
9392 case MULT:
9393 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
9394 return false;
9396 case NEG:
9397 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
9398 && (mode == SFmode || !TARGET_VFP_SINGLE))
9400 *total = COSTS_N_INSNS (1);
9401 return false;
9404 /* Fall through */
9405 case NOT:
9406 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
9408 return false;
9410 case IF_THEN_ELSE:
9411 *total = 0;
9412 return false;
9414 case COMPARE:
9415 if (cc_register (XEXP (x, 0), VOIDmode))
9416 * total = 0;
9417 else
9418 *total = COSTS_N_INSNS (1);
9419 return false;
9421 case ABS:
9422 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
9423 && (mode == SFmode || !TARGET_VFP_SINGLE))
9424 *total = COSTS_N_INSNS (1);
9425 else
9426 *total = COSTS_N_INSNS (1 + ARM_NUM_REGS (mode));
9427 return false;
9429 case SIGN_EXTEND:
9430 case ZERO_EXTEND:
9431 return arm_rtx_costs_1 (x, outer_code, total, 0);
9433 case CONST_INT:
9434 if (const_ok_for_arm (INTVAL (x)))
9435 /* A multiplication by a constant requires another instruction
9436 to load the constant to a register. */
9437 *total = COSTS_N_INSNS ((outer_code == SET || outer_code == MULT)
9438 ? 1 : 0);
9439 else if (const_ok_for_arm (~INTVAL (x)))
9440 *total = COSTS_N_INSNS (outer_code == AND ? 0 : 1);
9441 else if (const_ok_for_arm (-INTVAL (x)))
9443 if (outer_code == COMPARE || outer_code == PLUS
9444 || outer_code == MINUS)
9445 *total = 0;
9446 else
9447 *total = COSTS_N_INSNS (1);
9449 else
9450 *total = COSTS_N_INSNS (2);
9451 return true;
9453 case CONST:
9454 case LABEL_REF:
9455 case SYMBOL_REF:
9456 *total = COSTS_N_INSNS (2);
9457 return true;
9459 case CONST_DOUBLE:
9460 *total = COSTS_N_INSNS (4);
9461 return true;
9463 case CONST_VECTOR:
9464 if (TARGET_NEON
9465 && TARGET_HARD_FLOAT
9466 && outer_code == SET
9467 && (VALID_NEON_DREG_MODE (mode) || VALID_NEON_QREG_MODE (mode))
9468 && neon_immediate_valid_for_move (x, mode, NULL, NULL))
9469 *total = COSTS_N_INSNS (1);
9470 else
9471 *total = COSTS_N_INSNS (4);
9472 return true;
9474 case HIGH:
9475 case LO_SUM:
9476 /* We prefer constant pool entries to MOVW/MOVT pairs, so bump the
9477 cost of these slightly. */
9478 *total = COSTS_N_INSNS (1) + 1;
9479 return true;
9481 case SET:
9482 return false;
9484 default:
9485 if (mode != VOIDmode)
9486 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
9487 else
9488 *total = COSTS_N_INSNS (4); /* How knows? */
9489 return false;
9493 /* Helper function for arm_rtx_costs. If the operand is a valid shift
9494 operand, then return the operand that is being shifted. If the shift
9495 is not by a constant, then set SHIFT_REG to point to the operand.
9496 Return NULL if OP is not a shifter operand. */
9497 static rtx
9498 shifter_op_p (rtx op, rtx *shift_reg)
9500 enum rtx_code code = GET_CODE (op);
9502 if (code == MULT && CONST_INT_P (XEXP (op, 1))
9503 && exact_log2 (INTVAL (XEXP (op, 1))) > 0)
9504 return XEXP (op, 0);
9505 else if (code == ROTATE && CONST_INT_P (XEXP (op, 1)))
9506 return XEXP (op, 0);
9507 else if (code == ROTATERT || code == ASHIFT || code == LSHIFTRT
9508 || code == ASHIFTRT)
9510 if (!CONST_INT_P (XEXP (op, 1)))
9511 *shift_reg = XEXP (op, 1);
9512 return XEXP (op, 0);
9515 return NULL;
9518 static bool
9519 arm_unspec_cost (rtx x, enum rtx_code /* outer_code */, bool speed_p, int *cost)
9521 const struct cpu_cost_table *extra_cost = current_tune->insn_extra_cost;
9522 rtx_code code = GET_CODE (x);
9523 gcc_assert (code == UNSPEC || code == UNSPEC_VOLATILE);
9525 switch (XINT (x, 1))
9527 case UNSPEC_UNALIGNED_LOAD:
9528 /* We can only do unaligned loads into the integer unit, and we can't
9529 use LDM or LDRD. */
9530 *cost = COSTS_N_INSNS (ARM_NUM_REGS (GET_MODE (x)));
9531 if (speed_p)
9532 *cost += (ARM_NUM_REGS (GET_MODE (x)) * extra_cost->ldst.load
9533 + extra_cost->ldst.load_unaligned);
9535 #ifdef NOT_YET
9536 *cost += arm_address_cost (XEXP (XVECEXP (x, 0, 0), 0), GET_MODE (x),
9537 ADDR_SPACE_GENERIC, speed_p);
9538 #endif
9539 return true;
9541 case UNSPEC_UNALIGNED_STORE:
9542 *cost = COSTS_N_INSNS (ARM_NUM_REGS (GET_MODE (x)));
9543 if (speed_p)
9544 *cost += (ARM_NUM_REGS (GET_MODE (x)) * extra_cost->ldst.store
9545 + extra_cost->ldst.store_unaligned);
9547 *cost += rtx_cost (XVECEXP (x, 0, 0), VOIDmode, UNSPEC, 0, speed_p);
9548 #ifdef NOT_YET
9549 *cost += arm_address_cost (XEXP (XVECEXP (x, 0, 0), 0), GET_MODE (x),
9550 ADDR_SPACE_GENERIC, speed_p);
9551 #endif
9552 return true;
9554 case UNSPEC_VRINTZ:
9555 case UNSPEC_VRINTP:
9556 case UNSPEC_VRINTM:
9557 case UNSPEC_VRINTR:
9558 case UNSPEC_VRINTX:
9559 case UNSPEC_VRINTA:
9560 if (speed_p)
9561 *cost += extra_cost->fp[GET_MODE (x) == DFmode].roundint;
9563 return true;
9564 default:
9565 *cost = COSTS_N_INSNS (2);
9566 break;
9568 return true;
9571 /* Cost of a libcall. We assume one insn per argument, an amount for the
9572 call (one insn for -Os) and then one for processing the result. */
9573 #define LIBCALL_COST(N) COSTS_N_INSNS (N + (speed_p ? 18 : 2))
9575 #define HANDLE_NARROW_SHIFT_ARITH(OP, IDX) \
9576 do \
9578 shift_op = shifter_op_p (XEXP (x, IDX), &shift_reg); \
9579 if (shift_op != NULL \
9580 && arm_rtx_shift_left_p (XEXP (x, IDX))) \
9582 if (shift_reg) \
9584 if (speed_p) \
9585 *cost += extra_cost->alu.arith_shift_reg; \
9586 *cost += rtx_cost (shift_reg, GET_MODE (shift_reg), \
9587 ASHIFT, 1, speed_p); \
9589 else if (speed_p) \
9590 *cost += extra_cost->alu.arith_shift; \
9592 *cost += (rtx_cost (shift_op, GET_MODE (shift_op), \
9593 ASHIFT, 0, speed_p) \
9594 + rtx_cost (XEXP (x, 1 - IDX), \
9595 GET_MODE (shift_op), \
9596 OP, 1, speed_p)); \
9597 return true; \
9600 while (0);
9602 /* RTX costs. Make an estimate of the cost of executing the operation
9603 X, which is contained with an operation with code OUTER_CODE.
9604 SPEED_P indicates whether the cost desired is the performance cost,
9605 or the size cost. The estimate is stored in COST and the return
9606 value is TRUE if the cost calculation is final, or FALSE if the
9607 caller should recurse through the operands of X to add additional
9608 costs.
9610 We currently make no attempt to model the size savings of Thumb-2
9611 16-bit instructions. At the normal points in compilation where
9612 this code is called we have no measure of whether the condition
9613 flags are live or not, and thus no realistic way to determine what
9614 the size will eventually be. */
9615 static bool
9616 arm_new_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
9617 const struct cpu_cost_table *extra_cost,
9618 int *cost, bool speed_p)
9620 machine_mode mode = GET_MODE (x);
9622 *cost = COSTS_N_INSNS (1);
9624 if (TARGET_THUMB1)
9626 if (speed_p)
9627 *cost = thumb1_rtx_costs (x, code, outer_code);
9628 else
9629 *cost = thumb1_size_rtx_costs (x, code, outer_code);
9630 return true;
9633 switch (code)
9635 case SET:
9636 *cost = 0;
9637 /* SET RTXs don't have a mode so we get it from the destination. */
9638 mode = GET_MODE (SET_DEST (x));
9640 if (REG_P (SET_SRC (x))
9641 && REG_P (SET_DEST (x)))
9643 /* Assume that most copies can be done with a single insn,
9644 unless we don't have HW FP, in which case everything
9645 larger than word mode will require two insns. */
9646 *cost = COSTS_N_INSNS (((!TARGET_HARD_FLOAT
9647 && GET_MODE_SIZE (mode) > 4)
9648 || mode == DImode)
9649 ? 2 : 1);
9650 /* Conditional register moves can be encoded
9651 in 16 bits in Thumb mode. */
9652 if (!speed_p && TARGET_THUMB && outer_code == COND_EXEC)
9653 *cost >>= 1;
9655 return true;
9658 if (CONST_INT_P (SET_SRC (x)))
9660 /* Handle CONST_INT here, since the value doesn't have a mode
9661 and we would otherwise be unable to work out the true cost. */
9662 *cost = rtx_cost (SET_DEST (x), GET_MODE (SET_DEST (x)), SET,
9663 0, speed_p);
9664 outer_code = SET;
9665 /* Slightly lower the cost of setting a core reg to a constant.
9666 This helps break up chains and allows for better scheduling. */
9667 if (REG_P (SET_DEST (x))
9668 && REGNO (SET_DEST (x)) <= LR_REGNUM)
9669 *cost -= 1;
9670 x = SET_SRC (x);
9671 /* Immediate moves with an immediate in the range [0, 255] can be
9672 encoded in 16 bits in Thumb mode. */
9673 if (!speed_p && TARGET_THUMB && GET_MODE (x) == SImode
9674 && INTVAL (x) >= 0 && INTVAL (x) <=255)
9675 *cost >>= 1;
9676 goto const_int_cost;
9679 return false;
9681 case MEM:
9682 /* A memory access costs 1 insn if the mode is small, or the address is
9683 a single register, otherwise it costs one insn per word. */
9684 if (REG_P (XEXP (x, 0)))
9685 *cost = COSTS_N_INSNS (1);
9686 else if (flag_pic
9687 && GET_CODE (XEXP (x, 0)) == PLUS
9688 && will_be_in_index_register (XEXP (XEXP (x, 0), 1)))
9689 /* This will be split into two instructions.
9690 See arm.md:calculate_pic_address. */
9691 *cost = COSTS_N_INSNS (2);
9692 else
9693 *cost = COSTS_N_INSNS (ARM_NUM_REGS (mode));
9695 /* For speed optimizations, add the costs of the address and
9696 accessing memory. */
9697 if (speed_p)
9698 #ifdef NOT_YET
9699 *cost += (extra_cost->ldst.load
9700 + arm_address_cost (XEXP (x, 0), mode,
9701 ADDR_SPACE_GENERIC, speed_p));
9702 #else
9703 *cost += extra_cost->ldst.load;
9704 #endif
9705 return true;
9707 case PARALLEL:
9709 /* Calculations of LDM costs are complex. We assume an initial cost
9710 (ldm_1st) which will load the number of registers mentioned in
9711 ldm_regs_per_insn_1st registers; then each additional
9712 ldm_regs_per_insn_subsequent registers cost one more insn. The
9713 formula for N regs is thus:
9715 ldm_1st + COSTS_N_INSNS ((max (N - ldm_regs_per_insn_1st, 0)
9716 + ldm_regs_per_insn_subsequent - 1)
9717 / ldm_regs_per_insn_subsequent).
9719 Additional costs may also be added for addressing. A similar
9720 formula is used for STM. */
9722 bool is_ldm = load_multiple_operation (x, SImode);
9723 bool is_stm = store_multiple_operation (x, SImode);
9725 if (is_ldm || is_stm)
9727 if (speed_p)
9729 HOST_WIDE_INT nregs = XVECLEN (x, 0);
9730 HOST_WIDE_INT regs_per_insn_1st = is_ldm
9731 ? extra_cost->ldst.ldm_regs_per_insn_1st
9732 : extra_cost->ldst.stm_regs_per_insn_1st;
9733 HOST_WIDE_INT regs_per_insn_sub = is_ldm
9734 ? extra_cost->ldst.ldm_regs_per_insn_subsequent
9735 : extra_cost->ldst.stm_regs_per_insn_subsequent;
9737 *cost += regs_per_insn_1st
9738 + COSTS_N_INSNS (((MAX (nregs - regs_per_insn_1st, 0))
9739 + regs_per_insn_sub - 1)
9740 / regs_per_insn_sub);
9741 return true;
9745 return false;
9747 case DIV:
9748 case UDIV:
9749 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
9750 && (mode == SFmode || !TARGET_VFP_SINGLE))
9751 *cost += COSTS_N_INSNS (speed_p
9752 ? extra_cost->fp[mode != SFmode].div : 0);
9753 else if (mode == SImode && TARGET_IDIV)
9754 *cost += COSTS_N_INSNS (speed_p ? extra_cost->mult[0].idiv : 0);
9755 else
9756 *cost = LIBCALL_COST (2);
9757 return false; /* All arguments must be in registers. */
9759 case MOD:
9760 /* MOD by a power of 2 can be expanded as:
9761 rsbs r1, r0, #0
9762 and r0, r0, #(n - 1)
9763 and r1, r1, #(n - 1)
9764 rsbpl r0, r1, #0. */
9765 if (CONST_INT_P (XEXP (x, 1))
9766 && exact_log2 (INTVAL (XEXP (x, 1))) > 0
9767 && mode == SImode)
9769 *cost += COSTS_N_INSNS (3);
9771 if (speed_p)
9772 *cost += 2 * extra_cost->alu.logical
9773 + extra_cost->alu.arith;
9774 return true;
9777 /* Fall-through. */
9778 case UMOD:
9779 *cost = LIBCALL_COST (2);
9780 return false; /* All arguments must be in registers. */
9782 case ROTATE:
9783 if (mode == SImode && REG_P (XEXP (x, 1)))
9785 *cost += (COSTS_N_INSNS (1)
9786 + rtx_cost (XEXP (x, 0), mode, code, 0, speed_p));
9787 if (speed_p)
9788 *cost += extra_cost->alu.shift_reg;
9789 return true;
9791 /* Fall through */
9792 case ROTATERT:
9793 case ASHIFT:
9794 case LSHIFTRT:
9795 case ASHIFTRT:
9796 if (mode == DImode && CONST_INT_P (XEXP (x, 1)))
9798 *cost += (COSTS_N_INSNS (2)
9799 + rtx_cost (XEXP (x, 0), mode, code, 0, speed_p));
9800 if (speed_p)
9801 *cost += 2 * extra_cost->alu.shift;
9802 return true;
9804 else if (mode == SImode)
9806 *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
9807 /* Slightly disparage register shifts at -Os, but not by much. */
9808 if (!CONST_INT_P (XEXP (x, 1)))
9809 *cost += (speed_p ? extra_cost->alu.shift_reg : 1
9810 + rtx_cost (XEXP (x, 1), mode, code, 1, speed_p));
9811 return true;
9813 else if (GET_MODE_CLASS (mode) == MODE_INT
9814 && GET_MODE_SIZE (mode) < 4)
9816 if (code == ASHIFT)
9818 *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
9819 /* Slightly disparage register shifts at -Os, but not by
9820 much. */
9821 if (!CONST_INT_P (XEXP (x, 1)))
9822 *cost += (speed_p ? extra_cost->alu.shift_reg : 1
9823 + rtx_cost (XEXP (x, 1), mode, code, 1, speed_p));
9825 else if (code == LSHIFTRT || code == ASHIFTRT)
9827 if (arm_arch_thumb2 && CONST_INT_P (XEXP (x, 1)))
9829 /* Can use SBFX/UBFX. */
9830 if (speed_p)
9831 *cost += extra_cost->alu.bfx;
9832 *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
9834 else
9836 *cost += COSTS_N_INSNS (1);
9837 *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
9838 if (speed_p)
9840 if (CONST_INT_P (XEXP (x, 1)))
9841 *cost += 2 * extra_cost->alu.shift;
9842 else
9843 *cost += (extra_cost->alu.shift
9844 + extra_cost->alu.shift_reg);
9846 else
9847 /* Slightly disparage register shifts. */
9848 *cost += !CONST_INT_P (XEXP (x, 1));
9851 else /* Rotates. */
9853 *cost = COSTS_N_INSNS (2 + !CONST_INT_P (XEXP (x, 1)));
9854 *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
9855 if (speed_p)
9857 if (CONST_INT_P (XEXP (x, 1)))
9858 *cost += (2 * extra_cost->alu.shift
9859 + extra_cost->alu.log_shift);
9860 else
9861 *cost += (extra_cost->alu.shift
9862 + extra_cost->alu.shift_reg
9863 + extra_cost->alu.log_shift_reg);
9866 return true;
9869 *cost = LIBCALL_COST (2);
9870 return false;
9872 case BSWAP:
9873 if (arm_arch6)
9875 if (mode == SImode)
9877 if (speed_p)
9878 *cost += extra_cost->alu.rev;
9880 return false;
9883 else
9885 /* No rev instruction available. Look at arm_legacy_rev
9886 and thumb_legacy_rev for the form of RTL used then. */
9887 if (TARGET_THUMB)
9889 *cost += COSTS_N_INSNS (9);
9891 if (speed_p)
9893 *cost += 6 * extra_cost->alu.shift;
9894 *cost += 3 * extra_cost->alu.logical;
9897 else
9899 *cost += COSTS_N_INSNS (4);
9901 if (speed_p)
9903 *cost += 2 * extra_cost->alu.shift;
9904 *cost += extra_cost->alu.arith_shift;
9905 *cost += 2 * extra_cost->alu.logical;
9908 return true;
9910 return false;
9912 case MINUS:
9913 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
9914 && (mode == SFmode || !TARGET_VFP_SINGLE))
9916 if (GET_CODE (XEXP (x, 0)) == MULT
9917 || GET_CODE (XEXP (x, 1)) == MULT)
9919 rtx mul_op0, mul_op1, sub_op;
9921 if (speed_p)
9922 *cost += extra_cost->fp[mode != SFmode].mult_addsub;
9924 if (GET_CODE (XEXP (x, 0)) == MULT)
9926 mul_op0 = XEXP (XEXP (x, 0), 0);
9927 mul_op1 = XEXP (XEXP (x, 0), 1);
9928 sub_op = XEXP (x, 1);
9930 else
9932 mul_op0 = XEXP (XEXP (x, 1), 0);
9933 mul_op1 = XEXP (XEXP (x, 1), 1);
9934 sub_op = XEXP (x, 0);
9937 /* The first operand of the multiply may be optionally
9938 negated. */
9939 if (GET_CODE (mul_op0) == NEG)
9940 mul_op0 = XEXP (mul_op0, 0);
9942 *cost += (rtx_cost (mul_op0, mode, code, 0, speed_p)
9943 + rtx_cost (mul_op1, mode, code, 0, speed_p)
9944 + rtx_cost (sub_op, mode, code, 0, speed_p));
9946 return true;
9949 if (speed_p)
9950 *cost += extra_cost->fp[mode != SFmode].addsub;
9951 return false;
9954 if (mode == SImode)
9956 rtx shift_by_reg = NULL;
9957 rtx shift_op;
9958 rtx non_shift_op;
9960 shift_op = shifter_op_p (XEXP (x, 0), &shift_by_reg);
9961 if (shift_op == NULL)
9963 shift_op = shifter_op_p (XEXP (x, 1), &shift_by_reg);
9964 non_shift_op = XEXP (x, 0);
9966 else
9967 non_shift_op = XEXP (x, 1);
9969 if (shift_op != NULL)
9971 if (shift_by_reg != NULL)
9973 if (speed_p)
9974 *cost += extra_cost->alu.arith_shift_reg;
9975 *cost += rtx_cost (shift_by_reg, mode, code, 0, speed_p);
9977 else if (speed_p)
9978 *cost += extra_cost->alu.arith_shift;
9980 *cost += rtx_cost (shift_op, mode, code, 0, speed_p);
9981 *cost += rtx_cost (non_shift_op, mode, code, 0, speed_p);
9982 return true;
9985 if (arm_arch_thumb2
9986 && GET_CODE (XEXP (x, 1)) == MULT)
9988 /* MLS. */
9989 if (speed_p)
9990 *cost += extra_cost->mult[0].add;
9991 *cost += rtx_cost (XEXP (x, 0), mode, MINUS, 0, speed_p);
9992 *cost += rtx_cost (XEXP (XEXP (x, 1), 0), mode, MULT, 0, speed_p);
9993 *cost += rtx_cost (XEXP (XEXP (x, 1), 1), mode, MULT, 1, speed_p);
9994 return true;
9997 if (CONST_INT_P (XEXP (x, 0)))
9999 int insns = arm_gen_constant (MINUS, SImode, NULL_RTX,
10000 INTVAL (XEXP (x, 0)), NULL_RTX,
10001 NULL_RTX, 1, 0);
10002 *cost = COSTS_N_INSNS (insns);
10003 if (speed_p)
10004 *cost += insns * extra_cost->alu.arith;
10005 *cost += rtx_cost (XEXP (x, 1), mode, code, 1, speed_p);
10006 return true;
10008 else if (speed_p)
10009 *cost += extra_cost->alu.arith;
10011 return false;
10014 if (GET_MODE_CLASS (mode) == MODE_INT
10015 && GET_MODE_SIZE (mode) < 4)
10017 rtx shift_op, shift_reg;
10018 shift_reg = NULL;
10020 /* We check both sides of the MINUS for shifter operands since,
10021 unlike PLUS, it's not commutative. */
10023 HANDLE_NARROW_SHIFT_ARITH (MINUS, 0)
10024 HANDLE_NARROW_SHIFT_ARITH (MINUS, 1)
10026 /* Slightly disparage, as we might need to widen the result. */
10027 *cost += 1;
10028 if (speed_p)
10029 *cost += extra_cost->alu.arith;
10031 if (CONST_INT_P (XEXP (x, 0)))
10033 *cost += rtx_cost (XEXP (x, 1), mode, code, 1, speed_p);
10034 return true;
10037 return false;
10040 if (mode == DImode)
10042 *cost += COSTS_N_INSNS (1);
10044 if (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND)
10046 rtx op1 = XEXP (x, 1);
10048 if (speed_p)
10049 *cost += 2 * extra_cost->alu.arith;
10051 if (GET_CODE (op1) == ZERO_EXTEND)
10052 *cost += rtx_cost (XEXP (op1, 0), VOIDmode, ZERO_EXTEND,
10053 0, speed_p);
10054 else
10055 *cost += rtx_cost (op1, mode, MINUS, 1, speed_p);
10056 *cost += rtx_cost (XEXP (XEXP (x, 0), 0), VOIDmode, ZERO_EXTEND,
10057 0, speed_p);
10058 return true;
10060 else if (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND)
10062 if (speed_p)
10063 *cost += extra_cost->alu.arith + extra_cost->alu.arith_shift;
10064 *cost += (rtx_cost (XEXP (XEXP (x, 0), 0), VOIDmode, SIGN_EXTEND,
10065 0, speed_p)
10066 + rtx_cost (XEXP (x, 1), mode, MINUS, 1, speed_p));
10067 return true;
10069 else if (GET_CODE (XEXP (x, 1)) == ZERO_EXTEND
10070 || GET_CODE (XEXP (x, 1)) == SIGN_EXTEND)
10072 if (speed_p)
10073 *cost += (extra_cost->alu.arith
10074 + (GET_CODE (XEXP (x, 1)) == ZERO_EXTEND
10075 ? extra_cost->alu.arith
10076 : extra_cost->alu.arith_shift));
10077 *cost += (rtx_cost (XEXP (x, 0), mode, MINUS, 0, speed_p)
10078 + rtx_cost (XEXP (XEXP (x, 1), 0), VOIDmode,
10079 GET_CODE (XEXP (x, 1)), 0, speed_p));
10080 return true;
10083 if (speed_p)
10084 *cost += 2 * extra_cost->alu.arith;
10085 return false;
10088 /* Vector mode? */
10090 *cost = LIBCALL_COST (2);
10091 return false;
10093 case PLUS:
10094 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
10095 && (mode == SFmode || !TARGET_VFP_SINGLE))
10097 if (GET_CODE (XEXP (x, 0)) == MULT)
10099 rtx mul_op0, mul_op1, add_op;
10101 if (speed_p)
10102 *cost += extra_cost->fp[mode != SFmode].mult_addsub;
10104 mul_op0 = XEXP (XEXP (x, 0), 0);
10105 mul_op1 = XEXP (XEXP (x, 0), 1);
10106 add_op = XEXP (x, 1);
10108 *cost += (rtx_cost (mul_op0, mode, code, 0, speed_p)
10109 + rtx_cost (mul_op1, mode, code, 0, speed_p)
10110 + rtx_cost (add_op, mode, code, 0, speed_p));
10112 return true;
10115 if (speed_p)
10116 *cost += extra_cost->fp[mode != SFmode].addsub;
10117 return false;
10119 else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
10121 *cost = LIBCALL_COST (2);
10122 return false;
10125 /* Narrow modes can be synthesized in SImode, but the range
10126 of useful sub-operations is limited. Check for shift operations
10127 on one of the operands. Only left shifts can be used in the
10128 narrow modes. */
10129 if (GET_MODE_CLASS (mode) == MODE_INT
10130 && GET_MODE_SIZE (mode) < 4)
10132 rtx shift_op, shift_reg;
10133 shift_reg = NULL;
10135 HANDLE_NARROW_SHIFT_ARITH (PLUS, 0)
10137 if (CONST_INT_P (XEXP (x, 1)))
10139 int insns = arm_gen_constant (PLUS, SImode, NULL_RTX,
10140 INTVAL (XEXP (x, 1)), NULL_RTX,
10141 NULL_RTX, 1, 0);
10142 *cost = COSTS_N_INSNS (insns);
10143 if (speed_p)
10144 *cost += insns * extra_cost->alu.arith;
10145 /* Slightly penalize a narrow operation as the result may
10146 need widening. */
10147 *cost += 1 + rtx_cost (XEXP (x, 0), mode, PLUS, 0, speed_p);
10148 return true;
10151 /* Slightly penalize a narrow operation as the result may
10152 need widening. */
10153 *cost += 1;
10154 if (speed_p)
10155 *cost += extra_cost->alu.arith;
10157 return false;
10160 if (mode == SImode)
10162 rtx shift_op, shift_reg;
10164 if (TARGET_INT_SIMD
10165 && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
10166 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
10168 /* UXTA[BH] or SXTA[BH]. */
10169 if (speed_p)
10170 *cost += extra_cost->alu.extend_arith;
10171 *cost += (rtx_cost (XEXP (XEXP (x, 0), 0), VOIDmode, ZERO_EXTEND,
10172 0, speed_p)
10173 + rtx_cost (XEXP (x, 1), mode, PLUS, 0, speed_p));
10174 return true;
10177 shift_reg = NULL;
10178 shift_op = shifter_op_p (XEXP (x, 0), &shift_reg);
10179 if (shift_op != NULL)
10181 if (shift_reg)
10183 if (speed_p)
10184 *cost += extra_cost->alu.arith_shift_reg;
10185 *cost += rtx_cost (shift_reg, mode, ASHIFT, 1, speed_p);
10187 else if (speed_p)
10188 *cost += extra_cost->alu.arith_shift;
10190 *cost += (rtx_cost (shift_op, mode, ASHIFT, 0, speed_p)
10191 + rtx_cost (XEXP (x, 1), mode, PLUS, 1, speed_p));
10192 return true;
10194 if (GET_CODE (XEXP (x, 0)) == MULT)
10196 rtx mul_op = XEXP (x, 0);
10198 if (TARGET_DSP_MULTIPLY
10199 && ((GET_CODE (XEXP (mul_op, 0)) == SIGN_EXTEND
10200 && (GET_CODE (XEXP (mul_op, 1)) == SIGN_EXTEND
10201 || (GET_CODE (XEXP (mul_op, 1)) == ASHIFTRT
10202 && CONST_INT_P (XEXP (XEXP (mul_op, 1), 1))
10203 && INTVAL (XEXP (XEXP (mul_op, 1), 1)) == 16)))
10204 || (GET_CODE (XEXP (mul_op, 0)) == ASHIFTRT
10205 && CONST_INT_P (XEXP (XEXP (mul_op, 0), 1))
10206 && INTVAL (XEXP (XEXP (mul_op, 0), 1)) == 16
10207 && (GET_CODE (XEXP (mul_op, 1)) == SIGN_EXTEND
10208 || (GET_CODE (XEXP (mul_op, 1)) == ASHIFTRT
10209 && CONST_INT_P (XEXP (XEXP (mul_op, 1), 1))
10210 && (INTVAL (XEXP (XEXP (mul_op, 1), 1))
10211 == 16))))))
10213 /* SMLA[BT][BT]. */
10214 if (speed_p)
10215 *cost += extra_cost->mult[0].extend_add;
10216 *cost += (rtx_cost (XEXP (XEXP (mul_op, 0), 0), mode,
10217 SIGN_EXTEND, 0, speed_p)
10218 + rtx_cost (XEXP (XEXP (mul_op, 1), 0), mode,
10219 SIGN_EXTEND, 0, speed_p)
10220 + rtx_cost (XEXP (x, 1), mode, PLUS, 1, speed_p));
10221 return true;
10224 if (speed_p)
10225 *cost += extra_cost->mult[0].add;
10226 *cost += (rtx_cost (XEXP (mul_op, 0), mode, MULT, 0, speed_p)
10227 + rtx_cost (XEXP (mul_op, 1), mode, MULT, 1, speed_p)
10228 + rtx_cost (XEXP (x, 1), mode, PLUS, 1, speed_p));
10229 return true;
10231 if (CONST_INT_P (XEXP (x, 1)))
10233 int insns = arm_gen_constant (PLUS, SImode, NULL_RTX,
10234 INTVAL (XEXP (x, 1)), NULL_RTX,
10235 NULL_RTX, 1, 0);
10236 *cost = COSTS_N_INSNS (insns);
10237 if (speed_p)
10238 *cost += insns * extra_cost->alu.arith;
10239 *cost += rtx_cost (XEXP (x, 0), mode, PLUS, 0, speed_p);
10240 return true;
10242 else if (speed_p)
10243 *cost += extra_cost->alu.arith;
10245 return false;
10248 if (mode == DImode)
10250 if (arm_arch3m
10251 && GET_CODE (XEXP (x, 0)) == MULT
10252 && ((GET_CODE (XEXP (XEXP (x, 0), 0)) == ZERO_EXTEND
10253 && GET_CODE (XEXP (XEXP (x, 0), 1)) == ZERO_EXTEND)
10254 || (GET_CODE (XEXP (XEXP (x, 0), 0)) == SIGN_EXTEND
10255 && GET_CODE (XEXP (XEXP (x, 0), 1)) == SIGN_EXTEND)))
10257 if (speed_p)
10258 *cost += extra_cost->mult[1].extend_add;
10259 *cost += (rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0), mode,
10260 ZERO_EXTEND, 0, speed_p)
10261 + rtx_cost (XEXP (XEXP (XEXP (x, 0), 1), 0), mode,
10262 ZERO_EXTEND, 0, speed_p)
10263 + rtx_cost (XEXP (x, 1), mode, PLUS, 1, speed_p));
10264 return true;
10267 *cost += COSTS_N_INSNS (1);
10269 if (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
10270 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND)
10272 if (speed_p)
10273 *cost += (extra_cost->alu.arith
10274 + (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
10275 ? extra_cost->alu.arith
10276 : extra_cost->alu.arith_shift));
10278 *cost += (rtx_cost (XEXP (XEXP (x, 0), 0), VOIDmode, ZERO_EXTEND,
10279 0, speed_p)
10280 + rtx_cost (XEXP (x, 1), mode, PLUS, 1, speed_p));
10281 return true;
10284 if (speed_p)
10285 *cost += 2 * extra_cost->alu.arith;
10286 return false;
10289 /* Vector mode? */
10290 *cost = LIBCALL_COST (2);
10291 return false;
10292 case IOR:
10293 if (mode == SImode && arm_arch6 && aarch_rev16_p (x))
10295 if (speed_p)
10296 *cost += extra_cost->alu.rev;
10298 return true;
10300 /* Fall through. */
10301 case AND: case XOR:
10302 if (mode == SImode)
10304 enum rtx_code subcode = GET_CODE (XEXP (x, 0));
10305 rtx op0 = XEXP (x, 0);
10306 rtx shift_op, shift_reg;
10308 if (subcode == NOT
10309 && (code == AND
10310 || (code == IOR && TARGET_THUMB2)))
10311 op0 = XEXP (op0, 0);
10313 shift_reg = NULL;
10314 shift_op = shifter_op_p (op0, &shift_reg);
10315 if (shift_op != NULL)
10317 if (shift_reg)
10319 if (speed_p)
10320 *cost += extra_cost->alu.log_shift_reg;
10321 *cost += rtx_cost (shift_reg, mode, ASHIFT, 1, speed_p);
10323 else if (speed_p)
10324 *cost += extra_cost->alu.log_shift;
10326 *cost += (rtx_cost (shift_op, mode, ASHIFT, 0, speed_p)
10327 + rtx_cost (XEXP (x, 1), mode, code, 1, speed_p));
10328 return true;
10331 if (CONST_INT_P (XEXP (x, 1)))
10333 int insns = arm_gen_constant (code, SImode, NULL_RTX,
10334 INTVAL (XEXP (x, 1)), NULL_RTX,
10335 NULL_RTX, 1, 0);
10337 *cost = COSTS_N_INSNS (insns);
10338 if (speed_p)
10339 *cost += insns * extra_cost->alu.logical;
10340 *cost += rtx_cost (op0, mode, code, 0, speed_p);
10341 return true;
10344 if (speed_p)
10345 *cost += extra_cost->alu.logical;
10346 *cost += (rtx_cost (op0, mode, code, 0, speed_p)
10347 + rtx_cost (XEXP (x, 1), mode, code, 1, speed_p));
10348 return true;
10351 if (mode == DImode)
10353 rtx op0 = XEXP (x, 0);
10354 enum rtx_code subcode = GET_CODE (op0);
10356 *cost += COSTS_N_INSNS (1);
10358 if (subcode == NOT
10359 && (code == AND
10360 || (code == IOR && TARGET_THUMB2)))
10361 op0 = XEXP (op0, 0);
10363 if (GET_CODE (op0) == ZERO_EXTEND)
10365 if (speed_p)
10366 *cost += 2 * extra_cost->alu.logical;
10368 *cost += (rtx_cost (XEXP (op0, 0), VOIDmode, ZERO_EXTEND,
10369 0, speed_p)
10370 + rtx_cost (XEXP (x, 1), mode, code, 0, speed_p));
10371 return true;
10373 else if (GET_CODE (op0) == SIGN_EXTEND)
10375 if (speed_p)
10376 *cost += extra_cost->alu.logical + extra_cost->alu.log_shift;
10378 *cost += (rtx_cost (XEXP (op0, 0), VOIDmode, SIGN_EXTEND,
10379 0, speed_p)
10380 + rtx_cost (XEXP (x, 1), mode, code, 0, speed_p));
10381 return true;
10384 if (speed_p)
10385 *cost += 2 * extra_cost->alu.logical;
10387 return true;
10389 /* Vector mode? */
10391 *cost = LIBCALL_COST (2);
10392 return false;
10394 case MULT:
10395 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
10396 && (mode == SFmode || !TARGET_VFP_SINGLE))
10398 rtx op0 = XEXP (x, 0);
10400 if (GET_CODE (op0) == NEG && !flag_rounding_math)
10401 op0 = XEXP (op0, 0);
10403 if (speed_p)
10404 *cost += extra_cost->fp[mode != SFmode].mult;
10406 *cost += (rtx_cost (op0, mode, MULT, 0, speed_p)
10407 + rtx_cost (XEXP (x, 1), mode, MULT, 1, speed_p));
10408 return true;
10410 else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
10412 *cost = LIBCALL_COST (2);
10413 return false;
10416 if (mode == SImode)
10418 if (TARGET_DSP_MULTIPLY
10419 && ((GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
10420 && (GET_CODE (XEXP (x, 1)) == SIGN_EXTEND
10421 || (GET_CODE (XEXP (x, 1)) == ASHIFTRT
10422 && CONST_INT_P (XEXP (XEXP (x, 1), 1))
10423 && INTVAL (XEXP (XEXP (x, 1), 1)) == 16)))
10424 || (GET_CODE (XEXP (x, 0)) == ASHIFTRT
10425 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
10426 && INTVAL (XEXP (XEXP (x, 0), 1)) == 16
10427 && (GET_CODE (XEXP (x, 1)) == SIGN_EXTEND
10428 || (GET_CODE (XEXP (x, 1)) == ASHIFTRT
10429 && CONST_INT_P (XEXP (XEXP (x, 1), 1))
10430 && (INTVAL (XEXP (XEXP (x, 1), 1))
10431 == 16))))))
10433 /* SMUL[TB][TB]. */
10434 if (speed_p)
10435 *cost += extra_cost->mult[0].extend;
10436 *cost += rtx_cost (XEXP (XEXP (x, 0), 0), mode,
10437 SIGN_EXTEND, 0, speed_p);
10438 *cost += rtx_cost (XEXP (XEXP (x, 1), 0), mode,
10439 SIGN_EXTEND, 1, speed_p);
10440 return true;
10442 if (speed_p)
10443 *cost += extra_cost->mult[0].simple;
10444 return false;
10447 if (mode == DImode)
10449 if (arm_arch3m
10450 && ((GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
10451 && GET_CODE (XEXP (x, 1)) == ZERO_EXTEND)
10452 || (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
10453 && GET_CODE (XEXP (x, 1)) == SIGN_EXTEND)))
10455 if (speed_p)
10456 *cost += extra_cost->mult[1].extend;
10457 *cost += (rtx_cost (XEXP (XEXP (x, 0), 0), VOIDmode,
10458 ZERO_EXTEND, 0, speed_p)
10459 + rtx_cost (XEXP (XEXP (x, 1), 0), VOIDmode,
10460 ZERO_EXTEND, 0, speed_p));
10461 return true;
10464 *cost = LIBCALL_COST (2);
10465 return false;
10468 /* Vector mode? */
10469 *cost = LIBCALL_COST (2);
10470 return false;
10472 case NEG:
10473 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
10474 && (mode == SFmode || !TARGET_VFP_SINGLE))
10476 if (GET_CODE (XEXP (x, 0)) == MULT)
10478 /* VNMUL. */
10479 *cost = rtx_cost (XEXP (x, 0), mode, NEG, 0, speed_p);
10480 return true;
10483 if (speed_p)
10484 *cost += extra_cost->fp[mode != SFmode].neg;
10486 return false;
10488 else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
10490 *cost = LIBCALL_COST (1);
10491 return false;
10494 if (mode == SImode)
10496 if (GET_CODE (XEXP (x, 0)) == ABS)
10498 *cost += COSTS_N_INSNS (1);
10499 /* Assume the non-flag-changing variant. */
10500 if (speed_p)
10501 *cost += (extra_cost->alu.log_shift
10502 + extra_cost->alu.arith_shift);
10503 *cost += rtx_cost (XEXP (XEXP (x, 0), 0), mode, ABS, 0, speed_p);
10504 return true;
10507 if (GET_RTX_CLASS (GET_CODE (XEXP (x, 0))) == RTX_COMPARE
10508 || GET_RTX_CLASS (GET_CODE (XEXP (x, 0))) == RTX_COMM_COMPARE)
10510 *cost += COSTS_N_INSNS (1);
10511 /* No extra cost for MOV imm and MVN imm. */
10512 /* If the comparison op is using the flags, there's no further
10513 cost, otherwise we need to add the cost of the comparison. */
10514 if (!(REG_P (XEXP (XEXP (x, 0), 0))
10515 && REGNO (XEXP (XEXP (x, 0), 0)) == CC_REGNUM
10516 && XEXP (XEXP (x, 0), 1) == const0_rtx))
10518 mode = GET_MODE (XEXP (XEXP (x, 0), 0));
10519 *cost += (COSTS_N_INSNS (1)
10520 + rtx_cost (XEXP (XEXP (x, 0), 0), mode, COMPARE,
10521 0, speed_p)
10522 + rtx_cost (XEXP (XEXP (x, 0), 1), mode, COMPARE,
10523 1, speed_p));
10524 if (speed_p)
10525 *cost += extra_cost->alu.arith;
10527 return true;
10530 if (speed_p)
10531 *cost += extra_cost->alu.arith;
10532 return false;
10535 if (GET_MODE_CLASS (mode) == MODE_INT
10536 && GET_MODE_SIZE (mode) < 4)
10538 /* Slightly disparage, as we might need an extend operation. */
10539 *cost += 1;
10540 if (speed_p)
10541 *cost += extra_cost->alu.arith;
10542 return false;
10545 if (mode == DImode)
10547 *cost += COSTS_N_INSNS (1);
10548 if (speed_p)
10549 *cost += 2 * extra_cost->alu.arith;
10550 return false;
10553 /* Vector mode? */
10554 *cost = LIBCALL_COST (1);
10555 return false;
10557 case NOT:
10558 if (mode == SImode)
10560 rtx shift_op;
10561 rtx shift_reg = NULL;
10563 shift_op = shifter_op_p (XEXP (x, 0), &shift_reg);
10565 if (shift_op)
10567 if (shift_reg != NULL)
10569 if (speed_p)
10570 *cost += extra_cost->alu.log_shift_reg;
10571 *cost += rtx_cost (shift_reg, mode, ASHIFT, 1, speed_p);
10573 else if (speed_p)
10574 *cost += extra_cost->alu.log_shift;
10575 *cost += rtx_cost (shift_op, mode, ASHIFT, 0, speed_p);
10576 return true;
10579 if (speed_p)
10580 *cost += extra_cost->alu.logical;
10581 return false;
10583 if (mode == DImode)
10585 *cost += COSTS_N_INSNS (1);
10586 return false;
10589 /* Vector mode? */
10591 *cost += LIBCALL_COST (1);
10592 return false;
10594 case IF_THEN_ELSE:
10596 if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
10598 *cost += COSTS_N_INSNS (3);
10599 return true;
10601 int op1cost = rtx_cost (XEXP (x, 1), mode, SET, 1, speed_p);
10602 int op2cost = rtx_cost (XEXP (x, 2), mode, SET, 1, speed_p);
10604 *cost = rtx_cost (XEXP (x, 0), mode, IF_THEN_ELSE, 0, speed_p);
10605 /* Assume that if one arm of the if_then_else is a register,
10606 that it will be tied with the result and eliminate the
10607 conditional insn. */
10608 if (REG_P (XEXP (x, 1)))
10609 *cost += op2cost;
10610 else if (REG_P (XEXP (x, 2)))
10611 *cost += op1cost;
10612 else
10614 if (speed_p)
10616 if (extra_cost->alu.non_exec_costs_exec)
10617 *cost += op1cost + op2cost + extra_cost->alu.non_exec;
10618 else
10619 *cost += MAX (op1cost, op2cost) + extra_cost->alu.non_exec;
10621 else
10622 *cost += op1cost + op2cost;
10625 return true;
10627 case COMPARE:
10628 if (cc_register (XEXP (x, 0), VOIDmode) && XEXP (x, 1) == const0_rtx)
10629 *cost = 0;
10630 else
10632 machine_mode op0mode;
10633 /* We'll mostly assume that the cost of a compare is the cost of the
10634 LHS. However, there are some notable exceptions. */
10636 /* Floating point compares are never done as side-effects. */
10637 op0mode = GET_MODE (XEXP (x, 0));
10638 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (op0mode) == MODE_FLOAT
10639 && (op0mode == SFmode || !TARGET_VFP_SINGLE))
10641 if (speed_p)
10642 *cost += extra_cost->fp[op0mode != SFmode].compare;
10644 if (XEXP (x, 1) == CONST0_RTX (op0mode))
10646 *cost += rtx_cost (XEXP (x, 0), op0mode, code, 0, speed_p);
10647 return true;
10650 return false;
10652 else if (GET_MODE_CLASS (op0mode) == MODE_FLOAT)
10654 *cost = LIBCALL_COST (2);
10655 return false;
10658 /* DImode compares normally take two insns. */
10659 if (op0mode == DImode)
10661 *cost += COSTS_N_INSNS (1);
10662 if (speed_p)
10663 *cost += 2 * extra_cost->alu.arith;
10664 return false;
10667 if (op0mode == SImode)
10669 rtx shift_op;
10670 rtx shift_reg;
10672 if (XEXP (x, 1) == const0_rtx
10673 && !(REG_P (XEXP (x, 0))
10674 || (GET_CODE (XEXP (x, 0)) == SUBREG
10675 && REG_P (SUBREG_REG (XEXP (x, 0))))))
10677 *cost = rtx_cost (XEXP (x, 0), op0mode, COMPARE, 0, speed_p);
10679 /* Multiply operations that set the flags are often
10680 significantly more expensive. */
10681 if (speed_p
10682 && GET_CODE (XEXP (x, 0)) == MULT
10683 && !power_of_two_operand (XEXP (XEXP (x, 0), 1), mode))
10684 *cost += extra_cost->mult[0].flag_setting;
10686 if (speed_p
10687 && GET_CODE (XEXP (x, 0)) == PLUS
10688 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
10689 && !power_of_two_operand (XEXP (XEXP (XEXP (x, 0),
10690 0), 1), mode))
10691 *cost += extra_cost->mult[0].flag_setting;
10692 return true;
10695 shift_reg = NULL;
10696 shift_op = shifter_op_p (XEXP (x, 0), &shift_reg);
10697 if (shift_op != NULL)
10699 if (shift_reg != NULL)
10701 *cost += rtx_cost (shift_reg, op0mode, ASHIFT,
10702 1, speed_p);
10703 if (speed_p)
10704 *cost += extra_cost->alu.arith_shift_reg;
10706 else if (speed_p)
10707 *cost += extra_cost->alu.arith_shift;
10708 *cost += rtx_cost (shift_op, op0mode, ASHIFT, 0, speed_p);
10709 *cost += rtx_cost (XEXP (x, 1), op0mode, COMPARE, 1, speed_p);
10710 return true;
10713 if (speed_p)
10714 *cost += extra_cost->alu.arith;
10715 if (CONST_INT_P (XEXP (x, 1))
10716 && const_ok_for_op (INTVAL (XEXP (x, 1)), COMPARE))
10718 *cost += rtx_cost (XEXP (x, 0), op0mode, COMPARE, 0, speed_p);
10719 return true;
10721 return false;
10724 /* Vector mode? */
10726 *cost = LIBCALL_COST (2);
10727 return false;
10729 return true;
10731 case EQ:
10732 case NE:
10733 case LT:
10734 case LE:
10735 case GT:
10736 case GE:
10737 case LTU:
10738 case LEU:
10739 case GEU:
10740 case GTU:
10741 case ORDERED:
10742 case UNORDERED:
10743 case UNEQ:
10744 case UNLE:
10745 case UNLT:
10746 case UNGE:
10747 case UNGT:
10748 case LTGT:
10749 if (outer_code == SET)
10751 /* Is it a store-flag operation? */
10752 if (REG_P (XEXP (x, 0)) && REGNO (XEXP (x, 0)) == CC_REGNUM
10753 && XEXP (x, 1) == const0_rtx)
10755 /* Thumb also needs an IT insn. */
10756 *cost += COSTS_N_INSNS (TARGET_THUMB ? 2 : 1);
10757 return true;
10759 if (XEXP (x, 1) == const0_rtx)
10761 switch (code)
10763 case LT:
10764 /* LSR Rd, Rn, #31. */
10765 if (speed_p)
10766 *cost += extra_cost->alu.shift;
10767 break;
10769 case EQ:
10770 /* RSBS T1, Rn, #0
10771 ADC Rd, Rn, T1. */
10773 case NE:
10774 /* SUBS T1, Rn, #1
10775 SBC Rd, Rn, T1. */
10776 *cost += COSTS_N_INSNS (1);
10777 break;
10779 case LE:
10780 /* RSBS T1, Rn, Rn, LSR #31
10781 ADC Rd, Rn, T1. */
10782 *cost += COSTS_N_INSNS (1);
10783 if (speed_p)
10784 *cost += extra_cost->alu.arith_shift;
10785 break;
10787 case GT:
10788 /* RSB Rd, Rn, Rn, ASR #1
10789 LSR Rd, Rd, #31. */
10790 *cost += COSTS_N_INSNS (1);
10791 if (speed_p)
10792 *cost += (extra_cost->alu.arith_shift
10793 + extra_cost->alu.shift);
10794 break;
10796 case GE:
10797 /* ASR Rd, Rn, #31
10798 ADD Rd, Rn, #1. */
10799 *cost += COSTS_N_INSNS (1);
10800 if (speed_p)
10801 *cost += extra_cost->alu.shift;
10802 break;
10804 default:
10805 /* Remaining cases are either meaningless or would take
10806 three insns anyway. */
10807 *cost = COSTS_N_INSNS (3);
10808 break;
10810 *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
10811 return true;
10813 else
10815 *cost += COSTS_N_INSNS (TARGET_THUMB ? 3 : 2);
10816 if (CONST_INT_P (XEXP (x, 1))
10817 && const_ok_for_op (INTVAL (XEXP (x, 1)), COMPARE))
10819 *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
10820 return true;
10823 return false;
10826 /* Not directly inside a set. If it involves the condition code
10827 register it must be the condition for a branch, cond_exec or
10828 I_T_E operation. Since the comparison is performed elsewhere
10829 this is just the control part which has no additional
10830 cost. */
10831 else if (REG_P (XEXP (x, 0)) && REGNO (XEXP (x, 0)) == CC_REGNUM
10832 && XEXP (x, 1) == const0_rtx)
10834 *cost = 0;
10835 return true;
10837 return false;
10839 case ABS:
10840 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
10841 && (mode == SFmode || !TARGET_VFP_SINGLE))
10843 if (speed_p)
10844 *cost += extra_cost->fp[mode != SFmode].neg;
10846 return false;
10848 else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
10850 *cost = LIBCALL_COST (1);
10851 return false;
10854 if (mode == SImode)
10856 if (speed_p)
10857 *cost += extra_cost->alu.log_shift + extra_cost->alu.arith_shift;
10858 return false;
10860 /* Vector mode? */
10861 *cost = LIBCALL_COST (1);
10862 return false;
10864 case SIGN_EXTEND:
10865 if ((arm_arch4 || GET_MODE (XEXP (x, 0)) == SImode)
10866 && MEM_P (XEXP (x, 0)))
10868 if (mode == DImode)
10869 *cost += COSTS_N_INSNS (1);
10871 if (!speed_p)
10872 return true;
10874 if (GET_MODE (XEXP (x, 0)) == SImode)
10875 *cost += extra_cost->ldst.load;
10876 else
10877 *cost += extra_cost->ldst.load_sign_extend;
10879 if (mode == DImode)
10880 *cost += extra_cost->alu.shift;
10882 return true;
10885 /* Widening from less than 32-bits requires an extend operation. */
10886 if (GET_MODE (XEXP (x, 0)) != SImode && arm_arch6)
10888 /* We have SXTB/SXTH. */
10889 *cost += rtx_cost (XEXP (x, 0), VOIDmode, code, 0, speed_p);
10890 if (speed_p)
10891 *cost += extra_cost->alu.extend;
10893 else if (GET_MODE (XEXP (x, 0)) != SImode)
10895 /* Needs two shifts. */
10896 *cost += COSTS_N_INSNS (1);
10897 *cost += rtx_cost (XEXP (x, 0), VOIDmode, code, 0, speed_p);
10898 if (speed_p)
10899 *cost += 2 * extra_cost->alu.shift;
10902 /* Widening beyond 32-bits requires one more insn. */
10903 if (mode == DImode)
10905 *cost += COSTS_N_INSNS (1);
10906 if (speed_p)
10907 *cost += extra_cost->alu.shift;
10910 return true;
10912 case ZERO_EXTEND:
10913 if ((arm_arch4
10914 || GET_MODE (XEXP (x, 0)) == SImode
10915 || GET_MODE (XEXP (x, 0)) == QImode)
10916 && MEM_P (XEXP (x, 0)))
10918 *cost = rtx_cost (XEXP (x, 0), VOIDmode, code, 0, speed_p);
10920 if (mode == DImode)
10921 *cost += COSTS_N_INSNS (1); /* No speed penalty. */
10923 return true;
10926 /* Widening from less than 32-bits requires an extend operation. */
10927 if (GET_MODE (XEXP (x, 0)) == QImode)
10929 /* UXTB can be a shorter instruction in Thumb2, but it might
10930 be slower than the AND Rd, Rn, #255 alternative. When
10931 optimizing for speed it should never be slower to use
10932 AND, and we don't really model 16-bit vs 32-bit insns
10933 here. */
10934 if (speed_p)
10935 *cost += extra_cost->alu.logical;
10937 else if (GET_MODE (XEXP (x, 0)) != SImode && arm_arch6)
10939 /* We have UXTB/UXTH. */
10940 *cost += rtx_cost (XEXP (x, 0), VOIDmode, code, 0, speed_p);
10941 if (speed_p)
10942 *cost += extra_cost->alu.extend;
10944 else if (GET_MODE (XEXP (x, 0)) != SImode)
10946 /* Needs two shifts. It's marginally preferable to use
10947 shifts rather than two BIC instructions as the second
10948 shift may merge with a subsequent insn as a shifter
10949 op. */
10950 *cost = COSTS_N_INSNS (2);
10951 *cost += rtx_cost (XEXP (x, 0), VOIDmode, code, 0, speed_p);
10952 if (speed_p)
10953 *cost += 2 * extra_cost->alu.shift;
10956 /* Widening beyond 32-bits requires one more insn. */
10957 if (mode == DImode)
10959 *cost += COSTS_N_INSNS (1); /* No speed penalty. */
10962 return true;
10964 case CONST_INT:
10965 *cost = 0;
10966 /* CONST_INT has no mode, so we cannot tell for sure how many
10967 insns are really going to be needed. The best we can do is
10968 look at the value passed. If it fits in SImode, then assume
10969 that's the mode it will be used for. Otherwise assume it
10970 will be used in DImode. */
10971 if (INTVAL (x) == trunc_int_for_mode (INTVAL (x), SImode))
10972 mode = SImode;
10973 else
10974 mode = DImode;
10976 /* Avoid blowing up in arm_gen_constant (). */
10977 if (!(outer_code == PLUS
10978 || outer_code == AND
10979 || outer_code == IOR
10980 || outer_code == XOR
10981 || outer_code == MINUS))
10982 outer_code = SET;
10984 const_int_cost:
10985 if (mode == SImode)
10987 *cost += COSTS_N_INSNS (arm_gen_constant (outer_code, SImode, NULL,
10988 INTVAL (x), NULL, NULL,
10989 0, 0));
10990 /* Extra costs? */
10992 else
10994 *cost += COSTS_N_INSNS (arm_gen_constant
10995 (outer_code, SImode, NULL,
10996 trunc_int_for_mode (INTVAL (x), SImode),
10997 NULL, NULL, 0, 0)
10998 + arm_gen_constant (outer_code, SImode, NULL,
10999 INTVAL (x) >> 32, NULL,
11000 NULL, 0, 0));
11001 /* Extra costs? */
11004 return true;
11006 case CONST:
11007 case LABEL_REF:
11008 case SYMBOL_REF:
11009 if (speed_p)
11011 if (arm_arch_thumb2 && !flag_pic)
11012 *cost += COSTS_N_INSNS (1);
11013 else
11014 *cost += extra_cost->ldst.load;
11016 else
11017 *cost += COSTS_N_INSNS (1);
11019 if (flag_pic)
11021 *cost += COSTS_N_INSNS (1);
11022 if (speed_p)
11023 *cost += extra_cost->alu.arith;
11026 return true;
11028 case CONST_FIXED:
11029 *cost = COSTS_N_INSNS (4);
11030 /* Fixme. */
11031 return true;
11033 case CONST_DOUBLE:
11034 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
11035 && (mode == SFmode || !TARGET_VFP_SINGLE))
11037 if (vfp3_const_double_rtx (x))
11039 if (speed_p)
11040 *cost += extra_cost->fp[mode == DFmode].fpconst;
11041 return true;
11044 if (speed_p)
11046 if (mode == DFmode)
11047 *cost += extra_cost->ldst.loadd;
11048 else
11049 *cost += extra_cost->ldst.loadf;
11051 else
11052 *cost += COSTS_N_INSNS (1 + (mode == DFmode));
11054 return true;
11056 *cost = COSTS_N_INSNS (4);
11057 return true;
11059 case CONST_VECTOR:
11060 /* Fixme. */
11061 if (TARGET_NEON
11062 && TARGET_HARD_FLOAT
11063 && (VALID_NEON_DREG_MODE (mode) || VALID_NEON_QREG_MODE (mode))
11064 && neon_immediate_valid_for_move (x, mode, NULL, NULL))
11065 *cost = COSTS_N_INSNS (1);
11066 else
11067 *cost = COSTS_N_INSNS (4);
11068 return true;
11070 case HIGH:
11071 case LO_SUM:
11072 /* When optimizing for size, we prefer constant pool entries to
11073 MOVW/MOVT pairs, so bump the cost of these slightly. */
11074 if (!speed_p)
11075 *cost += 1;
11076 return true;
11078 case CLZ:
11079 if (speed_p)
11080 *cost += extra_cost->alu.clz;
11081 return false;
11083 case SMIN:
11084 if (XEXP (x, 1) == const0_rtx)
11086 if (speed_p)
11087 *cost += extra_cost->alu.log_shift;
11088 *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
11089 return true;
11091 /* Fall through. */
11092 case SMAX:
11093 case UMIN:
11094 case UMAX:
11095 *cost += COSTS_N_INSNS (1);
11096 return false;
11098 case TRUNCATE:
11099 if (GET_CODE (XEXP (x, 0)) == ASHIFTRT
11100 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
11101 && INTVAL (XEXP (XEXP (x, 0), 1)) == 32
11102 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
11103 && ((GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == SIGN_EXTEND
11104 && GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1)) == SIGN_EXTEND)
11105 || (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == ZERO_EXTEND
11106 && (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1))
11107 == ZERO_EXTEND))))
11109 if (speed_p)
11110 *cost += extra_cost->mult[1].extend;
11111 *cost += (rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0), VOIDmode,
11112 ZERO_EXTEND, 0, speed_p)
11113 + rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 1), VOIDmode,
11114 ZERO_EXTEND, 0, speed_p));
11115 return true;
11117 *cost = LIBCALL_COST (1);
11118 return false;
11120 case UNSPEC_VOLATILE:
11121 case UNSPEC:
11122 return arm_unspec_cost (x, outer_code, speed_p, cost);
11124 case PC:
11125 /* Reading the PC is like reading any other register. Writing it
11126 is more expensive, but we take that into account elsewhere. */
11127 *cost = 0;
11128 return true;
11130 case ZERO_EXTRACT:
11131 /* TODO: Simple zero_extract of bottom bits using AND. */
11132 /* Fall through. */
11133 case SIGN_EXTRACT:
11134 if (arm_arch6
11135 && mode == SImode
11136 && CONST_INT_P (XEXP (x, 1))
11137 && CONST_INT_P (XEXP (x, 2)))
11139 if (speed_p)
11140 *cost += extra_cost->alu.bfx;
11141 *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
11142 return true;
11144 /* Without UBFX/SBFX, need to resort to shift operations. */
11145 *cost += COSTS_N_INSNS (1);
11146 if (speed_p)
11147 *cost += 2 * extra_cost->alu.shift;
11148 *cost += rtx_cost (XEXP (x, 0), mode, ASHIFT, 0, speed_p);
11149 return true;
11151 case FLOAT_EXTEND:
11152 if (TARGET_HARD_FLOAT)
11154 if (speed_p)
11155 *cost += extra_cost->fp[mode == DFmode].widen;
11156 if (!TARGET_FPU_ARMV8
11157 && GET_MODE (XEXP (x, 0)) == HFmode)
11159 /* Pre v8, widening HF->DF is a two-step process, first
11160 widening to SFmode. */
11161 *cost += COSTS_N_INSNS (1);
11162 if (speed_p)
11163 *cost += extra_cost->fp[0].widen;
11165 *cost += rtx_cost (XEXP (x, 0), VOIDmode, code, 0, speed_p);
11166 return true;
11169 *cost = LIBCALL_COST (1);
11170 return false;
11172 case FLOAT_TRUNCATE:
11173 if (TARGET_HARD_FLOAT)
11175 if (speed_p)
11176 *cost += extra_cost->fp[mode == DFmode].narrow;
11177 *cost += rtx_cost (XEXP (x, 0), VOIDmode, code, 0, speed_p);
11178 return true;
11179 /* Vector modes? */
11181 *cost = LIBCALL_COST (1);
11182 return false;
11184 case FMA:
11185 if (TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_FMA)
11187 rtx op0 = XEXP (x, 0);
11188 rtx op1 = XEXP (x, 1);
11189 rtx op2 = XEXP (x, 2);
11192 /* vfms or vfnma. */
11193 if (GET_CODE (op0) == NEG)
11194 op0 = XEXP (op0, 0);
11196 /* vfnms or vfnma. */
11197 if (GET_CODE (op2) == NEG)
11198 op2 = XEXP (op2, 0);
11200 *cost += rtx_cost (op0, mode, FMA, 0, speed_p);
11201 *cost += rtx_cost (op1, mode, FMA, 1, speed_p);
11202 *cost += rtx_cost (op2, mode, FMA, 2, speed_p);
11204 if (speed_p)
11205 *cost += extra_cost->fp[mode ==DFmode].fma;
11207 return true;
11210 *cost = LIBCALL_COST (3);
11211 return false;
11213 case FIX:
11214 case UNSIGNED_FIX:
11215 if (TARGET_HARD_FLOAT)
11217 /* The *combine_vcvtf2i reduces a vmul+vcvt into
11218 a vcvt fixed-point conversion. */
11219 if (code == FIX && mode == SImode
11220 && GET_CODE (XEXP (x, 0)) == FIX
11221 && GET_MODE (XEXP (x, 0)) == SFmode
11222 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
11223 && vfp3_const_double_for_bits (XEXP (XEXP (XEXP (x, 0), 0), 1))
11224 > 0)
11226 if (speed_p)
11227 *cost += extra_cost->fp[0].toint;
11229 *cost += rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0), mode,
11230 code, 0, speed_p);
11231 return true;
11234 if (GET_MODE_CLASS (mode) == MODE_INT)
11236 mode = GET_MODE (XEXP (x, 0));
11237 if (speed_p)
11238 *cost += extra_cost->fp[mode == DFmode].toint;
11239 /* Strip of the 'cost' of rounding towards zero. */
11240 if (GET_CODE (XEXP (x, 0)) == FIX)
11241 *cost += rtx_cost (XEXP (XEXP (x, 0), 0), mode, code,
11242 0, speed_p);
11243 else
11244 *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
11245 /* ??? Increase the cost to deal with transferring from
11246 FP -> CORE registers? */
11247 return true;
11249 else if (GET_MODE_CLASS (mode) == MODE_FLOAT
11250 && TARGET_FPU_ARMV8)
11252 if (speed_p)
11253 *cost += extra_cost->fp[mode == DFmode].roundint;
11254 return false;
11256 /* Vector costs? */
11258 *cost = LIBCALL_COST (1);
11259 return false;
11261 case FLOAT:
11262 case UNSIGNED_FLOAT:
11263 if (TARGET_HARD_FLOAT)
11265 /* ??? Increase the cost to deal with transferring from CORE
11266 -> FP registers? */
11267 if (speed_p)
11268 *cost += extra_cost->fp[mode == DFmode].fromint;
11269 return false;
11271 *cost = LIBCALL_COST (1);
11272 return false;
11274 case CALL:
11275 return true;
11277 case ASM_OPERANDS:
11279 /* Just a guess. Guess number of instructions in the asm
11280 plus one insn per input. Always a minimum of COSTS_N_INSNS (1)
11281 though (see PR60663). */
11282 int asm_length = MAX (1, asm_str_count (ASM_OPERANDS_TEMPLATE (x)));
11283 int num_operands = ASM_OPERANDS_INPUT_LENGTH (x);
11285 *cost = COSTS_N_INSNS (asm_length + num_operands);
11286 return true;
11288 default:
11289 if (mode != VOIDmode)
11290 *cost = COSTS_N_INSNS (ARM_NUM_REGS (mode));
11291 else
11292 *cost = COSTS_N_INSNS (4); /* Who knows? */
11293 return false;
11297 #undef HANDLE_NARROW_SHIFT_ARITH
11299 /* RTX costs when optimizing for size. */
11300 static bool
11301 arm_rtx_costs (rtx x, machine_mode mode ATTRIBUTE_UNUSED, int outer_code,
11302 int opno ATTRIBUTE_UNUSED, int *total, bool speed)
11304 bool result;
11305 int code = GET_CODE (x);
11307 if (TARGET_OLD_RTX_COSTS
11308 || (!current_tune->insn_extra_cost && !TARGET_NEW_GENERIC_COSTS))
11310 /* Old way. (Deprecated.) */
11311 if (!speed)
11312 result = arm_size_rtx_costs (x, (enum rtx_code) code,
11313 (enum rtx_code) outer_code, total);
11314 else
11315 result = current_tune->rtx_costs (x, (enum rtx_code) code,
11316 (enum rtx_code) outer_code, total,
11317 speed);
11319 else
11321 /* New way. */
11322 if (current_tune->insn_extra_cost)
11323 result = arm_new_rtx_costs (x, (enum rtx_code) code,
11324 (enum rtx_code) outer_code,
11325 current_tune->insn_extra_cost,
11326 total, speed);
11327 /* TARGET_NEW_GENERIC_COSTS && !TARGET_OLD_RTX_COSTS
11328 && current_tune->insn_extra_cost != NULL */
11329 else
11330 result = arm_new_rtx_costs (x, (enum rtx_code) code,
11331 (enum rtx_code) outer_code,
11332 &generic_extra_costs, total, speed);
11335 if (dump_file && (dump_flags & TDF_DETAILS))
11337 print_rtl_single (dump_file, x);
11338 fprintf (dump_file, "\n%s cost: %d (%s)\n", speed ? "Hot" : "Cold",
11339 *total, result ? "final" : "partial");
11341 return result;
11344 /* RTX costs for cores with a slow MUL implementation. Thumb-2 is not
11345 supported on any "slowmul" cores, so it can be ignored. */
11347 static bool
11348 arm_slowmul_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
11349 int *total, bool speed)
11351 machine_mode mode = GET_MODE (x);
11353 if (TARGET_THUMB)
11355 *total = thumb1_rtx_costs (x, code, outer_code);
11356 return true;
11359 switch (code)
11361 case MULT:
11362 if (GET_MODE_CLASS (mode) == MODE_FLOAT
11363 || mode == DImode)
11365 *total = COSTS_N_INSNS (20);
11366 return false;
11369 if (CONST_INT_P (XEXP (x, 1)))
11371 unsigned HOST_WIDE_INT i = (INTVAL (XEXP (x, 1))
11372 & (unsigned HOST_WIDE_INT) 0xffffffff);
11373 int cost, const_ok = const_ok_for_arm (i);
11374 int j, booth_unit_size;
11376 /* Tune as appropriate. */
11377 cost = const_ok ? 4 : 8;
11378 booth_unit_size = 2;
11379 for (j = 0; i && j < 32; j += booth_unit_size)
11381 i >>= booth_unit_size;
11382 cost++;
11385 *total = COSTS_N_INSNS (cost);
11386 *total += rtx_cost (XEXP (x, 0), mode, code, 0, speed);
11387 return true;
11390 *total = COSTS_N_INSNS (20);
11391 return false;
11393 default:
11394 return arm_rtx_costs_1 (x, outer_code, total, speed);;
11399 /* RTX cost for cores with a fast multiply unit (M variants). */
11401 static bool
11402 arm_fastmul_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
11403 int *total, bool speed)
11405 machine_mode mode = GET_MODE (x);
11407 if (TARGET_THUMB1)
11409 *total = thumb1_rtx_costs (x, code, outer_code);
11410 return true;
11413 /* ??? should thumb2 use different costs? */
11414 switch (code)
11416 case MULT:
11417 /* There is no point basing this on the tuning, since it is always the
11418 fast variant if it exists at all. */
11419 if (mode == DImode
11420 && (GET_CODE (XEXP (x, 0)) == GET_CODE (XEXP (x, 1)))
11421 && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
11422 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
11424 *total = COSTS_N_INSNS(2);
11425 return false;
11429 if (mode == DImode)
11431 *total = COSTS_N_INSNS (5);
11432 return false;
11435 if (CONST_INT_P (XEXP (x, 1)))
11437 unsigned HOST_WIDE_INT i = (INTVAL (XEXP (x, 1))
11438 & (unsigned HOST_WIDE_INT) 0xffffffff);
11439 int cost, const_ok = const_ok_for_arm (i);
11440 int j, booth_unit_size;
11442 /* Tune as appropriate. */
11443 cost = const_ok ? 4 : 8;
11444 booth_unit_size = 8;
11445 for (j = 0; i && j < 32; j += booth_unit_size)
11447 i >>= booth_unit_size;
11448 cost++;
11451 *total = COSTS_N_INSNS(cost);
11452 return false;
11455 if (mode == SImode)
11457 *total = COSTS_N_INSNS (4);
11458 return false;
11461 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
11463 if (TARGET_HARD_FLOAT
11464 && (mode == SFmode
11465 || (mode == DFmode && !TARGET_VFP_SINGLE)))
11467 *total = COSTS_N_INSNS (1);
11468 return false;
11472 /* Requires a lib call */
11473 *total = COSTS_N_INSNS (20);
11474 return false;
11476 default:
11477 return arm_rtx_costs_1 (x, outer_code, total, speed);
11482 /* RTX cost for XScale CPUs. Thumb-2 is not supported on any xscale cores,
11483 so it can be ignored. */
11485 static bool
11486 arm_xscale_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
11487 int *total, bool speed)
11489 machine_mode mode = GET_MODE (x);
11491 if (TARGET_THUMB)
11493 *total = thumb1_rtx_costs (x, code, outer_code);
11494 return true;
11497 switch (code)
11499 case COMPARE:
11500 if (GET_CODE (XEXP (x, 0)) != MULT)
11501 return arm_rtx_costs_1 (x, outer_code, total, speed);
11503 /* A COMPARE of a MULT is slow on XScale; the muls instruction
11504 will stall until the multiplication is complete. */
11505 *total = COSTS_N_INSNS (3);
11506 return false;
11508 case MULT:
11509 /* There is no point basing this on the tuning, since it is always the
11510 fast variant if it exists at all. */
11511 if (mode == DImode
11512 && (GET_CODE (XEXP (x, 0)) == GET_CODE (XEXP (x, 1)))
11513 && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
11514 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
11516 *total = COSTS_N_INSNS (2);
11517 return false;
11521 if (mode == DImode)
11523 *total = COSTS_N_INSNS (5);
11524 return false;
11527 if (CONST_INT_P (XEXP (x, 1)))
11529 /* If operand 1 is a constant we can more accurately
11530 calculate the cost of the multiply. The multiplier can
11531 retire 15 bits on the first cycle and a further 12 on the
11532 second. We do, of course, have to load the constant into
11533 a register first. */
11534 unsigned HOST_WIDE_INT i = INTVAL (XEXP (x, 1));
11535 /* There's a general overhead of one cycle. */
11536 int cost = 1;
11537 unsigned HOST_WIDE_INT masked_const;
11539 if (i & 0x80000000)
11540 i = ~i;
11542 i &= (unsigned HOST_WIDE_INT) 0xffffffff;
11544 masked_const = i & 0xffff8000;
11545 if (masked_const != 0)
11547 cost++;
11548 masked_const = i & 0xf8000000;
11549 if (masked_const != 0)
11550 cost++;
11552 *total = COSTS_N_INSNS (cost);
11553 return false;
11556 if (mode == SImode)
11558 *total = COSTS_N_INSNS (3);
11559 return false;
11562 /* Requires a lib call */
11563 *total = COSTS_N_INSNS (20);
11564 return false;
11566 default:
11567 return arm_rtx_costs_1 (x, outer_code, total, speed);
11572 /* RTX costs for 9e (and later) cores. */
11574 static bool
11575 arm_9e_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
11576 int *total, bool speed)
11578 machine_mode mode = GET_MODE (x);
11580 if (TARGET_THUMB1)
11582 switch (code)
11584 case MULT:
11585 /* Small multiply: 32 cycles for an integer multiply inst. */
11586 if (arm_arch6m && arm_m_profile_small_mul)
11587 *total = COSTS_N_INSNS (32);
11588 else
11589 *total = COSTS_N_INSNS (3);
11590 return true;
11592 default:
11593 *total = thumb1_rtx_costs (x, code, outer_code);
11594 return true;
11598 switch (code)
11600 case MULT:
11601 /* There is no point basing this on the tuning, since it is always the
11602 fast variant if it exists at all. */
11603 if (mode == DImode
11604 && (GET_CODE (XEXP (x, 0)) == GET_CODE (XEXP (x, 1)))
11605 && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
11606 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
11608 *total = COSTS_N_INSNS (2);
11609 return false;
11613 if (mode == DImode)
11615 *total = COSTS_N_INSNS (5);
11616 return false;
11619 if (mode == SImode)
11621 *total = COSTS_N_INSNS (2);
11622 return false;
11625 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
11627 if (TARGET_HARD_FLOAT
11628 && (mode == SFmode
11629 || (mode == DFmode && !TARGET_VFP_SINGLE)))
11631 *total = COSTS_N_INSNS (1);
11632 return false;
11636 *total = COSTS_N_INSNS (20);
11637 return false;
11639 default:
11640 return arm_rtx_costs_1 (x, outer_code, total, speed);
11643 /* All address computations that can be done are free, but rtx cost returns
11644 the same for practically all of them. So we weight the different types
11645 of address here in the order (most pref first):
11646 PRE/POST_INC/DEC, SHIFT or NON-INT sum, INT sum, REG, MEM or LABEL. */
11647 static inline int
11648 arm_arm_address_cost (rtx x)
11650 enum rtx_code c = GET_CODE (x);
11652 if (c == PRE_INC || c == PRE_DEC || c == POST_INC || c == POST_DEC)
11653 return 0;
11654 if (c == MEM || c == LABEL_REF || c == SYMBOL_REF)
11655 return 10;
11657 if (c == PLUS)
11659 if (CONST_INT_P (XEXP (x, 1)))
11660 return 2;
11662 if (ARITHMETIC_P (XEXP (x, 0)) || ARITHMETIC_P (XEXP (x, 1)))
11663 return 3;
11665 return 4;
11668 return 6;
11671 static inline int
11672 arm_thumb_address_cost (rtx x)
11674 enum rtx_code c = GET_CODE (x);
11676 if (c == REG)
11677 return 1;
11678 if (c == PLUS
11679 && REG_P (XEXP (x, 0))
11680 && CONST_INT_P (XEXP (x, 1)))
11681 return 1;
11683 return 2;
11686 static int
11687 arm_address_cost (rtx x, machine_mode mode ATTRIBUTE_UNUSED,
11688 addr_space_t as ATTRIBUTE_UNUSED, bool speed ATTRIBUTE_UNUSED)
11690 return TARGET_32BIT ? arm_arm_address_cost (x) : arm_thumb_address_cost (x);
11693 /* Adjust cost hook for XScale. */
11694 static bool
11695 xscale_sched_adjust_cost (rtx_insn *insn, int dep_type, rtx_insn *dep,
11696 int * cost)
11698 /* Some true dependencies can have a higher cost depending
11699 on precisely how certain input operands are used. */
11700 if (dep_type == 0
11701 && recog_memoized (insn) >= 0
11702 && recog_memoized (dep) >= 0)
11704 int shift_opnum = get_attr_shift (insn);
11705 enum attr_type attr_type = get_attr_type (dep);
11707 /* If nonzero, SHIFT_OPNUM contains the operand number of a shifted
11708 operand for INSN. If we have a shifted input operand and the
11709 instruction we depend on is another ALU instruction, then we may
11710 have to account for an additional stall. */
11711 if (shift_opnum != 0
11712 && (attr_type == TYPE_ALU_SHIFT_IMM
11713 || attr_type == TYPE_ALUS_SHIFT_IMM
11714 || attr_type == TYPE_LOGIC_SHIFT_IMM
11715 || attr_type == TYPE_LOGICS_SHIFT_IMM
11716 || attr_type == TYPE_ALU_SHIFT_REG
11717 || attr_type == TYPE_ALUS_SHIFT_REG
11718 || attr_type == TYPE_LOGIC_SHIFT_REG
11719 || attr_type == TYPE_LOGICS_SHIFT_REG
11720 || attr_type == TYPE_MOV_SHIFT
11721 || attr_type == TYPE_MVN_SHIFT
11722 || attr_type == TYPE_MOV_SHIFT_REG
11723 || attr_type == TYPE_MVN_SHIFT_REG))
11725 rtx shifted_operand;
11726 int opno;
11728 /* Get the shifted operand. */
11729 extract_insn (insn);
11730 shifted_operand = recog_data.operand[shift_opnum];
11732 /* Iterate over all the operands in DEP. If we write an operand
11733 that overlaps with SHIFTED_OPERAND, then we have increase the
11734 cost of this dependency. */
11735 extract_insn (dep);
11736 preprocess_constraints (dep);
11737 for (opno = 0; opno < recog_data.n_operands; opno++)
11739 /* We can ignore strict inputs. */
11740 if (recog_data.operand_type[opno] == OP_IN)
11741 continue;
11743 if (reg_overlap_mentioned_p (recog_data.operand[opno],
11744 shifted_operand))
11746 *cost = 2;
11747 return false;
11752 return true;
11755 /* Adjust cost hook for Cortex A9. */
11756 static bool
11757 cortex_a9_sched_adjust_cost (rtx_insn *insn, int dep_type, rtx_insn *dep,
11758 int * cost)
11760 switch (dep_type)
11762 case REG_DEP_ANTI:
11763 *cost = 0;
11764 return false;
11766 case REG_DEP_TRUE:
11767 case REG_DEP_OUTPUT:
11768 if (recog_memoized (insn) >= 0
11769 && recog_memoized (dep) >= 0)
11771 if (GET_CODE (PATTERN (insn)) == SET)
11773 if (GET_MODE_CLASS
11774 (GET_MODE (SET_DEST (PATTERN (insn)))) == MODE_FLOAT
11775 || GET_MODE_CLASS
11776 (GET_MODE (SET_SRC (PATTERN (insn)))) == MODE_FLOAT)
11778 enum attr_type attr_type_insn = get_attr_type (insn);
11779 enum attr_type attr_type_dep = get_attr_type (dep);
11781 /* By default all dependencies of the form
11782 s0 = s0 <op> s1
11783 s0 = s0 <op> s2
11784 have an extra latency of 1 cycle because
11785 of the input and output dependency in this
11786 case. However this gets modeled as an true
11787 dependency and hence all these checks. */
11788 if (REG_P (SET_DEST (PATTERN (insn)))
11789 && reg_set_p (SET_DEST (PATTERN (insn)), dep))
11791 /* FMACS is a special case where the dependent
11792 instruction can be issued 3 cycles before
11793 the normal latency in case of an output
11794 dependency. */
11795 if ((attr_type_insn == TYPE_FMACS
11796 || attr_type_insn == TYPE_FMACD)
11797 && (attr_type_dep == TYPE_FMACS
11798 || attr_type_dep == TYPE_FMACD))
11800 if (dep_type == REG_DEP_OUTPUT)
11801 *cost = insn_default_latency (dep) - 3;
11802 else
11803 *cost = insn_default_latency (dep);
11804 return false;
11806 else
11808 if (dep_type == REG_DEP_OUTPUT)
11809 *cost = insn_default_latency (dep) + 1;
11810 else
11811 *cost = insn_default_latency (dep);
11813 return false;
11818 break;
11820 default:
11821 gcc_unreachable ();
11824 return true;
11827 /* Adjust cost hook for FA726TE. */
11828 static bool
11829 fa726te_sched_adjust_cost (rtx_insn *insn, int dep_type, rtx_insn *dep,
11830 int * cost)
11832 /* For FA726TE, true dependency on CPSR (i.e. set cond followed by predicated)
11833 have penalty of 3. */
11834 if (dep_type == REG_DEP_TRUE
11835 && recog_memoized (insn) >= 0
11836 && recog_memoized (dep) >= 0
11837 && get_attr_conds (dep) == CONDS_SET)
11839 /* Use of carry (e.g. 64-bit arithmetic) in ALU: 3-cycle latency. */
11840 if (get_attr_conds (insn) == CONDS_USE
11841 && get_attr_type (insn) != TYPE_BRANCH)
11843 *cost = 3;
11844 return false;
11847 if (GET_CODE (PATTERN (insn)) == COND_EXEC
11848 || get_attr_conds (insn) == CONDS_USE)
11850 *cost = 0;
11851 return false;
11855 return true;
11858 /* Implement TARGET_REGISTER_MOVE_COST.
11860 Moves between VFP_REGS and GENERAL_REGS are a single insn, but
11861 it is typically more expensive than a single memory access. We set
11862 the cost to less than two memory accesses so that floating
11863 point to integer conversion does not go through memory. */
11866 arm_register_move_cost (machine_mode mode ATTRIBUTE_UNUSED,
11867 reg_class_t from, reg_class_t to)
11869 if (TARGET_32BIT)
11871 if ((IS_VFP_CLASS (from) && !IS_VFP_CLASS (to))
11872 || (!IS_VFP_CLASS (from) && IS_VFP_CLASS (to)))
11873 return 15;
11874 else if ((from == IWMMXT_REGS && to != IWMMXT_REGS)
11875 || (from != IWMMXT_REGS && to == IWMMXT_REGS))
11876 return 4;
11877 else if (from == IWMMXT_GR_REGS || to == IWMMXT_GR_REGS)
11878 return 20;
11879 else
11880 return 2;
11882 else
11884 if (from == HI_REGS || to == HI_REGS)
11885 return 4;
11886 else
11887 return 2;
11891 /* Implement TARGET_MEMORY_MOVE_COST. */
11894 arm_memory_move_cost (machine_mode mode, reg_class_t rclass,
11895 bool in ATTRIBUTE_UNUSED)
11897 if (TARGET_32BIT)
11898 return 10;
11899 else
11901 if (GET_MODE_SIZE (mode) < 4)
11902 return 8;
11903 else
11904 return ((2 * GET_MODE_SIZE (mode)) * (rclass == LO_REGS ? 1 : 2));
11908 /* Vectorizer cost model implementation. */
11910 /* Implement targetm.vectorize.builtin_vectorization_cost. */
11911 static int
11912 arm_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
11913 tree vectype,
11914 int misalign ATTRIBUTE_UNUSED)
11916 unsigned elements;
11918 switch (type_of_cost)
11920 case scalar_stmt:
11921 return current_tune->vec_costs->scalar_stmt_cost;
11923 case scalar_load:
11924 return current_tune->vec_costs->scalar_load_cost;
11926 case scalar_store:
11927 return current_tune->vec_costs->scalar_store_cost;
11929 case vector_stmt:
11930 return current_tune->vec_costs->vec_stmt_cost;
11932 case vector_load:
11933 return current_tune->vec_costs->vec_align_load_cost;
11935 case vector_store:
11936 return current_tune->vec_costs->vec_store_cost;
11938 case vec_to_scalar:
11939 return current_tune->vec_costs->vec_to_scalar_cost;
11941 case scalar_to_vec:
11942 return current_tune->vec_costs->scalar_to_vec_cost;
11944 case unaligned_load:
11945 return current_tune->vec_costs->vec_unalign_load_cost;
11947 case unaligned_store:
11948 return current_tune->vec_costs->vec_unalign_store_cost;
11950 case cond_branch_taken:
11951 return current_tune->vec_costs->cond_taken_branch_cost;
11953 case cond_branch_not_taken:
11954 return current_tune->vec_costs->cond_not_taken_branch_cost;
11956 case vec_perm:
11957 case vec_promote_demote:
11958 return current_tune->vec_costs->vec_stmt_cost;
11960 case vec_construct:
11961 elements = TYPE_VECTOR_SUBPARTS (vectype);
11962 return elements / 2 + 1;
11964 default:
11965 gcc_unreachable ();
11969 /* Implement targetm.vectorize.add_stmt_cost. */
11971 static unsigned
11972 arm_add_stmt_cost (void *data, int count, enum vect_cost_for_stmt kind,
11973 struct _stmt_vec_info *stmt_info, int misalign,
11974 enum vect_cost_model_location where)
11976 unsigned *cost = (unsigned *) data;
11977 unsigned retval = 0;
11979 if (flag_vect_cost_model)
11981 tree vectype = stmt_info ? stmt_vectype (stmt_info) : NULL_TREE;
11982 int stmt_cost = arm_builtin_vectorization_cost (kind, vectype, misalign);
11984 /* Statements in an inner loop relative to the loop being
11985 vectorized are weighted more heavily. The value here is
11986 arbitrary and could potentially be improved with analysis. */
11987 if (where == vect_body && stmt_info && stmt_in_inner_loop_p (stmt_info))
11988 count *= 50; /* FIXME. */
11990 retval = (unsigned) (count * stmt_cost);
11991 cost[where] += retval;
11994 return retval;
11997 /* Return true if and only if this insn can dual-issue only as older. */
11998 static bool
11999 cortexa7_older_only (rtx_insn *insn)
12001 if (recog_memoized (insn) < 0)
12002 return false;
12004 switch (get_attr_type (insn))
12006 case TYPE_ALU_DSP_REG:
12007 case TYPE_ALU_SREG:
12008 case TYPE_ALUS_SREG:
12009 case TYPE_LOGIC_REG:
12010 case TYPE_LOGICS_REG:
12011 case TYPE_ADC_REG:
12012 case TYPE_ADCS_REG:
12013 case TYPE_ADR:
12014 case TYPE_BFM:
12015 case TYPE_REV:
12016 case TYPE_MVN_REG:
12017 case TYPE_SHIFT_IMM:
12018 case TYPE_SHIFT_REG:
12019 case TYPE_LOAD_BYTE:
12020 case TYPE_LOAD1:
12021 case TYPE_STORE1:
12022 case TYPE_FFARITHS:
12023 case TYPE_FADDS:
12024 case TYPE_FFARITHD:
12025 case TYPE_FADDD:
12026 case TYPE_FMOV:
12027 case TYPE_F_CVT:
12028 case TYPE_FCMPS:
12029 case TYPE_FCMPD:
12030 case TYPE_FCONSTS:
12031 case TYPE_FCONSTD:
12032 case TYPE_FMULS:
12033 case TYPE_FMACS:
12034 case TYPE_FMULD:
12035 case TYPE_FMACD:
12036 case TYPE_FDIVS:
12037 case TYPE_FDIVD:
12038 case TYPE_F_MRC:
12039 case TYPE_F_MRRC:
12040 case TYPE_F_FLAG:
12041 case TYPE_F_LOADS:
12042 case TYPE_F_STORES:
12043 return true;
12044 default:
12045 return false;
12049 /* Return true if and only if this insn can dual-issue as younger. */
12050 static bool
12051 cortexa7_younger (FILE *file, int verbose, rtx_insn *insn)
12053 if (recog_memoized (insn) < 0)
12055 if (verbose > 5)
12056 fprintf (file, ";; not cortexa7_younger %d\n", INSN_UID (insn));
12057 return false;
12060 switch (get_attr_type (insn))
12062 case TYPE_ALU_IMM:
12063 case TYPE_ALUS_IMM:
12064 case TYPE_LOGIC_IMM:
12065 case TYPE_LOGICS_IMM:
12066 case TYPE_EXTEND:
12067 case TYPE_MVN_IMM:
12068 case TYPE_MOV_IMM:
12069 case TYPE_MOV_REG:
12070 case TYPE_MOV_SHIFT:
12071 case TYPE_MOV_SHIFT_REG:
12072 case TYPE_BRANCH:
12073 case TYPE_CALL:
12074 return true;
12075 default:
12076 return false;
12081 /* Look for an instruction that can dual issue only as an older
12082 instruction, and move it in front of any instructions that can
12083 dual-issue as younger, while preserving the relative order of all
12084 other instructions in the ready list. This is a hueuristic to help
12085 dual-issue in later cycles, by postponing issue of more flexible
12086 instructions. This heuristic may affect dual issue opportunities
12087 in the current cycle. */
12088 static void
12089 cortexa7_sched_reorder (FILE *file, int verbose, rtx_insn **ready,
12090 int *n_readyp, int clock)
12092 int i;
12093 int first_older_only = -1, first_younger = -1;
12095 if (verbose > 5)
12096 fprintf (file,
12097 ";; sched_reorder for cycle %d with %d insns in ready list\n",
12098 clock,
12099 *n_readyp);
12101 /* Traverse the ready list from the head (the instruction to issue
12102 first), and looking for the first instruction that can issue as
12103 younger and the first instruction that can dual-issue only as
12104 older. */
12105 for (i = *n_readyp - 1; i >= 0; i--)
12107 rtx_insn *insn = ready[i];
12108 if (cortexa7_older_only (insn))
12110 first_older_only = i;
12111 if (verbose > 5)
12112 fprintf (file, ";; reorder older found %d\n", INSN_UID (insn));
12113 break;
12115 else if (cortexa7_younger (file, verbose, insn) && first_younger == -1)
12116 first_younger = i;
12119 /* Nothing to reorder because either no younger insn found or insn
12120 that can dual-issue only as older appears before any insn that
12121 can dual-issue as younger. */
12122 if (first_younger == -1)
12124 if (verbose > 5)
12125 fprintf (file, ";; sched_reorder nothing to reorder as no younger\n");
12126 return;
12129 /* Nothing to reorder because no older-only insn in the ready list. */
12130 if (first_older_only == -1)
12132 if (verbose > 5)
12133 fprintf (file, ";; sched_reorder nothing to reorder as no older_only\n");
12134 return;
12137 /* Move first_older_only insn before first_younger. */
12138 if (verbose > 5)
12139 fprintf (file, ";; cortexa7_sched_reorder insn %d before %d\n",
12140 INSN_UID(ready [first_older_only]),
12141 INSN_UID(ready [first_younger]));
12142 rtx_insn *first_older_only_insn = ready [first_older_only];
12143 for (i = first_older_only; i < first_younger; i++)
12145 ready[i] = ready[i+1];
12148 ready[i] = first_older_only_insn;
12149 return;
12152 /* Implement TARGET_SCHED_REORDER. */
12153 static int
12154 arm_sched_reorder (FILE *file, int verbose, rtx_insn **ready, int *n_readyp,
12155 int clock)
12157 switch (arm_tune)
12159 case cortexa7:
12160 cortexa7_sched_reorder (file, verbose, ready, n_readyp, clock);
12161 break;
12162 default:
12163 /* Do nothing for other cores. */
12164 break;
12167 return arm_issue_rate ();
12170 /* This function implements the target macro TARGET_SCHED_ADJUST_COST.
12171 It corrects the value of COST based on the relationship between
12172 INSN and DEP through the dependence LINK. It returns the new
12173 value. There is a per-core adjust_cost hook to adjust scheduler costs
12174 and the per-core hook can choose to completely override the generic
12175 adjust_cost function. Only put bits of code into arm_adjust_cost that
12176 are common across all cores. */
12177 static int
12178 arm_adjust_cost (rtx_insn *insn, int dep_type, rtx_insn *dep, int cost,
12179 unsigned int)
12181 rtx i_pat, d_pat;
12183 /* When generating Thumb-1 code, we want to place flag-setting operations
12184 close to a conditional branch which depends on them, so that we can
12185 omit the comparison. */
12186 if (TARGET_THUMB1
12187 && dep_type == 0
12188 && recog_memoized (insn) == CODE_FOR_cbranchsi4_insn
12189 && recog_memoized (dep) >= 0
12190 && get_attr_conds (dep) == CONDS_SET)
12191 return 0;
12193 if (current_tune->sched_adjust_cost != NULL)
12195 if (!current_tune->sched_adjust_cost (insn, dep_type, dep, &cost))
12196 return cost;
12199 /* XXX Is this strictly true? */
12200 if (dep_type == REG_DEP_ANTI
12201 || dep_type == REG_DEP_OUTPUT)
12202 return 0;
12204 /* Call insns don't incur a stall, even if they follow a load. */
12205 if (dep_type == 0
12206 && CALL_P (insn))
12207 return 1;
12209 if ((i_pat = single_set (insn)) != NULL
12210 && MEM_P (SET_SRC (i_pat))
12211 && (d_pat = single_set (dep)) != NULL
12212 && MEM_P (SET_DEST (d_pat)))
12214 rtx src_mem = XEXP (SET_SRC (i_pat), 0);
12215 /* This is a load after a store, there is no conflict if the load reads
12216 from a cached area. Assume that loads from the stack, and from the
12217 constant pool are cached, and that others will miss. This is a
12218 hack. */
12220 if ((GET_CODE (src_mem) == SYMBOL_REF
12221 && CONSTANT_POOL_ADDRESS_P (src_mem))
12222 || reg_mentioned_p (stack_pointer_rtx, src_mem)
12223 || reg_mentioned_p (frame_pointer_rtx, src_mem)
12224 || reg_mentioned_p (hard_frame_pointer_rtx, src_mem))
12225 return 1;
12228 return cost;
12232 arm_max_conditional_execute (void)
12234 return max_insns_skipped;
12237 static int
12238 arm_default_branch_cost (bool speed_p, bool predictable_p ATTRIBUTE_UNUSED)
12240 if (TARGET_32BIT)
12241 return (TARGET_THUMB2 && !speed_p) ? 1 : 4;
12242 else
12243 return (optimize > 0) ? 2 : 0;
12246 static int
12247 arm_cortex_a5_branch_cost (bool speed_p, bool predictable_p)
12249 return speed_p ? 0 : arm_default_branch_cost (speed_p, predictable_p);
12252 /* Thumb-2 branches are relatively cheap on Cortex-M processors ("1 + P cycles"
12253 on Cortex-M4, where P varies from 1 to 3 according to some criteria), since
12254 sequences of non-executed instructions in IT blocks probably take the same
12255 amount of time as executed instructions (and the IT instruction itself takes
12256 space in icache). This function was experimentally determined to give good
12257 results on a popular embedded benchmark. */
12259 static int
12260 arm_cortex_m_branch_cost (bool speed_p, bool predictable_p)
12262 return (TARGET_32BIT && speed_p) ? 1
12263 : arm_default_branch_cost (speed_p, predictable_p);
12266 static int
12267 arm_cortex_m7_branch_cost (bool speed_p, bool predictable_p)
12269 return speed_p ? 0 : arm_default_branch_cost (speed_p, predictable_p);
12272 static bool fp_consts_inited = false;
12274 static REAL_VALUE_TYPE value_fp0;
12276 static void
12277 init_fp_table (void)
12279 REAL_VALUE_TYPE r;
12281 r = REAL_VALUE_ATOF ("0", DFmode);
12282 value_fp0 = r;
12283 fp_consts_inited = true;
12286 /* Return TRUE if rtx X is a valid immediate FP constant. */
12288 arm_const_double_rtx (rtx x)
12290 const REAL_VALUE_TYPE *r;
12292 if (!fp_consts_inited)
12293 init_fp_table ();
12295 r = CONST_DOUBLE_REAL_VALUE (x);
12296 if (REAL_VALUE_MINUS_ZERO (*r))
12297 return 0;
12299 if (real_equal (r, &value_fp0))
12300 return 1;
12302 return 0;
12305 /* VFPv3 has a fairly wide range of representable immediates, formed from
12306 "quarter-precision" floating-point values. These can be evaluated using this
12307 formula (with ^ for exponentiation):
12309 -1^s * n * 2^-r
12311 Where 's' is a sign bit (0/1), 'n' and 'r' are integers such that
12312 16 <= n <= 31 and 0 <= r <= 7.
12314 These values are mapped onto an 8-bit integer ABCDEFGH s.t.
12316 - A (most-significant) is the sign bit.
12317 - BCD are the exponent (encoded as r XOR 3).
12318 - EFGH are the mantissa (encoded as n - 16).
12321 /* Return an integer index for a VFPv3 immediate operand X suitable for the
12322 fconst[sd] instruction, or -1 if X isn't suitable. */
12323 static int
12324 vfp3_const_double_index (rtx x)
12326 REAL_VALUE_TYPE r, m;
12327 int sign, exponent;
12328 unsigned HOST_WIDE_INT mantissa, mant_hi;
12329 unsigned HOST_WIDE_INT mask;
12330 int point_pos = 2 * HOST_BITS_PER_WIDE_INT - 1;
12331 bool fail;
12333 if (!TARGET_VFP3 || !CONST_DOUBLE_P (x))
12334 return -1;
12336 r = *CONST_DOUBLE_REAL_VALUE (x);
12338 /* We can't represent these things, so detect them first. */
12339 if (REAL_VALUE_ISINF (r) || REAL_VALUE_ISNAN (r) || REAL_VALUE_MINUS_ZERO (r))
12340 return -1;
12342 /* Extract sign, exponent and mantissa. */
12343 sign = REAL_VALUE_NEGATIVE (r) ? 1 : 0;
12344 r = real_value_abs (&r);
12345 exponent = REAL_EXP (&r);
12346 /* For the mantissa, we expand into two HOST_WIDE_INTS, apart from the
12347 highest (sign) bit, with a fixed binary point at bit point_pos.
12348 WARNING: If there's ever a VFP version which uses more than 2 * H_W_I - 1
12349 bits for the mantissa, this may fail (low bits would be lost). */
12350 real_ldexp (&m, &r, point_pos - exponent);
12351 wide_int w = real_to_integer (&m, &fail, HOST_BITS_PER_WIDE_INT * 2);
12352 mantissa = w.elt (0);
12353 mant_hi = w.elt (1);
12355 /* If there are bits set in the low part of the mantissa, we can't
12356 represent this value. */
12357 if (mantissa != 0)
12358 return -1;
12360 /* Now make it so that mantissa contains the most-significant bits, and move
12361 the point_pos to indicate that the least-significant bits have been
12362 discarded. */
12363 point_pos -= HOST_BITS_PER_WIDE_INT;
12364 mantissa = mant_hi;
12366 /* We can permit four significant bits of mantissa only, plus a high bit
12367 which is always 1. */
12368 mask = (HOST_WIDE_INT_1U << (point_pos - 5)) - 1;
12369 if ((mantissa & mask) != 0)
12370 return -1;
12372 /* Now we know the mantissa is in range, chop off the unneeded bits. */
12373 mantissa >>= point_pos - 5;
12375 /* The mantissa may be zero. Disallow that case. (It's possible to load the
12376 floating-point immediate zero with Neon using an integer-zero load, but
12377 that case is handled elsewhere.) */
12378 if (mantissa == 0)
12379 return -1;
12381 gcc_assert (mantissa >= 16 && mantissa <= 31);
12383 /* The value of 5 here would be 4 if GCC used IEEE754-like encoding (where
12384 normalized significands are in the range [1, 2). (Our mantissa is shifted
12385 left 4 places at this point relative to normalized IEEE754 values). GCC
12386 internally uses [0.5, 1) (see real.c), so the exponent returned from
12387 REAL_EXP must be altered. */
12388 exponent = 5 - exponent;
12390 if (exponent < 0 || exponent > 7)
12391 return -1;
12393 /* Sign, mantissa and exponent are now in the correct form to plug into the
12394 formula described in the comment above. */
12395 return (sign << 7) | ((exponent ^ 3) << 4) | (mantissa - 16);
12398 /* Return TRUE if rtx X is a valid immediate VFPv3 constant. */
12400 vfp3_const_double_rtx (rtx x)
12402 if (!TARGET_VFP3)
12403 return 0;
12405 return vfp3_const_double_index (x) != -1;
12408 /* Recognize immediates which can be used in various Neon instructions. Legal
12409 immediates are described by the following table (for VMVN variants, the
12410 bitwise inverse of the constant shown is recognized. In either case, VMOV
12411 is output and the correct instruction to use for a given constant is chosen
12412 by the assembler). The constant shown is replicated across all elements of
12413 the destination vector.
12415 insn elems variant constant (binary)
12416 ---- ----- ------- -----------------
12417 vmov i32 0 00000000 00000000 00000000 abcdefgh
12418 vmov i32 1 00000000 00000000 abcdefgh 00000000
12419 vmov i32 2 00000000 abcdefgh 00000000 00000000
12420 vmov i32 3 abcdefgh 00000000 00000000 00000000
12421 vmov i16 4 00000000 abcdefgh
12422 vmov i16 5 abcdefgh 00000000
12423 vmvn i32 6 00000000 00000000 00000000 abcdefgh
12424 vmvn i32 7 00000000 00000000 abcdefgh 00000000
12425 vmvn i32 8 00000000 abcdefgh 00000000 00000000
12426 vmvn i32 9 abcdefgh 00000000 00000000 00000000
12427 vmvn i16 10 00000000 abcdefgh
12428 vmvn i16 11 abcdefgh 00000000
12429 vmov i32 12 00000000 00000000 abcdefgh 11111111
12430 vmvn i32 13 00000000 00000000 abcdefgh 11111111
12431 vmov i32 14 00000000 abcdefgh 11111111 11111111
12432 vmvn i32 15 00000000 abcdefgh 11111111 11111111
12433 vmov i8 16 abcdefgh
12434 vmov i64 17 aaaaaaaa bbbbbbbb cccccccc dddddddd
12435 eeeeeeee ffffffff gggggggg hhhhhhhh
12436 vmov f32 18 aBbbbbbc defgh000 00000000 00000000
12437 vmov f32 19 00000000 00000000 00000000 00000000
12439 For case 18, B = !b. Representable values are exactly those accepted by
12440 vfp3_const_double_index, but are output as floating-point numbers rather
12441 than indices.
12443 For case 19, we will change it to vmov.i32 when assembling.
12445 Variants 0-5 (inclusive) may also be used as immediates for the second
12446 operand of VORR/VBIC instructions.
12448 The INVERSE argument causes the bitwise inverse of the given operand to be
12449 recognized instead (used for recognizing legal immediates for the VAND/VORN
12450 pseudo-instructions). If INVERSE is true, the value placed in *MODCONST is
12451 *not* inverted (i.e. the pseudo-instruction forms vand/vorn should still be
12452 output, rather than the real insns vbic/vorr).
12454 INVERSE makes no difference to the recognition of float vectors.
12456 The return value is the variant of immediate as shown in the above table, or
12457 -1 if the given value doesn't match any of the listed patterns.
12459 static int
12460 neon_valid_immediate (rtx op, machine_mode mode, int inverse,
12461 rtx *modconst, int *elementwidth)
12463 #define CHECK(STRIDE, ELSIZE, CLASS, TEST) \
12464 matches = 1; \
12465 for (i = 0; i < idx; i += (STRIDE)) \
12466 if (!(TEST)) \
12467 matches = 0; \
12468 if (matches) \
12470 immtype = (CLASS); \
12471 elsize = (ELSIZE); \
12472 break; \
12475 unsigned int i, elsize = 0, idx = 0, n_elts;
12476 unsigned int innersize;
12477 unsigned char bytes[16];
12478 int immtype = -1, matches;
12479 unsigned int invmask = inverse ? 0xff : 0;
12480 bool vector = GET_CODE (op) == CONST_VECTOR;
12482 if (vector)
12483 n_elts = CONST_VECTOR_NUNITS (op);
12484 else
12486 n_elts = 1;
12487 if (mode == VOIDmode)
12488 mode = DImode;
12491 innersize = GET_MODE_UNIT_SIZE (mode);
12493 /* Vectors of float constants. */
12494 if (GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
12496 rtx el0 = CONST_VECTOR_ELT (op, 0);
12498 if (!vfp3_const_double_rtx (el0) && el0 != CONST0_RTX (GET_MODE (el0)))
12499 return -1;
12501 /* FP16 vectors cannot be represented. */
12502 if (GET_MODE_INNER (mode) == HFmode)
12503 return -1;
12505 /* All elements in the vector must be the same. Note that 0.0 and -0.0
12506 are distinct in this context. */
12507 if (!const_vec_duplicate_p (op))
12508 return -1;
12510 if (modconst)
12511 *modconst = CONST_VECTOR_ELT (op, 0);
12513 if (elementwidth)
12514 *elementwidth = 0;
12516 if (el0 == CONST0_RTX (GET_MODE (el0)))
12517 return 19;
12518 else
12519 return 18;
12522 /* Splat vector constant out into a byte vector. */
12523 for (i = 0; i < n_elts; i++)
12525 rtx el = vector ? CONST_VECTOR_ELT (op, i) : op;
12526 unsigned HOST_WIDE_INT elpart;
12528 gcc_assert (CONST_INT_P (el));
12529 elpart = INTVAL (el);
12531 for (unsigned int byte = 0; byte < innersize; byte++)
12533 bytes[idx++] = (elpart & 0xff) ^ invmask;
12534 elpart >>= BITS_PER_UNIT;
12538 /* Sanity check. */
12539 gcc_assert (idx == GET_MODE_SIZE (mode));
12543 CHECK (4, 32, 0, bytes[i] == bytes[0] && bytes[i + 1] == 0
12544 && bytes[i + 2] == 0 && bytes[i + 3] == 0);
12546 CHECK (4, 32, 1, bytes[i] == 0 && bytes[i + 1] == bytes[1]
12547 && bytes[i + 2] == 0 && bytes[i + 3] == 0);
12549 CHECK (4, 32, 2, bytes[i] == 0 && bytes[i + 1] == 0
12550 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0);
12552 CHECK (4, 32, 3, bytes[i] == 0 && bytes[i + 1] == 0
12553 && bytes[i + 2] == 0 && bytes[i + 3] == bytes[3]);
12555 CHECK (2, 16, 4, bytes[i] == bytes[0] && bytes[i + 1] == 0);
12557 CHECK (2, 16, 5, bytes[i] == 0 && bytes[i + 1] == bytes[1]);
12559 CHECK (4, 32, 6, bytes[i] == bytes[0] && bytes[i + 1] == 0xff
12560 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff);
12562 CHECK (4, 32, 7, bytes[i] == 0xff && bytes[i + 1] == bytes[1]
12563 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff);
12565 CHECK (4, 32, 8, bytes[i] == 0xff && bytes[i + 1] == 0xff
12566 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff);
12568 CHECK (4, 32, 9, bytes[i] == 0xff && bytes[i + 1] == 0xff
12569 && bytes[i + 2] == 0xff && bytes[i + 3] == bytes[3]);
12571 CHECK (2, 16, 10, bytes[i] == bytes[0] && bytes[i + 1] == 0xff);
12573 CHECK (2, 16, 11, bytes[i] == 0xff && bytes[i + 1] == bytes[1]);
12575 CHECK (4, 32, 12, bytes[i] == 0xff && bytes[i + 1] == bytes[1]
12576 && bytes[i + 2] == 0 && bytes[i + 3] == 0);
12578 CHECK (4, 32, 13, bytes[i] == 0 && bytes[i + 1] == bytes[1]
12579 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff);
12581 CHECK (4, 32, 14, bytes[i] == 0xff && bytes[i + 1] == 0xff
12582 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0);
12584 CHECK (4, 32, 15, bytes[i] == 0 && bytes[i + 1] == 0
12585 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff);
12587 CHECK (1, 8, 16, bytes[i] == bytes[0]);
12589 CHECK (1, 64, 17, (bytes[i] == 0 || bytes[i] == 0xff)
12590 && bytes[i] == bytes[(i + 8) % idx]);
12592 while (0);
12594 if (immtype == -1)
12595 return -1;
12597 if (elementwidth)
12598 *elementwidth = elsize;
12600 if (modconst)
12602 unsigned HOST_WIDE_INT imm = 0;
12604 /* Un-invert bytes of recognized vector, if necessary. */
12605 if (invmask != 0)
12606 for (i = 0; i < idx; i++)
12607 bytes[i] ^= invmask;
12609 if (immtype == 17)
12611 /* FIXME: Broken on 32-bit H_W_I hosts. */
12612 gcc_assert (sizeof (HOST_WIDE_INT) == 8);
12614 for (i = 0; i < 8; i++)
12615 imm |= (unsigned HOST_WIDE_INT) (bytes[i] ? 0xff : 0)
12616 << (i * BITS_PER_UNIT);
12618 *modconst = GEN_INT (imm);
12620 else
12622 unsigned HOST_WIDE_INT imm = 0;
12624 for (i = 0; i < elsize / BITS_PER_UNIT; i++)
12625 imm |= (unsigned HOST_WIDE_INT) bytes[i] << (i * BITS_PER_UNIT);
12627 *modconst = GEN_INT (imm);
12631 return immtype;
12632 #undef CHECK
12635 /* Return TRUE if rtx X is legal for use as either a Neon VMOV (or, implicitly,
12636 VMVN) immediate. Write back width per element to *ELEMENTWIDTH (or zero for
12637 float elements), and a modified constant (whatever should be output for a
12638 VMOV) in *MODCONST. */
12641 neon_immediate_valid_for_move (rtx op, machine_mode mode,
12642 rtx *modconst, int *elementwidth)
12644 rtx tmpconst;
12645 int tmpwidth;
12646 int retval = neon_valid_immediate (op, mode, 0, &tmpconst, &tmpwidth);
12648 if (retval == -1)
12649 return 0;
12651 if (modconst)
12652 *modconst = tmpconst;
12654 if (elementwidth)
12655 *elementwidth = tmpwidth;
12657 return 1;
12660 /* Return TRUE if rtx X is legal for use in a VORR or VBIC instruction. If
12661 the immediate is valid, write a constant suitable for using as an operand
12662 to VORR/VBIC/VAND/VORN to *MODCONST and the corresponding element width to
12663 *ELEMENTWIDTH. See neon_valid_immediate for description of INVERSE. */
12666 neon_immediate_valid_for_logic (rtx op, machine_mode mode, int inverse,
12667 rtx *modconst, int *elementwidth)
12669 rtx tmpconst;
12670 int tmpwidth;
12671 int retval = neon_valid_immediate (op, mode, inverse, &tmpconst, &tmpwidth);
12673 if (retval < 0 || retval > 5)
12674 return 0;
12676 if (modconst)
12677 *modconst = tmpconst;
12679 if (elementwidth)
12680 *elementwidth = tmpwidth;
12682 return 1;
12685 /* Return TRUE if rtx OP is legal for use in a VSHR or VSHL instruction. If
12686 the immediate is valid, write a constant suitable for using as an operand
12687 to VSHR/VSHL to *MODCONST and the corresponding element width to
12688 *ELEMENTWIDTH. ISLEFTSHIFT is for determine left or right shift,
12689 because they have different limitations. */
12692 neon_immediate_valid_for_shift (rtx op, machine_mode mode,
12693 rtx *modconst, int *elementwidth,
12694 bool isleftshift)
12696 unsigned int innersize = GET_MODE_UNIT_SIZE (mode);
12697 unsigned int n_elts = CONST_VECTOR_NUNITS (op), i;
12698 unsigned HOST_WIDE_INT last_elt = 0;
12699 unsigned HOST_WIDE_INT maxshift;
12701 /* Split vector constant out into a byte vector. */
12702 for (i = 0; i < n_elts; i++)
12704 rtx el = CONST_VECTOR_ELT (op, i);
12705 unsigned HOST_WIDE_INT elpart;
12707 if (CONST_INT_P (el))
12708 elpart = INTVAL (el);
12709 else if (CONST_DOUBLE_P (el))
12710 return 0;
12711 else
12712 gcc_unreachable ();
12714 if (i != 0 && elpart != last_elt)
12715 return 0;
12717 last_elt = elpart;
12720 /* Shift less than element size. */
12721 maxshift = innersize * 8;
12723 if (isleftshift)
12725 /* Left shift immediate value can be from 0 to <size>-1. */
12726 if (last_elt >= maxshift)
12727 return 0;
12729 else
12731 /* Right shift immediate value can be from 1 to <size>. */
12732 if (last_elt == 0 || last_elt > maxshift)
12733 return 0;
12736 if (elementwidth)
12737 *elementwidth = innersize * 8;
12739 if (modconst)
12740 *modconst = CONST_VECTOR_ELT (op, 0);
12742 return 1;
12745 /* Return a string suitable for output of Neon immediate logic operation
12746 MNEM. */
12748 char *
12749 neon_output_logic_immediate (const char *mnem, rtx *op2, machine_mode mode,
12750 int inverse, int quad)
12752 int width, is_valid;
12753 static char templ[40];
12755 is_valid = neon_immediate_valid_for_logic (*op2, mode, inverse, op2, &width);
12757 gcc_assert (is_valid != 0);
12759 if (quad)
12760 sprintf (templ, "%s.i%d\t%%q0, %%2", mnem, width);
12761 else
12762 sprintf (templ, "%s.i%d\t%%P0, %%2", mnem, width);
12764 return templ;
12767 /* Return a string suitable for output of Neon immediate shift operation
12768 (VSHR or VSHL) MNEM. */
12770 char *
12771 neon_output_shift_immediate (const char *mnem, char sign, rtx *op2,
12772 machine_mode mode, int quad,
12773 bool isleftshift)
12775 int width, is_valid;
12776 static char templ[40];
12778 is_valid = neon_immediate_valid_for_shift (*op2, mode, op2, &width, isleftshift);
12779 gcc_assert (is_valid != 0);
12781 if (quad)
12782 sprintf (templ, "%s.%c%d\t%%q0, %%q1, %%2", mnem, sign, width);
12783 else
12784 sprintf (templ, "%s.%c%d\t%%P0, %%P1, %%2", mnem, sign, width);
12786 return templ;
12789 /* Output a sequence of pairwise operations to implement a reduction.
12790 NOTE: We do "too much work" here, because pairwise operations work on two
12791 registers-worth of operands in one go. Unfortunately we can't exploit those
12792 extra calculations to do the full operation in fewer steps, I don't think.
12793 Although all vector elements of the result but the first are ignored, we
12794 actually calculate the same result in each of the elements. An alternative
12795 such as initially loading a vector with zero to use as each of the second
12796 operands would use up an additional register and take an extra instruction,
12797 for no particular gain. */
12799 void
12800 neon_pairwise_reduce (rtx op0, rtx op1, machine_mode mode,
12801 rtx (*reduc) (rtx, rtx, rtx))
12803 unsigned int i, parts = GET_MODE_SIZE (mode) / GET_MODE_UNIT_SIZE (mode);
12804 rtx tmpsum = op1;
12806 for (i = parts / 2; i >= 1; i /= 2)
12808 rtx dest = (i == 1) ? op0 : gen_reg_rtx (mode);
12809 emit_insn (reduc (dest, tmpsum, tmpsum));
12810 tmpsum = dest;
12814 /* If VALS is a vector constant that can be loaded into a register
12815 using VDUP, generate instructions to do so and return an RTX to
12816 assign to the register. Otherwise return NULL_RTX. */
12818 static rtx
12819 neon_vdup_constant (rtx vals)
12821 machine_mode mode = GET_MODE (vals);
12822 machine_mode inner_mode = GET_MODE_INNER (mode);
12823 rtx x;
12825 if (GET_CODE (vals) != CONST_VECTOR || GET_MODE_SIZE (inner_mode) > 4)
12826 return NULL_RTX;
12828 if (!const_vec_duplicate_p (vals, &x))
12829 /* The elements are not all the same. We could handle repeating
12830 patterns of a mode larger than INNER_MODE here (e.g. int8x8_t
12831 {0, C, 0, C, 0, C, 0, C} which can be loaded using
12832 vdup.i16). */
12833 return NULL_RTX;
12835 /* We can load this constant by using VDUP and a constant in a
12836 single ARM register. This will be cheaper than a vector
12837 load. */
12839 x = copy_to_mode_reg (inner_mode, x);
12840 return gen_rtx_VEC_DUPLICATE (mode, x);
12843 /* Generate code to load VALS, which is a PARALLEL containing only
12844 constants (for vec_init) or CONST_VECTOR, efficiently into a
12845 register. Returns an RTX to copy into the register, or NULL_RTX
12846 for a PARALLEL that can not be converted into a CONST_VECTOR. */
12849 neon_make_constant (rtx vals)
12851 machine_mode mode = GET_MODE (vals);
12852 rtx target;
12853 rtx const_vec = NULL_RTX;
12854 int n_elts = GET_MODE_NUNITS (mode);
12855 int n_const = 0;
12856 int i;
12858 if (GET_CODE (vals) == CONST_VECTOR)
12859 const_vec = vals;
12860 else if (GET_CODE (vals) == PARALLEL)
12862 /* A CONST_VECTOR must contain only CONST_INTs and
12863 CONST_DOUBLEs, but CONSTANT_P allows more (e.g. SYMBOL_REF).
12864 Only store valid constants in a CONST_VECTOR. */
12865 for (i = 0; i < n_elts; ++i)
12867 rtx x = XVECEXP (vals, 0, i);
12868 if (CONST_INT_P (x) || CONST_DOUBLE_P (x))
12869 n_const++;
12871 if (n_const == n_elts)
12872 const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0));
12874 else
12875 gcc_unreachable ();
12877 if (const_vec != NULL
12878 && neon_immediate_valid_for_move (const_vec, mode, NULL, NULL))
12879 /* Load using VMOV. On Cortex-A8 this takes one cycle. */
12880 return const_vec;
12881 else if ((target = neon_vdup_constant (vals)) != NULL_RTX)
12882 /* Loaded using VDUP. On Cortex-A8 the VDUP takes one NEON
12883 pipeline cycle; creating the constant takes one or two ARM
12884 pipeline cycles. */
12885 return target;
12886 else if (const_vec != NULL_RTX)
12887 /* Load from constant pool. On Cortex-A8 this takes two cycles
12888 (for either double or quad vectors). We can not take advantage
12889 of single-cycle VLD1 because we need a PC-relative addressing
12890 mode. */
12891 return const_vec;
12892 else
12893 /* A PARALLEL containing something not valid inside CONST_VECTOR.
12894 We can not construct an initializer. */
12895 return NULL_RTX;
12898 /* Initialize vector TARGET to VALS. */
12900 void
12901 neon_expand_vector_init (rtx target, rtx vals)
12903 machine_mode mode = GET_MODE (target);
12904 machine_mode inner_mode = GET_MODE_INNER (mode);
12905 int n_elts = GET_MODE_NUNITS (mode);
12906 int n_var = 0, one_var = -1;
12907 bool all_same = true;
12908 rtx x, mem;
12909 int i;
12911 for (i = 0; i < n_elts; ++i)
12913 x = XVECEXP (vals, 0, i);
12914 if (!CONSTANT_P (x))
12915 ++n_var, one_var = i;
12917 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
12918 all_same = false;
12921 if (n_var == 0)
12923 rtx constant = neon_make_constant (vals);
12924 if (constant != NULL_RTX)
12926 emit_move_insn (target, constant);
12927 return;
12931 /* Splat a single non-constant element if we can. */
12932 if (all_same && GET_MODE_SIZE (inner_mode) <= 4)
12934 x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, 0));
12935 emit_insn (gen_rtx_SET (target, gen_rtx_VEC_DUPLICATE (mode, x)));
12936 return;
12939 /* One field is non-constant. Load constant then overwrite varying
12940 field. This is more efficient than using the stack. */
12941 if (n_var == 1)
12943 rtx copy = copy_rtx (vals);
12944 rtx index = GEN_INT (one_var);
12946 /* Load constant part of vector, substitute neighboring value for
12947 varying element. */
12948 XVECEXP (copy, 0, one_var) = XVECEXP (vals, 0, (one_var + 1) % n_elts);
12949 neon_expand_vector_init (target, copy);
12951 /* Insert variable. */
12952 x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, one_var));
12953 switch (mode)
12955 case V8QImode:
12956 emit_insn (gen_neon_vset_lanev8qi (target, x, target, index));
12957 break;
12958 case V16QImode:
12959 emit_insn (gen_neon_vset_lanev16qi (target, x, target, index));
12960 break;
12961 case V4HImode:
12962 emit_insn (gen_neon_vset_lanev4hi (target, x, target, index));
12963 break;
12964 case V8HImode:
12965 emit_insn (gen_neon_vset_lanev8hi (target, x, target, index));
12966 break;
12967 case V2SImode:
12968 emit_insn (gen_neon_vset_lanev2si (target, x, target, index));
12969 break;
12970 case V4SImode:
12971 emit_insn (gen_neon_vset_lanev4si (target, x, target, index));
12972 break;
12973 case V2SFmode:
12974 emit_insn (gen_neon_vset_lanev2sf (target, x, target, index));
12975 break;
12976 case V4SFmode:
12977 emit_insn (gen_neon_vset_lanev4sf (target, x, target, index));
12978 break;
12979 case V2DImode:
12980 emit_insn (gen_neon_vset_lanev2di (target, x, target, index));
12981 break;
12982 default:
12983 gcc_unreachable ();
12985 return;
12988 /* Construct the vector in memory one field at a time
12989 and load the whole vector. */
12990 mem = assign_stack_temp (mode, GET_MODE_SIZE (mode));
12991 for (i = 0; i < n_elts; i++)
12992 emit_move_insn (adjust_address_nv (mem, inner_mode,
12993 i * GET_MODE_SIZE (inner_mode)),
12994 XVECEXP (vals, 0, i));
12995 emit_move_insn (target, mem);
12998 /* Ensure OPERAND lies between LOW (inclusive) and HIGH (exclusive). Raise
12999 ERR if it doesn't. EXP indicates the source location, which includes the
13000 inlining history for intrinsics. */
13002 static void
13003 bounds_check (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high,
13004 const_tree exp, const char *desc)
13006 HOST_WIDE_INT lane;
13008 gcc_assert (CONST_INT_P (operand));
13010 lane = INTVAL (operand);
13012 if (lane < low || lane >= high)
13014 if (exp)
13015 error ("%K%s %wd out of range %wd - %wd",
13016 exp, desc, lane, low, high - 1);
13017 else
13018 error ("%s %wd out of range %wd - %wd", desc, lane, low, high - 1);
13022 /* Bounds-check lanes. */
13024 void
13025 neon_lane_bounds (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high,
13026 const_tree exp)
13028 bounds_check (operand, low, high, exp, "lane");
13031 /* Bounds-check constants. */
13033 void
13034 neon_const_bounds (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high)
13036 bounds_check (operand, low, high, NULL_TREE, "constant");
13039 HOST_WIDE_INT
13040 neon_element_bits (machine_mode mode)
13042 return GET_MODE_UNIT_BITSIZE (mode);
13046 /* Predicates for `match_operand' and `match_operator'. */
13048 /* Return TRUE if OP is a valid coprocessor memory address pattern.
13049 WB is true if full writeback address modes are allowed and is false
13050 if limited writeback address modes (POST_INC and PRE_DEC) are
13051 allowed. */
13054 arm_coproc_mem_operand (rtx op, bool wb)
13056 rtx ind;
13058 /* Reject eliminable registers. */
13059 if (! (reload_in_progress || reload_completed || lra_in_progress)
13060 && ( reg_mentioned_p (frame_pointer_rtx, op)
13061 || reg_mentioned_p (arg_pointer_rtx, op)
13062 || reg_mentioned_p (virtual_incoming_args_rtx, op)
13063 || reg_mentioned_p (virtual_outgoing_args_rtx, op)
13064 || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
13065 || reg_mentioned_p (virtual_stack_vars_rtx, op)))
13066 return FALSE;
13068 /* Constants are converted into offsets from labels. */
13069 if (!MEM_P (op))
13070 return FALSE;
13072 ind = XEXP (op, 0);
13074 if (reload_completed
13075 && (GET_CODE (ind) == LABEL_REF
13076 || (GET_CODE (ind) == CONST
13077 && GET_CODE (XEXP (ind, 0)) == PLUS
13078 && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF
13079 && CONST_INT_P (XEXP (XEXP (ind, 0), 1)))))
13080 return TRUE;
13082 /* Match: (mem (reg)). */
13083 if (REG_P (ind))
13084 return arm_address_register_rtx_p (ind, 0);
13086 /* Autoincremment addressing modes. POST_INC and PRE_DEC are
13087 acceptable in any case (subject to verification by
13088 arm_address_register_rtx_p). We need WB to be true to accept
13089 PRE_INC and POST_DEC. */
13090 if (GET_CODE (ind) == POST_INC
13091 || GET_CODE (ind) == PRE_DEC
13092 || (wb
13093 && (GET_CODE (ind) == PRE_INC
13094 || GET_CODE (ind) == POST_DEC)))
13095 return arm_address_register_rtx_p (XEXP (ind, 0), 0);
13097 if (wb
13098 && (GET_CODE (ind) == POST_MODIFY || GET_CODE (ind) == PRE_MODIFY)
13099 && arm_address_register_rtx_p (XEXP (ind, 0), 0)
13100 && GET_CODE (XEXP (ind, 1)) == PLUS
13101 && rtx_equal_p (XEXP (XEXP (ind, 1), 0), XEXP (ind, 0)))
13102 ind = XEXP (ind, 1);
13104 /* Match:
13105 (plus (reg)
13106 (const)). */
13107 if (GET_CODE (ind) == PLUS
13108 && REG_P (XEXP (ind, 0))
13109 && REG_MODE_OK_FOR_BASE_P (XEXP (ind, 0), VOIDmode)
13110 && CONST_INT_P (XEXP (ind, 1))
13111 && INTVAL (XEXP (ind, 1)) > -1024
13112 && INTVAL (XEXP (ind, 1)) < 1024
13113 && (INTVAL (XEXP (ind, 1)) & 3) == 0)
13114 return TRUE;
13116 return FALSE;
13119 /* Return TRUE if OP is a memory operand which we can load or store a vector
13120 to/from. TYPE is one of the following values:
13121 0 - Vector load/stor (vldr)
13122 1 - Core registers (ldm)
13123 2 - Element/structure loads (vld1)
13126 neon_vector_mem_operand (rtx op, int type, bool strict)
13128 rtx ind;
13130 /* Reject eliminable registers. */
13131 if (strict && ! (reload_in_progress || reload_completed)
13132 && (reg_mentioned_p (frame_pointer_rtx, op)
13133 || reg_mentioned_p (arg_pointer_rtx, op)
13134 || reg_mentioned_p (virtual_incoming_args_rtx, op)
13135 || reg_mentioned_p (virtual_outgoing_args_rtx, op)
13136 || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
13137 || reg_mentioned_p (virtual_stack_vars_rtx, op)))
13138 return FALSE;
13140 /* Constants are converted into offsets from labels. */
13141 if (!MEM_P (op))
13142 return FALSE;
13144 ind = XEXP (op, 0);
13146 if (reload_completed
13147 && (GET_CODE (ind) == LABEL_REF
13148 || (GET_CODE (ind) == CONST
13149 && GET_CODE (XEXP (ind, 0)) == PLUS
13150 && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF
13151 && CONST_INT_P (XEXP (XEXP (ind, 0), 1)))))
13152 return TRUE;
13154 /* Match: (mem (reg)). */
13155 if (REG_P (ind))
13156 return arm_address_register_rtx_p (ind, 0);
13158 /* Allow post-increment with Neon registers. */
13159 if ((type != 1 && GET_CODE (ind) == POST_INC)
13160 || (type == 0 && GET_CODE (ind) == PRE_DEC))
13161 return arm_address_register_rtx_p (XEXP (ind, 0), 0);
13163 /* Allow post-increment by register for VLDn */
13164 if (type == 2 && GET_CODE (ind) == POST_MODIFY
13165 && GET_CODE (XEXP (ind, 1)) == PLUS
13166 && REG_P (XEXP (XEXP (ind, 1), 1)))
13167 return true;
13169 /* Match:
13170 (plus (reg)
13171 (const)). */
13172 if (type == 0
13173 && GET_CODE (ind) == PLUS
13174 && REG_P (XEXP (ind, 0))
13175 && REG_MODE_OK_FOR_BASE_P (XEXP (ind, 0), VOIDmode)
13176 && CONST_INT_P (XEXP (ind, 1))
13177 && INTVAL (XEXP (ind, 1)) > -1024
13178 /* For quad modes, we restrict the constant offset to be slightly less
13179 than what the instruction format permits. We have no such constraint
13180 on double mode offsets. (This must match arm_legitimate_index_p.) */
13181 && (INTVAL (XEXP (ind, 1))
13182 < (VALID_NEON_QREG_MODE (GET_MODE (op))? 1016 : 1024))
13183 && (INTVAL (XEXP (ind, 1)) & 3) == 0)
13184 return TRUE;
13186 return FALSE;
13189 /* Return TRUE if OP is a mem suitable for loading/storing a Neon struct
13190 type. */
13192 neon_struct_mem_operand (rtx op)
13194 rtx ind;
13196 /* Reject eliminable registers. */
13197 if (! (reload_in_progress || reload_completed)
13198 && ( reg_mentioned_p (frame_pointer_rtx, op)
13199 || reg_mentioned_p (arg_pointer_rtx, op)
13200 || reg_mentioned_p (virtual_incoming_args_rtx, op)
13201 || reg_mentioned_p (virtual_outgoing_args_rtx, op)
13202 || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
13203 || reg_mentioned_p (virtual_stack_vars_rtx, op)))
13204 return FALSE;
13206 /* Constants are converted into offsets from labels. */
13207 if (!MEM_P (op))
13208 return FALSE;
13210 ind = XEXP (op, 0);
13212 if (reload_completed
13213 && (GET_CODE (ind) == LABEL_REF
13214 || (GET_CODE (ind) == CONST
13215 && GET_CODE (XEXP (ind, 0)) == PLUS
13216 && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF
13217 && CONST_INT_P (XEXP (XEXP (ind, 0), 1)))))
13218 return TRUE;
13220 /* Match: (mem (reg)). */
13221 if (REG_P (ind))
13222 return arm_address_register_rtx_p (ind, 0);
13224 /* vldm/vstm allows POST_INC (ia) and PRE_DEC (db). */
13225 if (GET_CODE (ind) == POST_INC
13226 || GET_CODE (ind) == PRE_DEC)
13227 return arm_address_register_rtx_p (XEXP (ind, 0), 0);
13229 return FALSE;
13232 /* Return true if X is a register that will be eliminated later on. */
13234 arm_eliminable_register (rtx x)
13236 return REG_P (x) && (REGNO (x) == FRAME_POINTER_REGNUM
13237 || REGNO (x) == ARG_POINTER_REGNUM
13238 || (REGNO (x) >= FIRST_VIRTUAL_REGISTER
13239 && REGNO (x) <= LAST_VIRTUAL_REGISTER));
13242 /* Return GENERAL_REGS if a scratch register required to reload x to/from
13243 coprocessor registers. Otherwise return NO_REGS. */
13245 enum reg_class
13246 coproc_secondary_reload_class (machine_mode mode, rtx x, bool wb)
13248 if (mode == HFmode)
13250 if (!TARGET_NEON_FP16 && !TARGET_VFP_FP16INST)
13251 return GENERAL_REGS;
13252 if (s_register_operand (x, mode) || neon_vector_mem_operand (x, 2, true))
13253 return NO_REGS;
13254 return GENERAL_REGS;
13257 /* The neon move patterns handle all legitimate vector and struct
13258 addresses. */
13259 if (TARGET_NEON
13260 && (MEM_P (x) || GET_CODE (x) == CONST_VECTOR)
13261 && (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
13262 || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT
13263 || VALID_NEON_STRUCT_MODE (mode)))
13264 return NO_REGS;
13266 if (arm_coproc_mem_operand (x, wb) || s_register_operand (x, mode))
13267 return NO_REGS;
13269 return GENERAL_REGS;
13272 /* Values which must be returned in the most-significant end of the return
13273 register. */
13275 static bool
13276 arm_return_in_msb (const_tree valtype)
13278 return (TARGET_AAPCS_BASED
13279 && BYTES_BIG_ENDIAN
13280 && (AGGREGATE_TYPE_P (valtype)
13281 || TREE_CODE (valtype) == COMPLEX_TYPE
13282 || FIXED_POINT_TYPE_P (valtype)));
13285 /* Return TRUE if X references a SYMBOL_REF. */
13287 symbol_mentioned_p (rtx x)
13289 const char * fmt;
13290 int i;
13292 if (GET_CODE (x) == SYMBOL_REF)
13293 return 1;
13295 /* UNSPEC_TLS entries for a symbol include the SYMBOL_REF, but they
13296 are constant offsets, not symbols. */
13297 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
13298 return 0;
13300 fmt = GET_RTX_FORMAT (GET_CODE (x));
13302 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
13304 if (fmt[i] == 'E')
13306 int j;
13308 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
13309 if (symbol_mentioned_p (XVECEXP (x, i, j)))
13310 return 1;
13312 else if (fmt[i] == 'e' && symbol_mentioned_p (XEXP (x, i)))
13313 return 1;
13316 return 0;
13319 /* Return TRUE if X references a LABEL_REF. */
13321 label_mentioned_p (rtx x)
13323 const char * fmt;
13324 int i;
13326 if (GET_CODE (x) == LABEL_REF)
13327 return 1;
13329 /* UNSPEC_TLS entries for a symbol include a LABEL_REF for the referencing
13330 instruction, but they are constant offsets, not symbols. */
13331 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
13332 return 0;
13334 fmt = GET_RTX_FORMAT (GET_CODE (x));
13335 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
13337 if (fmt[i] == 'E')
13339 int j;
13341 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
13342 if (label_mentioned_p (XVECEXP (x, i, j)))
13343 return 1;
13345 else if (fmt[i] == 'e' && label_mentioned_p (XEXP (x, i)))
13346 return 1;
13349 return 0;
13353 tls_mentioned_p (rtx x)
13355 switch (GET_CODE (x))
13357 case CONST:
13358 return tls_mentioned_p (XEXP (x, 0));
13360 case UNSPEC:
13361 if (XINT (x, 1) == UNSPEC_TLS)
13362 return 1;
13364 /* Fall through. */
13365 default:
13366 return 0;
13370 /* Must not copy any rtx that uses a pc-relative address.
13371 Also, disallow copying of load-exclusive instructions that
13372 may appear after splitting of compare-and-swap-style operations
13373 so as to prevent those loops from being transformed away from their
13374 canonical forms (see PR 69904). */
13376 static bool
13377 arm_cannot_copy_insn_p (rtx_insn *insn)
13379 /* The tls call insn cannot be copied, as it is paired with a data
13380 word. */
13381 if (recog_memoized (insn) == CODE_FOR_tlscall)
13382 return true;
13384 subrtx_iterator::array_type array;
13385 FOR_EACH_SUBRTX (iter, array, PATTERN (insn), ALL)
13387 const_rtx x = *iter;
13388 if (GET_CODE (x) == UNSPEC
13389 && (XINT (x, 1) == UNSPEC_PIC_BASE
13390 || XINT (x, 1) == UNSPEC_PIC_UNIFIED))
13391 return true;
13394 rtx set = single_set (insn);
13395 if (set)
13397 rtx src = SET_SRC (set);
13398 if (GET_CODE (src) == ZERO_EXTEND)
13399 src = XEXP (src, 0);
13401 /* Catch the load-exclusive and load-acquire operations. */
13402 if (GET_CODE (src) == UNSPEC_VOLATILE
13403 && (XINT (src, 1) == VUNSPEC_LL
13404 || XINT (src, 1) == VUNSPEC_LAX))
13405 return true;
13407 return false;
13410 enum rtx_code
13411 minmax_code (rtx x)
13413 enum rtx_code code = GET_CODE (x);
13415 switch (code)
13417 case SMAX:
13418 return GE;
13419 case SMIN:
13420 return LE;
13421 case UMIN:
13422 return LEU;
13423 case UMAX:
13424 return GEU;
13425 default:
13426 gcc_unreachable ();
13430 /* Match pair of min/max operators that can be implemented via usat/ssat. */
13432 bool
13433 arm_sat_operator_match (rtx lo_bound, rtx hi_bound,
13434 int *mask, bool *signed_sat)
13436 /* The high bound must be a power of two minus one. */
13437 int log = exact_log2 (INTVAL (hi_bound) + 1);
13438 if (log == -1)
13439 return false;
13441 /* The low bound is either zero (for usat) or one less than the
13442 negation of the high bound (for ssat). */
13443 if (INTVAL (lo_bound) == 0)
13445 if (mask)
13446 *mask = log;
13447 if (signed_sat)
13448 *signed_sat = false;
13450 return true;
13453 if (INTVAL (lo_bound) == -INTVAL (hi_bound) - 1)
13455 if (mask)
13456 *mask = log + 1;
13457 if (signed_sat)
13458 *signed_sat = true;
13460 return true;
13463 return false;
13466 /* Return 1 if memory locations are adjacent. */
13468 adjacent_mem_locations (rtx a, rtx b)
13470 /* We don't guarantee to preserve the order of these memory refs. */
13471 if (volatile_refs_p (a) || volatile_refs_p (b))
13472 return 0;
13474 if ((REG_P (XEXP (a, 0))
13475 || (GET_CODE (XEXP (a, 0)) == PLUS
13476 && CONST_INT_P (XEXP (XEXP (a, 0), 1))))
13477 && (REG_P (XEXP (b, 0))
13478 || (GET_CODE (XEXP (b, 0)) == PLUS
13479 && CONST_INT_P (XEXP (XEXP (b, 0), 1)))))
13481 HOST_WIDE_INT val0 = 0, val1 = 0;
13482 rtx reg0, reg1;
13483 int val_diff;
13485 if (GET_CODE (XEXP (a, 0)) == PLUS)
13487 reg0 = XEXP (XEXP (a, 0), 0);
13488 val0 = INTVAL (XEXP (XEXP (a, 0), 1));
13490 else
13491 reg0 = XEXP (a, 0);
13493 if (GET_CODE (XEXP (b, 0)) == PLUS)
13495 reg1 = XEXP (XEXP (b, 0), 0);
13496 val1 = INTVAL (XEXP (XEXP (b, 0), 1));
13498 else
13499 reg1 = XEXP (b, 0);
13501 /* Don't accept any offset that will require multiple
13502 instructions to handle, since this would cause the
13503 arith_adjacentmem pattern to output an overlong sequence. */
13504 if (!const_ok_for_op (val0, PLUS) || !const_ok_for_op (val1, PLUS))
13505 return 0;
13507 /* Don't allow an eliminable register: register elimination can make
13508 the offset too large. */
13509 if (arm_eliminable_register (reg0))
13510 return 0;
13512 val_diff = val1 - val0;
13514 if (arm_ld_sched)
13516 /* If the target has load delay slots, then there's no benefit
13517 to using an ldm instruction unless the offset is zero and
13518 we are optimizing for size. */
13519 return (optimize_size && (REGNO (reg0) == REGNO (reg1))
13520 && (val0 == 0 || val1 == 0 || val0 == 4 || val1 == 4)
13521 && (val_diff == 4 || val_diff == -4));
13524 return ((REGNO (reg0) == REGNO (reg1))
13525 && (val_diff == 4 || val_diff == -4));
13528 return 0;
13531 /* Return true if OP is a valid load or store multiple operation. LOAD is true
13532 for load operations, false for store operations. CONSECUTIVE is true
13533 if the register numbers in the operation must be consecutive in the register
13534 bank. RETURN_PC is true if value is to be loaded in PC.
13535 The pattern we are trying to match for load is:
13536 [(SET (R_d0) (MEM (PLUS (addr) (offset))))
13537 (SET (R_d1) (MEM (PLUS (addr) (offset + <reg_increment>))))
13540 (SET (R_dn) (MEM (PLUS (addr) (offset + n * <reg_increment>))))
13542 where
13543 1. If offset is 0, first insn should be (SET (R_d0) (MEM (src_addr))).
13544 2. REGNO (R_d0) < REGNO (R_d1) < ... < REGNO (R_dn).
13545 3. If consecutive is TRUE, then for kth register being loaded,
13546 REGNO (R_dk) = REGNO (R_d0) + k.
13547 The pattern for store is similar. */
13548 bool
13549 ldm_stm_operation_p (rtx op, bool load, machine_mode mode,
13550 bool consecutive, bool return_pc)
13552 HOST_WIDE_INT count = XVECLEN (op, 0);
13553 rtx reg, mem, addr;
13554 unsigned regno;
13555 unsigned first_regno;
13556 HOST_WIDE_INT i = 1, base = 0, offset = 0;
13557 rtx elt;
13558 bool addr_reg_in_reglist = false;
13559 bool update = false;
13560 int reg_increment;
13561 int offset_adj;
13562 int regs_per_val;
13564 /* If not in SImode, then registers must be consecutive
13565 (e.g., VLDM instructions for DFmode). */
13566 gcc_assert ((mode == SImode) || consecutive);
13567 /* Setting return_pc for stores is illegal. */
13568 gcc_assert (!return_pc || load);
13570 /* Set up the increments and the regs per val based on the mode. */
13571 reg_increment = GET_MODE_SIZE (mode);
13572 regs_per_val = reg_increment / 4;
13573 offset_adj = return_pc ? 1 : 0;
13575 if (count <= 1
13576 || GET_CODE (XVECEXP (op, 0, offset_adj)) != SET
13577 || (load && !REG_P (SET_DEST (XVECEXP (op, 0, offset_adj)))))
13578 return false;
13580 /* Check if this is a write-back. */
13581 elt = XVECEXP (op, 0, offset_adj);
13582 if (GET_CODE (SET_SRC (elt)) == PLUS)
13584 i++;
13585 base = 1;
13586 update = true;
13588 /* The offset adjustment must be the number of registers being
13589 popped times the size of a single register. */
13590 if (!REG_P (SET_DEST (elt))
13591 || !REG_P (XEXP (SET_SRC (elt), 0))
13592 || (REGNO (SET_DEST (elt)) != REGNO (XEXP (SET_SRC (elt), 0)))
13593 || !CONST_INT_P (XEXP (SET_SRC (elt), 1))
13594 || INTVAL (XEXP (SET_SRC (elt), 1)) !=
13595 ((count - 1 - offset_adj) * reg_increment))
13596 return false;
13599 i = i + offset_adj;
13600 base = base + offset_adj;
13601 /* Perform a quick check so we don't blow up below. If only one reg is loaded,
13602 success depends on the type: VLDM can do just one reg,
13603 LDM must do at least two. */
13604 if ((count <= i) && (mode == SImode))
13605 return false;
13607 elt = XVECEXP (op, 0, i - 1);
13608 if (GET_CODE (elt) != SET)
13609 return false;
13611 if (load)
13613 reg = SET_DEST (elt);
13614 mem = SET_SRC (elt);
13616 else
13618 reg = SET_SRC (elt);
13619 mem = SET_DEST (elt);
13622 if (!REG_P (reg) || !MEM_P (mem))
13623 return false;
13625 regno = REGNO (reg);
13626 first_regno = regno;
13627 addr = XEXP (mem, 0);
13628 if (GET_CODE (addr) == PLUS)
13630 if (!CONST_INT_P (XEXP (addr, 1)))
13631 return false;
13633 offset = INTVAL (XEXP (addr, 1));
13634 addr = XEXP (addr, 0);
13637 if (!REG_P (addr))
13638 return false;
13640 /* Don't allow SP to be loaded unless it is also the base register. It
13641 guarantees that SP is reset correctly when an LDM instruction
13642 is interrupted. Otherwise, we might end up with a corrupt stack. */
13643 if (load && (REGNO (reg) == SP_REGNUM) && (REGNO (addr) != SP_REGNUM))
13644 return false;
13646 for (; i < count; i++)
13648 elt = XVECEXP (op, 0, i);
13649 if (GET_CODE (elt) != SET)
13650 return false;
13652 if (load)
13654 reg = SET_DEST (elt);
13655 mem = SET_SRC (elt);
13657 else
13659 reg = SET_SRC (elt);
13660 mem = SET_DEST (elt);
13663 if (!REG_P (reg)
13664 || GET_MODE (reg) != mode
13665 || REGNO (reg) <= regno
13666 || (consecutive
13667 && (REGNO (reg) !=
13668 (unsigned int) (first_regno + regs_per_val * (i - base))))
13669 /* Don't allow SP to be loaded unless it is also the base register. It
13670 guarantees that SP is reset correctly when an LDM instruction
13671 is interrupted. Otherwise, we might end up with a corrupt stack. */
13672 || (load && (REGNO (reg) == SP_REGNUM) && (REGNO (addr) != SP_REGNUM))
13673 || !MEM_P (mem)
13674 || GET_MODE (mem) != mode
13675 || ((GET_CODE (XEXP (mem, 0)) != PLUS
13676 || !rtx_equal_p (XEXP (XEXP (mem, 0), 0), addr)
13677 || !CONST_INT_P (XEXP (XEXP (mem, 0), 1))
13678 || (INTVAL (XEXP (XEXP (mem, 0), 1)) !=
13679 offset + (i - base) * reg_increment))
13680 && (!REG_P (XEXP (mem, 0))
13681 || offset + (i - base) * reg_increment != 0)))
13682 return false;
13684 regno = REGNO (reg);
13685 if (regno == REGNO (addr))
13686 addr_reg_in_reglist = true;
13689 if (load)
13691 if (update && addr_reg_in_reglist)
13692 return false;
13694 /* For Thumb-1, address register is always modified - either by write-back
13695 or by explicit load. If the pattern does not describe an update,
13696 then the address register must be in the list of loaded registers. */
13697 if (TARGET_THUMB1)
13698 return update || addr_reg_in_reglist;
13701 return true;
13704 /* Return true iff it would be profitable to turn a sequence of NOPS loads
13705 or stores (depending on IS_STORE) into a load-multiple or store-multiple
13706 instruction. ADD_OFFSET is nonzero if the base address register needs
13707 to be modified with an add instruction before we can use it. */
13709 static bool
13710 multiple_operation_profitable_p (bool is_store ATTRIBUTE_UNUSED,
13711 int nops, HOST_WIDE_INT add_offset)
13713 /* For ARM8,9 & StrongARM, 2 ldr instructions are faster than an ldm
13714 if the offset isn't small enough. The reason 2 ldrs are faster
13715 is because these ARMs are able to do more than one cache access
13716 in a single cycle. The ARM9 and StrongARM have Harvard caches,
13717 whilst the ARM8 has a double bandwidth cache. This means that
13718 these cores can do both an instruction fetch and a data fetch in
13719 a single cycle, so the trick of calculating the address into a
13720 scratch register (one of the result regs) and then doing a load
13721 multiple actually becomes slower (and no smaller in code size).
13722 That is the transformation
13724 ldr rd1, [rbase + offset]
13725 ldr rd2, [rbase + offset + 4]
13729 add rd1, rbase, offset
13730 ldmia rd1, {rd1, rd2}
13732 produces worse code -- '3 cycles + any stalls on rd2' instead of
13733 '2 cycles + any stalls on rd2'. On ARMs with only one cache
13734 access per cycle, the first sequence could never complete in less
13735 than 6 cycles, whereas the ldm sequence would only take 5 and
13736 would make better use of sequential accesses if not hitting the
13737 cache.
13739 We cheat here and test 'arm_ld_sched' which we currently know to
13740 only be true for the ARM8, ARM9 and StrongARM. If this ever
13741 changes, then the test below needs to be reworked. */
13742 if (nops == 2 && arm_ld_sched && add_offset != 0)
13743 return false;
13745 /* XScale has load-store double instructions, but they have stricter
13746 alignment requirements than load-store multiple, so we cannot
13747 use them.
13749 For XScale ldm requires 2 + NREGS cycles to complete and blocks
13750 the pipeline until completion.
13752 NREGS CYCLES
13758 An ldr instruction takes 1-3 cycles, but does not block the
13759 pipeline.
13761 NREGS CYCLES
13762 1 1-3
13763 2 2-6
13764 3 3-9
13765 4 4-12
13767 Best case ldr will always win. However, the more ldr instructions
13768 we issue, the less likely we are to be able to schedule them well.
13769 Using ldr instructions also increases code size.
13771 As a compromise, we use ldr for counts of 1 or 2 regs, and ldm
13772 for counts of 3 or 4 regs. */
13773 if (nops <= 2 && arm_tune_xscale && !optimize_size)
13774 return false;
13775 return true;
13778 /* Subroutine of load_multiple_sequence and store_multiple_sequence.
13779 Given an array of UNSORTED_OFFSETS, of which there are NOPS, compute
13780 an array ORDER which describes the sequence to use when accessing the
13781 offsets that produces an ascending order. In this sequence, each
13782 offset must be larger by exactly 4 than the previous one. ORDER[0]
13783 must have been filled in with the lowest offset by the caller.
13784 If UNSORTED_REGS is nonnull, it is an array of register numbers that
13785 we use to verify that ORDER produces an ascending order of registers.
13786 Return true if it was possible to construct such an order, false if
13787 not. */
13789 static bool
13790 compute_offset_order (int nops, HOST_WIDE_INT *unsorted_offsets, int *order,
13791 int *unsorted_regs)
13793 int i;
13794 for (i = 1; i < nops; i++)
13796 int j;
13798 order[i] = order[i - 1];
13799 for (j = 0; j < nops; j++)
13800 if (unsorted_offsets[j] == unsorted_offsets[order[i - 1]] + 4)
13802 /* We must find exactly one offset that is higher than the
13803 previous one by 4. */
13804 if (order[i] != order[i - 1])
13805 return false;
13806 order[i] = j;
13808 if (order[i] == order[i - 1])
13809 return false;
13810 /* The register numbers must be ascending. */
13811 if (unsorted_regs != NULL
13812 && unsorted_regs[order[i]] <= unsorted_regs[order[i - 1]])
13813 return false;
13815 return true;
13818 /* Used to determine in a peephole whether a sequence of load
13819 instructions can be changed into a load-multiple instruction.
13820 NOPS is the number of separate load instructions we are examining. The
13821 first NOPS entries in OPERANDS are the destination registers, the
13822 next NOPS entries are memory operands. If this function is
13823 successful, *BASE is set to the common base register of the memory
13824 accesses; *LOAD_OFFSET is set to the first memory location's offset
13825 from that base register.
13826 REGS is an array filled in with the destination register numbers.
13827 SAVED_ORDER (if nonnull), is an array filled in with an order that maps
13828 insn numbers to an ascending order of stores. If CHECK_REGS is true,
13829 the sequence of registers in REGS matches the loads from ascending memory
13830 locations, and the function verifies that the register numbers are
13831 themselves ascending. If CHECK_REGS is false, the register numbers
13832 are stored in the order they are found in the operands. */
13833 static int
13834 load_multiple_sequence (rtx *operands, int nops, int *regs, int *saved_order,
13835 int *base, HOST_WIDE_INT *load_offset, bool check_regs)
13837 int unsorted_regs[MAX_LDM_STM_OPS];
13838 HOST_WIDE_INT unsorted_offsets[MAX_LDM_STM_OPS];
13839 int order[MAX_LDM_STM_OPS];
13840 rtx base_reg_rtx = NULL;
13841 int base_reg = -1;
13842 int i, ldm_case;
13844 /* Can only handle up to MAX_LDM_STM_OPS insns at present, though could be
13845 easily extended if required. */
13846 gcc_assert (nops >= 2 && nops <= MAX_LDM_STM_OPS);
13848 memset (order, 0, MAX_LDM_STM_OPS * sizeof (int));
13850 /* Loop over the operands and check that the memory references are
13851 suitable (i.e. immediate offsets from the same base register). At
13852 the same time, extract the target register, and the memory
13853 offsets. */
13854 for (i = 0; i < nops; i++)
13856 rtx reg;
13857 rtx offset;
13859 /* Convert a subreg of a mem into the mem itself. */
13860 if (GET_CODE (operands[nops + i]) == SUBREG)
13861 operands[nops + i] = alter_subreg (operands + (nops + i), true);
13863 gcc_assert (MEM_P (operands[nops + i]));
13865 /* Don't reorder volatile memory references; it doesn't seem worth
13866 looking for the case where the order is ok anyway. */
13867 if (MEM_VOLATILE_P (operands[nops + i]))
13868 return 0;
13870 offset = const0_rtx;
13872 if ((REG_P (reg = XEXP (operands[nops + i], 0))
13873 || (GET_CODE (reg) == SUBREG
13874 && REG_P (reg = SUBREG_REG (reg))))
13875 || (GET_CODE (XEXP (operands[nops + i], 0)) == PLUS
13876 && ((REG_P (reg = XEXP (XEXP (operands[nops + i], 0), 0)))
13877 || (GET_CODE (reg) == SUBREG
13878 && REG_P (reg = SUBREG_REG (reg))))
13879 && (CONST_INT_P (offset
13880 = XEXP (XEXP (operands[nops + i], 0), 1)))))
13882 if (i == 0)
13884 base_reg = REGNO (reg);
13885 base_reg_rtx = reg;
13886 if (TARGET_THUMB1 && base_reg > LAST_LO_REGNUM)
13887 return 0;
13889 else if (base_reg != (int) REGNO (reg))
13890 /* Not addressed from the same base register. */
13891 return 0;
13893 unsorted_regs[i] = (REG_P (operands[i])
13894 ? REGNO (operands[i])
13895 : REGNO (SUBREG_REG (operands[i])));
13897 /* If it isn't an integer register, or if it overwrites the
13898 base register but isn't the last insn in the list, then
13899 we can't do this. */
13900 if (unsorted_regs[i] < 0
13901 || (TARGET_THUMB1 && unsorted_regs[i] > LAST_LO_REGNUM)
13902 || unsorted_regs[i] > 14
13903 || (i != nops - 1 && unsorted_regs[i] == base_reg))
13904 return 0;
13906 /* Don't allow SP to be loaded unless it is also the base
13907 register. It guarantees that SP is reset correctly when
13908 an LDM instruction is interrupted. Otherwise, we might
13909 end up with a corrupt stack. */
13910 if (unsorted_regs[i] == SP_REGNUM && base_reg != SP_REGNUM)
13911 return 0;
13913 unsorted_offsets[i] = INTVAL (offset);
13914 if (i == 0 || unsorted_offsets[i] < unsorted_offsets[order[0]])
13915 order[0] = i;
13917 else
13918 /* Not a suitable memory address. */
13919 return 0;
13922 /* All the useful information has now been extracted from the
13923 operands into unsorted_regs and unsorted_offsets; additionally,
13924 order[0] has been set to the lowest offset in the list. Sort
13925 the offsets into order, verifying that they are adjacent, and
13926 check that the register numbers are ascending. */
13927 if (!compute_offset_order (nops, unsorted_offsets, order,
13928 check_regs ? unsorted_regs : NULL))
13929 return 0;
13931 if (saved_order)
13932 memcpy (saved_order, order, sizeof order);
13934 if (base)
13936 *base = base_reg;
13938 for (i = 0; i < nops; i++)
13939 regs[i] = unsorted_regs[check_regs ? order[i] : i];
13941 *load_offset = unsorted_offsets[order[0]];
13944 if (TARGET_THUMB1
13945 && !peep2_reg_dead_p (nops, base_reg_rtx))
13946 return 0;
13948 if (unsorted_offsets[order[0]] == 0)
13949 ldm_case = 1; /* ldmia */
13950 else if (TARGET_ARM && unsorted_offsets[order[0]] == 4)
13951 ldm_case = 2; /* ldmib */
13952 else if (TARGET_ARM && unsorted_offsets[order[nops - 1]] == 0)
13953 ldm_case = 3; /* ldmda */
13954 else if (TARGET_32BIT && unsorted_offsets[order[nops - 1]] == -4)
13955 ldm_case = 4; /* ldmdb */
13956 else if (const_ok_for_arm (unsorted_offsets[order[0]])
13957 || const_ok_for_arm (-unsorted_offsets[order[0]]))
13958 ldm_case = 5;
13959 else
13960 return 0;
13962 if (!multiple_operation_profitable_p (false, nops,
13963 ldm_case == 5
13964 ? unsorted_offsets[order[0]] : 0))
13965 return 0;
13967 return ldm_case;
13970 /* Used to determine in a peephole whether a sequence of store instructions can
13971 be changed into a store-multiple instruction.
13972 NOPS is the number of separate store instructions we are examining.
13973 NOPS_TOTAL is the total number of instructions recognized by the peephole
13974 pattern.
13975 The first NOPS entries in OPERANDS are the source registers, the next
13976 NOPS entries are memory operands. If this function is successful, *BASE is
13977 set to the common base register of the memory accesses; *LOAD_OFFSET is set
13978 to the first memory location's offset from that base register. REGS is an
13979 array filled in with the source register numbers, REG_RTXS (if nonnull) is
13980 likewise filled with the corresponding rtx's.
13981 SAVED_ORDER (if nonnull), is an array filled in with an order that maps insn
13982 numbers to an ascending order of stores.
13983 If CHECK_REGS is true, the sequence of registers in *REGS matches the stores
13984 from ascending memory locations, and the function verifies that the register
13985 numbers are themselves ascending. If CHECK_REGS is false, the register
13986 numbers are stored in the order they are found in the operands. */
13987 static int
13988 store_multiple_sequence (rtx *operands, int nops, int nops_total,
13989 int *regs, rtx *reg_rtxs, int *saved_order, int *base,
13990 HOST_WIDE_INT *load_offset, bool check_regs)
13992 int unsorted_regs[MAX_LDM_STM_OPS];
13993 rtx unsorted_reg_rtxs[MAX_LDM_STM_OPS];
13994 HOST_WIDE_INT unsorted_offsets[MAX_LDM_STM_OPS];
13995 int order[MAX_LDM_STM_OPS];
13996 int base_reg = -1;
13997 rtx base_reg_rtx = NULL;
13998 int i, stm_case;
14000 /* Write back of base register is currently only supported for Thumb 1. */
14001 int base_writeback = TARGET_THUMB1;
14003 /* Can only handle up to MAX_LDM_STM_OPS insns at present, though could be
14004 easily extended if required. */
14005 gcc_assert (nops >= 2 && nops <= MAX_LDM_STM_OPS);
14007 memset (order, 0, MAX_LDM_STM_OPS * sizeof (int));
14009 /* Loop over the operands and check that the memory references are
14010 suitable (i.e. immediate offsets from the same base register). At
14011 the same time, extract the target register, and the memory
14012 offsets. */
14013 for (i = 0; i < nops; i++)
14015 rtx reg;
14016 rtx offset;
14018 /* Convert a subreg of a mem into the mem itself. */
14019 if (GET_CODE (operands[nops + i]) == SUBREG)
14020 operands[nops + i] = alter_subreg (operands + (nops + i), true);
14022 gcc_assert (MEM_P (operands[nops + i]));
14024 /* Don't reorder volatile memory references; it doesn't seem worth
14025 looking for the case where the order is ok anyway. */
14026 if (MEM_VOLATILE_P (operands[nops + i]))
14027 return 0;
14029 offset = const0_rtx;
14031 if ((REG_P (reg = XEXP (operands[nops + i], 0))
14032 || (GET_CODE (reg) == SUBREG
14033 && REG_P (reg = SUBREG_REG (reg))))
14034 || (GET_CODE (XEXP (operands[nops + i], 0)) == PLUS
14035 && ((REG_P (reg = XEXP (XEXP (operands[nops + i], 0), 0)))
14036 || (GET_CODE (reg) == SUBREG
14037 && REG_P (reg = SUBREG_REG (reg))))
14038 && (CONST_INT_P (offset
14039 = XEXP (XEXP (operands[nops + i], 0), 1)))))
14041 unsorted_reg_rtxs[i] = (REG_P (operands[i])
14042 ? operands[i] : SUBREG_REG (operands[i]));
14043 unsorted_regs[i] = REGNO (unsorted_reg_rtxs[i]);
14045 if (i == 0)
14047 base_reg = REGNO (reg);
14048 base_reg_rtx = reg;
14049 if (TARGET_THUMB1 && base_reg > LAST_LO_REGNUM)
14050 return 0;
14052 else if (base_reg != (int) REGNO (reg))
14053 /* Not addressed from the same base register. */
14054 return 0;
14056 /* If it isn't an integer register, then we can't do this. */
14057 if (unsorted_regs[i] < 0
14058 || (TARGET_THUMB1 && unsorted_regs[i] > LAST_LO_REGNUM)
14059 /* The effects are unpredictable if the base register is
14060 both updated and stored. */
14061 || (base_writeback && unsorted_regs[i] == base_reg)
14062 || (TARGET_THUMB2 && unsorted_regs[i] == SP_REGNUM)
14063 || unsorted_regs[i] > 14)
14064 return 0;
14066 unsorted_offsets[i] = INTVAL (offset);
14067 if (i == 0 || unsorted_offsets[i] < unsorted_offsets[order[0]])
14068 order[0] = i;
14070 else
14071 /* Not a suitable memory address. */
14072 return 0;
14075 /* All the useful information has now been extracted from the
14076 operands into unsorted_regs and unsorted_offsets; additionally,
14077 order[0] has been set to the lowest offset in the list. Sort
14078 the offsets into order, verifying that they are adjacent, and
14079 check that the register numbers are ascending. */
14080 if (!compute_offset_order (nops, unsorted_offsets, order,
14081 check_regs ? unsorted_regs : NULL))
14082 return 0;
14084 if (saved_order)
14085 memcpy (saved_order, order, sizeof order);
14087 if (base)
14089 *base = base_reg;
14091 for (i = 0; i < nops; i++)
14093 regs[i] = unsorted_regs[check_regs ? order[i] : i];
14094 if (reg_rtxs)
14095 reg_rtxs[i] = unsorted_reg_rtxs[check_regs ? order[i] : i];
14098 *load_offset = unsorted_offsets[order[0]];
14101 if (TARGET_THUMB1
14102 && !peep2_reg_dead_p (nops_total, base_reg_rtx))
14103 return 0;
14105 if (unsorted_offsets[order[0]] == 0)
14106 stm_case = 1; /* stmia */
14107 else if (TARGET_ARM && unsorted_offsets[order[0]] == 4)
14108 stm_case = 2; /* stmib */
14109 else if (TARGET_ARM && unsorted_offsets[order[nops - 1]] == 0)
14110 stm_case = 3; /* stmda */
14111 else if (TARGET_32BIT && unsorted_offsets[order[nops - 1]] == -4)
14112 stm_case = 4; /* stmdb */
14113 else
14114 return 0;
14116 if (!multiple_operation_profitable_p (false, nops, 0))
14117 return 0;
14119 return stm_case;
14122 /* Routines for use in generating RTL. */
14124 /* Generate a load-multiple instruction. COUNT is the number of loads in
14125 the instruction; REGS and MEMS are arrays containing the operands.
14126 BASEREG is the base register to be used in addressing the memory operands.
14127 WBACK_OFFSET is nonzero if the instruction should update the base
14128 register. */
14130 static rtx
14131 arm_gen_load_multiple_1 (int count, int *regs, rtx *mems, rtx basereg,
14132 HOST_WIDE_INT wback_offset)
14134 int i = 0, j;
14135 rtx result;
14137 if (!multiple_operation_profitable_p (false, count, 0))
14139 rtx seq;
14141 start_sequence ();
14143 for (i = 0; i < count; i++)
14144 emit_move_insn (gen_rtx_REG (SImode, regs[i]), mems[i]);
14146 if (wback_offset != 0)
14147 emit_move_insn (basereg, plus_constant (Pmode, basereg, wback_offset));
14149 seq = get_insns ();
14150 end_sequence ();
14152 return seq;
14155 result = gen_rtx_PARALLEL (VOIDmode,
14156 rtvec_alloc (count + (wback_offset != 0 ? 1 : 0)));
14157 if (wback_offset != 0)
14159 XVECEXP (result, 0, 0)
14160 = gen_rtx_SET (basereg, plus_constant (Pmode, basereg, wback_offset));
14161 i = 1;
14162 count++;
14165 for (j = 0; i < count; i++, j++)
14166 XVECEXP (result, 0, i)
14167 = gen_rtx_SET (gen_rtx_REG (SImode, regs[j]), mems[j]);
14169 return result;
14172 /* Generate a store-multiple instruction. COUNT is the number of stores in
14173 the instruction; REGS and MEMS are arrays containing the operands.
14174 BASEREG is the base register to be used in addressing the memory operands.
14175 WBACK_OFFSET is nonzero if the instruction should update the base
14176 register. */
14178 static rtx
14179 arm_gen_store_multiple_1 (int count, int *regs, rtx *mems, rtx basereg,
14180 HOST_WIDE_INT wback_offset)
14182 int i = 0, j;
14183 rtx result;
14185 if (GET_CODE (basereg) == PLUS)
14186 basereg = XEXP (basereg, 0);
14188 if (!multiple_operation_profitable_p (false, count, 0))
14190 rtx seq;
14192 start_sequence ();
14194 for (i = 0; i < count; i++)
14195 emit_move_insn (mems[i], gen_rtx_REG (SImode, regs[i]));
14197 if (wback_offset != 0)
14198 emit_move_insn (basereg, plus_constant (Pmode, basereg, wback_offset));
14200 seq = get_insns ();
14201 end_sequence ();
14203 return seq;
14206 result = gen_rtx_PARALLEL (VOIDmode,
14207 rtvec_alloc (count + (wback_offset != 0 ? 1 : 0)));
14208 if (wback_offset != 0)
14210 XVECEXP (result, 0, 0)
14211 = gen_rtx_SET (basereg, plus_constant (Pmode, basereg, wback_offset));
14212 i = 1;
14213 count++;
14216 for (j = 0; i < count; i++, j++)
14217 XVECEXP (result, 0, i)
14218 = gen_rtx_SET (mems[j], gen_rtx_REG (SImode, regs[j]));
14220 return result;
14223 /* Generate either a load-multiple or a store-multiple instruction. This
14224 function can be used in situations where we can start with a single MEM
14225 rtx and adjust its address upwards.
14226 COUNT is the number of operations in the instruction, not counting a
14227 possible update of the base register. REGS is an array containing the
14228 register operands.
14229 BASEREG is the base register to be used in addressing the memory operands,
14230 which are constructed from BASEMEM.
14231 WRITE_BACK specifies whether the generated instruction should include an
14232 update of the base register.
14233 OFFSETP is used to pass an offset to and from this function; this offset
14234 is not used when constructing the address (instead BASEMEM should have an
14235 appropriate offset in its address), it is used only for setting
14236 MEM_OFFSET. It is updated only if WRITE_BACK is true.*/
14238 static rtx
14239 arm_gen_multiple_op (bool is_load, int *regs, int count, rtx basereg,
14240 bool write_back, rtx basemem, HOST_WIDE_INT *offsetp)
14242 rtx mems[MAX_LDM_STM_OPS];
14243 HOST_WIDE_INT offset = *offsetp;
14244 int i;
14246 gcc_assert (count <= MAX_LDM_STM_OPS);
14248 if (GET_CODE (basereg) == PLUS)
14249 basereg = XEXP (basereg, 0);
14251 for (i = 0; i < count; i++)
14253 rtx addr = plus_constant (Pmode, basereg, i * 4);
14254 mems[i] = adjust_automodify_address_nv (basemem, SImode, addr, offset);
14255 offset += 4;
14258 if (write_back)
14259 *offsetp = offset;
14261 if (is_load)
14262 return arm_gen_load_multiple_1 (count, regs, mems, basereg,
14263 write_back ? 4 * count : 0);
14264 else
14265 return arm_gen_store_multiple_1 (count, regs, mems, basereg,
14266 write_back ? 4 * count : 0);
14270 arm_gen_load_multiple (int *regs, int count, rtx basereg, int write_back,
14271 rtx basemem, HOST_WIDE_INT *offsetp)
14273 return arm_gen_multiple_op (TRUE, regs, count, basereg, write_back, basemem,
14274 offsetp);
14278 arm_gen_store_multiple (int *regs, int count, rtx basereg, int write_back,
14279 rtx basemem, HOST_WIDE_INT *offsetp)
14281 return arm_gen_multiple_op (FALSE, regs, count, basereg, write_back, basemem,
14282 offsetp);
14285 /* Called from a peephole2 expander to turn a sequence of loads into an
14286 LDM instruction. OPERANDS are the operands found by the peephole matcher;
14287 NOPS indicates how many separate loads we are trying to combine. SORT_REGS
14288 is true if we can reorder the registers because they are used commutatively
14289 subsequently.
14290 Returns true iff we could generate a new instruction. */
14292 bool
14293 gen_ldm_seq (rtx *operands, int nops, bool sort_regs)
14295 int regs[MAX_LDM_STM_OPS], mem_order[MAX_LDM_STM_OPS];
14296 rtx mems[MAX_LDM_STM_OPS];
14297 int i, j, base_reg;
14298 rtx base_reg_rtx;
14299 HOST_WIDE_INT offset;
14300 int write_back = FALSE;
14301 int ldm_case;
14302 rtx addr;
14304 ldm_case = load_multiple_sequence (operands, nops, regs, mem_order,
14305 &base_reg, &offset, !sort_regs);
14307 if (ldm_case == 0)
14308 return false;
14310 if (sort_regs)
14311 for (i = 0; i < nops - 1; i++)
14312 for (j = i + 1; j < nops; j++)
14313 if (regs[i] > regs[j])
14315 int t = regs[i];
14316 regs[i] = regs[j];
14317 regs[j] = t;
14319 base_reg_rtx = gen_rtx_REG (Pmode, base_reg);
14321 if (TARGET_THUMB1)
14323 gcc_assert (peep2_reg_dead_p (nops, base_reg_rtx));
14324 gcc_assert (ldm_case == 1 || ldm_case == 5);
14325 write_back = TRUE;
14328 if (ldm_case == 5)
14330 rtx newbase = TARGET_THUMB1 ? base_reg_rtx : gen_rtx_REG (SImode, regs[0]);
14331 emit_insn (gen_addsi3 (newbase, base_reg_rtx, GEN_INT (offset)));
14332 offset = 0;
14333 if (!TARGET_THUMB1)
14335 base_reg = regs[0];
14336 base_reg_rtx = newbase;
14340 for (i = 0; i < nops; i++)
14342 addr = plus_constant (Pmode, base_reg_rtx, offset + i * 4);
14343 mems[i] = adjust_automodify_address_nv (operands[nops + mem_order[i]],
14344 SImode, addr, 0);
14346 emit_insn (arm_gen_load_multiple_1 (nops, regs, mems, base_reg_rtx,
14347 write_back ? offset + i * 4 : 0));
14348 return true;
14351 /* Called from a peephole2 expander to turn a sequence of stores into an
14352 STM instruction. OPERANDS are the operands found by the peephole matcher;
14353 NOPS indicates how many separate stores we are trying to combine.
14354 Returns true iff we could generate a new instruction. */
14356 bool
14357 gen_stm_seq (rtx *operands, int nops)
14359 int i;
14360 int regs[MAX_LDM_STM_OPS], mem_order[MAX_LDM_STM_OPS];
14361 rtx mems[MAX_LDM_STM_OPS];
14362 int base_reg;
14363 rtx base_reg_rtx;
14364 HOST_WIDE_INT offset;
14365 int write_back = FALSE;
14366 int stm_case;
14367 rtx addr;
14368 bool base_reg_dies;
14370 stm_case = store_multiple_sequence (operands, nops, nops, regs, NULL,
14371 mem_order, &base_reg, &offset, true);
14373 if (stm_case == 0)
14374 return false;
14376 base_reg_rtx = gen_rtx_REG (Pmode, base_reg);
14378 base_reg_dies = peep2_reg_dead_p (nops, base_reg_rtx);
14379 if (TARGET_THUMB1)
14381 gcc_assert (base_reg_dies);
14382 write_back = TRUE;
14385 if (stm_case == 5)
14387 gcc_assert (base_reg_dies);
14388 emit_insn (gen_addsi3 (base_reg_rtx, base_reg_rtx, GEN_INT (offset)));
14389 offset = 0;
14392 addr = plus_constant (Pmode, base_reg_rtx, offset);
14394 for (i = 0; i < nops; i++)
14396 addr = plus_constant (Pmode, base_reg_rtx, offset + i * 4);
14397 mems[i] = adjust_automodify_address_nv (operands[nops + mem_order[i]],
14398 SImode, addr, 0);
14400 emit_insn (arm_gen_store_multiple_1 (nops, regs, mems, base_reg_rtx,
14401 write_back ? offset + i * 4 : 0));
14402 return true;
14405 /* Called from a peephole2 expander to turn a sequence of stores that are
14406 preceded by constant loads into an STM instruction. OPERANDS are the
14407 operands found by the peephole matcher; NOPS indicates how many
14408 separate stores we are trying to combine; there are 2 * NOPS
14409 instructions in the peephole.
14410 Returns true iff we could generate a new instruction. */
14412 bool
14413 gen_const_stm_seq (rtx *operands, int nops)
14415 int regs[MAX_LDM_STM_OPS], sorted_regs[MAX_LDM_STM_OPS];
14416 int reg_order[MAX_LDM_STM_OPS], mem_order[MAX_LDM_STM_OPS];
14417 rtx reg_rtxs[MAX_LDM_STM_OPS], orig_reg_rtxs[MAX_LDM_STM_OPS];
14418 rtx mems[MAX_LDM_STM_OPS];
14419 int base_reg;
14420 rtx base_reg_rtx;
14421 HOST_WIDE_INT offset;
14422 int write_back = FALSE;
14423 int stm_case;
14424 rtx addr;
14425 bool base_reg_dies;
14426 int i, j;
14427 HARD_REG_SET allocated;
14429 stm_case = store_multiple_sequence (operands, nops, 2 * nops, regs, reg_rtxs,
14430 mem_order, &base_reg, &offset, false);
14432 if (stm_case == 0)
14433 return false;
14435 memcpy (orig_reg_rtxs, reg_rtxs, sizeof orig_reg_rtxs);
14437 /* If the same register is used more than once, try to find a free
14438 register. */
14439 CLEAR_HARD_REG_SET (allocated);
14440 for (i = 0; i < nops; i++)
14442 for (j = i + 1; j < nops; j++)
14443 if (regs[i] == regs[j])
14445 rtx t = peep2_find_free_register (0, nops * 2,
14446 TARGET_THUMB1 ? "l" : "r",
14447 SImode, &allocated);
14448 if (t == NULL_RTX)
14449 return false;
14450 reg_rtxs[i] = t;
14451 regs[i] = REGNO (t);
14455 /* Compute an ordering that maps the register numbers to an ascending
14456 sequence. */
14457 reg_order[0] = 0;
14458 for (i = 0; i < nops; i++)
14459 if (regs[i] < regs[reg_order[0]])
14460 reg_order[0] = i;
14462 for (i = 1; i < nops; i++)
14464 int this_order = reg_order[i - 1];
14465 for (j = 0; j < nops; j++)
14466 if (regs[j] > regs[reg_order[i - 1]]
14467 && (this_order == reg_order[i - 1]
14468 || regs[j] < regs[this_order]))
14469 this_order = j;
14470 reg_order[i] = this_order;
14473 /* Ensure that registers that must be live after the instruction end
14474 up with the correct value. */
14475 for (i = 0; i < nops; i++)
14477 int this_order = reg_order[i];
14478 if ((this_order != mem_order[i]
14479 || orig_reg_rtxs[this_order] != reg_rtxs[this_order])
14480 && !peep2_reg_dead_p (nops * 2, orig_reg_rtxs[this_order]))
14481 return false;
14484 /* Load the constants. */
14485 for (i = 0; i < nops; i++)
14487 rtx op = operands[2 * nops + mem_order[i]];
14488 sorted_regs[i] = regs[reg_order[i]];
14489 emit_move_insn (reg_rtxs[reg_order[i]], op);
14492 base_reg_rtx = gen_rtx_REG (Pmode, base_reg);
14494 base_reg_dies = peep2_reg_dead_p (nops * 2, base_reg_rtx);
14495 if (TARGET_THUMB1)
14497 gcc_assert (base_reg_dies);
14498 write_back = TRUE;
14501 if (stm_case == 5)
14503 gcc_assert (base_reg_dies);
14504 emit_insn (gen_addsi3 (base_reg_rtx, base_reg_rtx, GEN_INT (offset)));
14505 offset = 0;
14508 addr = plus_constant (Pmode, base_reg_rtx, offset);
14510 for (i = 0; i < nops; i++)
14512 addr = plus_constant (Pmode, base_reg_rtx, offset + i * 4);
14513 mems[i] = adjust_automodify_address_nv (operands[nops + mem_order[i]],
14514 SImode, addr, 0);
14516 emit_insn (arm_gen_store_multiple_1 (nops, sorted_regs, mems, base_reg_rtx,
14517 write_back ? offset + i * 4 : 0));
14518 return true;
14521 /* Copy a block of memory using plain ldr/str/ldrh/strh instructions, to permit
14522 unaligned copies on processors which support unaligned semantics for those
14523 instructions. INTERLEAVE_FACTOR can be used to attempt to hide load latency
14524 (using more registers) by doing e.g. load/load/store/store for a factor of 2.
14525 An interleave factor of 1 (the minimum) will perform no interleaving.
14526 Load/store multiple are used for aligned addresses where possible. */
14528 static void
14529 arm_block_move_unaligned_straight (rtx dstbase, rtx srcbase,
14530 HOST_WIDE_INT length,
14531 unsigned int interleave_factor)
14533 rtx *regs = XALLOCAVEC (rtx, interleave_factor);
14534 int *regnos = XALLOCAVEC (int, interleave_factor);
14535 HOST_WIDE_INT block_size_bytes = interleave_factor * UNITS_PER_WORD;
14536 HOST_WIDE_INT i, j;
14537 HOST_WIDE_INT remaining = length, words;
14538 rtx halfword_tmp = NULL, byte_tmp = NULL;
14539 rtx dst, src;
14540 bool src_aligned = MEM_ALIGN (srcbase) >= BITS_PER_WORD;
14541 bool dst_aligned = MEM_ALIGN (dstbase) >= BITS_PER_WORD;
14542 HOST_WIDE_INT srcoffset, dstoffset;
14543 HOST_WIDE_INT src_autoinc, dst_autoinc;
14544 rtx mem, addr;
14546 gcc_assert (1 <= interleave_factor && interleave_factor <= 4);
14548 /* Use hard registers if we have aligned source or destination so we can use
14549 load/store multiple with contiguous registers. */
14550 if (dst_aligned || src_aligned)
14551 for (i = 0; i < interleave_factor; i++)
14552 regs[i] = gen_rtx_REG (SImode, i);
14553 else
14554 for (i = 0; i < interleave_factor; i++)
14555 regs[i] = gen_reg_rtx (SImode);
14557 dst = copy_addr_to_reg (XEXP (dstbase, 0));
14558 src = copy_addr_to_reg (XEXP (srcbase, 0));
14560 srcoffset = dstoffset = 0;
14562 /* Calls to arm_gen_load_multiple and arm_gen_store_multiple update SRC/DST.
14563 For copying the last bytes we want to subtract this offset again. */
14564 src_autoinc = dst_autoinc = 0;
14566 for (i = 0; i < interleave_factor; i++)
14567 regnos[i] = i;
14569 /* Copy BLOCK_SIZE_BYTES chunks. */
14571 for (i = 0; i + block_size_bytes <= length; i += block_size_bytes)
14573 /* Load words. */
14574 if (src_aligned && interleave_factor > 1)
14576 emit_insn (arm_gen_load_multiple (regnos, interleave_factor, src,
14577 TRUE, srcbase, &srcoffset));
14578 src_autoinc += UNITS_PER_WORD * interleave_factor;
14580 else
14582 for (j = 0; j < interleave_factor; j++)
14584 addr = plus_constant (Pmode, src, (srcoffset + j * UNITS_PER_WORD
14585 - src_autoinc));
14586 mem = adjust_automodify_address (srcbase, SImode, addr,
14587 srcoffset + j * UNITS_PER_WORD);
14588 emit_insn (gen_unaligned_loadsi (regs[j], mem));
14590 srcoffset += block_size_bytes;
14593 /* Store words. */
14594 if (dst_aligned && interleave_factor > 1)
14596 emit_insn (arm_gen_store_multiple (regnos, interleave_factor, dst,
14597 TRUE, dstbase, &dstoffset));
14598 dst_autoinc += UNITS_PER_WORD * interleave_factor;
14600 else
14602 for (j = 0; j < interleave_factor; j++)
14604 addr = plus_constant (Pmode, dst, (dstoffset + j * UNITS_PER_WORD
14605 - dst_autoinc));
14606 mem = adjust_automodify_address (dstbase, SImode, addr,
14607 dstoffset + j * UNITS_PER_WORD);
14608 emit_insn (gen_unaligned_storesi (mem, regs[j]));
14610 dstoffset += block_size_bytes;
14613 remaining -= block_size_bytes;
14616 /* Copy any whole words left (note these aren't interleaved with any
14617 subsequent halfword/byte load/stores in the interests of simplicity). */
14619 words = remaining / UNITS_PER_WORD;
14621 gcc_assert (words < interleave_factor);
14623 if (src_aligned && words > 1)
14625 emit_insn (arm_gen_load_multiple (regnos, words, src, TRUE, srcbase,
14626 &srcoffset));
14627 src_autoinc += UNITS_PER_WORD * words;
14629 else
14631 for (j = 0; j < words; j++)
14633 addr = plus_constant (Pmode, src,
14634 srcoffset + j * UNITS_PER_WORD - src_autoinc);
14635 mem = adjust_automodify_address (srcbase, SImode, addr,
14636 srcoffset + j * UNITS_PER_WORD);
14637 if (src_aligned)
14638 emit_move_insn (regs[j], mem);
14639 else
14640 emit_insn (gen_unaligned_loadsi (regs[j], mem));
14642 srcoffset += words * UNITS_PER_WORD;
14645 if (dst_aligned && words > 1)
14647 emit_insn (arm_gen_store_multiple (regnos, words, dst, TRUE, dstbase,
14648 &dstoffset));
14649 dst_autoinc += words * UNITS_PER_WORD;
14651 else
14653 for (j = 0; j < words; j++)
14655 addr = plus_constant (Pmode, dst,
14656 dstoffset + j * UNITS_PER_WORD - dst_autoinc);
14657 mem = adjust_automodify_address (dstbase, SImode, addr,
14658 dstoffset + j * UNITS_PER_WORD);
14659 if (dst_aligned)
14660 emit_move_insn (mem, regs[j]);
14661 else
14662 emit_insn (gen_unaligned_storesi (mem, regs[j]));
14664 dstoffset += words * UNITS_PER_WORD;
14667 remaining -= words * UNITS_PER_WORD;
14669 gcc_assert (remaining < 4);
14671 /* Copy a halfword if necessary. */
14673 if (remaining >= 2)
14675 halfword_tmp = gen_reg_rtx (SImode);
14677 addr = plus_constant (Pmode, src, srcoffset - src_autoinc);
14678 mem = adjust_automodify_address (srcbase, HImode, addr, srcoffset);
14679 emit_insn (gen_unaligned_loadhiu (halfword_tmp, mem));
14681 /* Either write out immediately, or delay until we've loaded the last
14682 byte, depending on interleave factor. */
14683 if (interleave_factor == 1)
14685 addr = plus_constant (Pmode, dst, dstoffset - dst_autoinc);
14686 mem = adjust_automodify_address (dstbase, HImode, addr, dstoffset);
14687 emit_insn (gen_unaligned_storehi (mem,
14688 gen_lowpart (HImode, halfword_tmp)));
14689 halfword_tmp = NULL;
14690 dstoffset += 2;
14693 remaining -= 2;
14694 srcoffset += 2;
14697 gcc_assert (remaining < 2);
14699 /* Copy last byte. */
14701 if ((remaining & 1) != 0)
14703 byte_tmp = gen_reg_rtx (SImode);
14705 addr = plus_constant (Pmode, src, srcoffset - src_autoinc);
14706 mem = adjust_automodify_address (srcbase, QImode, addr, srcoffset);
14707 emit_move_insn (gen_lowpart (QImode, byte_tmp), mem);
14709 if (interleave_factor == 1)
14711 addr = plus_constant (Pmode, dst, dstoffset - dst_autoinc);
14712 mem = adjust_automodify_address (dstbase, QImode, addr, dstoffset);
14713 emit_move_insn (mem, gen_lowpart (QImode, byte_tmp));
14714 byte_tmp = NULL;
14715 dstoffset++;
14718 remaining--;
14719 srcoffset++;
14722 /* Store last halfword if we haven't done so already. */
14724 if (halfword_tmp)
14726 addr = plus_constant (Pmode, dst, dstoffset - dst_autoinc);
14727 mem = adjust_automodify_address (dstbase, HImode, addr, dstoffset);
14728 emit_insn (gen_unaligned_storehi (mem,
14729 gen_lowpart (HImode, halfword_tmp)));
14730 dstoffset += 2;
14733 /* Likewise for last byte. */
14735 if (byte_tmp)
14737 addr = plus_constant (Pmode, dst, dstoffset - dst_autoinc);
14738 mem = adjust_automodify_address (dstbase, QImode, addr, dstoffset);
14739 emit_move_insn (mem, gen_lowpart (QImode, byte_tmp));
14740 dstoffset++;
14743 gcc_assert (remaining == 0 && srcoffset == dstoffset);
14746 /* From mips_adjust_block_mem:
14748 Helper function for doing a loop-based block operation on memory
14749 reference MEM. Each iteration of the loop will operate on LENGTH
14750 bytes of MEM.
14752 Create a new base register for use within the loop and point it to
14753 the start of MEM. Create a new memory reference that uses this
14754 register. Store them in *LOOP_REG and *LOOP_MEM respectively. */
14756 static void
14757 arm_adjust_block_mem (rtx mem, HOST_WIDE_INT length, rtx *loop_reg,
14758 rtx *loop_mem)
14760 *loop_reg = copy_addr_to_reg (XEXP (mem, 0));
14762 /* Although the new mem does not refer to a known location,
14763 it does keep up to LENGTH bytes of alignment. */
14764 *loop_mem = change_address (mem, BLKmode, *loop_reg);
14765 set_mem_align (*loop_mem, MIN (MEM_ALIGN (mem), length * BITS_PER_UNIT));
14768 /* From mips_block_move_loop:
14770 Move LENGTH bytes from SRC to DEST using a loop that moves BYTES_PER_ITER
14771 bytes at a time. LENGTH must be at least BYTES_PER_ITER. Assume that
14772 the memory regions do not overlap. */
14774 static void
14775 arm_block_move_unaligned_loop (rtx dest, rtx src, HOST_WIDE_INT length,
14776 unsigned int interleave_factor,
14777 HOST_WIDE_INT bytes_per_iter)
14779 rtx src_reg, dest_reg, final_src, test;
14780 HOST_WIDE_INT leftover;
14782 leftover = length % bytes_per_iter;
14783 length -= leftover;
14785 /* Create registers and memory references for use within the loop. */
14786 arm_adjust_block_mem (src, bytes_per_iter, &src_reg, &src);
14787 arm_adjust_block_mem (dest, bytes_per_iter, &dest_reg, &dest);
14789 /* Calculate the value that SRC_REG should have after the last iteration of
14790 the loop. */
14791 final_src = expand_simple_binop (Pmode, PLUS, src_reg, GEN_INT (length),
14792 0, 0, OPTAB_WIDEN);
14794 /* Emit the start of the loop. */
14795 rtx_code_label *label = gen_label_rtx ();
14796 emit_label (label);
14798 /* Emit the loop body. */
14799 arm_block_move_unaligned_straight (dest, src, bytes_per_iter,
14800 interleave_factor);
14802 /* Move on to the next block. */
14803 emit_move_insn (src_reg, plus_constant (Pmode, src_reg, bytes_per_iter));
14804 emit_move_insn (dest_reg, plus_constant (Pmode, dest_reg, bytes_per_iter));
14806 /* Emit the loop condition. */
14807 test = gen_rtx_NE (VOIDmode, src_reg, final_src);
14808 emit_jump_insn (gen_cbranchsi4 (test, src_reg, final_src, label));
14810 /* Mop up any left-over bytes. */
14811 if (leftover)
14812 arm_block_move_unaligned_straight (dest, src, leftover, interleave_factor);
14815 /* Emit a block move when either the source or destination is unaligned (not
14816 aligned to a four-byte boundary). This may need further tuning depending on
14817 core type, optimize_size setting, etc. */
14819 static int
14820 arm_movmemqi_unaligned (rtx *operands)
14822 HOST_WIDE_INT length = INTVAL (operands[2]);
14824 if (optimize_size)
14826 bool src_aligned = MEM_ALIGN (operands[1]) >= BITS_PER_WORD;
14827 bool dst_aligned = MEM_ALIGN (operands[0]) >= BITS_PER_WORD;
14828 /* Inlined memcpy using ldr/str/ldrh/strh can be quite big: try to limit
14829 size of code if optimizing for size. We'll use ldm/stm if src_aligned
14830 or dst_aligned though: allow more interleaving in those cases since the
14831 resulting code can be smaller. */
14832 unsigned int interleave_factor = (src_aligned || dst_aligned) ? 2 : 1;
14833 HOST_WIDE_INT bytes_per_iter = (src_aligned || dst_aligned) ? 8 : 4;
14835 if (length > 12)
14836 arm_block_move_unaligned_loop (operands[0], operands[1], length,
14837 interleave_factor, bytes_per_iter);
14838 else
14839 arm_block_move_unaligned_straight (operands[0], operands[1], length,
14840 interleave_factor);
14842 else
14844 /* Note that the loop created by arm_block_move_unaligned_loop may be
14845 subject to loop unrolling, which makes tuning this condition a little
14846 redundant. */
14847 if (length > 32)
14848 arm_block_move_unaligned_loop (operands[0], operands[1], length, 4, 16);
14849 else
14850 arm_block_move_unaligned_straight (operands[0], operands[1], length, 4);
14853 return 1;
14857 arm_gen_movmemqi (rtx *operands)
14859 HOST_WIDE_INT in_words_to_go, out_words_to_go, last_bytes;
14860 HOST_WIDE_INT srcoffset, dstoffset;
14861 int i;
14862 rtx src, dst, srcbase, dstbase;
14863 rtx part_bytes_reg = NULL;
14864 rtx mem;
14866 if (!CONST_INT_P (operands[2])
14867 || !CONST_INT_P (operands[3])
14868 || INTVAL (operands[2]) > 64)
14869 return 0;
14871 if (unaligned_access && (INTVAL (operands[3]) & 3) != 0)
14872 return arm_movmemqi_unaligned (operands);
14874 if (INTVAL (operands[3]) & 3)
14875 return 0;
14877 dstbase = operands[0];
14878 srcbase = operands[1];
14880 dst = copy_to_mode_reg (SImode, XEXP (dstbase, 0));
14881 src = copy_to_mode_reg (SImode, XEXP (srcbase, 0));
14883 in_words_to_go = ARM_NUM_INTS (INTVAL (operands[2]));
14884 out_words_to_go = INTVAL (operands[2]) / 4;
14885 last_bytes = INTVAL (operands[2]) & 3;
14886 dstoffset = srcoffset = 0;
14888 if (out_words_to_go != in_words_to_go && ((in_words_to_go - 1) & 3) != 0)
14889 part_bytes_reg = gen_rtx_REG (SImode, (in_words_to_go - 1) & 3);
14891 for (i = 0; in_words_to_go >= 2; i+=4)
14893 if (in_words_to_go > 4)
14894 emit_insn (arm_gen_load_multiple (arm_regs_in_sequence, 4, src,
14895 TRUE, srcbase, &srcoffset));
14896 else
14897 emit_insn (arm_gen_load_multiple (arm_regs_in_sequence, in_words_to_go,
14898 src, FALSE, srcbase,
14899 &srcoffset));
14901 if (out_words_to_go)
14903 if (out_words_to_go > 4)
14904 emit_insn (arm_gen_store_multiple (arm_regs_in_sequence, 4, dst,
14905 TRUE, dstbase, &dstoffset));
14906 else if (out_words_to_go != 1)
14907 emit_insn (arm_gen_store_multiple (arm_regs_in_sequence,
14908 out_words_to_go, dst,
14909 (last_bytes == 0
14910 ? FALSE : TRUE),
14911 dstbase, &dstoffset));
14912 else
14914 mem = adjust_automodify_address (dstbase, SImode, dst, dstoffset);
14915 emit_move_insn (mem, gen_rtx_REG (SImode, R0_REGNUM));
14916 if (last_bytes != 0)
14918 emit_insn (gen_addsi3 (dst, dst, GEN_INT (4)));
14919 dstoffset += 4;
14924 in_words_to_go -= in_words_to_go < 4 ? in_words_to_go : 4;
14925 out_words_to_go -= out_words_to_go < 4 ? out_words_to_go : 4;
14928 /* OUT_WORDS_TO_GO will be zero here if there are byte stores to do. */
14929 if (out_words_to_go)
14931 rtx sreg;
14933 mem = adjust_automodify_address (srcbase, SImode, src, srcoffset);
14934 sreg = copy_to_reg (mem);
14936 mem = adjust_automodify_address (dstbase, SImode, dst, dstoffset);
14937 emit_move_insn (mem, sreg);
14938 in_words_to_go--;
14940 gcc_assert (!in_words_to_go); /* Sanity check */
14943 if (in_words_to_go)
14945 gcc_assert (in_words_to_go > 0);
14947 mem = adjust_automodify_address (srcbase, SImode, src, srcoffset);
14948 part_bytes_reg = copy_to_mode_reg (SImode, mem);
14951 gcc_assert (!last_bytes || part_bytes_reg);
14953 if (BYTES_BIG_ENDIAN && last_bytes)
14955 rtx tmp = gen_reg_rtx (SImode);
14957 /* The bytes we want are in the top end of the word. */
14958 emit_insn (gen_lshrsi3 (tmp, part_bytes_reg,
14959 GEN_INT (8 * (4 - last_bytes))));
14960 part_bytes_reg = tmp;
14962 while (last_bytes)
14964 mem = adjust_automodify_address (dstbase, QImode,
14965 plus_constant (Pmode, dst,
14966 last_bytes - 1),
14967 dstoffset + last_bytes - 1);
14968 emit_move_insn (mem, gen_lowpart (QImode, part_bytes_reg));
14970 if (--last_bytes)
14972 tmp = gen_reg_rtx (SImode);
14973 emit_insn (gen_lshrsi3 (tmp, part_bytes_reg, GEN_INT (8)));
14974 part_bytes_reg = tmp;
14979 else
14981 if (last_bytes > 1)
14983 mem = adjust_automodify_address (dstbase, HImode, dst, dstoffset);
14984 emit_move_insn (mem, gen_lowpart (HImode, part_bytes_reg));
14985 last_bytes -= 2;
14986 if (last_bytes)
14988 rtx tmp = gen_reg_rtx (SImode);
14989 emit_insn (gen_addsi3 (dst, dst, const2_rtx));
14990 emit_insn (gen_lshrsi3 (tmp, part_bytes_reg, GEN_INT (16)));
14991 part_bytes_reg = tmp;
14992 dstoffset += 2;
14996 if (last_bytes)
14998 mem = adjust_automodify_address (dstbase, QImode, dst, dstoffset);
14999 emit_move_insn (mem, gen_lowpart (QImode, part_bytes_reg));
15003 return 1;
15006 /* Helper for gen_movmem_ldrd_strd. Increase the address of memory rtx
15007 by mode size. */
15008 inline static rtx
15009 next_consecutive_mem (rtx mem)
15011 machine_mode mode = GET_MODE (mem);
15012 HOST_WIDE_INT offset = GET_MODE_SIZE (mode);
15013 rtx addr = plus_constant (Pmode, XEXP (mem, 0), offset);
15015 return adjust_automodify_address (mem, mode, addr, offset);
15018 /* Copy using LDRD/STRD instructions whenever possible.
15019 Returns true upon success. */
15020 bool
15021 gen_movmem_ldrd_strd (rtx *operands)
15023 unsigned HOST_WIDE_INT len;
15024 HOST_WIDE_INT align;
15025 rtx src, dst, base;
15026 rtx reg0;
15027 bool src_aligned, dst_aligned;
15028 bool src_volatile, dst_volatile;
15030 gcc_assert (CONST_INT_P (operands[2]));
15031 gcc_assert (CONST_INT_P (operands[3]));
15033 len = UINTVAL (operands[2]);
15034 if (len > 64)
15035 return false;
15037 /* Maximum alignment we can assume for both src and dst buffers. */
15038 align = INTVAL (operands[3]);
15040 if ((!unaligned_access) && (len >= 4) && ((align & 3) != 0))
15041 return false;
15043 /* Place src and dst addresses in registers
15044 and update the corresponding mem rtx. */
15045 dst = operands[0];
15046 dst_volatile = MEM_VOLATILE_P (dst);
15047 dst_aligned = MEM_ALIGN (dst) >= BITS_PER_WORD;
15048 base = copy_to_mode_reg (SImode, XEXP (dst, 0));
15049 dst = adjust_automodify_address (dst, VOIDmode, base, 0);
15051 src = operands[1];
15052 src_volatile = MEM_VOLATILE_P (src);
15053 src_aligned = MEM_ALIGN (src) >= BITS_PER_WORD;
15054 base = copy_to_mode_reg (SImode, XEXP (src, 0));
15055 src = adjust_automodify_address (src, VOIDmode, base, 0);
15057 if (!unaligned_access && !(src_aligned && dst_aligned))
15058 return false;
15060 if (src_volatile || dst_volatile)
15061 return false;
15063 /* If we cannot generate any LDRD/STRD, try to generate LDM/STM. */
15064 if (!(dst_aligned || src_aligned))
15065 return arm_gen_movmemqi (operands);
15067 /* If the either src or dst is unaligned we'll be accessing it as pairs
15068 of unaligned SImode accesses. Otherwise we can generate DImode
15069 ldrd/strd instructions. */
15070 src = adjust_address (src, src_aligned ? DImode : SImode, 0);
15071 dst = adjust_address (dst, dst_aligned ? DImode : SImode, 0);
15073 while (len >= 8)
15075 len -= 8;
15076 reg0 = gen_reg_rtx (DImode);
15077 rtx low_reg = NULL_RTX;
15078 rtx hi_reg = NULL_RTX;
15080 if (!src_aligned || !dst_aligned)
15082 low_reg = gen_lowpart (SImode, reg0);
15083 hi_reg = gen_highpart_mode (SImode, DImode, reg0);
15085 if (src_aligned)
15086 emit_move_insn (reg0, src);
15087 else
15089 emit_insn (gen_unaligned_loadsi (low_reg, src));
15090 src = next_consecutive_mem (src);
15091 emit_insn (gen_unaligned_loadsi (hi_reg, src));
15094 if (dst_aligned)
15095 emit_move_insn (dst, reg0);
15096 else
15098 emit_insn (gen_unaligned_storesi (dst, low_reg));
15099 dst = next_consecutive_mem (dst);
15100 emit_insn (gen_unaligned_storesi (dst, hi_reg));
15103 src = next_consecutive_mem (src);
15104 dst = next_consecutive_mem (dst);
15107 gcc_assert (len < 8);
15108 if (len >= 4)
15110 /* More than a word but less than a double-word to copy. Copy a word. */
15111 reg0 = gen_reg_rtx (SImode);
15112 src = adjust_address (src, SImode, 0);
15113 dst = adjust_address (dst, SImode, 0);
15114 if (src_aligned)
15115 emit_move_insn (reg0, src);
15116 else
15117 emit_insn (gen_unaligned_loadsi (reg0, src));
15119 if (dst_aligned)
15120 emit_move_insn (dst, reg0);
15121 else
15122 emit_insn (gen_unaligned_storesi (dst, reg0));
15124 src = next_consecutive_mem (src);
15125 dst = next_consecutive_mem (dst);
15126 len -= 4;
15129 if (len == 0)
15130 return true;
15132 /* Copy the remaining bytes. */
15133 if (len >= 2)
15135 dst = adjust_address (dst, HImode, 0);
15136 src = adjust_address (src, HImode, 0);
15137 reg0 = gen_reg_rtx (SImode);
15138 if (src_aligned)
15139 emit_insn (gen_zero_extendhisi2 (reg0, src));
15140 else
15141 emit_insn (gen_unaligned_loadhiu (reg0, src));
15143 if (dst_aligned)
15144 emit_insn (gen_movhi (dst, gen_lowpart(HImode, reg0)));
15145 else
15146 emit_insn (gen_unaligned_storehi (dst, gen_lowpart (HImode, reg0)));
15148 src = next_consecutive_mem (src);
15149 dst = next_consecutive_mem (dst);
15150 if (len == 2)
15151 return true;
15154 dst = adjust_address (dst, QImode, 0);
15155 src = adjust_address (src, QImode, 0);
15156 reg0 = gen_reg_rtx (QImode);
15157 emit_move_insn (reg0, src);
15158 emit_move_insn (dst, reg0);
15159 return true;
15162 /* Select a dominance comparison mode if possible for a test of the general
15163 form (OP (COND_OR (X) (Y)) (const_int 0)). We support three forms.
15164 COND_OR == DOM_CC_X_AND_Y => (X && Y)
15165 COND_OR == DOM_CC_NX_OR_Y => ((! X) || Y)
15166 COND_OR == DOM_CC_X_OR_Y => (X || Y)
15167 In all cases OP will be either EQ or NE, but we don't need to know which
15168 here. If we are unable to support a dominance comparison we return
15169 CC mode. This will then fail to match for the RTL expressions that
15170 generate this call. */
15171 machine_mode
15172 arm_select_dominance_cc_mode (rtx x, rtx y, HOST_WIDE_INT cond_or)
15174 enum rtx_code cond1, cond2;
15175 int swapped = 0;
15177 /* Currently we will probably get the wrong result if the individual
15178 comparisons are not simple. This also ensures that it is safe to
15179 reverse a comparison if necessary. */
15180 if ((arm_select_cc_mode (cond1 = GET_CODE (x), XEXP (x, 0), XEXP (x, 1))
15181 != CCmode)
15182 || (arm_select_cc_mode (cond2 = GET_CODE (y), XEXP (y, 0), XEXP (y, 1))
15183 != CCmode))
15184 return CCmode;
15186 /* The if_then_else variant of this tests the second condition if the
15187 first passes, but is true if the first fails. Reverse the first
15188 condition to get a true "inclusive-or" expression. */
15189 if (cond_or == DOM_CC_NX_OR_Y)
15190 cond1 = reverse_condition (cond1);
15192 /* If the comparisons are not equal, and one doesn't dominate the other,
15193 then we can't do this. */
15194 if (cond1 != cond2
15195 && !comparison_dominates_p (cond1, cond2)
15196 && (swapped = 1, !comparison_dominates_p (cond2, cond1)))
15197 return CCmode;
15199 if (swapped)
15200 std::swap (cond1, cond2);
15202 switch (cond1)
15204 case EQ:
15205 if (cond_or == DOM_CC_X_AND_Y)
15206 return CC_DEQmode;
15208 switch (cond2)
15210 case EQ: return CC_DEQmode;
15211 case LE: return CC_DLEmode;
15212 case LEU: return CC_DLEUmode;
15213 case GE: return CC_DGEmode;
15214 case GEU: return CC_DGEUmode;
15215 default: gcc_unreachable ();
15218 case LT:
15219 if (cond_or == DOM_CC_X_AND_Y)
15220 return CC_DLTmode;
15222 switch (cond2)
15224 case LT:
15225 return CC_DLTmode;
15226 case LE:
15227 return CC_DLEmode;
15228 case NE:
15229 return CC_DNEmode;
15230 default:
15231 gcc_unreachable ();
15234 case GT:
15235 if (cond_or == DOM_CC_X_AND_Y)
15236 return CC_DGTmode;
15238 switch (cond2)
15240 case GT:
15241 return CC_DGTmode;
15242 case GE:
15243 return CC_DGEmode;
15244 case NE:
15245 return CC_DNEmode;
15246 default:
15247 gcc_unreachable ();
15250 case LTU:
15251 if (cond_or == DOM_CC_X_AND_Y)
15252 return CC_DLTUmode;
15254 switch (cond2)
15256 case LTU:
15257 return CC_DLTUmode;
15258 case LEU:
15259 return CC_DLEUmode;
15260 case NE:
15261 return CC_DNEmode;
15262 default:
15263 gcc_unreachable ();
15266 case GTU:
15267 if (cond_or == DOM_CC_X_AND_Y)
15268 return CC_DGTUmode;
15270 switch (cond2)
15272 case GTU:
15273 return CC_DGTUmode;
15274 case GEU:
15275 return CC_DGEUmode;
15276 case NE:
15277 return CC_DNEmode;
15278 default:
15279 gcc_unreachable ();
15282 /* The remaining cases only occur when both comparisons are the
15283 same. */
15284 case NE:
15285 gcc_assert (cond1 == cond2);
15286 return CC_DNEmode;
15288 case LE:
15289 gcc_assert (cond1 == cond2);
15290 return CC_DLEmode;
15292 case GE:
15293 gcc_assert (cond1 == cond2);
15294 return CC_DGEmode;
15296 case LEU:
15297 gcc_assert (cond1 == cond2);
15298 return CC_DLEUmode;
15300 case GEU:
15301 gcc_assert (cond1 == cond2);
15302 return CC_DGEUmode;
15304 default:
15305 gcc_unreachable ();
15309 machine_mode
15310 arm_select_cc_mode (enum rtx_code op, rtx x, rtx y)
15312 /* All floating point compares return CCFP if it is an equality
15313 comparison, and CCFPE otherwise. */
15314 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
15316 switch (op)
15318 case EQ:
15319 case NE:
15320 case UNORDERED:
15321 case ORDERED:
15322 case UNLT:
15323 case UNLE:
15324 case UNGT:
15325 case UNGE:
15326 case UNEQ:
15327 case LTGT:
15328 return CCFPmode;
15330 case LT:
15331 case LE:
15332 case GT:
15333 case GE:
15334 return CCFPEmode;
15336 default:
15337 gcc_unreachable ();
15341 /* A compare with a shifted operand. Because of canonicalization, the
15342 comparison will have to be swapped when we emit the assembler. */
15343 if (GET_MODE (y) == SImode
15344 && (REG_P (y) || (GET_CODE (y) == SUBREG))
15345 && (GET_CODE (x) == ASHIFT || GET_CODE (x) == ASHIFTRT
15346 || GET_CODE (x) == LSHIFTRT || GET_CODE (x) == ROTATE
15347 || GET_CODE (x) == ROTATERT))
15348 return CC_SWPmode;
15350 /* This operation is performed swapped, but since we only rely on the Z
15351 flag we don't need an additional mode. */
15352 if (GET_MODE (y) == SImode
15353 && (REG_P (y) || (GET_CODE (y) == SUBREG))
15354 && GET_CODE (x) == NEG
15355 && (op == EQ || op == NE))
15356 return CC_Zmode;
15358 /* This is a special case that is used by combine to allow a
15359 comparison of a shifted byte load to be split into a zero-extend
15360 followed by a comparison of the shifted integer (only valid for
15361 equalities and unsigned inequalities). */
15362 if (GET_MODE (x) == SImode
15363 && GET_CODE (x) == ASHIFT
15364 && CONST_INT_P (XEXP (x, 1)) && INTVAL (XEXP (x, 1)) == 24
15365 && GET_CODE (XEXP (x, 0)) == SUBREG
15366 && MEM_P (SUBREG_REG (XEXP (x, 0)))
15367 && GET_MODE (SUBREG_REG (XEXP (x, 0))) == QImode
15368 && (op == EQ || op == NE
15369 || op == GEU || op == GTU || op == LTU || op == LEU)
15370 && CONST_INT_P (y))
15371 return CC_Zmode;
15373 /* A construct for a conditional compare, if the false arm contains
15374 0, then both conditions must be true, otherwise either condition
15375 must be true. Not all conditions are possible, so CCmode is
15376 returned if it can't be done. */
15377 if (GET_CODE (x) == IF_THEN_ELSE
15378 && (XEXP (x, 2) == const0_rtx
15379 || XEXP (x, 2) == const1_rtx)
15380 && COMPARISON_P (XEXP (x, 0))
15381 && COMPARISON_P (XEXP (x, 1)))
15382 return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
15383 INTVAL (XEXP (x, 2)));
15385 /* Alternate canonicalizations of the above. These are somewhat cleaner. */
15386 if (GET_CODE (x) == AND
15387 && (op == EQ || op == NE)
15388 && COMPARISON_P (XEXP (x, 0))
15389 && COMPARISON_P (XEXP (x, 1)))
15390 return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
15391 DOM_CC_X_AND_Y);
15393 if (GET_CODE (x) == IOR
15394 && (op == EQ || op == NE)
15395 && COMPARISON_P (XEXP (x, 0))
15396 && COMPARISON_P (XEXP (x, 1)))
15397 return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
15398 DOM_CC_X_OR_Y);
15400 /* An operation (on Thumb) where we want to test for a single bit.
15401 This is done by shifting that bit up into the top bit of a
15402 scratch register; we can then branch on the sign bit. */
15403 if (TARGET_THUMB1
15404 && GET_MODE (x) == SImode
15405 && (op == EQ || op == NE)
15406 && GET_CODE (x) == ZERO_EXTRACT
15407 && XEXP (x, 1) == const1_rtx)
15408 return CC_Nmode;
15410 /* An operation that sets the condition codes as a side-effect, the
15411 V flag is not set correctly, so we can only use comparisons where
15412 this doesn't matter. (For LT and GE we can use "mi" and "pl"
15413 instead.) */
15414 /* ??? Does the ZERO_EXTRACT case really apply to thumb2? */
15415 if (GET_MODE (x) == SImode
15416 && y == const0_rtx
15417 && (op == EQ || op == NE || op == LT || op == GE)
15418 && (GET_CODE (x) == PLUS || GET_CODE (x) == MINUS
15419 || GET_CODE (x) == AND || GET_CODE (x) == IOR
15420 || GET_CODE (x) == XOR || GET_CODE (x) == MULT
15421 || GET_CODE (x) == NOT || GET_CODE (x) == NEG
15422 || GET_CODE (x) == LSHIFTRT
15423 || GET_CODE (x) == ASHIFT || GET_CODE (x) == ASHIFTRT
15424 || GET_CODE (x) == ROTATERT
15425 || (TARGET_32BIT && GET_CODE (x) == ZERO_EXTRACT)))
15426 return CC_NOOVmode;
15428 if (GET_MODE (x) == QImode && (op == EQ || op == NE))
15429 return CC_Zmode;
15431 if (GET_MODE (x) == SImode && (op == LTU || op == GEU)
15432 && GET_CODE (x) == PLUS
15433 && (rtx_equal_p (XEXP (x, 0), y) || rtx_equal_p (XEXP (x, 1), y)))
15434 return CC_Cmode;
15436 if (GET_MODE (x) == DImode || GET_MODE (y) == DImode)
15438 switch (op)
15440 case EQ:
15441 case NE:
15442 /* A DImode comparison against zero can be implemented by
15443 or'ing the two halves together. */
15444 if (y == const0_rtx)
15445 return CC_Zmode;
15447 /* We can do an equality test in three Thumb instructions. */
15448 if (!TARGET_32BIT)
15449 return CC_Zmode;
15451 /* FALLTHROUGH */
15453 case LTU:
15454 case LEU:
15455 case GTU:
15456 case GEU:
15457 /* DImode unsigned comparisons can be implemented by cmp +
15458 cmpeq without a scratch register. Not worth doing in
15459 Thumb-2. */
15460 if (TARGET_32BIT)
15461 return CC_CZmode;
15463 /* FALLTHROUGH */
15465 case LT:
15466 case LE:
15467 case GT:
15468 case GE:
15469 /* DImode signed and unsigned comparisons can be implemented
15470 by cmp + sbcs with a scratch register, but that does not
15471 set the Z flag - we must reverse GT/LE/GTU/LEU. */
15472 gcc_assert (op != EQ && op != NE);
15473 return CC_NCVmode;
15475 default:
15476 gcc_unreachable ();
15480 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_CC)
15481 return GET_MODE (x);
15483 return CCmode;
15486 /* X and Y are two things to compare using CODE. Emit the compare insn and
15487 return the rtx for register 0 in the proper mode. FP means this is a
15488 floating point compare: I don't think that it is needed on the arm. */
15490 arm_gen_compare_reg (enum rtx_code code, rtx x, rtx y, rtx scratch)
15492 machine_mode mode;
15493 rtx cc_reg;
15494 int dimode_comparison = GET_MODE (x) == DImode || GET_MODE (y) == DImode;
15496 /* We might have X as a constant, Y as a register because of the predicates
15497 used for cmpdi. If so, force X to a register here. */
15498 if (dimode_comparison && !REG_P (x))
15499 x = force_reg (DImode, x);
15501 mode = SELECT_CC_MODE (code, x, y);
15502 cc_reg = gen_rtx_REG (mode, CC_REGNUM);
15504 if (dimode_comparison
15505 && mode != CC_CZmode)
15507 rtx clobber, set;
15509 /* To compare two non-zero values for equality, XOR them and
15510 then compare against zero. Not used for ARM mode; there
15511 CC_CZmode is cheaper. */
15512 if (mode == CC_Zmode && y != const0_rtx)
15514 gcc_assert (!reload_completed);
15515 x = expand_binop (DImode, xor_optab, x, y, NULL_RTX, 0, OPTAB_WIDEN);
15516 y = const0_rtx;
15519 /* A scratch register is required. */
15520 if (reload_completed)
15521 gcc_assert (scratch != NULL && GET_MODE (scratch) == SImode);
15522 else
15523 scratch = gen_rtx_SCRATCH (SImode);
15525 clobber = gen_rtx_CLOBBER (VOIDmode, scratch);
15526 set = gen_rtx_SET (cc_reg, gen_rtx_COMPARE (mode, x, y));
15527 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, set, clobber)));
15529 else
15530 emit_set_insn (cc_reg, gen_rtx_COMPARE (mode, x, y));
15532 return cc_reg;
15535 /* Generate a sequence of insns that will generate the correct return
15536 address mask depending on the physical architecture that the program
15537 is running on. */
15539 arm_gen_return_addr_mask (void)
15541 rtx reg = gen_reg_rtx (Pmode);
15543 emit_insn (gen_return_addr_mask (reg));
15544 return reg;
15547 void
15548 arm_reload_in_hi (rtx *operands)
15550 rtx ref = operands[1];
15551 rtx base, scratch;
15552 HOST_WIDE_INT offset = 0;
15554 if (GET_CODE (ref) == SUBREG)
15556 offset = SUBREG_BYTE (ref);
15557 ref = SUBREG_REG (ref);
15560 if (REG_P (ref))
15562 /* We have a pseudo which has been spilt onto the stack; there
15563 are two cases here: the first where there is a simple
15564 stack-slot replacement and a second where the stack-slot is
15565 out of range, or is used as a subreg. */
15566 if (reg_equiv_mem (REGNO (ref)))
15568 ref = reg_equiv_mem (REGNO (ref));
15569 base = find_replacement (&XEXP (ref, 0));
15571 else
15572 /* The slot is out of range, or was dressed up in a SUBREG. */
15573 base = reg_equiv_address (REGNO (ref));
15575 /* PR 62554: If there is no equivalent memory location then just move
15576 the value as an SImode register move. This happens when the target
15577 architecture variant does not have an HImode register move. */
15578 if (base == NULL)
15580 gcc_assert (REG_P (operands[0]));
15581 emit_insn (gen_movsi (gen_rtx_SUBREG (SImode, operands[0], 0),
15582 gen_rtx_SUBREG (SImode, ref, 0)));
15583 return;
15586 else
15587 base = find_replacement (&XEXP (ref, 0));
15589 /* Handle the case where the address is too complex to be offset by 1. */
15590 if (GET_CODE (base) == MINUS
15591 || (GET_CODE (base) == PLUS && !CONST_INT_P (XEXP (base, 1))))
15593 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
15595 emit_set_insn (base_plus, base);
15596 base = base_plus;
15598 else if (GET_CODE (base) == PLUS)
15600 /* The addend must be CONST_INT, or we would have dealt with it above. */
15601 HOST_WIDE_INT hi, lo;
15603 offset += INTVAL (XEXP (base, 1));
15604 base = XEXP (base, 0);
15606 /* Rework the address into a legal sequence of insns. */
15607 /* Valid range for lo is -4095 -> 4095 */
15608 lo = (offset >= 0
15609 ? (offset & 0xfff)
15610 : -((-offset) & 0xfff));
15612 /* Corner case, if lo is the max offset then we would be out of range
15613 once we have added the additional 1 below, so bump the msb into the
15614 pre-loading insn(s). */
15615 if (lo == 4095)
15616 lo &= 0x7ff;
15618 hi = ((((offset - lo) & (HOST_WIDE_INT) 0xffffffff)
15619 ^ (HOST_WIDE_INT) 0x80000000)
15620 - (HOST_WIDE_INT) 0x80000000);
15622 gcc_assert (hi + lo == offset);
15624 if (hi != 0)
15626 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
15628 /* Get the base address; addsi3 knows how to handle constants
15629 that require more than one insn. */
15630 emit_insn (gen_addsi3 (base_plus, base, GEN_INT (hi)));
15631 base = base_plus;
15632 offset = lo;
15636 /* Operands[2] may overlap operands[0] (though it won't overlap
15637 operands[1]), that's why we asked for a DImode reg -- so we can
15638 use the bit that does not overlap. */
15639 if (REGNO (operands[2]) == REGNO (operands[0]))
15640 scratch = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
15641 else
15642 scratch = gen_rtx_REG (SImode, REGNO (operands[2]));
15644 emit_insn (gen_zero_extendqisi2 (scratch,
15645 gen_rtx_MEM (QImode,
15646 plus_constant (Pmode, base,
15647 offset))));
15648 emit_insn (gen_zero_extendqisi2 (gen_rtx_SUBREG (SImode, operands[0], 0),
15649 gen_rtx_MEM (QImode,
15650 plus_constant (Pmode, base,
15651 offset + 1))));
15652 if (!BYTES_BIG_ENDIAN)
15653 emit_set_insn (gen_rtx_SUBREG (SImode, operands[0], 0),
15654 gen_rtx_IOR (SImode,
15655 gen_rtx_ASHIFT
15656 (SImode,
15657 gen_rtx_SUBREG (SImode, operands[0], 0),
15658 GEN_INT (8)),
15659 scratch));
15660 else
15661 emit_set_insn (gen_rtx_SUBREG (SImode, operands[0], 0),
15662 gen_rtx_IOR (SImode,
15663 gen_rtx_ASHIFT (SImode, scratch,
15664 GEN_INT (8)),
15665 gen_rtx_SUBREG (SImode, operands[0], 0)));
15668 /* Handle storing a half-word to memory during reload by synthesizing as two
15669 byte stores. Take care not to clobber the input values until after we
15670 have moved them somewhere safe. This code assumes that if the DImode
15671 scratch in operands[2] overlaps either the input value or output address
15672 in some way, then that value must die in this insn (we absolutely need
15673 two scratch registers for some corner cases). */
15674 void
15675 arm_reload_out_hi (rtx *operands)
15677 rtx ref = operands[0];
15678 rtx outval = operands[1];
15679 rtx base, scratch;
15680 HOST_WIDE_INT offset = 0;
15682 if (GET_CODE (ref) == SUBREG)
15684 offset = SUBREG_BYTE (ref);
15685 ref = SUBREG_REG (ref);
15688 if (REG_P (ref))
15690 /* We have a pseudo which has been spilt onto the stack; there
15691 are two cases here: the first where there is a simple
15692 stack-slot replacement and a second where the stack-slot is
15693 out of range, or is used as a subreg. */
15694 if (reg_equiv_mem (REGNO (ref)))
15696 ref = reg_equiv_mem (REGNO (ref));
15697 base = find_replacement (&XEXP (ref, 0));
15699 else
15700 /* The slot is out of range, or was dressed up in a SUBREG. */
15701 base = reg_equiv_address (REGNO (ref));
15703 /* PR 62254: If there is no equivalent memory location then just move
15704 the value as an SImode register move. This happens when the target
15705 architecture variant does not have an HImode register move. */
15706 if (base == NULL)
15708 gcc_assert (REG_P (outval) || SUBREG_P (outval));
15710 if (REG_P (outval))
15712 emit_insn (gen_movsi (gen_rtx_SUBREG (SImode, ref, 0),
15713 gen_rtx_SUBREG (SImode, outval, 0)));
15715 else /* SUBREG_P (outval) */
15717 if (GET_MODE (SUBREG_REG (outval)) == SImode)
15718 emit_insn (gen_movsi (gen_rtx_SUBREG (SImode, ref, 0),
15719 SUBREG_REG (outval)));
15720 else
15721 /* FIXME: Handle other cases ? */
15722 gcc_unreachable ();
15724 return;
15727 else
15728 base = find_replacement (&XEXP (ref, 0));
15730 scratch = gen_rtx_REG (SImode, REGNO (operands[2]));
15732 /* Handle the case where the address is too complex to be offset by 1. */
15733 if (GET_CODE (base) == MINUS
15734 || (GET_CODE (base) == PLUS && !CONST_INT_P (XEXP (base, 1))))
15736 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
15738 /* Be careful not to destroy OUTVAL. */
15739 if (reg_overlap_mentioned_p (base_plus, outval))
15741 /* Updating base_plus might destroy outval, see if we can
15742 swap the scratch and base_plus. */
15743 if (!reg_overlap_mentioned_p (scratch, outval))
15744 std::swap (scratch, base_plus);
15745 else
15747 rtx scratch_hi = gen_rtx_REG (HImode, REGNO (operands[2]));
15749 /* Be conservative and copy OUTVAL into the scratch now,
15750 this should only be necessary if outval is a subreg
15751 of something larger than a word. */
15752 /* XXX Might this clobber base? I can't see how it can,
15753 since scratch is known to overlap with OUTVAL, and
15754 must be wider than a word. */
15755 emit_insn (gen_movhi (scratch_hi, outval));
15756 outval = scratch_hi;
15760 emit_set_insn (base_plus, base);
15761 base = base_plus;
15763 else if (GET_CODE (base) == PLUS)
15765 /* The addend must be CONST_INT, or we would have dealt with it above. */
15766 HOST_WIDE_INT hi, lo;
15768 offset += INTVAL (XEXP (base, 1));
15769 base = XEXP (base, 0);
15771 /* Rework the address into a legal sequence of insns. */
15772 /* Valid range for lo is -4095 -> 4095 */
15773 lo = (offset >= 0
15774 ? (offset & 0xfff)
15775 : -((-offset) & 0xfff));
15777 /* Corner case, if lo is the max offset then we would be out of range
15778 once we have added the additional 1 below, so bump the msb into the
15779 pre-loading insn(s). */
15780 if (lo == 4095)
15781 lo &= 0x7ff;
15783 hi = ((((offset - lo) & (HOST_WIDE_INT) 0xffffffff)
15784 ^ (HOST_WIDE_INT) 0x80000000)
15785 - (HOST_WIDE_INT) 0x80000000);
15787 gcc_assert (hi + lo == offset);
15789 if (hi != 0)
15791 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
15793 /* Be careful not to destroy OUTVAL. */
15794 if (reg_overlap_mentioned_p (base_plus, outval))
15796 /* Updating base_plus might destroy outval, see if we
15797 can swap the scratch and base_plus. */
15798 if (!reg_overlap_mentioned_p (scratch, outval))
15799 std::swap (scratch, base_plus);
15800 else
15802 rtx scratch_hi = gen_rtx_REG (HImode, REGNO (operands[2]));
15804 /* Be conservative and copy outval into scratch now,
15805 this should only be necessary if outval is a
15806 subreg of something larger than a word. */
15807 /* XXX Might this clobber base? I can't see how it
15808 can, since scratch is known to overlap with
15809 outval. */
15810 emit_insn (gen_movhi (scratch_hi, outval));
15811 outval = scratch_hi;
15815 /* Get the base address; addsi3 knows how to handle constants
15816 that require more than one insn. */
15817 emit_insn (gen_addsi3 (base_plus, base, GEN_INT (hi)));
15818 base = base_plus;
15819 offset = lo;
15823 if (BYTES_BIG_ENDIAN)
15825 emit_insn (gen_movqi (gen_rtx_MEM (QImode,
15826 plus_constant (Pmode, base,
15827 offset + 1)),
15828 gen_lowpart (QImode, outval)));
15829 emit_insn (gen_lshrsi3 (scratch,
15830 gen_rtx_SUBREG (SImode, outval, 0),
15831 GEN_INT (8)));
15832 emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (Pmode, base,
15833 offset)),
15834 gen_lowpart (QImode, scratch)));
15836 else
15838 emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (Pmode, base,
15839 offset)),
15840 gen_lowpart (QImode, outval)));
15841 emit_insn (gen_lshrsi3 (scratch,
15842 gen_rtx_SUBREG (SImode, outval, 0),
15843 GEN_INT (8)));
15844 emit_insn (gen_movqi (gen_rtx_MEM (QImode,
15845 plus_constant (Pmode, base,
15846 offset + 1)),
15847 gen_lowpart (QImode, scratch)));
15851 /* Return true if a type must be passed in memory. For AAPCS, small aggregates
15852 (padded to the size of a word) should be passed in a register. */
15854 static bool
15855 arm_must_pass_in_stack (machine_mode mode, const_tree type)
15857 if (TARGET_AAPCS_BASED)
15858 return must_pass_in_stack_var_size (mode, type);
15859 else
15860 return must_pass_in_stack_var_size_or_pad (mode, type);
15864 /* For use by FUNCTION_ARG_PADDING (MODE, TYPE).
15865 Return true if an argument passed on the stack should be padded upwards,
15866 i.e. if the least-significant byte has useful data.
15867 For legacy APCS ABIs we use the default. For AAPCS based ABIs small
15868 aggregate types are placed in the lowest memory address. */
15870 bool
15871 arm_pad_arg_upward (machine_mode mode ATTRIBUTE_UNUSED, const_tree type)
15873 if (!TARGET_AAPCS_BASED)
15874 return DEFAULT_FUNCTION_ARG_PADDING(mode, type) == upward;
15876 if (type && BYTES_BIG_ENDIAN && INTEGRAL_TYPE_P (type))
15877 return false;
15879 return true;
15883 /* Similarly, for use by BLOCK_REG_PADDING (MODE, TYPE, FIRST).
15884 Return !BYTES_BIG_ENDIAN if the least significant byte of the
15885 register has useful data, and return the opposite if the most
15886 significant byte does. */
15888 bool
15889 arm_pad_reg_upward (machine_mode mode,
15890 tree type, int first ATTRIBUTE_UNUSED)
15892 if (TARGET_AAPCS_BASED && BYTES_BIG_ENDIAN)
15894 /* For AAPCS, small aggregates, small fixed-point types,
15895 and small complex types are always padded upwards. */
15896 if (type)
15898 if ((AGGREGATE_TYPE_P (type)
15899 || TREE_CODE (type) == COMPLEX_TYPE
15900 || FIXED_POINT_TYPE_P (type))
15901 && int_size_in_bytes (type) <= 4)
15902 return true;
15904 else
15906 if ((COMPLEX_MODE_P (mode) || ALL_FIXED_POINT_MODE_P (mode))
15907 && GET_MODE_SIZE (mode) <= 4)
15908 return true;
15912 /* Otherwise, use default padding. */
15913 return !BYTES_BIG_ENDIAN;
15916 /* Returns true iff OFFSET is valid for use in an LDRD/STRD instruction,
15917 assuming that the address in the base register is word aligned. */
15918 bool
15919 offset_ok_for_ldrd_strd (HOST_WIDE_INT offset)
15921 HOST_WIDE_INT max_offset;
15923 /* Offset must be a multiple of 4 in Thumb mode. */
15924 if (TARGET_THUMB2 && ((offset & 3) != 0))
15925 return false;
15927 if (TARGET_THUMB2)
15928 max_offset = 1020;
15929 else if (TARGET_ARM)
15930 max_offset = 255;
15931 else
15932 return false;
15934 return ((offset <= max_offset) && (offset >= -max_offset));
15937 /* Checks whether the operands are valid for use in an LDRD/STRD instruction.
15938 Assumes that RT, RT2, and RN are REG. This is guaranteed by the patterns.
15939 Assumes that the address in the base register RN is word aligned. Pattern
15940 guarantees that both memory accesses use the same base register,
15941 the offsets are constants within the range, and the gap between the offsets is 4.
15942 If preload complete then check that registers are legal. WBACK indicates whether
15943 address is updated. LOAD indicates whether memory access is load or store. */
15944 bool
15945 operands_ok_ldrd_strd (rtx rt, rtx rt2, rtx rn, HOST_WIDE_INT offset,
15946 bool wback, bool load)
15948 unsigned int t, t2, n;
15950 if (!reload_completed)
15951 return true;
15953 if (!offset_ok_for_ldrd_strd (offset))
15954 return false;
15956 t = REGNO (rt);
15957 t2 = REGNO (rt2);
15958 n = REGNO (rn);
15960 if ((TARGET_THUMB2)
15961 && ((wback && (n == t || n == t2))
15962 || (t == SP_REGNUM)
15963 || (t == PC_REGNUM)
15964 || (t2 == SP_REGNUM)
15965 || (t2 == PC_REGNUM)
15966 || (!load && (n == PC_REGNUM))
15967 || (load && (t == t2))
15968 /* Triggers Cortex-M3 LDRD errata. */
15969 || (!wback && load && fix_cm3_ldrd && (n == t))))
15970 return false;
15972 if ((TARGET_ARM)
15973 && ((wback && (n == t || n == t2))
15974 || (t2 == PC_REGNUM)
15975 || (t % 2 != 0) /* First destination register is not even. */
15976 || (t2 != t + 1)
15977 /* PC can be used as base register (for offset addressing only),
15978 but it is depricated. */
15979 || (n == PC_REGNUM)))
15980 return false;
15982 return true;
15985 /* Helper for gen_operands_ldrd_strd. Returns true iff the memory
15986 operand MEM's address contains an immediate offset from the base
15987 register and has no side effects, in which case it sets BASE and
15988 OFFSET accordingly. */
15989 static bool
15990 mem_ok_for_ldrd_strd (rtx mem, rtx *base, rtx *offset)
15992 rtx addr;
15994 gcc_assert (base != NULL && offset != NULL);
15996 /* TODO: Handle more general memory operand patterns, such as
15997 PRE_DEC and PRE_INC. */
15999 if (side_effects_p (mem))
16000 return false;
16002 /* Can't deal with subregs. */
16003 if (GET_CODE (mem) == SUBREG)
16004 return false;
16006 gcc_assert (MEM_P (mem));
16008 *offset = const0_rtx;
16010 addr = XEXP (mem, 0);
16012 /* If addr isn't valid for DImode, then we can't handle it. */
16013 if (!arm_legitimate_address_p (DImode, addr,
16014 reload_in_progress || reload_completed))
16015 return false;
16017 if (REG_P (addr))
16019 *base = addr;
16020 return true;
16022 else if (GET_CODE (addr) == PLUS || GET_CODE (addr) == MINUS)
16024 *base = XEXP (addr, 0);
16025 *offset = XEXP (addr, 1);
16026 return (REG_P (*base) && CONST_INT_P (*offset));
16029 return false;
16032 /* Called from a peephole2 to replace two word-size accesses with a
16033 single LDRD/STRD instruction. Returns true iff we can generate a
16034 new instruction sequence. That is, both accesses use the same base
16035 register and the gap between constant offsets is 4. This function
16036 may reorder its operands to match ldrd/strd RTL templates.
16037 OPERANDS are the operands found by the peephole matcher;
16038 OPERANDS[0,1] are register operands, and OPERANDS[2,3] are the
16039 corresponding memory operands. LOAD indicaates whether the access
16040 is load or store. CONST_STORE indicates a store of constant
16041 integer values held in OPERANDS[4,5] and assumes that the pattern
16042 is of length 4 insn, for the purpose of checking dead registers.
16043 COMMUTE indicates that register operands may be reordered. */
16044 bool
16045 gen_operands_ldrd_strd (rtx *operands, bool load,
16046 bool const_store, bool commute)
16048 int nops = 2;
16049 HOST_WIDE_INT offsets[2], offset;
16050 rtx base = NULL_RTX;
16051 rtx cur_base, cur_offset, tmp;
16052 int i, gap;
16053 HARD_REG_SET regset;
16055 gcc_assert (!const_store || !load);
16056 /* Check that the memory references are immediate offsets from the
16057 same base register. Extract the base register, the destination
16058 registers, and the corresponding memory offsets. */
16059 for (i = 0; i < nops; i++)
16061 if (!mem_ok_for_ldrd_strd (operands[nops+i], &cur_base, &cur_offset))
16062 return false;
16064 if (i == 0)
16065 base = cur_base;
16066 else if (REGNO (base) != REGNO (cur_base))
16067 return false;
16069 offsets[i] = INTVAL (cur_offset);
16070 if (GET_CODE (operands[i]) == SUBREG)
16072 tmp = SUBREG_REG (operands[i]);
16073 gcc_assert (GET_MODE (operands[i]) == GET_MODE (tmp));
16074 operands[i] = tmp;
16078 /* Make sure there is no dependency between the individual loads. */
16079 if (load && REGNO (operands[0]) == REGNO (base))
16080 return false; /* RAW */
16082 if (load && REGNO (operands[0]) == REGNO (operands[1]))
16083 return false; /* WAW */
16085 /* If the same input register is used in both stores
16086 when storing different constants, try to find a free register.
16087 For example, the code
16088 mov r0, 0
16089 str r0, [r2]
16090 mov r0, 1
16091 str r0, [r2, #4]
16092 can be transformed into
16093 mov r1, 0
16094 mov r0, 1
16095 strd r1, r0, [r2]
16096 in Thumb mode assuming that r1 is free.
16097 For ARM mode do the same but only if the starting register
16098 can be made to be even. */
16099 if (const_store
16100 && REGNO (operands[0]) == REGNO (operands[1])
16101 && INTVAL (operands[4]) != INTVAL (operands[5]))
16103 if (TARGET_THUMB2)
16105 CLEAR_HARD_REG_SET (regset);
16106 tmp = peep2_find_free_register (0, 4, "r", SImode, &regset);
16107 if (tmp == NULL_RTX)
16108 return false;
16110 /* Use the new register in the first load to ensure that
16111 if the original input register is not dead after peephole,
16112 then it will have the correct constant value. */
16113 operands[0] = tmp;
16115 else if (TARGET_ARM)
16117 int regno = REGNO (operands[0]);
16118 if (!peep2_reg_dead_p (4, operands[0]))
16120 /* When the input register is even and is not dead after the
16121 pattern, it has to hold the second constant but we cannot
16122 form a legal STRD in ARM mode with this register as the second
16123 register. */
16124 if (regno % 2 == 0)
16125 return false;
16127 /* Is regno-1 free? */
16128 SET_HARD_REG_SET (regset);
16129 CLEAR_HARD_REG_BIT(regset, regno - 1);
16130 tmp = peep2_find_free_register (0, 4, "r", SImode, &regset);
16131 if (tmp == NULL_RTX)
16132 return false;
16134 operands[0] = tmp;
16136 else
16138 /* Find a DImode register. */
16139 CLEAR_HARD_REG_SET (regset);
16140 tmp = peep2_find_free_register (0, 4, "r", DImode, &regset);
16141 if (tmp != NULL_RTX)
16143 operands[0] = simplify_gen_subreg (SImode, tmp, DImode, 0);
16144 operands[1] = simplify_gen_subreg (SImode, tmp, DImode, 4);
16146 else
16148 /* Can we use the input register to form a DI register? */
16149 SET_HARD_REG_SET (regset);
16150 CLEAR_HARD_REG_BIT(regset,
16151 regno % 2 == 0 ? regno + 1 : regno - 1);
16152 tmp = peep2_find_free_register (0, 4, "r", SImode, &regset);
16153 if (tmp == NULL_RTX)
16154 return false;
16155 operands[regno % 2 == 1 ? 0 : 1] = tmp;
16159 gcc_assert (operands[0] != NULL_RTX);
16160 gcc_assert (operands[1] != NULL_RTX);
16161 gcc_assert (REGNO (operands[0]) % 2 == 0);
16162 gcc_assert (REGNO (operands[1]) == REGNO (operands[0]) + 1);
16166 /* Make sure the instructions are ordered with lower memory access first. */
16167 if (offsets[0] > offsets[1])
16169 gap = offsets[0] - offsets[1];
16170 offset = offsets[1];
16172 /* Swap the instructions such that lower memory is accessed first. */
16173 std::swap (operands[0], operands[1]);
16174 std::swap (operands[2], operands[3]);
16175 if (const_store)
16176 std::swap (operands[4], operands[5]);
16178 else
16180 gap = offsets[1] - offsets[0];
16181 offset = offsets[0];
16184 /* Make sure accesses are to consecutive memory locations. */
16185 if (gap != 4)
16186 return false;
16188 /* Make sure we generate legal instructions. */
16189 if (operands_ok_ldrd_strd (operands[0], operands[1], base, offset,
16190 false, load))
16191 return true;
16193 /* In Thumb state, where registers are almost unconstrained, there
16194 is little hope to fix it. */
16195 if (TARGET_THUMB2)
16196 return false;
16198 if (load && commute)
16200 /* Try reordering registers. */
16201 std::swap (operands[0], operands[1]);
16202 if (operands_ok_ldrd_strd (operands[0], operands[1], base, offset,
16203 false, load))
16204 return true;
16207 if (const_store)
16209 /* If input registers are dead after this pattern, they can be
16210 reordered or replaced by other registers that are free in the
16211 current pattern. */
16212 if (!peep2_reg_dead_p (4, operands[0])
16213 || !peep2_reg_dead_p (4, operands[1]))
16214 return false;
16216 /* Try to reorder the input registers. */
16217 /* For example, the code
16218 mov r0, 0
16219 mov r1, 1
16220 str r1, [r2]
16221 str r0, [r2, #4]
16222 can be transformed into
16223 mov r1, 0
16224 mov r0, 1
16225 strd r0, [r2]
16227 if (operands_ok_ldrd_strd (operands[1], operands[0], base, offset,
16228 false, false))
16230 std::swap (operands[0], operands[1]);
16231 return true;
16234 /* Try to find a free DI register. */
16235 CLEAR_HARD_REG_SET (regset);
16236 add_to_hard_reg_set (&regset, SImode, REGNO (operands[0]));
16237 add_to_hard_reg_set (&regset, SImode, REGNO (operands[1]));
16238 while (true)
16240 tmp = peep2_find_free_register (0, 4, "r", DImode, &regset);
16241 if (tmp == NULL_RTX)
16242 return false;
16244 /* DREG must be an even-numbered register in DImode.
16245 Split it into SI registers. */
16246 operands[0] = simplify_gen_subreg (SImode, tmp, DImode, 0);
16247 operands[1] = simplify_gen_subreg (SImode, tmp, DImode, 4);
16248 gcc_assert (operands[0] != NULL_RTX);
16249 gcc_assert (operands[1] != NULL_RTX);
16250 gcc_assert (REGNO (operands[0]) % 2 == 0);
16251 gcc_assert (REGNO (operands[0]) + 1 == REGNO (operands[1]));
16253 return (operands_ok_ldrd_strd (operands[0], operands[1],
16254 base, offset,
16255 false, load));
16259 return false;
16265 /* Print a symbolic form of X to the debug file, F. */
16266 static void
16267 arm_print_value (FILE *f, rtx x)
16269 switch (GET_CODE (x))
16271 case CONST_INT:
16272 fprintf (f, HOST_WIDE_INT_PRINT_HEX, INTVAL (x));
16273 return;
16275 case CONST_DOUBLE:
16276 fprintf (f, "<0x%lx,0x%lx>", (long)XWINT (x, 2), (long)XWINT (x, 3));
16277 return;
16279 case CONST_VECTOR:
16281 int i;
16283 fprintf (f, "<");
16284 for (i = 0; i < CONST_VECTOR_NUNITS (x); i++)
16286 fprintf (f, HOST_WIDE_INT_PRINT_HEX, INTVAL (CONST_VECTOR_ELT (x, i)));
16287 if (i < (CONST_VECTOR_NUNITS (x) - 1))
16288 fputc (',', f);
16290 fprintf (f, ">");
16292 return;
16294 case CONST_STRING:
16295 fprintf (f, "\"%s\"", XSTR (x, 0));
16296 return;
16298 case SYMBOL_REF:
16299 fprintf (f, "`%s'", XSTR (x, 0));
16300 return;
16302 case LABEL_REF:
16303 fprintf (f, "L%d", INSN_UID (XEXP (x, 0)));
16304 return;
16306 case CONST:
16307 arm_print_value (f, XEXP (x, 0));
16308 return;
16310 case PLUS:
16311 arm_print_value (f, XEXP (x, 0));
16312 fprintf (f, "+");
16313 arm_print_value (f, XEXP (x, 1));
16314 return;
16316 case PC:
16317 fprintf (f, "pc");
16318 return;
16320 default:
16321 fprintf (f, "????");
16322 return;
16326 /* Routines for manipulation of the constant pool. */
16328 /* Arm instructions cannot load a large constant directly into a
16329 register; they have to come from a pc relative load. The constant
16330 must therefore be placed in the addressable range of the pc
16331 relative load. Depending on the precise pc relative load
16332 instruction the range is somewhere between 256 bytes and 4k. This
16333 means that we often have to dump a constant inside a function, and
16334 generate code to branch around it.
16336 It is important to minimize this, since the branches will slow
16337 things down and make the code larger.
16339 Normally we can hide the table after an existing unconditional
16340 branch so that there is no interruption of the flow, but in the
16341 worst case the code looks like this:
16343 ldr rn, L1
16345 b L2
16346 align
16347 L1: .long value
16351 ldr rn, L3
16353 b L4
16354 align
16355 L3: .long value
16359 We fix this by performing a scan after scheduling, which notices
16360 which instructions need to have their operands fetched from the
16361 constant table and builds the table.
16363 The algorithm starts by building a table of all the constants that
16364 need fixing up and all the natural barriers in the function (places
16365 where a constant table can be dropped without breaking the flow).
16366 For each fixup we note how far the pc-relative replacement will be
16367 able to reach and the offset of the instruction into the function.
16369 Having built the table we then group the fixes together to form
16370 tables that are as large as possible (subject to addressing
16371 constraints) and emit each table of constants after the last
16372 barrier that is within range of all the instructions in the group.
16373 If a group does not contain a barrier, then we forcibly create one
16374 by inserting a jump instruction into the flow. Once the table has
16375 been inserted, the insns are then modified to reference the
16376 relevant entry in the pool.
16378 Possible enhancements to the algorithm (not implemented) are:
16380 1) For some processors and object formats, there may be benefit in
16381 aligning the pools to the start of cache lines; this alignment
16382 would need to be taken into account when calculating addressability
16383 of a pool. */
16385 /* These typedefs are located at the start of this file, so that
16386 they can be used in the prototypes there. This comment is to
16387 remind readers of that fact so that the following structures
16388 can be understood more easily.
16390 typedef struct minipool_node Mnode;
16391 typedef struct minipool_fixup Mfix; */
16393 struct minipool_node
16395 /* Doubly linked chain of entries. */
16396 Mnode * next;
16397 Mnode * prev;
16398 /* The maximum offset into the code that this entry can be placed. While
16399 pushing fixes for forward references, all entries are sorted in order
16400 of increasing max_address. */
16401 HOST_WIDE_INT max_address;
16402 /* Similarly for an entry inserted for a backwards ref. */
16403 HOST_WIDE_INT min_address;
16404 /* The number of fixes referencing this entry. This can become zero
16405 if we "unpush" an entry. In this case we ignore the entry when we
16406 come to emit the code. */
16407 int refcount;
16408 /* The offset from the start of the minipool. */
16409 HOST_WIDE_INT offset;
16410 /* The value in table. */
16411 rtx value;
16412 /* The mode of value. */
16413 machine_mode mode;
16414 /* The size of the value. With iWMMXt enabled
16415 sizes > 4 also imply an alignment of 8-bytes. */
16416 int fix_size;
16419 struct minipool_fixup
16421 Mfix * next;
16422 rtx_insn * insn;
16423 HOST_WIDE_INT address;
16424 rtx * loc;
16425 machine_mode mode;
16426 int fix_size;
16427 rtx value;
16428 Mnode * minipool;
16429 HOST_WIDE_INT forwards;
16430 HOST_WIDE_INT backwards;
16433 /* Fixes less than a word need padding out to a word boundary. */
16434 #define MINIPOOL_FIX_SIZE(mode) \
16435 (GET_MODE_SIZE ((mode)) >= 4 ? GET_MODE_SIZE ((mode)) : 4)
16437 static Mnode * minipool_vector_head;
16438 static Mnode * minipool_vector_tail;
16439 static rtx_code_label *minipool_vector_label;
16440 static int minipool_pad;
16442 /* The linked list of all minipool fixes required for this function. */
16443 Mfix * minipool_fix_head;
16444 Mfix * minipool_fix_tail;
16445 /* The fix entry for the current minipool, once it has been placed. */
16446 Mfix * minipool_barrier;
16448 #ifndef JUMP_TABLES_IN_TEXT_SECTION
16449 #define JUMP_TABLES_IN_TEXT_SECTION 0
16450 #endif
16452 static HOST_WIDE_INT
16453 get_jump_table_size (rtx_jump_table_data *insn)
16455 /* ADDR_VECs only take room if read-only data does into the text
16456 section. */
16457 if (JUMP_TABLES_IN_TEXT_SECTION || readonly_data_section == text_section)
16459 rtx body = PATTERN (insn);
16460 int elt = GET_CODE (body) == ADDR_DIFF_VEC ? 1 : 0;
16461 HOST_WIDE_INT size;
16462 HOST_WIDE_INT modesize;
16464 modesize = GET_MODE_SIZE (GET_MODE (body));
16465 size = modesize * XVECLEN (body, elt);
16466 switch (modesize)
16468 case 1:
16469 /* Round up size of TBB table to a halfword boundary. */
16470 size = (size + 1) & ~HOST_WIDE_INT_1;
16471 break;
16472 case 2:
16473 /* No padding necessary for TBH. */
16474 break;
16475 case 4:
16476 /* Add two bytes for alignment on Thumb. */
16477 if (TARGET_THUMB)
16478 size += 2;
16479 break;
16480 default:
16481 gcc_unreachable ();
16483 return size;
16486 return 0;
16489 /* Return the maximum amount of padding that will be inserted before
16490 label LABEL. */
16492 static HOST_WIDE_INT
16493 get_label_padding (rtx label)
16495 HOST_WIDE_INT align, min_insn_size;
16497 align = 1 << label_to_alignment (label);
16498 min_insn_size = TARGET_THUMB ? 2 : 4;
16499 return align > min_insn_size ? align - min_insn_size : 0;
16502 /* Move a minipool fix MP from its current location to before MAX_MP.
16503 If MAX_MP is NULL, then MP doesn't need moving, but the addressing
16504 constraints may need updating. */
16505 static Mnode *
16506 move_minipool_fix_forward_ref (Mnode *mp, Mnode *max_mp,
16507 HOST_WIDE_INT max_address)
16509 /* The code below assumes these are different. */
16510 gcc_assert (mp != max_mp);
16512 if (max_mp == NULL)
16514 if (max_address < mp->max_address)
16515 mp->max_address = max_address;
16517 else
16519 if (max_address > max_mp->max_address - mp->fix_size)
16520 mp->max_address = max_mp->max_address - mp->fix_size;
16521 else
16522 mp->max_address = max_address;
16524 /* Unlink MP from its current position. Since max_mp is non-null,
16525 mp->prev must be non-null. */
16526 mp->prev->next = mp->next;
16527 if (mp->next != NULL)
16528 mp->next->prev = mp->prev;
16529 else
16530 minipool_vector_tail = mp->prev;
16532 /* Re-insert it before MAX_MP. */
16533 mp->next = max_mp;
16534 mp->prev = max_mp->prev;
16535 max_mp->prev = mp;
16537 if (mp->prev != NULL)
16538 mp->prev->next = mp;
16539 else
16540 minipool_vector_head = mp;
16543 /* Save the new entry. */
16544 max_mp = mp;
16546 /* Scan over the preceding entries and adjust their addresses as
16547 required. */
16548 while (mp->prev != NULL
16549 && mp->prev->max_address > mp->max_address - mp->prev->fix_size)
16551 mp->prev->max_address = mp->max_address - mp->prev->fix_size;
16552 mp = mp->prev;
16555 return max_mp;
16558 /* Add a constant to the minipool for a forward reference. Returns the
16559 node added or NULL if the constant will not fit in this pool. */
16560 static Mnode *
16561 add_minipool_forward_ref (Mfix *fix)
16563 /* If set, max_mp is the first pool_entry that has a lower
16564 constraint than the one we are trying to add. */
16565 Mnode * max_mp = NULL;
16566 HOST_WIDE_INT max_address = fix->address + fix->forwards - minipool_pad;
16567 Mnode * mp;
16569 /* If the minipool starts before the end of FIX->INSN then this FIX
16570 can not be placed into the current pool. Furthermore, adding the
16571 new constant pool entry may cause the pool to start FIX_SIZE bytes
16572 earlier. */
16573 if (minipool_vector_head &&
16574 (fix->address + get_attr_length (fix->insn)
16575 >= minipool_vector_head->max_address - fix->fix_size))
16576 return NULL;
16578 /* Scan the pool to see if a constant with the same value has
16579 already been added. While we are doing this, also note the
16580 location where we must insert the constant if it doesn't already
16581 exist. */
16582 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
16584 if (GET_CODE (fix->value) == GET_CODE (mp->value)
16585 && fix->mode == mp->mode
16586 && (!LABEL_P (fix->value)
16587 || (CODE_LABEL_NUMBER (fix->value)
16588 == CODE_LABEL_NUMBER (mp->value)))
16589 && rtx_equal_p (fix->value, mp->value))
16591 /* More than one fix references this entry. */
16592 mp->refcount++;
16593 return move_minipool_fix_forward_ref (mp, max_mp, max_address);
16596 /* Note the insertion point if necessary. */
16597 if (max_mp == NULL
16598 && mp->max_address > max_address)
16599 max_mp = mp;
16601 /* If we are inserting an 8-bytes aligned quantity and
16602 we have not already found an insertion point, then
16603 make sure that all such 8-byte aligned quantities are
16604 placed at the start of the pool. */
16605 if (ARM_DOUBLEWORD_ALIGN
16606 && max_mp == NULL
16607 && fix->fix_size >= 8
16608 && mp->fix_size < 8)
16610 max_mp = mp;
16611 max_address = mp->max_address;
16615 /* The value is not currently in the minipool, so we need to create
16616 a new entry for it. If MAX_MP is NULL, the entry will be put on
16617 the end of the list since the placement is less constrained than
16618 any existing entry. Otherwise, we insert the new fix before
16619 MAX_MP and, if necessary, adjust the constraints on the other
16620 entries. */
16621 mp = XNEW (Mnode);
16622 mp->fix_size = fix->fix_size;
16623 mp->mode = fix->mode;
16624 mp->value = fix->value;
16625 mp->refcount = 1;
16626 /* Not yet required for a backwards ref. */
16627 mp->min_address = -65536;
16629 if (max_mp == NULL)
16631 mp->max_address = max_address;
16632 mp->next = NULL;
16633 mp->prev = minipool_vector_tail;
16635 if (mp->prev == NULL)
16637 minipool_vector_head = mp;
16638 minipool_vector_label = gen_label_rtx ();
16640 else
16641 mp->prev->next = mp;
16643 minipool_vector_tail = mp;
16645 else
16647 if (max_address > max_mp->max_address - mp->fix_size)
16648 mp->max_address = max_mp->max_address - mp->fix_size;
16649 else
16650 mp->max_address = max_address;
16652 mp->next = max_mp;
16653 mp->prev = max_mp->prev;
16654 max_mp->prev = mp;
16655 if (mp->prev != NULL)
16656 mp->prev->next = mp;
16657 else
16658 minipool_vector_head = mp;
16661 /* Save the new entry. */
16662 max_mp = mp;
16664 /* Scan over the preceding entries and adjust their addresses as
16665 required. */
16666 while (mp->prev != NULL
16667 && mp->prev->max_address > mp->max_address - mp->prev->fix_size)
16669 mp->prev->max_address = mp->max_address - mp->prev->fix_size;
16670 mp = mp->prev;
16673 return max_mp;
16676 static Mnode *
16677 move_minipool_fix_backward_ref (Mnode *mp, Mnode *min_mp,
16678 HOST_WIDE_INT min_address)
16680 HOST_WIDE_INT offset;
16682 /* The code below assumes these are different. */
16683 gcc_assert (mp != min_mp);
16685 if (min_mp == NULL)
16687 if (min_address > mp->min_address)
16688 mp->min_address = min_address;
16690 else
16692 /* We will adjust this below if it is too loose. */
16693 mp->min_address = min_address;
16695 /* Unlink MP from its current position. Since min_mp is non-null,
16696 mp->next must be non-null. */
16697 mp->next->prev = mp->prev;
16698 if (mp->prev != NULL)
16699 mp->prev->next = mp->next;
16700 else
16701 minipool_vector_head = mp->next;
16703 /* Reinsert it after MIN_MP. */
16704 mp->prev = min_mp;
16705 mp->next = min_mp->next;
16706 min_mp->next = mp;
16707 if (mp->next != NULL)
16708 mp->next->prev = mp;
16709 else
16710 minipool_vector_tail = mp;
16713 min_mp = mp;
16715 offset = 0;
16716 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
16718 mp->offset = offset;
16719 if (mp->refcount > 0)
16720 offset += mp->fix_size;
16722 if (mp->next && mp->next->min_address < mp->min_address + mp->fix_size)
16723 mp->next->min_address = mp->min_address + mp->fix_size;
16726 return min_mp;
16729 /* Add a constant to the minipool for a backward reference. Returns the
16730 node added or NULL if the constant will not fit in this pool.
16732 Note that the code for insertion for a backwards reference can be
16733 somewhat confusing because the calculated offsets for each fix do
16734 not take into account the size of the pool (which is still under
16735 construction. */
16736 static Mnode *
16737 add_minipool_backward_ref (Mfix *fix)
16739 /* If set, min_mp is the last pool_entry that has a lower constraint
16740 than the one we are trying to add. */
16741 Mnode *min_mp = NULL;
16742 /* This can be negative, since it is only a constraint. */
16743 HOST_WIDE_INT min_address = fix->address - fix->backwards;
16744 Mnode *mp;
16746 /* If we can't reach the current pool from this insn, or if we can't
16747 insert this entry at the end of the pool without pushing other
16748 fixes out of range, then we don't try. This ensures that we
16749 can't fail later on. */
16750 if (min_address >= minipool_barrier->address
16751 || (minipool_vector_tail->min_address + fix->fix_size
16752 >= minipool_barrier->address))
16753 return NULL;
16755 /* Scan the pool to see if a constant with the same value has
16756 already been added. While we are doing this, also note the
16757 location where we must insert the constant if it doesn't already
16758 exist. */
16759 for (mp = minipool_vector_tail; mp != NULL; mp = mp->prev)
16761 if (GET_CODE (fix->value) == GET_CODE (mp->value)
16762 && fix->mode == mp->mode
16763 && (!LABEL_P (fix->value)
16764 || (CODE_LABEL_NUMBER (fix->value)
16765 == CODE_LABEL_NUMBER (mp->value)))
16766 && rtx_equal_p (fix->value, mp->value)
16767 /* Check that there is enough slack to move this entry to the
16768 end of the table (this is conservative). */
16769 && (mp->max_address
16770 > (minipool_barrier->address
16771 + minipool_vector_tail->offset
16772 + minipool_vector_tail->fix_size)))
16774 mp->refcount++;
16775 return move_minipool_fix_backward_ref (mp, min_mp, min_address);
16778 if (min_mp != NULL)
16779 mp->min_address += fix->fix_size;
16780 else
16782 /* Note the insertion point if necessary. */
16783 if (mp->min_address < min_address)
16785 /* For now, we do not allow the insertion of 8-byte alignment
16786 requiring nodes anywhere but at the start of the pool. */
16787 if (ARM_DOUBLEWORD_ALIGN
16788 && fix->fix_size >= 8 && mp->fix_size < 8)
16789 return NULL;
16790 else
16791 min_mp = mp;
16793 else if (mp->max_address
16794 < minipool_barrier->address + mp->offset + fix->fix_size)
16796 /* Inserting before this entry would push the fix beyond
16797 its maximum address (which can happen if we have
16798 re-located a forwards fix); force the new fix to come
16799 after it. */
16800 if (ARM_DOUBLEWORD_ALIGN
16801 && fix->fix_size >= 8 && mp->fix_size < 8)
16802 return NULL;
16803 else
16805 min_mp = mp;
16806 min_address = mp->min_address + fix->fix_size;
16809 /* Do not insert a non-8-byte aligned quantity before 8-byte
16810 aligned quantities. */
16811 else if (ARM_DOUBLEWORD_ALIGN
16812 && fix->fix_size < 8
16813 && mp->fix_size >= 8)
16815 min_mp = mp;
16816 min_address = mp->min_address + fix->fix_size;
16821 /* We need to create a new entry. */
16822 mp = XNEW (Mnode);
16823 mp->fix_size = fix->fix_size;
16824 mp->mode = fix->mode;
16825 mp->value = fix->value;
16826 mp->refcount = 1;
16827 mp->max_address = minipool_barrier->address + 65536;
16829 mp->min_address = min_address;
16831 if (min_mp == NULL)
16833 mp->prev = NULL;
16834 mp->next = minipool_vector_head;
16836 if (mp->next == NULL)
16838 minipool_vector_tail = mp;
16839 minipool_vector_label = gen_label_rtx ();
16841 else
16842 mp->next->prev = mp;
16844 minipool_vector_head = mp;
16846 else
16848 mp->next = min_mp->next;
16849 mp->prev = min_mp;
16850 min_mp->next = mp;
16852 if (mp->next != NULL)
16853 mp->next->prev = mp;
16854 else
16855 minipool_vector_tail = mp;
16858 /* Save the new entry. */
16859 min_mp = mp;
16861 if (mp->prev)
16862 mp = mp->prev;
16863 else
16864 mp->offset = 0;
16866 /* Scan over the following entries and adjust their offsets. */
16867 while (mp->next != NULL)
16869 if (mp->next->min_address < mp->min_address + mp->fix_size)
16870 mp->next->min_address = mp->min_address + mp->fix_size;
16872 if (mp->refcount)
16873 mp->next->offset = mp->offset + mp->fix_size;
16874 else
16875 mp->next->offset = mp->offset;
16877 mp = mp->next;
16880 return min_mp;
16883 static void
16884 assign_minipool_offsets (Mfix *barrier)
16886 HOST_WIDE_INT offset = 0;
16887 Mnode *mp;
16889 minipool_barrier = barrier;
16891 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
16893 mp->offset = offset;
16895 if (mp->refcount > 0)
16896 offset += mp->fix_size;
16900 /* Output the literal table */
16901 static void
16902 dump_minipool (rtx_insn *scan)
16904 Mnode * mp;
16905 Mnode * nmp;
16906 int align64 = 0;
16908 if (ARM_DOUBLEWORD_ALIGN)
16909 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
16910 if (mp->refcount > 0 && mp->fix_size >= 8)
16912 align64 = 1;
16913 break;
16916 if (dump_file)
16917 fprintf (dump_file,
16918 ";; Emitting minipool after insn %u; address %ld; align %d (bytes)\n",
16919 INSN_UID (scan), (unsigned long) minipool_barrier->address, align64 ? 8 : 4);
16921 scan = emit_label_after (gen_label_rtx (), scan);
16922 scan = emit_insn_after (align64 ? gen_align_8 () : gen_align_4 (), scan);
16923 scan = emit_label_after (minipool_vector_label, scan);
16925 for (mp = minipool_vector_head; mp != NULL; mp = nmp)
16927 if (mp->refcount > 0)
16929 if (dump_file)
16931 fprintf (dump_file,
16932 ";; Offset %u, min %ld, max %ld ",
16933 (unsigned) mp->offset, (unsigned long) mp->min_address,
16934 (unsigned long) mp->max_address);
16935 arm_print_value (dump_file, mp->value);
16936 fputc ('\n', dump_file);
16939 switch (GET_MODE_SIZE (mp->mode))
16941 #ifdef HAVE_consttable_1
16942 case 1:
16943 scan = emit_insn_after (gen_consttable_1 (mp->value), scan);
16944 break;
16946 #endif
16947 #ifdef HAVE_consttable_2
16948 case 2:
16949 scan = emit_insn_after (gen_consttable_2 (mp->value), scan);
16950 break;
16952 #endif
16953 #ifdef HAVE_consttable_4
16954 case 4:
16955 scan = emit_insn_after (gen_consttable_4 (mp->value), scan);
16956 break;
16958 #endif
16959 #ifdef HAVE_consttable_8
16960 case 8:
16961 scan = emit_insn_after (gen_consttable_8 (mp->value), scan);
16962 break;
16964 #endif
16965 #ifdef HAVE_consttable_16
16966 case 16:
16967 scan = emit_insn_after (gen_consttable_16 (mp->value), scan);
16968 break;
16970 #endif
16971 default:
16972 gcc_unreachable ();
16976 nmp = mp->next;
16977 free (mp);
16980 minipool_vector_head = minipool_vector_tail = NULL;
16981 scan = emit_insn_after (gen_consttable_end (), scan);
16982 scan = emit_barrier_after (scan);
16985 /* Return the cost of forcibly inserting a barrier after INSN. */
16986 static int
16987 arm_barrier_cost (rtx_insn *insn)
16989 /* Basing the location of the pool on the loop depth is preferable,
16990 but at the moment, the basic block information seems to be
16991 corrupt by this stage of the compilation. */
16992 int base_cost = 50;
16993 rtx_insn *next = next_nonnote_insn (insn);
16995 if (next != NULL && LABEL_P (next))
16996 base_cost -= 20;
16998 switch (GET_CODE (insn))
17000 case CODE_LABEL:
17001 /* It will always be better to place the table before the label, rather
17002 than after it. */
17003 return 50;
17005 case INSN:
17006 case CALL_INSN:
17007 return base_cost;
17009 case JUMP_INSN:
17010 return base_cost - 10;
17012 default:
17013 return base_cost + 10;
17017 /* Find the best place in the insn stream in the range
17018 (FIX->address,MAX_ADDRESS) to forcibly insert a minipool barrier.
17019 Create the barrier by inserting a jump and add a new fix entry for
17020 it. */
17021 static Mfix *
17022 create_fix_barrier (Mfix *fix, HOST_WIDE_INT max_address)
17024 HOST_WIDE_INT count = 0;
17025 rtx_barrier *barrier;
17026 rtx_insn *from = fix->insn;
17027 /* The instruction after which we will insert the jump. */
17028 rtx_insn *selected = NULL;
17029 int selected_cost;
17030 /* The address at which the jump instruction will be placed. */
17031 HOST_WIDE_INT selected_address;
17032 Mfix * new_fix;
17033 HOST_WIDE_INT max_count = max_address - fix->address;
17034 rtx_code_label *label = gen_label_rtx ();
17036 selected_cost = arm_barrier_cost (from);
17037 selected_address = fix->address;
17039 while (from && count < max_count)
17041 rtx_jump_table_data *tmp;
17042 int new_cost;
17044 /* This code shouldn't have been called if there was a natural barrier
17045 within range. */
17046 gcc_assert (!BARRIER_P (from));
17048 /* Count the length of this insn. This must stay in sync with the
17049 code that pushes minipool fixes. */
17050 if (LABEL_P (from))
17051 count += get_label_padding (from);
17052 else
17053 count += get_attr_length (from);
17055 /* If there is a jump table, add its length. */
17056 if (tablejump_p (from, NULL, &tmp))
17058 count += get_jump_table_size (tmp);
17060 /* Jump tables aren't in a basic block, so base the cost on
17061 the dispatch insn. If we select this location, we will
17062 still put the pool after the table. */
17063 new_cost = arm_barrier_cost (from);
17065 if (count < max_count
17066 && (!selected || new_cost <= selected_cost))
17068 selected = tmp;
17069 selected_cost = new_cost;
17070 selected_address = fix->address + count;
17073 /* Continue after the dispatch table. */
17074 from = NEXT_INSN (tmp);
17075 continue;
17078 new_cost = arm_barrier_cost (from);
17080 if (count < max_count
17081 && (!selected || new_cost <= selected_cost))
17083 selected = from;
17084 selected_cost = new_cost;
17085 selected_address = fix->address + count;
17088 from = NEXT_INSN (from);
17091 /* Make sure that we found a place to insert the jump. */
17092 gcc_assert (selected);
17094 /* Make sure we do not split a call and its corresponding
17095 CALL_ARG_LOCATION note. */
17096 if (CALL_P (selected))
17098 rtx_insn *next = NEXT_INSN (selected);
17099 if (next && NOTE_P (next)
17100 && NOTE_KIND (next) == NOTE_INSN_CALL_ARG_LOCATION)
17101 selected = next;
17104 /* Create a new JUMP_INSN that branches around a barrier. */
17105 from = emit_jump_insn_after (gen_jump (label), selected);
17106 JUMP_LABEL (from) = label;
17107 barrier = emit_barrier_after (from);
17108 emit_label_after (label, barrier);
17110 /* Create a minipool barrier entry for the new barrier. */
17111 new_fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* new_fix));
17112 new_fix->insn = barrier;
17113 new_fix->address = selected_address;
17114 new_fix->next = fix->next;
17115 fix->next = new_fix;
17117 return new_fix;
17120 /* Record that there is a natural barrier in the insn stream at
17121 ADDRESS. */
17122 static void
17123 push_minipool_barrier (rtx_insn *insn, HOST_WIDE_INT address)
17125 Mfix * fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* fix));
17127 fix->insn = insn;
17128 fix->address = address;
17130 fix->next = NULL;
17131 if (minipool_fix_head != NULL)
17132 minipool_fix_tail->next = fix;
17133 else
17134 minipool_fix_head = fix;
17136 minipool_fix_tail = fix;
17139 /* Record INSN, which will need fixing up to load a value from the
17140 minipool. ADDRESS is the offset of the insn since the start of the
17141 function; LOC is a pointer to the part of the insn which requires
17142 fixing; VALUE is the constant that must be loaded, which is of type
17143 MODE. */
17144 static void
17145 push_minipool_fix (rtx_insn *insn, HOST_WIDE_INT address, rtx *loc,
17146 machine_mode mode, rtx value)
17148 Mfix * fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* fix));
17150 fix->insn = insn;
17151 fix->address = address;
17152 fix->loc = loc;
17153 fix->mode = mode;
17154 fix->fix_size = MINIPOOL_FIX_SIZE (mode);
17155 fix->value = value;
17156 fix->forwards = get_attr_pool_range (insn);
17157 fix->backwards = get_attr_neg_pool_range (insn);
17158 fix->minipool = NULL;
17160 /* If an insn doesn't have a range defined for it, then it isn't
17161 expecting to be reworked by this code. Better to stop now than
17162 to generate duff assembly code. */
17163 gcc_assert (fix->forwards || fix->backwards);
17165 /* If an entry requires 8-byte alignment then assume all constant pools
17166 require 4 bytes of padding. Trying to do this later on a per-pool
17167 basis is awkward because existing pool entries have to be modified. */
17168 if (ARM_DOUBLEWORD_ALIGN && fix->fix_size >= 8)
17169 minipool_pad = 4;
17171 if (dump_file)
17173 fprintf (dump_file,
17174 ";; %smode fixup for i%d; addr %lu, range (%ld,%ld): ",
17175 GET_MODE_NAME (mode),
17176 INSN_UID (insn), (unsigned long) address,
17177 -1 * (long)fix->backwards, (long)fix->forwards);
17178 arm_print_value (dump_file, fix->value);
17179 fprintf (dump_file, "\n");
17182 /* Add it to the chain of fixes. */
17183 fix->next = NULL;
17185 if (minipool_fix_head != NULL)
17186 minipool_fix_tail->next = fix;
17187 else
17188 minipool_fix_head = fix;
17190 minipool_fix_tail = fix;
17193 /* Return maximum allowed cost of synthesizing a 64-bit constant VAL inline.
17194 Returns the number of insns needed, or 99 if we always want to synthesize
17195 the value. */
17197 arm_max_const_double_inline_cost ()
17199 /* Let the value get synthesized to avoid the use of literal pools. */
17200 if (arm_disable_literal_pool)
17201 return 99;
17203 return ((optimize_size || arm_ld_sched) ? 3 : 4);
17206 /* Return the cost of synthesizing a 64-bit constant VAL inline.
17207 Returns the number of insns needed, or 99 if we don't know how to
17208 do it. */
17210 arm_const_double_inline_cost (rtx val)
17212 rtx lowpart, highpart;
17213 machine_mode mode;
17215 mode = GET_MODE (val);
17217 if (mode == VOIDmode)
17218 mode = DImode;
17220 gcc_assert (GET_MODE_SIZE (mode) == 8);
17222 lowpart = gen_lowpart (SImode, val);
17223 highpart = gen_highpart_mode (SImode, mode, val);
17225 gcc_assert (CONST_INT_P (lowpart));
17226 gcc_assert (CONST_INT_P (highpart));
17228 return (arm_gen_constant (SET, SImode, NULL_RTX, INTVAL (lowpart),
17229 NULL_RTX, NULL_RTX, 0, 0)
17230 + arm_gen_constant (SET, SImode, NULL_RTX, INTVAL (highpart),
17231 NULL_RTX, NULL_RTX, 0, 0));
17234 /* Cost of loading a SImode constant. */
17235 static inline int
17236 arm_const_inline_cost (enum rtx_code code, rtx val)
17238 return arm_gen_constant (code, SImode, NULL_RTX, INTVAL (val),
17239 NULL_RTX, NULL_RTX, 1, 0);
17242 /* Return true if it is worthwhile to split a 64-bit constant into two
17243 32-bit operations. This is the case if optimizing for size, or
17244 if we have load delay slots, or if one 32-bit part can be done with
17245 a single data operation. */
17246 bool
17247 arm_const_double_by_parts (rtx val)
17249 machine_mode mode = GET_MODE (val);
17250 rtx part;
17252 if (optimize_size || arm_ld_sched)
17253 return true;
17255 if (mode == VOIDmode)
17256 mode = DImode;
17258 part = gen_highpart_mode (SImode, mode, val);
17260 gcc_assert (CONST_INT_P (part));
17262 if (const_ok_for_arm (INTVAL (part))
17263 || const_ok_for_arm (~INTVAL (part)))
17264 return true;
17266 part = gen_lowpart (SImode, val);
17268 gcc_assert (CONST_INT_P (part));
17270 if (const_ok_for_arm (INTVAL (part))
17271 || const_ok_for_arm (~INTVAL (part)))
17272 return true;
17274 return false;
17277 /* Return true if it is possible to inline both the high and low parts
17278 of a 64-bit constant into 32-bit data processing instructions. */
17279 bool
17280 arm_const_double_by_immediates (rtx val)
17282 machine_mode mode = GET_MODE (val);
17283 rtx part;
17285 if (mode == VOIDmode)
17286 mode = DImode;
17288 part = gen_highpart_mode (SImode, mode, val);
17290 gcc_assert (CONST_INT_P (part));
17292 if (!const_ok_for_arm (INTVAL (part)))
17293 return false;
17295 part = gen_lowpart (SImode, val);
17297 gcc_assert (CONST_INT_P (part));
17299 if (!const_ok_for_arm (INTVAL (part)))
17300 return false;
17302 return true;
17305 /* Scan INSN and note any of its operands that need fixing.
17306 If DO_PUSHES is false we do not actually push any of the fixups
17307 needed. */
17308 static void
17309 note_invalid_constants (rtx_insn *insn, HOST_WIDE_INT address, int do_pushes)
17311 int opno;
17313 extract_constrain_insn (insn);
17315 if (recog_data.n_alternatives == 0)
17316 return;
17318 /* Fill in recog_op_alt with information about the constraints of
17319 this insn. */
17320 preprocess_constraints (insn);
17322 const operand_alternative *op_alt = which_op_alt ();
17323 for (opno = 0; opno < recog_data.n_operands; opno++)
17325 /* Things we need to fix can only occur in inputs. */
17326 if (recog_data.operand_type[opno] != OP_IN)
17327 continue;
17329 /* If this alternative is a memory reference, then any mention
17330 of constants in this alternative is really to fool reload
17331 into allowing us to accept one there. We need to fix them up
17332 now so that we output the right code. */
17333 if (op_alt[opno].memory_ok)
17335 rtx op = recog_data.operand[opno];
17337 if (CONSTANT_P (op))
17339 if (do_pushes)
17340 push_minipool_fix (insn, address, recog_data.operand_loc[opno],
17341 recog_data.operand_mode[opno], op);
17343 else if (MEM_P (op)
17344 && GET_CODE (XEXP (op, 0)) == SYMBOL_REF
17345 && CONSTANT_POOL_ADDRESS_P (XEXP (op, 0)))
17347 if (do_pushes)
17349 rtx cop = avoid_constant_pool_reference (op);
17351 /* Casting the address of something to a mode narrower
17352 than a word can cause avoid_constant_pool_reference()
17353 to return the pool reference itself. That's no good to
17354 us here. Lets just hope that we can use the
17355 constant pool value directly. */
17356 if (op == cop)
17357 cop = get_pool_constant (XEXP (op, 0));
17359 push_minipool_fix (insn, address,
17360 recog_data.operand_loc[opno],
17361 recog_data.operand_mode[opno], cop);
17368 return;
17371 /* Rewrite move insn into subtract of 0 if the condition codes will
17372 be useful in next conditional jump insn. */
17374 static void
17375 thumb1_reorg (void)
17377 basic_block bb;
17379 FOR_EACH_BB_FN (bb, cfun)
17381 rtx dest, src;
17382 rtx cmp, op0, op1, set = NULL;
17383 rtx_insn *prev, *insn = BB_END (bb);
17384 bool insn_clobbered = false;
17386 while (insn != BB_HEAD (bb) && !NONDEBUG_INSN_P (insn))
17387 insn = PREV_INSN (insn);
17389 /* Find the last cbranchsi4_insn in basic block BB. */
17390 if (insn == BB_HEAD (bb)
17391 || INSN_CODE (insn) != CODE_FOR_cbranchsi4_insn)
17392 continue;
17394 /* Get the register with which we are comparing. */
17395 cmp = XEXP (SET_SRC (PATTERN (insn)), 0);
17396 op0 = XEXP (cmp, 0);
17397 op1 = XEXP (cmp, 1);
17399 /* Check that comparison is against ZERO. */
17400 if (!CONST_INT_P (op1) || INTVAL (op1) != 0)
17401 continue;
17403 /* Find the first flag setting insn before INSN in basic block BB. */
17404 gcc_assert (insn != BB_HEAD (bb));
17405 for (prev = PREV_INSN (insn);
17406 (!insn_clobbered
17407 && prev != BB_HEAD (bb)
17408 && (NOTE_P (prev)
17409 || DEBUG_INSN_P (prev)
17410 || ((set = single_set (prev)) != NULL
17411 && get_attr_conds (prev) == CONDS_NOCOND)));
17412 prev = PREV_INSN (prev))
17414 if (reg_set_p (op0, prev))
17415 insn_clobbered = true;
17418 /* Skip if op0 is clobbered by insn other than prev. */
17419 if (insn_clobbered)
17420 continue;
17422 if (!set)
17423 continue;
17425 dest = SET_DEST (set);
17426 src = SET_SRC (set);
17427 if (!low_register_operand (dest, SImode)
17428 || !low_register_operand (src, SImode))
17429 continue;
17431 /* Rewrite move into subtract of 0 if its operand is compared with ZERO
17432 in INSN. Both src and dest of the move insn are checked. */
17433 if (REGNO (op0) == REGNO (src) || REGNO (op0) == REGNO (dest))
17435 dest = copy_rtx (dest);
17436 src = copy_rtx (src);
17437 src = gen_rtx_MINUS (SImode, src, const0_rtx);
17438 PATTERN (prev) = gen_rtx_SET (dest, src);
17439 INSN_CODE (prev) = -1;
17440 /* Set test register in INSN to dest. */
17441 XEXP (cmp, 0) = copy_rtx (dest);
17442 INSN_CODE (insn) = -1;
17447 /* Convert instructions to their cc-clobbering variant if possible, since
17448 that allows us to use smaller encodings. */
17450 static void
17451 thumb2_reorg (void)
17453 basic_block bb;
17454 regset_head live;
17456 INIT_REG_SET (&live);
17458 /* We are freeing block_for_insn in the toplev to keep compatibility
17459 with old MDEP_REORGS that are not CFG based. Recompute it now. */
17460 compute_bb_for_insn ();
17461 df_analyze ();
17463 enum Convert_Action {SKIP, CONV, SWAP_CONV};
17465 FOR_EACH_BB_FN (bb, cfun)
17467 if ((current_tune->disparage_flag_setting_t16_encodings
17468 == tune_params::DISPARAGE_FLAGS_ALL)
17469 && optimize_bb_for_speed_p (bb))
17470 continue;
17472 rtx_insn *insn;
17473 Convert_Action action = SKIP;
17474 Convert_Action action_for_partial_flag_setting
17475 = ((current_tune->disparage_flag_setting_t16_encodings
17476 != tune_params::DISPARAGE_FLAGS_NEITHER)
17477 && optimize_bb_for_speed_p (bb))
17478 ? SKIP : CONV;
17480 COPY_REG_SET (&live, DF_LR_OUT (bb));
17481 df_simulate_initialize_backwards (bb, &live);
17482 FOR_BB_INSNS_REVERSE (bb, insn)
17484 if (NONJUMP_INSN_P (insn)
17485 && !REGNO_REG_SET_P (&live, CC_REGNUM)
17486 && GET_CODE (PATTERN (insn)) == SET)
17488 action = SKIP;
17489 rtx pat = PATTERN (insn);
17490 rtx dst = XEXP (pat, 0);
17491 rtx src = XEXP (pat, 1);
17492 rtx op0 = NULL_RTX, op1 = NULL_RTX;
17494 if (UNARY_P (src) || BINARY_P (src))
17495 op0 = XEXP (src, 0);
17497 if (BINARY_P (src))
17498 op1 = XEXP (src, 1);
17500 if (low_register_operand (dst, SImode))
17502 switch (GET_CODE (src))
17504 case PLUS:
17505 /* Adding two registers and storing the result
17506 in the first source is already a 16-bit
17507 operation. */
17508 if (rtx_equal_p (dst, op0)
17509 && register_operand (op1, SImode))
17510 break;
17512 if (low_register_operand (op0, SImode))
17514 /* ADDS <Rd>,<Rn>,<Rm> */
17515 if (low_register_operand (op1, SImode))
17516 action = CONV;
17517 /* ADDS <Rdn>,#<imm8> */
17518 /* SUBS <Rdn>,#<imm8> */
17519 else if (rtx_equal_p (dst, op0)
17520 && CONST_INT_P (op1)
17521 && IN_RANGE (INTVAL (op1), -255, 255))
17522 action = CONV;
17523 /* ADDS <Rd>,<Rn>,#<imm3> */
17524 /* SUBS <Rd>,<Rn>,#<imm3> */
17525 else if (CONST_INT_P (op1)
17526 && IN_RANGE (INTVAL (op1), -7, 7))
17527 action = CONV;
17529 /* ADCS <Rd>, <Rn> */
17530 else if (GET_CODE (XEXP (src, 0)) == PLUS
17531 && rtx_equal_p (XEXP (XEXP (src, 0), 0), dst)
17532 && low_register_operand (XEXP (XEXP (src, 0), 1),
17533 SImode)
17534 && COMPARISON_P (op1)
17535 && cc_register (XEXP (op1, 0), VOIDmode)
17536 && maybe_get_arm_condition_code (op1) == ARM_CS
17537 && XEXP (op1, 1) == const0_rtx)
17538 action = CONV;
17539 break;
17541 case MINUS:
17542 /* RSBS <Rd>,<Rn>,#0
17543 Not handled here: see NEG below. */
17544 /* SUBS <Rd>,<Rn>,#<imm3>
17545 SUBS <Rdn>,#<imm8>
17546 Not handled here: see PLUS above. */
17547 /* SUBS <Rd>,<Rn>,<Rm> */
17548 if (low_register_operand (op0, SImode)
17549 && low_register_operand (op1, SImode))
17550 action = CONV;
17551 break;
17553 case MULT:
17554 /* MULS <Rdm>,<Rn>,<Rdm>
17555 As an exception to the rule, this is only used
17556 when optimizing for size since MULS is slow on all
17557 known implementations. We do not even want to use
17558 MULS in cold code, if optimizing for speed, so we
17559 test the global flag here. */
17560 if (!optimize_size)
17561 break;
17562 /* Fall through. */
17563 case AND:
17564 case IOR:
17565 case XOR:
17566 /* ANDS <Rdn>,<Rm> */
17567 if (rtx_equal_p (dst, op0)
17568 && low_register_operand (op1, SImode))
17569 action = action_for_partial_flag_setting;
17570 else if (rtx_equal_p (dst, op1)
17571 && low_register_operand (op0, SImode))
17572 action = action_for_partial_flag_setting == SKIP
17573 ? SKIP : SWAP_CONV;
17574 break;
17576 case ASHIFTRT:
17577 case ASHIFT:
17578 case LSHIFTRT:
17579 /* ASRS <Rdn>,<Rm> */
17580 /* LSRS <Rdn>,<Rm> */
17581 /* LSLS <Rdn>,<Rm> */
17582 if (rtx_equal_p (dst, op0)
17583 && low_register_operand (op1, SImode))
17584 action = action_for_partial_flag_setting;
17585 /* ASRS <Rd>,<Rm>,#<imm5> */
17586 /* LSRS <Rd>,<Rm>,#<imm5> */
17587 /* LSLS <Rd>,<Rm>,#<imm5> */
17588 else if (low_register_operand (op0, SImode)
17589 && CONST_INT_P (op1)
17590 && IN_RANGE (INTVAL (op1), 0, 31))
17591 action = action_for_partial_flag_setting;
17592 break;
17594 case ROTATERT:
17595 /* RORS <Rdn>,<Rm> */
17596 if (rtx_equal_p (dst, op0)
17597 && low_register_operand (op1, SImode))
17598 action = action_for_partial_flag_setting;
17599 break;
17601 case NOT:
17602 /* MVNS <Rd>,<Rm> */
17603 if (low_register_operand (op0, SImode))
17604 action = action_for_partial_flag_setting;
17605 break;
17607 case NEG:
17608 /* NEGS <Rd>,<Rm> (a.k.a RSBS) */
17609 if (low_register_operand (op0, SImode))
17610 action = CONV;
17611 break;
17613 case CONST_INT:
17614 /* MOVS <Rd>,#<imm8> */
17615 if (CONST_INT_P (src)
17616 && IN_RANGE (INTVAL (src), 0, 255))
17617 action = action_for_partial_flag_setting;
17618 break;
17620 case REG:
17621 /* MOVS and MOV<c> with registers have different
17622 encodings, so are not relevant here. */
17623 break;
17625 default:
17626 break;
17630 if (action != SKIP)
17632 rtx ccreg = gen_rtx_REG (CCmode, CC_REGNUM);
17633 rtx clobber = gen_rtx_CLOBBER (VOIDmode, ccreg);
17634 rtvec vec;
17636 if (action == SWAP_CONV)
17638 src = copy_rtx (src);
17639 XEXP (src, 0) = op1;
17640 XEXP (src, 1) = op0;
17641 pat = gen_rtx_SET (dst, src);
17642 vec = gen_rtvec (2, pat, clobber);
17644 else /* action == CONV */
17645 vec = gen_rtvec (2, pat, clobber);
17647 PATTERN (insn) = gen_rtx_PARALLEL (VOIDmode, vec);
17648 INSN_CODE (insn) = -1;
17652 if (NONDEBUG_INSN_P (insn))
17653 df_simulate_one_insn_backwards (bb, insn, &live);
17657 CLEAR_REG_SET (&live);
17660 /* Gcc puts the pool in the wrong place for ARM, since we can only
17661 load addresses a limited distance around the pc. We do some
17662 special munging to move the constant pool values to the correct
17663 point in the code. */
17664 static void
17665 arm_reorg (void)
17667 rtx_insn *insn;
17668 HOST_WIDE_INT address = 0;
17669 Mfix * fix;
17671 if (TARGET_THUMB1)
17672 thumb1_reorg ();
17673 else if (TARGET_THUMB2)
17674 thumb2_reorg ();
17676 /* Ensure all insns that must be split have been split at this point.
17677 Otherwise, the pool placement code below may compute incorrect
17678 insn lengths. Note that when optimizing, all insns have already
17679 been split at this point. */
17680 if (!optimize)
17681 split_all_insns_noflow ();
17683 minipool_fix_head = minipool_fix_tail = NULL;
17685 /* The first insn must always be a note, or the code below won't
17686 scan it properly. */
17687 insn = get_insns ();
17688 gcc_assert (NOTE_P (insn));
17689 minipool_pad = 0;
17691 /* Scan all the insns and record the operands that will need fixing. */
17692 for (insn = next_nonnote_insn (insn); insn; insn = next_nonnote_insn (insn))
17694 if (BARRIER_P (insn))
17695 push_minipool_barrier (insn, address);
17696 else if (INSN_P (insn))
17698 rtx_jump_table_data *table;
17700 note_invalid_constants (insn, address, true);
17701 address += get_attr_length (insn);
17703 /* If the insn is a vector jump, add the size of the table
17704 and skip the table. */
17705 if (tablejump_p (insn, NULL, &table))
17707 address += get_jump_table_size (table);
17708 insn = table;
17711 else if (LABEL_P (insn))
17712 /* Add the worst-case padding due to alignment. We don't add
17713 the _current_ padding because the minipool insertions
17714 themselves might change it. */
17715 address += get_label_padding (insn);
17718 fix = minipool_fix_head;
17720 /* Now scan the fixups and perform the required changes. */
17721 while (fix)
17723 Mfix * ftmp;
17724 Mfix * fdel;
17725 Mfix * last_added_fix;
17726 Mfix * last_barrier = NULL;
17727 Mfix * this_fix;
17729 /* Skip any further barriers before the next fix. */
17730 while (fix && BARRIER_P (fix->insn))
17731 fix = fix->next;
17733 /* No more fixes. */
17734 if (fix == NULL)
17735 break;
17737 last_added_fix = NULL;
17739 for (ftmp = fix; ftmp; ftmp = ftmp->next)
17741 if (BARRIER_P (ftmp->insn))
17743 if (ftmp->address >= minipool_vector_head->max_address)
17744 break;
17746 last_barrier = ftmp;
17748 else if ((ftmp->minipool = add_minipool_forward_ref (ftmp)) == NULL)
17749 break;
17751 last_added_fix = ftmp; /* Keep track of the last fix added. */
17754 /* If we found a barrier, drop back to that; any fixes that we
17755 could have reached but come after the barrier will now go in
17756 the next mini-pool. */
17757 if (last_barrier != NULL)
17759 /* Reduce the refcount for those fixes that won't go into this
17760 pool after all. */
17761 for (fdel = last_barrier->next;
17762 fdel && fdel != ftmp;
17763 fdel = fdel->next)
17765 fdel->minipool->refcount--;
17766 fdel->minipool = NULL;
17769 ftmp = last_barrier;
17771 else
17773 /* ftmp is first fix that we can't fit into this pool and
17774 there no natural barriers that we could use. Insert a
17775 new barrier in the code somewhere between the previous
17776 fix and this one, and arrange to jump around it. */
17777 HOST_WIDE_INT max_address;
17779 /* The last item on the list of fixes must be a barrier, so
17780 we can never run off the end of the list of fixes without
17781 last_barrier being set. */
17782 gcc_assert (ftmp);
17784 max_address = minipool_vector_head->max_address;
17785 /* Check that there isn't another fix that is in range that
17786 we couldn't fit into this pool because the pool was
17787 already too large: we need to put the pool before such an
17788 instruction. The pool itself may come just after the
17789 fix because create_fix_barrier also allows space for a
17790 jump instruction. */
17791 if (ftmp->address < max_address)
17792 max_address = ftmp->address + 1;
17794 last_barrier = create_fix_barrier (last_added_fix, max_address);
17797 assign_minipool_offsets (last_barrier);
17799 while (ftmp)
17801 if (!BARRIER_P (ftmp->insn)
17802 && ((ftmp->minipool = add_minipool_backward_ref (ftmp))
17803 == NULL))
17804 break;
17806 ftmp = ftmp->next;
17809 /* Scan over the fixes we have identified for this pool, fixing them
17810 up and adding the constants to the pool itself. */
17811 for (this_fix = fix; this_fix && ftmp != this_fix;
17812 this_fix = this_fix->next)
17813 if (!BARRIER_P (this_fix->insn))
17815 rtx addr
17816 = plus_constant (Pmode,
17817 gen_rtx_LABEL_REF (VOIDmode,
17818 minipool_vector_label),
17819 this_fix->minipool->offset);
17820 *this_fix->loc = gen_rtx_MEM (this_fix->mode, addr);
17823 dump_minipool (last_barrier->insn);
17824 fix = ftmp;
17827 /* From now on we must synthesize any constants that we can't handle
17828 directly. This can happen if the RTL gets split during final
17829 instruction generation. */
17830 cfun->machine->after_arm_reorg = 1;
17832 /* Free the minipool memory. */
17833 obstack_free (&minipool_obstack, minipool_startobj);
17836 /* Routines to output assembly language. */
17838 /* Return string representation of passed in real value. */
17839 static const char *
17840 fp_const_from_val (REAL_VALUE_TYPE *r)
17842 if (!fp_consts_inited)
17843 init_fp_table ();
17845 gcc_assert (real_equal (r, &value_fp0));
17846 return "0";
17849 /* OPERANDS[0] is the entire list of insns that constitute pop,
17850 OPERANDS[1] is the base register, RETURN_PC is true iff return insn
17851 is in the list, UPDATE is true iff the list contains explicit
17852 update of base register. */
17853 void
17854 arm_output_multireg_pop (rtx *operands, bool return_pc, rtx cond, bool reverse,
17855 bool update)
17857 int i;
17858 char pattern[100];
17859 int offset;
17860 const char *conditional;
17861 int num_saves = XVECLEN (operands[0], 0);
17862 unsigned int regno;
17863 unsigned int regno_base = REGNO (operands[1]);
17864 bool interrupt_p = IS_INTERRUPT (arm_current_func_type ());
17866 offset = 0;
17867 offset += update ? 1 : 0;
17868 offset += return_pc ? 1 : 0;
17870 /* Is the base register in the list? */
17871 for (i = offset; i < num_saves; i++)
17873 regno = REGNO (XEXP (XVECEXP (operands[0], 0, i), 0));
17874 /* If SP is in the list, then the base register must be SP. */
17875 gcc_assert ((regno != SP_REGNUM) || (regno_base == SP_REGNUM));
17876 /* If base register is in the list, there must be no explicit update. */
17877 if (regno == regno_base)
17878 gcc_assert (!update);
17881 conditional = reverse ? "%?%D0" : "%?%d0";
17882 /* Can't use POP if returning from an interrupt. */
17883 if ((regno_base == SP_REGNUM) && update && !(interrupt_p && return_pc))
17884 sprintf (pattern, "pop%s\t{", conditional);
17885 else
17887 /* Output ldmfd when the base register is SP, otherwise output ldmia.
17888 It's just a convention, their semantics are identical. */
17889 if (regno_base == SP_REGNUM)
17890 sprintf (pattern, "ldmfd%s\t", conditional);
17891 else if (update)
17892 sprintf (pattern, "ldmia%s\t", conditional);
17893 else
17894 sprintf (pattern, "ldm%s\t", conditional);
17896 strcat (pattern, reg_names[regno_base]);
17897 if (update)
17898 strcat (pattern, "!, {");
17899 else
17900 strcat (pattern, ", {");
17903 /* Output the first destination register. */
17904 strcat (pattern,
17905 reg_names[REGNO (XEXP (XVECEXP (operands[0], 0, offset), 0))]);
17907 /* Output the rest of the destination registers. */
17908 for (i = offset + 1; i < num_saves; i++)
17910 strcat (pattern, ", ");
17911 strcat (pattern,
17912 reg_names[REGNO (XEXP (XVECEXP (operands[0], 0, i), 0))]);
17915 strcat (pattern, "}");
17917 if (interrupt_p && return_pc)
17918 strcat (pattern, "^");
17920 output_asm_insn (pattern, &cond);
17924 /* Output the assembly for a store multiple. */
17926 const char *
17927 vfp_output_vstmd (rtx * operands)
17929 char pattern[100];
17930 int p;
17931 int base;
17932 int i;
17933 rtx addr_reg = REG_P (XEXP (operands[0], 0))
17934 ? XEXP (operands[0], 0)
17935 : XEXP (XEXP (operands[0], 0), 0);
17936 bool push_p = REGNO (addr_reg) == SP_REGNUM;
17938 if (push_p)
17939 strcpy (pattern, "vpush%?.64\t{%P1");
17940 else
17941 strcpy (pattern, "vstmdb%?.64\t%m0!, {%P1");
17943 p = strlen (pattern);
17945 gcc_assert (REG_P (operands[1]));
17947 base = (REGNO (operands[1]) - FIRST_VFP_REGNUM) / 2;
17948 for (i = 1; i < XVECLEN (operands[2], 0); i++)
17950 p += sprintf (&pattern[p], ", d%d", base + i);
17952 strcpy (&pattern[p], "}");
17954 output_asm_insn (pattern, operands);
17955 return "";
17959 /* Emit RTL to save block of VFP register pairs to the stack. Returns the
17960 number of bytes pushed. */
17962 static int
17963 vfp_emit_fstmd (int base_reg, int count)
17965 rtx par;
17966 rtx dwarf;
17967 rtx tmp, reg;
17968 int i;
17970 /* Workaround ARM10 VFPr1 bug. Data corruption can occur when exactly two
17971 register pairs are stored by a store multiple insn. We avoid this
17972 by pushing an extra pair. */
17973 if (count == 2 && !arm_arch6)
17975 if (base_reg == LAST_VFP_REGNUM - 3)
17976 base_reg -= 2;
17977 count++;
17980 /* FSTMD may not store more than 16 doubleword registers at once. Split
17981 larger stores into multiple parts (up to a maximum of two, in
17982 practice). */
17983 if (count > 16)
17985 int saved;
17986 /* NOTE: base_reg is an internal register number, so each D register
17987 counts as 2. */
17988 saved = vfp_emit_fstmd (base_reg + 32, count - 16);
17989 saved += vfp_emit_fstmd (base_reg, 16);
17990 return saved;
17993 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (count));
17994 dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (count + 1));
17996 reg = gen_rtx_REG (DFmode, base_reg);
17997 base_reg += 2;
17999 XVECEXP (par, 0, 0)
18000 = gen_rtx_SET (gen_frame_mem
18001 (BLKmode,
18002 gen_rtx_PRE_MODIFY (Pmode,
18003 stack_pointer_rtx,
18004 plus_constant
18005 (Pmode, stack_pointer_rtx,
18006 - (count * 8)))
18008 gen_rtx_UNSPEC (BLKmode,
18009 gen_rtvec (1, reg),
18010 UNSPEC_PUSH_MULT));
18012 tmp = gen_rtx_SET (stack_pointer_rtx,
18013 plus_constant (Pmode, stack_pointer_rtx, -(count * 8)));
18014 RTX_FRAME_RELATED_P (tmp) = 1;
18015 XVECEXP (dwarf, 0, 0) = tmp;
18017 tmp = gen_rtx_SET (gen_frame_mem (DFmode, stack_pointer_rtx), reg);
18018 RTX_FRAME_RELATED_P (tmp) = 1;
18019 XVECEXP (dwarf, 0, 1) = tmp;
18021 for (i = 1; i < count; i++)
18023 reg = gen_rtx_REG (DFmode, base_reg);
18024 base_reg += 2;
18025 XVECEXP (par, 0, i) = gen_rtx_USE (VOIDmode, reg);
18027 tmp = gen_rtx_SET (gen_frame_mem (DFmode,
18028 plus_constant (Pmode,
18029 stack_pointer_rtx,
18030 i * 8)),
18031 reg);
18032 RTX_FRAME_RELATED_P (tmp) = 1;
18033 XVECEXP (dwarf, 0, i + 1) = tmp;
18036 par = emit_insn (par);
18037 add_reg_note (par, REG_FRAME_RELATED_EXPR, dwarf);
18038 RTX_FRAME_RELATED_P (par) = 1;
18040 return count * 8;
18043 /* Emit a call instruction with pattern PAT. ADDR is the address of
18044 the call target. */
18046 void
18047 arm_emit_call_insn (rtx pat, rtx addr, bool sibcall)
18049 rtx insn;
18051 insn = emit_call_insn (pat);
18053 /* The PIC register is live on entry to VxWorks PIC PLT entries.
18054 If the call might use such an entry, add a use of the PIC register
18055 to the instruction's CALL_INSN_FUNCTION_USAGE. */
18056 if (TARGET_VXWORKS_RTP
18057 && flag_pic
18058 && !sibcall
18059 && GET_CODE (addr) == SYMBOL_REF
18060 && (SYMBOL_REF_DECL (addr)
18061 ? !targetm.binds_local_p (SYMBOL_REF_DECL (addr))
18062 : !SYMBOL_REF_LOCAL_P (addr)))
18064 require_pic_register ();
18065 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), cfun->machine->pic_reg);
18068 if (TARGET_AAPCS_BASED)
18070 /* For AAPCS, IP and CC can be clobbered by veneers inserted by the
18071 linker. We need to add an IP clobber to allow setting
18072 TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS to true. A CC clobber
18073 is not needed since it's a fixed register. */
18074 rtx *fusage = &CALL_INSN_FUNCTION_USAGE (insn);
18075 clobber_reg (fusage, gen_rtx_REG (word_mode, IP_REGNUM));
18079 /* Output a 'call' insn. */
18080 const char *
18081 output_call (rtx *operands)
18083 gcc_assert (!arm_arch5); /* Patterns should call blx <reg> directly. */
18085 /* Handle calls to lr using ip (which may be clobbered in subr anyway). */
18086 if (REGNO (operands[0]) == LR_REGNUM)
18088 operands[0] = gen_rtx_REG (SImode, IP_REGNUM);
18089 output_asm_insn ("mov%?\t%0, %|lr", operands);
18092 output_asm_insn ("mov%?\t%|lr, %|pc", operands);
18094 if (TARGET_INTERWORK || arm_arch4t)
18095 output_asm_insn ("bx%?\t%0", operands);
18096 else
18097 output_asm_insn ("mov%?\t%|pc, %0", operands);
18099 return "";
18102 /* Output a move from arm registers to arm registers of a long double
18103 OPERANDS[0] is the destination.
18104 OPERANDS[1] is the source. */
18105 const char *
18106 output_mov_long_double_arm_from_arm (rtx *operands)
18108 /* We have to be careful here because the two might overlap. */
18109 int dest_start = REGNO (operands[0]);
18110 int src_start = REGNO (operands[1]);
18111 rtx ops[2];
18112 int i;
18114 if (dest_start < src_start)
18116 for (i = 0; i < 3; i++)
18118 ops[0] = gen_rtx_REG (SImode, dest_start + i);
18119 ops[1] = gen_rtx_REG (SImode, src_start + i);
18120 output_asm_insn ("mov%?\t%0, %1", ops);
18123 else
18125 for (i = 2; i >= 0; i--)
18127 ops[0] = gen_rtx_REG (SImode, dest_start + i);
18128 ops[1] = gen_rtx_REG (SImode, src_start + i);
18129 output_asm_insn ("mov%?\t%0, %1", ops);
18133 return "";
18136 void
18137 arm_emit_movpair (rtx dest, rtx src)
18139 rtx insn;
18141 /* If the src is an immediate, simplify it. */
18142 if (CONST_INT_P (src))
18144 HOST_WIDE_INT val = INTVAL (src);
18145 emit_set_insn (dest, GEN_INT (val & 0x0000ffff));
18146 if ((val >> 16) & 0x0000ffff)
18148 emit_set_insn (gen_rtx_ZERO_EXTRACT (SImode, dest, GEN_INT (16),
18149 GEN_INT (16)),
18150 GEN_INT ((val >> 16) & 0x0000ffff));
18151 insn = get_last_insn ();
18152 set_unique_reg_note (insn, REG_EQUAL, copy_rtx (src));
18154 return;
18156 emit_set_insn (dest, gen_rtx_HIGH (SImode, src));
18157 emit_set_insn (dest, gen_rtx_LO_SUM (SImode, dest, src));
18158 insn = get_last_insn ();
18159 set_unique_reg_note (insn, REG_EQUAL, copy_rtx (src));
18162 /* Output a move between double words. It must be REG<-MEM
18163 or MEM<-REG. */
18164 const char *
18165 output_move_double (rtx *operands, bool emit, int *count)
18167 enum rtx_code code0 = GET_CODE (operands[0]);
18168 enum rtx_code code1 = GET_CODE (operands[1]);
18169 rtx otherops[3];
18170 if (count)
18171 *count = 1;
18173 /* The only case when this might happen is when
18174 you are looking at the length of a DImode instruction
18175 that has an invalid constant in it. */
18176 if (code0 == REG && code1 != MEM)
18178 gcc_assert (!emit);
18179 *count = 2;
18180 return "";
18183 if (code0 == REG)
18185 unsigned int reg0 = REGNO (operands[0]);
18187 otherops[0] = gen_rtx_REG (SImode, 1 + reg0);
18189 gcc_assert (code1 == MEM); /* Constraints should ensure this. */
18191 switch (GET_CODE (XEXP (operands[1], 0)))
18193 case REG:
18195 if (emit)
18197 if (TARGET_LDRD
18198 && !(fix_cm3_ldrd && reg0 == REGNO(XEXP (operands[1], 0))))
18199 output_asm_insn ("ldrd%?\t%0, [%m1]", operands);
18200 else
18201 output_asm_insn ("ldmia%?\t%m1, %M0", operands);
18203 break;
18205 case PRE_INC:
18206 gcc_assert (TARGET_LDRD);
18207 if (emit)
18208 output_asm_insn ("ldrd%?\t%0, [%m1, #8]!", operands);
18209 break;
18211 case PRE_DEC:
18212 if (emit)
18214 if (TARGET_LDRD)
18215 output_asm_insn ("ldrd%?\t%0, [%m1, #-8]!", operands);
18216 else
18217 output_asm_insn ("ldmdb%?\t%m1!, %M0", operands);
18219 break;
18221 case POST_INC:
18222 if (emit)
18224 if (TARGET_LDRD)
18225 output_asm_insn ("ldrd%?\t%0, [%m1], #8", operands);
18226 else
18227 output_asm_insn ("ldmia%?\t%m1!, %M0", operands);
18229 break;
18231 case POST_DEC:
18232 gcc_assert (TARGET_LDRD);
18233 if (emit)
18234 output_asm_insn ("ldrd%?\t%0, [%m1], #-8", operands);
18235 break;
18237 case PRE_MODIFY:
18238 case POST_MODIFY:
18239 /* Autoicrement addressing modes should never have overlapping
18240 base and destination registers, and overlapping index registers
18241 are already prohibited, so this doesn't need to worry about
18242 fix_cm3_ldrd. */
18243 otherops[0] = operands[0];
18244 otherops[1] = XEXP (XEXP (XEXP (operands[1], 0), 1), 0);
18245 otherops[2] = XEXP (XEXP (XEXP (operands[1], 0), 1), 1);
18247 if (GET_CODE (XEXP (operands[1], 0)) == PRE_MODIFY)
18249 if (reg_overlap_mentioned_p (otherops[0], otherops[2]))
18251 /* Registers overlap so split out the increment. */
18252 if (emit)
18254 output_asm_insn ("add%?\t%1, %1, %2", otherops);
18255 output_asm_insn ("ldrd%?\t%0, [%1] @split", otherops);
18257 if (count)
18258 *count = 2;
18260 else
18262 /* Use a single insn if we can.
18263 FIXME: IWMMXT allows offsets larger than ldrd can
18264 handle, fix these up with a pair of ldr. */
18265 if (TARGET_THUMB2
18266 || !CONST_INT_P (otherops[2])
18267 || (INTVAL (otherops[2]) > -256
18268 && INTVAL (otherops[2]) < 256))
18270 if (emit)
18271 output_asm_insn ("ldrd%?\t%0, [%1, %2]!", otherops);
18273 else
18275 if (emit)
18277 output_asm_insn ("ldr%?\t%0, [%1, %2]!", otherops);
18278 output_asm_insn ("ldr%?\t%H0, [%1, #4]", otherops);
18280 if (count)
18281 *count = 2;
18286 else
18288 /* Use a single insn if we can.
18289 FIXME: IWMMXT allows offsets larger than ldrd can handle,
18290 fix these up with a pair of ldr. */
18291 if (TARGET_THUMB2
18292 || !CONST_INT_P (otherops[2])
18293 || (INTVAL (otherops[2]) > -256
18294 && INTVAL (otherops[2]) < 256))
18296 if (emit)
18297 output_asm_insn ("ldrd%?\t%0, [%1], %2", otherops);
18299 else
18301 if (emit)
18303 output_asm_insn ("ldr%?\t%H0, [%1, #4]", otherops);
18304 output_asm_insn ("ldr%?\t%0, [%1], %2", otherops);
18306 if (count)
18307 *count = 2;
18310 break;
18312 case LABEL_REF:
18313 case CONST:
18314 /* We might be able to use ldrd %0, %1 here. However the range is
18315 different to ldr/adr, and it is broken on some ARMv7-M
18316 implementations. */
18317 /* Use the second register of the pair to avoid problematic
18318 overlap. */
18319 otherops[1] = operands[1];
18320 if (emit)
18321 output_asm_insn ("adr%?\t%0, %1", otherops);
18322 operands[1] = otherops[0];
18323 if (emit)
18325 if (TARGET_LDRD)
18326 output_asm_insn ("ldrd%?\t%0, [%1]", operands);
18327 else
18328 output_asm_insn ("ldmia%?\t%1, %M0", operands);
18331 if (count)
18332 *count = 2;
18333 break;
18335 /* ??? This needs checking for thumb2. */
18336 default:
18337 if (arm_add_operand (XEXP (XEXP (operands[1], 0), 1),
18338 GET_MODE (XEXP (XEXP (operands[1], 0), 1))))
18340 otherops[0] = operands[0];
18341 otherops[1] = XEXP (XEXP (operands[1], 0), 0);
18342 otherops[2] = XEXP (XEXP (operands[1], 0), 1);
18344 if (GET_CODE (XEXP (operands[1], 0)) == PLUS)
18346 if (CONST_INT_P (otherops[2]) && !TARGET_LDRD)
18348 switch ((int) INTVAL (otherops[2]))
18350 case -8:
18351 if (emit)
18352 output_asm_insn ("ldmdb%?\t%1, %M0", otherops);
18353 return "";
18354 case -4:
18355 if (TARGET_THUMB2)
18356 break;
18357 if (emit)
18358 output_asm_insn ("ldmda%?\t%1, %M0", otherops);
18359 return "";
18360 case 4:
18361 if (TARGET_THUMB2)
18362 break;
18363 if (emit)
18364 output_asm_insn ("ldmib%?\t%1, %M0", otherops);
18365 return "";
18368 otherops[0] = gen_rtx_REG(SImode, REGNO(operands[0]) + 1);
18369 operands[1] = otherops[0];
18370 if (TARGET_LDRD
18371 && (REG_P (otherops[2])
18372 || TARGET_THUMB2
18373 || (CONST_INT_P (otherops[2])
18374 && INTVAL (otherops[2]) > -256
18375 && INTVAL (otherops[2]) < 256)))
18377 if (reg_overlap_mentioned_p (operands[0],
18378 otherops[2]))
18380 /* Swap base and index registers over to
18381 avoid a conflict. */
18382 std::swap (otherops[1], otherops[2]);
18384 /* If both registers conflict, it will usually
18385 have been fixed by a splitter. */
18386 if (reg_overlap_mentioned_p (operands[0], otherops[2])
18387 || (fix_cm3_ldrd && reg0 == REGNO (otherops[1])))
18389 if (emit)
18391 output_asm_insn ("add%?\t%0, %1, %2", otherops);
18392 output_asm_insn ("ldrd%?\t%0, [%1]", operands);
18394 if (count)
18395 *count = 2;
18397 else
18399 otherops[0] = operands[0];
18400 if (emit)
18401 output_asm_insn ("ldrd%?\t%0, [%1, %2]", otherops);
18403 return "";
18406 if (CONST_INT_P (otherops[2]))
18408 if (emit)
18410 if (!(const_ok_for_arm (INTVAL (otherops[2]))))
18411 output_asm_insn ("sub%?\t%0, %1, #%n2", otherops);
18412 else
18413 output_asm_insn ("add%?\t%0, %1, %2", otherops);
18416 else
18418 if (emit)
18419 output_asm_insn ("add%?\t%0, %1, %2", otherops);
18422 else
18424 if (emit)
18425 output_asm_insn ("sub%?\t%0, %1, %2", otherops);
18428 if (count)
18429 *count = 2;
18431 if (TARGET_LDRD)
18432 return "ldrd%?\t%0, [%1]";
18434 return "ldmia%?\t%1, %M0";
18436 else
18438 otherops[1] = adjust_address (operands[1], SImode, 4);
18439 /* Take care of overlapping base/data reg. */
18440 if (reg_mentioned_p (operands[0], operands[1]))
18442 if (emit)
18444 output_asm_insn ("ldr%?\t%0, %1", otherops);
18445 output_asm_insn ("ldr%?\t%0, %1", operands);
18447 if (count)
18448 *count = 2;
18451 else
18453 if (emit)
18455 output_asm_insn ("ldr%?\t%0, %1", operands);
18456 output_asm_insn ("ldr%?\t%0, %1", otherops);
18458 if (count)
18459 *count = 2;
18464 else
18466 /* Constraints should ensure this. */
18467 gcc_assert (code0 == MEM && code1 == REG);
18468 gcc_assert ((REGNO (operands[1]) != IP_REGNUM)
18469 || (TARGET_ARM && TARGET_LDRD));
18471 switch (GET_CODE (XEXP (operands[0], 0)))
18473 case REG:
18474 if (emit)
18476 if (TARGET_LDRD)
18477 output_asm_insn ("strd%?\t%1, [%m0]", operands);
18478 else
18479 output_asm_insn ("stm%?\t%m0, %M1", operands);
18481 break;
18483 case PRE_INC:
18484 gcc_assert (TARGET_LDRD);
18485 if (emit)
18486 output_asm_insn ("strd%?\t%1, [%m0, #8]!", operands);
18487 break;
18489 case PRE_DEC:
18490 if (emit)
18492 if (TARGET_LDRD)
18493 output_asm_insn ("strd%?\t%1, [%m0, #-8]!", operands);
18494 else
18495 output_asm_insn ("stmdb%?\t%m0!, %M1", operands);
18497 break;
18499 case POST_INC:
18500 if (emit)
18502 if (TARGET_LDRD)
18503 output_asm_insn ("strd%?\t%1, [%m0], #8", operands);
18504 else
18505 output_asm_insn ("stm%?\t%m0!, %M1", operands);
18507 break;
18509 case POST_DEC:
18510 gcc_assert (TARGET_LDRD);
18511 if (emit)
18512 output_asm_insn ("strd%?\t%1, [%m0], #-8", operands);
18513 break;
18515 case PRE_MODIFY:
18516 case POST_MODIFY:
18517 otherops[0] = operands[1];
18518 otherops[1] = XEXP (XEXP (XEXP (operands[0], 0), 1), 0);
18519 otherops[2] = XEXP (XEXP (XEXP (operands[0], 0), 1), 1);
18521 /* IWMMXT allows offsets larger than ldrd can handle,
18522 fix these up with a pair of ldr. */
18523 if (!TARGET_THUMB2
18524 && CONST_INT_P (otherops[2])
18525 && (INTVAL(otherops[2]) <= -256
18526 || INTVAL(otherops[2]) >= 256))
18528 if (GET_CODE (XEXP (operands[0], 0)) == PRE_MODIFY)
18530 if (emit)
18532 output_asm_insn ("str%?\t%0, [%1, %2]!", otherops);
18533 output_asm_insn ("str%?\t%H0, [%1, #4]", otherops);
18535 if (count)
18536 *count = 2;
18538 else
18540 if (emit)
18542 output_asm_insn ("str%?\t%H0, [%1, #4]", otherops);
18543 output_asm_insn ("str%?\t%0, [%1], %2", otherops);
18545 if (count)
18546 *count = 2;
18549 else if (GET_CODE (XEXP (operands[0], 0)) == PRE_MODIFY)
18551 if (emit)
18552 output_asm_insn ("strd%?\t%0, [%1, %2]!", otherops);
18554 else
18556 if (emit)
18557 output_asm_insn ("strd%?\t%0, [%1], %2", otherops);
18559 break;
18561 case PLUS:
18562 otherops[2] = XEXP (XEXP (operands[0], 0), 1);
18563 if (CONST_INT_P (otherops[2]) && !TARGET_LDRD)
18565 switch ((int) INTVAL (XEXP (XEXP (operands[0], 0), 1)))
18567 case -8:
18568 if (emit)
18569 output_asm_insn ("stmdb%?\t%m0, %M1", operands);
18570 return "";
18572 case -4:
18573 if (TARGET_THUMB2)
18574 break;
18575 if (emit)
18576 output_asm_insn ("stmda%?\t%m0, %M1", operands);
18577 return "";
18579 case 4:
18580 if (TARGET_THUMB2)
18581 break;
18582 if (emit)
18583 output_asm_insn ("stmib%?\t%m0, %M1", operands);
18584 return "";
18587 if (TARGET_LDRD
18588 && (REG_P (otherops[2])
18589 || TARGET_THUMB2
18590 || (CONST_INT_P (otherops[2])
18591 && INTVAL (otherops[2]) > -256
18592 && INTVAL (otherops[2]) < 256)))
18594 otherops[0] = operands[1];
18595 otherops[1] = XEXP (XEXP (operands[0], 0), 0);
18596 if (emit)
18597 output_asm_insn ("strd%?\t%0, [%1, %2]", otherops);
18598 return "";
18600 /* Fall through */
18602 default:
18603 otherops[0] = adjust_address (operands[0], SImode, 4);
18604 otherops[1] = operands[1];
18605 if (emit)
18607 output_asm_insn ("str%?\t%1, %0", operands);
18608 output_asm_insn ("str%?\t%H1, %0", otherops);
18610 if (count)
18611 *count = 2;
18615 return "";
18618 /* Output a move, load or store for quad-word vectors in ARM registers. Only
18619 handles MEMs accepted by neon_vector_mem_operand with TYPE=1. */
18621 const char *
18622 output_move_quad (rtx *operands)
18624 if (REG_P (operands[0]))
18626 /* Load, or reg->reg move. */
18628 if (MEM_P (operands[1]))
18630 switch (GET_CODE (XEXP (operands[1], 0)))
18632 case REG:
18633 output_asm_insn ("ldmia%?\t%m1, %M0", operands);
18634 break;
18636 case LABEL_REF:
18637 case CONST:
18638 output_asm_insn ("adr%?\t%0, %1", operands);
18639 output_asm_insn ("ldmia%?\t%0, %M0", operands);
18640 break;
18642 default:
18643 gcc_unreachable ();
18646 else
18648 rtx ops[2];
18649 int dest, src, i;
18651 gcc_assert (REG_P (operands[1]));
18653 dest = REGNO (operands[0]);
18654 src = REGNO (operands[1]);
18656 /* This seems pretty dumb, but hopefully GCC won't try to do it
18657 very often. */
18658 if (dest < src)
18659 for (i = 0; i < 4; i++)
18661 ops[0] = gen_rtx_REG (SImode, dest + i);
18662 ops[1] = gen_rtx_REG (SImode, src + i);
18663 output_asm_insn ("mov%?\t%0, %1", ops);
18665 else
18666 for (i = 3; i >= 0; i--)
18668 ops[0] = gen_rtx_REG (SImode, dest + i);
18669 ops[1] = gen_rtx_REG (SImode, src + i);
18670 output_asm_insn ("mov%?\t%0, %1", ops);
18674 else
18676 gcc_assert (MEM_P (operands[0]));
18677 gcc_assert (REG_P (operands[1]));
18678 gcc_assert (!reg_overlap_mentioned_p (operands[1], operands[0]));
18680 switch (GET_CODE (XEXP (operands[0], 0)))
18682 case REG:
18683 output_asm_insn ("stm%?\t%m0, %M1", operands);
18684 break;
18686 default:
18687 gcc_unreachable ();
18691 return "";
18694 /* Output a VFP load or store instruction. */
18696 const char *
18697 output_move_vfp (rtx *operands)
18699 rtx reg, mem, addr, ops[2];
18700 int load = REG_P (operands[0]);
18701 int dp = GET_MODE_SIZE (GET_MODE (operands[0])) == 8;
18702 int sp = (!TARGET_VFP_FP16INST
18703 || GET_MODE_SIZE (GET_MODE (operands[0])) == 4);
18704 int integer_p = GET_MODE_CLASS (GET_MODE (operands[0])) == MODE_INT;
18705 const char *templ;
18706 char buff[50];
18707 machine_mode mode;
18709 reg = operands[!load];
18710 mem = operands[load];
18712 mode = GET_MODE (reg);
18714 gcc_assert (REG_P (reg));
18715 gcc_assert (IS_VFP_REGNUM (REGNO (reg)));
18716 gcc_assert ((mode == HFmode && TARGET_HARD_FLOAT)
18717 || mode == SFmode
18718 || mode == DFmode
18719 || mode == HImode
18720 || mode == SImode
18721 || mode == DImode
18722 || (TARGET_NEON && VALID_NEON_DREG_MODE (mode)));
18723 gcc_assert (MEM_P (mem));
18725 addr = XEXP (mem, 0);
18727 switch (GET_CODE (addr))
18729 case PRE_DEC:
18730 templ = "v%smdb%%?.%s\t%%0!, {%%%s1}%s";
18731 ops[0] = XEXP (addr, 0);
18732 ops[1] = reg;
18733 break;
18735 case POST_INC:
18736 templ = "v%smia%%?.%s\t%%0!, {%%%s1}%s";
18737 ops[0] = XEXP (addr, 0);
18738 ops[1] = reg;
18739 break;
18741 default:
18742 templ = "v%sr%%?.%s\t%%%s0, %%1%s";
18743 ops[0] = reg;
18744 ops[1] = mem;
18745 break;
18748 sprintf (buff, templ,
18749 load ? "ld" : "st",
18750 dp ? "64" : sp ? "32" : "16",
18751 dp ? "P" : "",
18752 integer_p ? "\t%@ int" : "");
18753 output_asm_insn (buff, ops);
18755 return "";
18758 /* Output a Neon double-word or quad-word load or store, or a load
18759 or store for larger structure modes.
18761 WARNING: The ordering of elements is weird in big-endian mode,
18762 because the EABI requires that vectors stored in memory appear
18763 as though they were stored by a VSTM, as required by the EABI.
18764 GCC RTL defines element ordering based on in-memory order.
18765 This can be different from the architectural ordering of elements
18766 within a NEON register. The intrinsics defined in arm_neon.h use the
18767 NEON register element ordering, not the GCC RTL element ordering.
18769 For example, the in-memory ordering of a big-endian a quadword
18770 vector with 16-bit elements when stored from register pair {d0,d1}
18771 will be (lowest address first, d0[N] is NEON register element N):
18773 [d0[3], d0[2], d0[1], d0[0], d1[7], d1[6], d1[5], d1[4]]
18775 When necessary, quadword registers (dN, dN+1) are moved to ARM
18776 registers from rN in the order:
18778 dN -> (rN+1, rN), dN+1 -> (rN+3, rN+2)
18780 So that STM/LDM can be used on vectors in ARM registers, and the
18781 same memory layout will result as if VSTM/VLDM were used.
18783 Instead of VSTM/VLDM we prefer to use VST1.64/VLD1.64 where
18784 possible, which allows use of appropriate alignment tags.
18785 Note that the choice of "64" is independent of the actual vector
18786 element size; this size simply ensures that the behavior is
18787 equivalent to VSTM/VLDM in both little-endian and big-endian mode.
18789 Due to limitations of those instructions, use of VST1.64/VLD1.64
18790 is not possible if:
18791 - the address contains PRE_DEC, or
18792 - the mode refers to more than 4 double-word registers
18794 In those cases, it would be possible to replace VSTM/VLDM by a
18795 sequence of instructions; this is not currently implemented since
18796 this is not certain to actually improve performance. */
18798 const char *
18799 output_move_neon (rtx *operands)
18801 rtx reg, mem, addr, ops[2];
18802 int regno, nregs, load = REG_P (operands[0]);
18803 const char *templ;
18804 char buff[50];
18805 machine_mode mode;
18807 reg = operands[!load];
18808 mem = operands[load];
18810 mode = GET_MODE (reg);
18812 gcc_assert (REG_P (reg));
18813 regno = REGNO (reg);
18814 nregs = HARD_REGNO_NREGS (regno, mode) / 2;
18815 gcc_assert (VFP_REGNO_OK_FOR_DOUBLE (regno)
18816 || NEON_REGNO_OK_FOR_QUAD (regno));
18817 gcc_assert (VALID_NEON_DREG_MODE (mode)
18818 || VALID_NEON_QREG_MODE (mode)
18819 || VALID_NEON_STRUCT_MODE (mode));
18820 gcc_assert (MEM_P (mem));
18822 addr = XEXP (mem, 0);
18824 /* Strip off const from addresses like (const (plus (...))). */
18825 if (GET_CODE (addr) == CONST && GET_CODE (XEXP (addr, 0)) == PLUS)
18826 addr = XEXP (addr, 0);
18828 switch (GET_CODE (addr))
18830 case POST_INC:
18831 /* We have to use vldm / vstm for too-large modes. */
18832 if (nregs > 4)
18834 templ = "v%smia%%?\t%%0!, %%h1";
18835 ops[0] = XEXP (addr, 0);
18837 else
18839 templ = "v%s1.64\t%%h1, %%A0";
18840 ops[0] = mem;
18842 ops[1] = reg;
18843 break;
18845 case PRE_DEC:
18846 /* We have to use vldm / vstm in this case, since there is no
18847 pre-decrement form of the vld1 / vst1 instructions. */
18848 templ = "v%smdb%%?\t%%0!, %%h1";
18849 ops[0] = XEXP (addr, 0);
18850 ops[1] = reg;
18851 break;
18853 case POST_MODIFY:
18854 /* FIXME: Not currently enabled in neon_vector_mem_operand. */
18855 gcc_unreachable ();
18857 case REG:
18858 /* We have to use vldm / vstm for too-large modes. */
18859 if (nregs > 1)
18861 if (nregs > 4)
18862 templ = "v%smia%%?\t%%m0, %%h1";
18863 else
18864 templ = "v%s1.64\t%%h1, %%A0";
18866 ops[0] = mem;
18867 ops[1] = reg;
18868 break;
18870 /* Fall through. */
18871 case LABEL_REF:
18872 case PLUS:
18874 int i;
18875 int overlap = -1;
18876 for (i = 0; i < nregs; i++)
18878 /* We're only using DImode here because it's a convenient size. */
18879 ops[0] = gen_rtx_REG (DImode, REGNO (reg) + 2 * i);
18880 ops[1] = adjust_address (mem, DImode, 8 * i);
18881 if (reg_overlap_mentioned_p (ops[0], mem))
18883 gcc_assert (overlap == -1);
18884 overlap = i;
18886 else
18888 sprintf (buff, "v%sr%%?\t%%P0, %%1", load ? "ld" : "st");
18889 output_asm_insn (buff, ops);
18892 if (overlap != -1)
18894 ops[0] = gen_rtx_REG (DImode, REGNO (reg) + 2 * overlap);
18895 ops[1] = adjust_address (mem, SImode, 8 * overlap);
18896 sprintf (buff, "v%sr%%?\t%%P0, %%1", load ? "ld" : "st");
18897 output_asm_insn (buff, ops);
18900 return "";
18903 default:
18904 gcc_unreachable ();
18907 sprintf (buff, templ, load ? "ld" : "st");
18908 output_asm_insn (buff, ops);
18910 return "";
18913 /* Compute and return the length of neon_mov<mode>, where <mode> is
18914 one of VSTRUCT modes: EI, OI, CI or XI. */
18916 arm_attr_length_move_neon (rtx_insn *insn)
18918 rtx reg, mem, addr;
18919 int load;
18920 machine_mode mode;
18922 extract_insn_cached (insn);
18924 if (REG_P (recog_data.operand[0]) && REG_P (recog_data.operand[1]))
18926 mode = GET_MODE (recog_data.operand[0]);
18927 switch (mode)
18929 case EImode:
18930 case OImode:
18931 return 8;
18932 case CImode:
18933 return 12;
18934 case XImode:
18935 return 16;
18936 default:
18937 gcc_unreachable ();
18941 load = REG_P (recog_data.operand[0]);
18942 reg = recog_data.operand[!load];
18943 mem = recog_data.operand[load];
18945 gcc_assert (MEM_P (mem));
18947 mode = GET_MODE (reg);
18948 addr = XEXP (mem, 0);
18950 /* Strip off const from addresses like (const (plus (...))). */
18951 if (GET_CODE (addr) == CONST && GET_CODE (XEXP (addr, 0)) == PLUS)
18952 addr = XEXP (addr, 0);
18954 if (GET_CODE (addr) == LABEL_REF || GET_CODE (addr) == PLUS)
18956 int insns = HARD_REGNO_NREGS (REGNO (reg), mode) / 2;
18957 return insns * 4;
18959 else
18960 return 4;
18963 /* Return nonzero if the offset in the address is an immediate. Otherwise,
18964 return zero. */
18967 arm_address_offset_is_imm (rtx_insn *insn)
18969 rtx mem, addr;
18971 extract_insn_cached (insn);
18973 if (REG_P (recog_data.operand[0]))
18974 return 0;
18976 mem = recog_data.operand[0];
18978 gcc_assert (MEM_P (mem));
18980 addr = XEXP (mem, 0);
18982 if (REG_P (addr)
18983 || (GET_CODE (addr) == PLUS
18984 && REG_P (XEXP (addr, 0))
18985 && CONST_INT_P (XEXP (addr, 1))))
18986 return 1;
18987 else
18988 return 0;
18991 /* Output an ADD r, s, #n where n may be too big for one instruction.
18992 If adding zero to one register, output nothing. */
18993 const char *
18994 output_add_immediate (rtx *operands)
18996 HOST_WIDE_INT n = INTVAL (operands[2]);
18998 if (n != 0 || REGNO (operands[0]) != REGNO (operands[1]))
19000 if (n < 0)
19001 output_multi_immediate (operands,
19002 "sub%?\t%0, %1, %2", "sub%?\t%0, %0, %2", 2,
19003 -n);
19004 else
19005 output_multi_immediate (operands,
19006 "add%?\t%0, %1, %2", "add%?\t%0, %0, %2", 2,
19010 return "";
19013 /* Output a multiple immediate operation.
19014 OPERANDS is the vector of operands referred to in the output patterns.
19015 INSTR1 is the output pattern to use for the first constant.
19016 INSTR2 is the output pattern to use for subsequent constants.
19017 IMMED_OP is the index of the constant slot in OPERANDS.
19018 N is the constant value. */
19019 static const char *
19020 output_multi_immediate (rtx *operands, const char *instr1, const char *instr2,
19021 int immed_op, HOST_WIDE_INT n)
19023 #if HOST_BITS_PER_WIDE_INT > 32
19024 n &= 0xffffffff;
19025 #endif
19027 if (n == 0)
19029 /* Quick and easy output. */
19030 operands[immed_op] = const0_rtx;
19031 output_asm_insn (instr1, operands);
19033 else
19035 int i;
19036 const char * instr = instr1;
19038 /* Note that n is never zero here (which would give no output). */
19039 for (i = 0; i < 32; i += 2)
19041 if (n & (3 << i))
19043 operands[immed_op] = GEN_INT (n & (255 << i));
19044 output_asm_insn (instr, operands);
19045 instr = instr2;
19046 i += 6;
19051 return "";
19054 /* Return the name of a shifter operation. */
19055 static const char *
19056 arm_shift_nmem(enum rtx_code code)
19058 switch (code)
19060 case ASHIFT:
19061 return ARM_LSL_NAME;
19063 case ASHIFTRT:
19064 return "asr";
19066 case LSHIFTRT:
19067 return "lsr";
19069 case ROTATERT:
19070 return "ror";
19072 default:
19073 abort();
19077 /* Return the appropriate ARM instruction for the operation code.
19078 The returned result should not be overwritten. OP is the rtx of the
19079 operation. SHIFT_FIRST_ARG is TRUE if the first argument of the operator
19080 was shifted. */
19081 const char *
19082 arithmetic_instr (rtx op, int shift_first_arg)
19084 switch (GET_CODE (op))
19086 case PLUS:
19087 return "add";
19089 case MINUS:
19090 return shift_first_arg ? "rsb" : "sub";
19092 case IOR:
19093 return "orr";
19095 case XOR:
19096 return "eor";
19098 case AND:
19099 return "and";
19101 case ASHIFT:
19102 case ASHIFTRT:
19103 case LSHIFTRT:
19104 case ROTATERT:
19105 return arm_shift_nmem(GET_CODE(op));
19107 default:
19108 gcc_unreachable ();
19112 /* Ensure valid constant shifts and return the appropriate shift mnemonic
19113 for the operation code. The returned result should not be overwritten.
19114 OP is the rtx code of the shift.
19115 On exit, *AMOUNTP will be -1 if the shift is by a register, or a constant
19116 shift. */
19117 static const char *
19118 shift_op (rtx op, HOST_WIDE_INT *amountp)
19120 const char * mnem;
19121 enum rtx_code code = GET_CODE (op);
19123 switch (code)
19125 case ROTATE:
19126 if (!CONST_INT_P (XEXP (op, 1)))
19128 output_operand_lossage ("invalid shift operand");
19129 return NULL;
19132 code = ROTATERT;
19133 *amountp = 32 - INTVAL (XEXP (op, 1));
19134 mnem = "ror";
19135 break;
19137 case ASHIFT:
19138 case ASHIFTRT:
19139 case LSHIFTRT:
19140 case ROTATERT:
19141 mnem = arm_shift_nmem(code);
19142 if (CONST_INT_P (XEXP (op, 1)))
19144 *amountp = INTVAL (XEXP (op, 1));
19146 else if (REG_P (XEXP (op, 1)))
19148 *amountp = -1;
19149 return mnem;
19151 else
19153 output_operand_lossage ("invalid shift operand");
19154 return NULL;
19156 break;
19158 case MULT:
19159 /* We never have to worry about the amount being other than a
19160 power of 2, since this case can never be reloaded from a reg. */
19161 if (!CONST_INT_P (XEXP (op, 1)))
19163 output_operand_lossage ("invalid shift operand");
19164 return NULL;
19167 *amountp = INTVAL (XEXP (op, 1)) & 0xFFFFFFFF;
19169 /* Amount must be a power of two. */
19170 if (*amountp & (*amountp - 1))
19172 output_operand_lossage ("invalid shift operand");
19173 return NULL;
19176 *amountp = exact_log2 (*amountp);
19177 gcc_assert (IN_RANGE (*amountp, 0, 31));
19178 return ARM_LSL_NAME;
19180 default:
19181 output_operand_lossage ("invalid shift operand");
19182 return NULL;
19185 /* This is not 100% correct, but follows from the desire to merge
19186 multiplication by a power of 2 with the recognizer for a
19187 shift. >=32 is not a valid shift for "lsl", so we must try and
19188 output a shift that produces the correct arithmetical result.
19189 Using lsr #32 is identical except for the fact that the carry bit
19190 is not set correctly if we set the flags; but we never use the
19191 carry bit from such an operation, so we can ignore that. */
19192 if (code == ROTATERT)
19193 /* Rotate is just modulo 32. */
19194 *amountp &= 31;
19195 else if (*amountp != (*amountp & 31))
19197 if (code == ASHIFT)
19198 mnem = "lsr";
19199 *amountp = 32;
19202 /* Shifts of 0 are no-ops. */
19203 if (*amountp == 0)
19204 return NULL;
19206 return mnem;
19209 /* Output a .ascii pseudo-op, keeping track of lengths. This is
19210 because /bin/as is horribly restrictive. The judgement about
19211 whether or not each character is 'printable' (and can be output as
19212 is) or not (and must be printed with an octal escape) must be made
19213 with reference to the *host* character set -- the situation is
19214 similar to that discussed in the comments above pp_c_char in
19215 c-pretty-print.c. */
19217 #define MAX_ASCII_LEN 51
19219 void
19220 output_ascii_pseudo_op (FILE *stream, const unsigned char *p, int len)
19222 int i;
19223 int len_so_far = 0;
19225 fputs ("\t.ascii\t\"", stream);
19227 for (i = 0; i < len; i++)
19229 int c = p[i];
19231 if (len_so_far >= MAX_ASCII_LEN)
19233 fputs ("\"\n\t.ascii\t\"", stream);
19234 len_so_far = 0;
19237 if (ISPRINT (c))
19239 if (c == '\\' || c == '\"')
19241 putc ('\\', stream);
19242 len_so_far++;
19244 putc (c, stream);
19245 len_so_far++;
19247 else
19249 fprintf (stream, "\\%03o", c);
19250 len_so_far += 4;
19254 fputs ("\"\n", stream);
19257 /* Whether a register is callee saved or not. This is necessary because high
19258 registers are marked as caller saved when optimizing for size on Thumb-1
19259 targets despite being callee saved in order to avoid using them. */
19260 #define callee_saved_reg_p(reg) \
19261 (!call_used_regs[reg] \
19262 || (TARGET_THUMB1 && optimize_size \
19263 && reg >= FIRST_HI_REGNUM && reg <= LAST_HI_REGNUM))
19265 /* Compute the register save mask for registers 0 through 12
19266 inclusive. This code is used by arm_compute_save_reg_mask. */
19268 static unsigned long
19269 arm_compute_save_reg0_reg12_mask (void)
19271 unsigned long func_type = arm_current_func_type ();
19272 unsigned long save_reg_mask = 0;
19273 unsigned int reg;
19275 if (IS_INTERRUPT (func_type))
19277 unsigned int max_reg;
19278 /* Interrupt functions must not corrupt any registers,
19279 even call clobbered ones. If this is a leaf function
19280 we can just examine the registers used by the RTL, but
19281 otherwise we have to assume that whatever function is
19282 called might clobber anything, and so we have to save
19283 all the call-clobbered registers as well. */
19284 if (ARM_FUNC_TYPE (func_type) == ARM_FT_FIQ)
19285 /* FIQ handlers have registers r8 - r12 banked, so
19286 we only need to check r0 - r7, Normal ISRs only
19287 bank r14 and r15, so we must check up to r12.
19288 r13 is the stack pointer which is always preserved,
19289 so we do not need to consider it here. */
19290 max_reg = 7;
19291 else
19292 max_reg = 12;
19294 for (reg = 0; reg <= max_reg; reg++)
19295 if (df_regs_ever_live_p (reg)
19296 || (! crtl->is_leaf && call_used_regs[reg]))
19297 save_reg_mask |= (1 << reg);
19299 /* Also save the pic base register if necessary. */
19300 if (flag_pic
19301 && !TARGET_SINGLE_PIC_BASE
19302 && arm_pic_register != INVALID_REGNUM
19303 && crtl->uses_pic_offset_table)
19304 save_reg_mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
19306 else if (IS_VOLATILE(func_type))
19308 /* For noreturn functions we historically omitted register saves
19309 altogether. However this really messes up debugging. As a
19310 compromise save just the frame pointers. Combined with the link
19311 register saved elsewhere this should be sufficient to get
19312 a backtrace. */
19313 if (frame_pointer_needed)
19314 save_reg_mask |= 1 << HARD_FRAME_POINTER_REGNUM;
19315 if (df_regs_ever_live_p (ARM_HARD_FRAME_POINTER_REGNUM))
19316 save_reg_mask |= 1 << ARM_HARD_FRAME_POINTER_REGNUM;
19317 if (df_regs_ever_live_p (THUMB_HARD_FRAME_POINTER_REGNUM))
19318 save_reg_mask |= 1 << THUMB_HARD_FRAME_POINTER_REGNUM;
19320 else
19322 /* In the normal case we only need to save those registers
19323 which are call saved and which are used by this function. */
19324 for (reg = 0; reg <= 11; reg++)
19325 if (df_regs_ever_live_p (reg) && callee_saved_reg_p (reg))
19326 save_reg_mask |= (1 << reg);
19328 /* Handle the frame pointer as a special case. */
19329 if (frame_pointer_needed)
19330 save_reg_mask |= 1 << HARD_FRAME_POINTER_REGNUM;
19332 /* If we aren't loading the PIC register,
19333 don't stack it even though it may be live. */
19334 if (flag_pic
19335 && !TARGET_SINGLE_PIC_BASE
19336 && arm_pic_register != INVALID_REGNUM
19337 && (df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM)
19338 || crtl->uses_pic_offset_table))
19339 save_reg_mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
19341 /* The prologue will copy SP into R0, so save it. */
19342 if (IS_STACKALIGN (func_type))
19343 save_reg_mask |= 1;
19346 /* Save registers so the exception handler can modify them. */
19347 if (crtl->calls_eh_return)
19349 unsigned int i;
19351 for (i = 0; ; i++)
19353 reg = EH_RETURN_DATA_REGNO (i);
19354 if (reg == INVALID_REGNUM)
19355 break;
19356 save_reg_mask |= 1 << reg;
19360 return save_reg_mask;
19363 /* Return true if r3 is live at the start of the function. */
19365 static bool
19366 arm_r3_live_at_start_p (void)
19368 /* Just look at cfg info, which is still close enough to correct at this
19369 point. This gives false positives for broken functions that might use
19370 uninitialized data that happens to be allocated in r3, but who cares? */
19371 return REGNO_REG_SET_P (df_get_live_out (ENTRY_BLOCK_PTR_FOR_FN (cfun)), 3);
19374 /* Compute the number of bytes used to store the static chain register on the
19375 stack, above the stack frame. We need to know this accurately to get the
19376 alignment of the rest of the stack frame correct. */
19378 static int
19379 arm_compute_static_chain_stack_bytes (void)
19381 /* See the defining assertion in arm_expand_prologue. */
19382 if (IS_NESTED (arm_current_func_type ())
19383 && ((TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
19384 || (flag_stack_check == STATIC_BUILTIN_STACK_CHECK
19385 && !df_regs_ever_live_p (LR_REGNUM)))
19386 && arm_r3_live_at_start_p ()
19387 && crtl->args.pretend_args_size == 0)
19388 return 4;
19390 return 0;
19393 /* Compute a bit mask of which registers need to be
19394 saved on the stack for the current function.
19395 This is used by arm_get_frame_offsets, which may add extra registers. */
19397 static unsigned long
19398 arm_compute_save_reg_mask (void)
19400 unsigned int save_reg_mask = 0;
19401 unsigned long func_type = arm_current_func_type ();
19402 unsigned int reg;
19404 if (IS_NAKED (func_type))
19405 /* This should never really happen. */
19406 return 0;
19408 /* If we are creating a stack frame, then we must save the frame pointer,
19409 IP (which will hold the old stack pointer), LR and the PC. */
19410 if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
19411 save_reg_mask |=
19412 (1 << ARM_HARD_FRAME_POINTER_REGNUM)
19413 | (1 << IP_REGNUM)
19414 | (1 << LR_REGNUM)
19415 | (1 << PC_REGNUM);
19417 save_reg_mask |= arm_compute_save_reg0_reg12_mask ();
19419 /* Decide if we need to save the link register.
19420 Interrupt routines have their own banked link register,
19421 so they never need to save it.
19422 Otherwise if we do not use the link register we do not need to save
19423 it. If we are pushing other registers onto the stack however, we
19424 can save an instruction in the epilogue by pushing the link register
19425 now and then popping it back into the PC. This incurs extra memory
19426 accesses though, so we only do it when optimizing for size, and only
19427 if we know that we will not need a fancy return sequence. */
19428 if (df_regs_ever_live_p (LR_REGNUM)
19429 || (save_reg_mask
19430 && optimize_size
19431 && ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL
19432 && !crtl->tail_call_emit
19433 && !crtl->calls_eh_return))
19434 save_reg_mask |= 1 << LR_REGNUM;
19436 if (cfun->machine->lr_save_eliminated)
19437 save_reg_mask &= ~ (1 << LR_REGNUM);
19439 if (TARGET_REALLY_IWMMXT
19440 && ((bit_count (save_reg_mask)
19441 + ARM_NUM_INTS (crtl->args.pretend_args_size +
19442 arm_compute_static_chain_stack_bytes())
19443 ) % 2) != 0)
19445 /* The total number of registers that are going to be pushed
19446 onto the stack is odd. We need to ensure that the stack
19447 is 64-bit aligned before we start to save iWMMXt registers,
19448 and also before we start to create locals. (A local variable
19449 might be a double or long long which we will load/store using
19450 an iWMMXt instruction). Therefore we need to push another
19451 ARM register, so that the stack will be 64-bit aligned. We
19452 try to avoid using the arg registers (r0 -r3) as they might be
19453 used to pass values in a tail call. */
19454 for (reg = 4; reg <= 12; reg++)
19455 if ((save_reg_mask & (1 << reg)) == 0)
19456 break;
19458 if (reg <= 12)
19459 save_reg_mask |= (1 << reg);
19460 else
19462 cfun->machine->sibcall_blocked = 1;
19463 save_reg_mask |= (1 << 3);
19467 /* We may need to push an additional register for use initializing the
19468 PIC base register. */
19469 if (TARGET_THUMB2 && IS_NESTED (func_type) && flag_pic
19470 && (save_reg_mask & THUMB2_WORK_REGS) == 0)
19472 reg = thumb_find_work_register (1 << 4);
19473 if (!call_used_regs[reg])
19474 save_reg_mask |= (1 << reg);
19477 return save_reg_mask;
19480 /* Compute a bit mask of which registers need to be
19481 saved on the stack for the current function. */
19482 static unsigned long
19483 thumb1_compute_save_reg_mask (void)
19485 unsigned long mask;
19486 unsigned reg;
19488 mask = 0;
19489 for (reg = 0; reg < 12; reg ++)
19490 if (df_regs_ever_live_p (reg) && callee_saved_reg_p (reg))
19491 mask |= 1 << reg;
19493 if (flag_pic
19494 && !TARGET_SINGLE_PIC_BASE
19495 && arm_pic_register != INVALID_REGNUM
19496 && crtl->uses_pic_offset_table)
19497 mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
19499 /* See if we might need r11 for calls to _interwork_r11_call_via_rN(). */
19500 if (!frame_pointer_needed && CALLER_INTERWORKING_SLOT_SIZE > 0)
19501 mask |= 1 << ARM_HARD_FRAME_POINTER_REGNUM;
19503 /* LR will also be pushed if any lo regs are pushed. */
19504 if (mask & 0xff || thumb_force_lr_save ())
19505 mask |= (1 << LR_REGNUM);
19507 /* Make sure we have a low work register if we need one.
19508 We will need one if we are going to push a high register,
19509 but we are not currently intending to push a low register. */
19510 if ((mask & 0xff) == 0
19511 && ((mask & 0x0f00) || TARGET_BACKTRACE))
19513 /* Use thumb_find_work_register to choose which register
19514 we will use. If the register is live then we will
19515 have to push it. Use LAST_LO_REGNUM as our fallback
19516 choice for the register to select. */
19517 reg = thumb_find_work_register (1 << LAST_LO_REGNUM);
19518 /* Make sure the register returned by thumb_find_work_register is
19519 not part of the return value. */
19520 if (reg * UNITS_PER_WORD <= (unsigned) arm_size_return_regs ())
19521 reg = LAST_LO_REGNUM;
19523 if (callee_saved_reg_p (reg))
19524 mask |= 1 << reg;
19527 /* The 504 below is 8 bytes less than 512 because there are two possible
19528 alignment words. We can't tell here if they will be present or not so we
19529 have to play it safe and assume that they are. */
19530 if ((CALLER_INTERWORKING_SLOT_SIZE +
19531 ROUND_UP_WORD (get_frame_size ()) +
19532 crtl->outgoing_args_size) >= 504)
19534 /* This is the same as the code in thumb1_expand_prologue() which
19535 determines which register to use for stack decrement. */
19536 for (reg = LAST_ARG_REGNUM + 1; reg <= LAST_LO_REGNUM; reg++)
19537 if (mask & (1 << reg))
19538 break;
19540 if (reg > LAST_LO_REGNUM)
19542 /* Make sure we have a register available for stack decrement. */
19543 mask |= 1 << LAST_LO_REGNUM;
19547 return mask;
19551 /* Return the number of bytes required to save VFP registers. */
19552 static int
19553 arm_get_vfp_saved_size (void)
19555 unsigned int regno;
19556 int count;
19557 int saved;
19559 saved = 0;
19560 /* Space for saved VFP registers. */
19561 if (TARGET_HARD_FLOAT)
19563 count = 0;
19564 for (regno = FIRST_VFP_REGNUM;
19565 regno < LAST_VFP_REGNUM;
19566 regno += 2)
19568 if ((!df_regs_ever_live_p (regno) || call_used_regs[regno])
19569 && (!df_regs_ever_live_p (regno + 1) || call_used_regs[regno + 1]))
19571 if (count > 0)
19573 /* Workaround ARM10 VFPr1 bug. */
19574 if (count == 2 && !arm_arch6)
19575 count++;
19576 saved += count * 8;
19578 count = 0;
19580 else
19581 count++;
19583 if (count > 0)
19585 if (count == 2 && !arm_arch6)
19586 count++;
19587 saved += count * 8;
19590 return saved;
19594 /* Generate a function exit sequence. If REALLY_RETURN is false, then do
19595 everything bar the final return instruction. If simple_return is true,
19596 then do not output epilogue, because it has already been emitted in RTL. */
19597 const char *
19598 output_return_instruction (rtx operand, bool really_return, bool reverse,
19599 bool simple_return)
19601 char conditional[10];
19602 char instr[100];
19603 unsigned reg;
19604 unsigned long live_regs_mask;
19605 unsigned long func_type;
19606 arm_stack_offsets *offsets;
19608 func_type = arm_current_func_type ();
19610 if (IS_NAKED (func_type))
19611 return "";
19613 if (IS_VOLATILE (func_type) && TARGET_ABORT_NORETURN)
19615 /* If this function was declared non-returning, and we have
19616 found a tail call, then we have to trust that the called
19617 function won't return. */
19618 if (really_return)
19620 rtx ops[2];
19622 /* Otherwise, trap an attempted return by aborting. */
19623 ops[0] = operand;
19624 ops[1] = gen_rtx_SYMBOL_REF (Pmode, NEED_PLT_RELOC ? "abort(PLT)"
19625 : "abort");
19626 assemble_external_libcall (ops[1]);
19627 output_asm_insn (reverse ? "bl%D0\t%a1" : "bl%d0\t%a1", ops);
19630 return "";
19633 gcc_assert (!cfun->calls_alloca || really_return);
19635 sprintf (conditional, "%%?%%%c0", reverse ? 'D' : 'd');
19637 cfun->machine->return_used_this_function = 1;
19639 offsets = arm_get_frame_offsets ();
19640 live_regs_mask = offsets->saved_regs_mask;
19642 if (!simple_return && live_regs_mask)
19644 const char * return_reg;
19646 /* If we do not have any special requirements for function exit
19647 (e.g. interworking) then we can load the return address
19648 directly into the PC. Otherwise we must load it into LR. */
19649 if (really_return
19650 && (IS_INTERRUPT (func_type) || !TARGET_INTERWORK))
19651 return_reg = reg_names[PC_REGNUM];
19652 else
19653 return_reg = reg_names[LR_REGNUM];
19655 if ((live_regs_mask & (1 << IP_REGNUM)) == (1 << IP_REGNUM))
19657 /* There are three possible reasons for the IP register
19658 being saved. 1) a stack frame was created, in which case
19659 IP contains the old stack pointer, or 2) an ISR routine
19660 corrupted it, or 3) it was saved to align the stack on
19661 iWMMXt. In case 1, restore IP into SP, otherwise just
19662 restore IP. */
19663 if (frame_pointer_needed)
19665 live_regs_mask &= ~ (1 << IP_REGNUM);
19666 live_regs_mask |= (1 << SP_REGNUM);
19668 else
19669 gcc_assert (IS_INTERRUPT (func_type) || TARGET_REALLY_IWMMXT);
19672 /* On some ARM architectures it is faster to use LDR rather than
19673 LDM to load a single register. On other architectures, the
19674 cost is the same. In 26 bit mode, or for exception handlers,
19675 we have to use LDM to load the PC so that the CPSR is also
19676 restored. */
19677 for (reg = 0; reg <= LAST_ARM_REGNUM; reg++)
19678 if (live_regs_mask == (1U << reg))
19679 break;
19681 if (reg <= LAST_ARM_REGNUM
19682 && (reg != LR_REGNUM
19683 || ! really_return
19684 || ! IS_INTERRUPT (func_type)))
19686 sprintf (instr, "ldr%s\t%%|%s, [%%|sp], #4", conditional,
19687 (reg == LR_REGNUM) ? return_reg : reg_names[reg]);
19689 else
19691 char *p;
19692 int first = 1;
19694 /* Generate the load multiple instruction to restore the
19695 registers. Note we can get here, even if
19696 frame_pointer_needed is true, but only if sp already
19697 points to the base of the saved core registers. */
19698 if (live_regs_mask & (1 << SP_REGNUM))
19700 unsigned HOST_WIDE_INT stack_adjust;
19702 stack_adjust = offsets->outgoing_args - offsets->saved_regs;
19703 gcc_assert (stack_adjust == 0 || stack_adjust == 4);
19705 if (stack_adjust && arm_arch5 && TARGET_ARM)
19706 sprintf (instr, "ldmib%s\t%%|sp, {", conditional);
19707 else
19709 /* If we can't use ldmib (SA110 bug),
19710 then try to pop r3 instead. */
19711 if (stack_adjust)
19712 live_regs_mask |= 1 << 3;
19714 sprintf (instr, "ldmfd%s\t%%|sp, {", conditional);
19717 /* For interrupt returns we have to use an LDM rather than
19718 a POP so that we can use the exception return variant. */
19719 else if (IS_INTERRUPT (func_type))
19720 sprintf (instr, "ldmfd%s\t%%|sp!, {", conditional);
19721 else
19722 sprintf (instr, "pop%s\t{", conditional);
19724 p = instr + strlen (instr);
19726 for (reg = 0; reg <= SP_REGNUM; reg++)
19727 if (live_regs_mask & (1 << reg))
19729 int l = strlen (reg_names[reg]);
19731 if (first)
19732 first = 0;
19733 else
19735 memcpy (p, ", ", 2);
19736 p += 2;
19739 memcpy (p, "%|", 2);
19740 memcpy (p + 2, reg_names[reg], l);
19741 p += l + 2;
19744 if (live_regs_mask & (1 << LR_REGNUM))
19746 sprintf (p, "%s%%|%s}", first ? "" : ", ", return_reg);
19747 /* If returning from an interrupt, restore the CPSR. */
19748 if (IS_INTERRUPT (func_type))
19749 strcat (p, "^");
19751 else
19752 strcpy (p, "}");
19755 output_asm_insn (instr, & operand);
19757 /* See if we need to generate an extra instruction to
19758 perform the actual function return. */
19759 if (really_return
19760 && func_type != ARM_FT_INTERWORKED
19761 && (live_regs_mask & (1 << LR_REGNUM)) != 0)
19763 /* The return has already been handled
19764 by loading the LR into the PC. */
19765 return "";
19769 if (really_return)
19771 switch ((int) ARM_FUNC_TYPE (func_type))
19773 case ARM_FT_ISR:
19774 case ARM_FT_FIQ:
19775 /* ??? This is wrong for unified assembly syntax. */
19776 sprintf (instr, "sub%ss\t%%|pc, %%|lr, #4", conditional);
19777 break;
19779 case ARM_FT_INTERWORKED:
19780 gcc_assert (arm_arch5 || arm_arch4t);
19781 sprintf (instr, "bx%s\t%%|lr", conditional);
19782 break;
19784 case ARM_FT_EXCEPTION:
19785 /* ??? This is wrong for unified assembly syntax. */
19786 sprintf (instr, "mov%ss\t%%|pc, %%|lr", conditional);
19787 break;
19789 default:
19790 /* Use bx if it's available. */
19791 if (arm_arch5 || arm_arch4t)
19792 sprintf (instr, "bx%s\t%%|lr", conditional);
19793 else
19794 sprintf (instr, "mov%s\t%%|pc, %%|lr", conditional);
19795 break;
19798 output_asm_insn (instr, & operand);
19801 return "";
19804 /* Write the function name into the code section, directly preceding
19805 the function prologue.
19807 Code will be output similar to this:
19809 .ascii "arm_poke_function_name", 0
19810 .align
19812 .word 0xff000000 + (t1 - t0)
19813 arm_poke_function_name
19814 mov ip, sp
19815 stmfd sp!, {fp, ip, lr, pc}
19816 sub fp, ip, #4
19818 When performing a stack backtrace, code can inspect the value
19819 of 'pc' stored at 'fp' + 0. If the trace function then looks
19820 at location pc - 12 and the top 8 bits are set, then we know
19821 that there is a function name embedded immediately preceding this
19822 location and has length ((pc[-3]) & 0xff000000).
19824 We assume that pc is declared as a pointer to an unsigned long.
19826 It is of no benefit to output the function name if we are assembling
19827 a leaf function. These function types will not contain a stack
19828 backtrace structure, therefore it is not possible to determine the
19829 function name. */
19830 void
19831 arm_poke_function_name (FILE *stream, const char *name)
19833 unsigned long alignlength;
19834 unsigned long length;
19835 rtx x;
19837 length = strlen (name) + 1;
19838 alignlength = ROUND_UP_WORD (length);
19840 ASM_OUTPUT_ASCII (stream, name, length);
19841 ASM_OUTPUT_ALIGN (stream, 2);
19842 x = GEN_INT ((unsigned HOST_WIDE_INT) 0xff000000 + alignlength);
19843 assemble_aligned_integer (UNITS_PER_WORD, x);
19846 /* Place some comments into the assembler stream
19847 describing the current function. */
19848 static void
19849 arm_output_function_prologue (FILE *f, HOST_WIDE_INT frame_size)
19851 unsigned long func_type;
19853 /* ??? Do we want to print some of the below anyway? */
19854 if (TARGET_THUMB1)
19855 return;
19857 /* Sanity check. */
19858 gcc_assert (!arm_ccfsm_state && !arm_target_insn);
19860 func_type = arm_current_func_type ();
19862 switch ((int) ARM_FUNC_TYPE (func_type))
19864 default:
19865 case ARM_FT_NORMAL:
19866 break;
19867 case ARM_FT_INTERWORKED:
19868 asm_fprintf (f, "\t%@ Function supports interworking.\n");
19869 break;
19870 case ARM_FT_ISR:
19871 asm_fprintf (f, "\t%@ Interrupt Service Routine.\n");
19872 break;
19873 case ARM_FT_FIQ:
19874 asm_fprintf (f, "\t%@ Fast Interrupt Service Routine.\n");
19875 break;
19876 case ARM_FT_EXCEPTION:
19877 asm_fprintf (f, "\t%@ ARM Exception Handler.\n");
19878 break;
19881 if (IS_NAKED (func_type))
19882 asm_fprintf (f, "\t%@ Naked Function: prologue and epilogue provided by programmer.\n");
19884 if (IS_VOLATILE (func_type))
19885 asm_fprintf (f, "\t%@ Volatile: function does not return.\n");
19887 if (IS_NESTED (func_type))
19888 asm_fprintf (f, "\t%@ Nested: function declared inside another function.\n");
19889 if (IS_STACKALIGN (func_type))
19890 asm_fprintf (f, "\t%@ Stack Align: May be called with mis-aligned SP.\n");
19892 asm_fprintf (f, "\t%@ args = %d, pretend = %d, frame = %wd\n",
19893 crtl->args.size,
19894 crtl->args.pretend_args_size, frame_size);
19896 asm_fprintf (f, "\t%@ frame_needed = %d, uses_anonymous_args = %d\n",
19897 frame_pointer_needed,
19898 cfun->machine->uses_anonymous_args);
19900 if (cfun->machine->lr_save_eliminated)
19901 asm_fprintf (f, "\t%@ link register save eliminated.\n");
19903 if (crtl->calls_eh_return)
19904 asm_fprintf (f, "\t@ Calls __builtin_eh_return.\n");
19908 static void
19909 arm_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED,
19910 HOST_WIDE_INT frame_size ATTRIBUTE_UNUSED)
19912 arm_stack_offsets *offsets;
19914 if (TARGET_THUMB1)
19916 int regno;
19918 /* Emit any call-via-reg trampolines that are needed for v4t support
19919 of call_reg and call_value_reg type insns. */
19920 for (regno = 0; regno < LR_REGNUM; regno++)
19922 rtx label = cfun->machine->call_via[regno];
19924 if (label != NULL)
19926 switch_to_section (function_section (current_function_decl));
19927 targetm.asm_out.internal_label (asm_out_file, "L",
19928 CODE_LABEL_NUMBER (label));
19929 asm_fprintf (asm_out_file, "\tbx\t%r\n", regno);
19933 /* ??? Probably not safe to set this here, since it assumes that a
19934 function will be emitted as assembly immediately after we generate
19935 RTL for it. This does not happen for inline functions. */
19936 cfun->machine->return_used_this_function = 0;
19938 else /* TARGET_32BIT */
19940 /* We need to take into account any stack-frame rounding. */
19941 offsets = arm_get_frame_offsets ();
19943 gcc_assert (!use_return_insn (FALSE, NULL)
19944 || (cfun->machine->return_used_this_function != 0)
19945 || offsets->saved_regs == offsets->outgoing_args
19946 || frame_pointer_needed);
19950 /* Generate and emit a sequence of insns equivalent to PUSH, but using
19951 STR and STRD. If an even number of registers are being pushed, one
19952 or more STRD patterns are created for each register pair. If an
19953 odd number of registers are pushed, emit an initial STR followed by
19954 as many STRD instructions as are needed. This works best when the
19955 stack is initially 64-bit aligned (the normal case), since it
19956 ensures that each STRD is also 64-bit aligned. */
19957 static void
19958 thumb2_emit_strd_push (unsigned long saved_regs_mask)
19960 int num_regs = 0;
19961 int i;
19962 int regno;
19963 rtx par = NULL_RTX;
19964 rtx dwarf = NULL_RTX;
19965 rtx tmp;
19966 bool first = true;
19968 num_regs = bit_count (saved_regs_mask);
19970 /* Must be at least one register to save, and can't save SP or PC. */
19971 gcc_assert (num_regs > 0 && num_regs <= 14);
19972 gcc_assert (!(saved_regs_mask & (1 << SP_REGNUM)));
19973 gcc_assert (!(saved_regs_mask & (1 << PC_REGNUM)));
19975 /* Create sequence for DWARF info. All the frame-related data for
19976 debugging is held in this wrapper. */
19977 dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (num_regs + 1));
19979 /* Describe the stack adjustment. */
19980 tmp = gen_rtx_SET (stack_pointer_rtx,
19981 plus_constant (Pmode, stack_pointer_rtx, -4 * num_regs));
19982 RTX_FRAME_RELATED_P (tmp) = 1;
19983 XVECEXP (dwarf, 0, 0) = tmp;
19985 /* Find the first register. */
19986 for (regno = 0; (saved_regs_mask & (1 << regno)) == 0; regno++)
19989 i = 0;
19991 /* If there's an odd number of registers to push. Start off by
19992 pushing a single register. This ensures that subsequent strd
19993 operations are dword aligned (assuming that SP was originally
19994 64-bit aligned). */
19995 if ((num_regs & 1) != 0)
19997 rtx reg, mem, insn;
19999 reg = gen_rtx_REG (SImode, regno);
20000 if (num_regs == 1)
20001 mem = gen_frame_mem (Pmode, gen_rtx_PRE_DEC (Pmode,
20002 stack_pointer_rtx));
20003 else
20004 mem = gen_frame_mem (Pmode,
20005 gen_rtx_PRE_MODIFY
20006 (Pmode, stack_pointer_rtx,
20007 plus_constant (Pmode, stack_pointer_rtx,
20008 -4 * num_regs)));
20010 tmp = gen_rtx_SET (mem, reg);
20011 RTX_FRAME_RELATED_P (tmp) = 1;
20012 insn = emit_insn (tmp);
20013 RTX_FRAME_RELATED_P (insn) = 1;
20014 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
20015 tmp = gen_rtx_SET (gen_frame_mem (Pmode, stack_pointer_rtx), reg);
20016 RTX_FRAME_RELATED_P (tmp) = 1;
20017 i++;
20018 regno++;
20019 XVECEXP (dwarf, 0, i) = tmp;
20020 first = false;
20023 while (i < num_regs)
20024 if (saved_regs_mask & (1 << regno))
20026 rtx reg1, reg2, mem1, mem2;
20027 rtx tmp0, tmp1, tmp2;
20028 int regno2;
20030 /* Find the register to pair with this one. */
20031 for (regno2 = regno + 1; (saved_regs_mask & (1 << regno2)) == 0;
20032 regno2++)
20035 reg1 = gen_rtx_REG (SImode, regno);
20036 reg2 = gen_rtx_REG (SImode, regno2);
20038 if (first)
20040 rtx insn;
20042 first = false;
20043 mem1 = gen_frame_mem (Pmode, plus_constant (Pmode,
20044 stack_pointer_rtx,
20045 -4 * num_regs));
20046 mem2 = gen_frame_mem (Pmode, plus_constant (Pmode,
20047 stack_pointer_rtx,
20048 -4 * (num_regs - 1)));
20049 tmp0 = gen_rtx_SET (stack_pointer_rtx,
20050 plus_constant (Pmode, stack_pointer_rtx,
20051 -4 * (num_regs)));
20052 tmp1 = gen_rtx_SET (mem1, reg1);
20053 tmp2 = gen_rtx_SET (mem2, reg2);
20054 RTX_FRAME_RELATED_P (tmp0) = 1;
20055 RTX_FRAME_RELATED_P (tmp1) = 1;
20056 RTX_FRAME_RELATED_P (tmp2) = 1;
20057 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (3));
20058 XVECEXP (par, 0, 0) = tmp0;
20059 XVECEXP (par, 0, 1) = tmp1;
20060 XVECEXP (par, 0, 2) = tmp2;
20061 insn = emit_insn (par);
20062 RTX_FRAME_RELATED_P (insn) = 1;
20063 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
20065 else
20067 mem1 = gen_frame_mem (Pmode, plus_constant (Pmode,
20068 stack_pointer_rtx,
20069 4 * i));
20070 mem2 = gen_frame_mem (Pmode, plus_constant (Pmode,
20071 stack_pointer_rtx,
20072 4 * (i + 1)));
20073 tmp1 = gen_rtx_SET (mem1, reg1);
20074 tmp2 = gen_rtx_SET (mem2, reg2);
20075 RTX_FRAME_RELATED_P (tmp1) = 1;
20076 RTX_FRAME_RELATED_P (tmp2) = 1;
20077 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
20078 XVECEXP (par, 0, 0) = tmp1;
20079 XVECEXP (par, 0, 1) = tmp2;
20080 emit_insn (par);
20083 /* Create unwind information. This is an approximation. */
20084 tmp1 = gen_rtx_SET (gen_frame_mem (Pmode,
20085 plus_constant (Pmode,
20086 stack_pointer_rtx,
20087 4 * i)),
20088 reg1);
20089 tmp2 = gen_rtx_SET (gen_frame_mem (Pmode,
20090 plus_constant (Pmode,
20091 stack_pointer_rtx,
20092 4 * (i + 1))),
20093 reg2);
20095 RTX_FRAME_RELATED_P (tmp1) = 1;
20096 RTX_FRAME_RELATED_P (tmp2) = 1;
20097 XVECEXP (dwarf, 0, i + 1) = tmp1;
20098 XVECEXP (dwarf, 0, i + 2) = tmp2;
20099 i += 2;
20100 regno = regno2 + 1;
20102 else
20103 regno++;
20105 return;
20108 /* STRD in ARM mode requires consecutive registers. This function emits STRD
20109 whenever possible, otherwise it emits single-word stores. The first store
20110 also allocates stack space for all saved registers, using writeback with
20111 post-addressing mode. All other stores use offset addressing. If no STRD
20112 can be emitted, this function emits a sequence of single-word stores,
20113 and not an STM as before, because single-word stores provide more freedom
20114 scheduling and can be turned into an STM by peephole optimizations. */
20115 static void
20116 arm_emit_strd_push (unsigned long saved_regs_mask)
20118 int num_regs = 0;
20119 int i, j, dwarf_index = 0;
20120 int offset = 0;
20121 rtx dwarf = NULL_RTX;
20122 rtx insn = NULL_RTX;
20123 rtx tmp, mem;
20125 /* TODO: A more efficient code can be emitted by changing the
20126 layout, e.g., first push all pairs that can use STRD to keep the
20127 stack aligned, and then push all other registers. */
20128 for (i = 0; i <= LAST_ARM_REGNUM; i++)
20129 if (saved_regs_mask & (1 << i))
20130 num_regs++;
20132 gcc_assert (!(saved_regs_mask & (1 << SP_REGNUM)));
20133 gcc_assert (!(saved_regs_mask & (1 << PC_REGNUM)));
20134 gcc_assert (num_regs > 0);
20136 /* Create sequence for DWARF info. */
20137 dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (num_regs + 1));
20139 /* For dwarf info, we generate explicit stack update. */
20140 tmp = gen_rtx_SET (stack_pointer_rtx,
20141 plus_constant (Pmode, stack_pointer_rtx, -4 * num_regs));
20142 RTX_FRAME_RELATED_P (tmp) = 1;
20143 XVECEXP (dwarf, 0, dwarf_index++) = tmp;
20145 /* Save registers. */
20146 offset = - 4 * num_regs;
20147 j = 0;
20148 while (j <= LAST_ARM_REGNUM)
20149 if (saved_regs_mask & (1 << j))
20151 if ((j % 2 == 0)
20152 && (saved_regs_mask & (1 << (j + 1))))
20154 /* Current register and previous register form register pair for
20155 which STRD can be generated. */
20156 if (offset < 0)
20158 /* Allocate stack space for all saved registers. */
20159 tmp = plus_constant (Pmode, stack_pointer_rtx, offset);
20160 tmp = gen_rtx_PRE_MODIFY (Pmode, stack_pointer_rtx, tmp);
20161 mem = gen_frame_mem (DImode, tmp);
20162 offset = 0;
20164 else if (offset > 0)
20165 mem = gen_frame_mem (DImode,
20166 plus_constant (Pmode,
20167 stack_pointer_rtx,
20168 offset));
20169 else
20170 mem = gen_frame_mem (DImode, stack_pointer_rtx);
20172 tmp = gen_rtx_SET (mem, gen_rtx_REG (DImode, j));
20173 RTX_FRAME_RELATED_P (tmp) = 1;
20174 tmp = emit_insn (tmp);
20176 /* Record the first store insn. */
20177 if (dwarf_index == 1)
20178 insn = tmp;
20180 /* Generate dwarf info. */
20181 mem = gen_frame_mem (SImode,
20182 plus_constant (Pmode,
20183 stack_pointer_rtx,
20184 offset));
20185 tmp = gen_rtx_SET (mem, gen_rtx_REG (SImode, j));
20186 RTX_FRAME_RELATED_P (tmp) = 1;
20187 XVECEXP (dwarf, 0, dwarf_index++) = tmp;
20189 mem = gen_frame_mem (SImode,
20190 plus_constant (Pmode,
20191 stack_pointer_rtx,
20192 offset + 4));
20193 tmp = gen_rtx_SET (mem, gen_rtx_REG (SImode, j + 1));
20194 RTX_FRAME_RELATED_P (tmp) = 1;
20195 XVECEXP (dwarf, 0, dwarf_index++) = tmp;
20197 offset += 8;
20198 j += 2;
20200 else
20202 /* Emit a single word store. */
20203 if (offset < 0)
20205 /* Allocate stack space for all saved registers. */
20206 tmp = plus_constant (Pmode, stack_pointer_rtx, offset);
20207 tmp = gen_rtx_PRE_MODIFY (Pmode, stack_pointer_rtx, tmp);
20208 mem = gen_frame_mem (SImode, tmp);
20209 offset = 0;
20211 else if (offset > 0)
20212 mem = gen_frame_mem (SImode,
20213 plus_constant (Pmode,
20214 stack_pointer_rtx,
20215 offset));
20216 else
20217 mem = gen_frame_mem (SImode, stack_pointer_rtx);
20219 tmp = gen_rtx_SET (mem, gen_rtx_REG (SImode, j));
20220 RTX_FRAME_RELATED_P (tmp) = 1;
20221 tmp = emit_insn (tmp);
20223 /* Record the first store insn. */
20224 if (dwarf_index == 1)
20225 insn = tmp;
20227 /* Generate dwarf info. */
20228 mem = gen_frame_mem (SImode,
20229 plus_constant(Pmode,
20230 stack_pointer_rtx,
20231 offset));
20232 tmp = gen_rtx_SET (mem, gen_rtx_REG (SImode, j));
20233 RTX_FRAME_RELATED_P (tmp) = 1;
20234 XVECEXP (dwarf, 0, dwarf_index++) = tmp;
20236 offset += 4;
20237 j += 1;
20240 else
20241 j++;
20243 /* Attach dwarf info to the first insn we generate. */
20244 gcc_assert (insn != NULL_RTX);
20245 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
20246 RTX_FRAME_RELATED_P (insn) = 1;
20249 /* Generate and emit an insn that we will recognize as a push_multi.
20250 Unfortunately, since this insn does not reflect very well the actual
20251 semantics of the operation, we need to annotate the insn for the benefit
20252 of DWARF2 frame unwind information. DWARF_REGS_MASK is a subset of
20253 MASK for registers that should be annotated for DWARF2 frame unwind
20254 information. */
20255 static rtx
20256 emit_multi_reg_push (unsigned long mask, unsigned long dwarf_regs_mask)
20258 int num_regs = 0;
20259 int num_dwarf_regs = 0;
20260 int i, j;
20261 rtx par;
20262 rtx dwarf;
20263 int dwarf_par_index;
20264 rtx tmp, reg;
20266 /* We don't record the PC in the dwarf frame information. */
20267 dwarf_regs_mask &= ~(1 << PC_REGNUM);
20269 for (i = 0; i <= LAST_ARM_REGNUM; i++)
20271 if (mask & (1 << i))
20272 num_regs++;
20273 if (dwarf_regs_mask & (1 << i))
20274 num_dwarf_regs++;
20277 gcc_assert (num_regs && num_regs <= 16);
20278 gcc_assert ((dwarf_regs_mask & ~mask) == 0);
20280 /* For the body of the insn we are going to generate an UNSPEC in
20281 parallel with several USEs. This allows the insn to be recognized
20282 by the push_multi pattern in the arm.md file.
20284 The body of the insn looks something like this:
20286 (parallel [
20287 (set (mem:BLK (pre_modify:SI (reg:SI sp)
20288 (const_int:SI <num>)))
20289 (unspec:BLK [(reg:SI r4)] UNSPEC_PUSH_MULT))
20290 (use (reg:SI XX))
20291 (use (reg:SI YY))
20295 For the frame note however, we try to be more explicit and actually
20296 show each register being stored into the stack frame, plus a (single)
20297 decrement of the stack pointer. We do it this way in order to be
20298 friendly to the stack unwinding code, which only wants to see a single
20299 stack decrement per instruction. The RTL we generate for the note looks
20300 something like this:
20302 (sequence [
20303 (set (reg:SI sp) (plus:SI (reg:SI sp) (const_int -20)))
20304 (set (mem:SI (reg:SI sp)) (reg:SI r4))
20305 (set (mem:SI (plus:SI (reg:SI sp) (const_int 4))) (reg:SI XX))
20306 (set (mem:SI (plus:SI (reg:SI sp) (const_int 8))) (reg:SI YY))
20310 FIXME:: In an ideal world the PRE_MODIFY would not exist and
20311 instead we'd have a parallel expression detailing all
20312 the stores to the various memory addresses so that debug
20313 information is more up-to-date. Remember however while writing
20314 this to take care of the constraints with the push instruction.
20316 Note also that this has to be taken care of for the VFP registers.
20318 For more see PR43399. */
20320 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (num_regs));
20321 dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (num_dwarf_regs + 1));
20322 dwarf_par_index = 1;
20324 for (i = 0; i <= LAST_ARM_REGNUM; i++)
20326 if (mask & (1 << i))
20328 reg = gen_rtx_REG (SImode, i);
20330 XVECEXP (par, 0, 0)
20331 = gen_rtx_SET (gen_frame_mem
20332 (BLKmode,
20333 gen_rtx_PRE_MODIFY (Pmode,
20334 stack_pointer_rtx,
20335 plus_constant
20336 (Pmode, stack_pointer_rtx,
20337 -4 * num_regs))
20339 gen_rtx_UNSPEC (BLKmode,
20340 gen_rtvec (1, reg),
20341 UNSPEC_PUSH_MULT));
20343 if (dwarf_regs_mask & (1 << i))
20345 tmp = gen_rtx_SET (gen_frame_mem (SImode, stack_pointer_rtx),
20346 reg);
20347 RTX_FRAME_RELATED_P (tmp) = 1;
20348 XVECEXP (dwarf, 0, dwarf_par_index++) = tmp;
20351 break;
20355 for (j = 1, i++; j < num_regs; i++)
20357 if (mask & (1 << i))
20359 reg = gen_rtx_REG (SImode, i);
20361 XVECEXP (par, 0, j) = gen_rtx_USE (VOIDmode, reg);
20363 if (dwarf_regs_mask & (1 << i))
20366 = gen_rtx_SET (gen_frame_mem
20367 (SImode,
20368 plus_constant (Pmode, stack_pointer_rtx,
20369 4 * j)),
20370 reg);
20371 RTX_FRAME_RELATED_P (tmp) = 1;
20372 XVECEXP (dwarf, 0, dwarf_par_index++) = tmp;
20375 j++;
20379 par = emit_insn (par);
20381 tmp = gen_rtx_SET (stack_pointer_rtx,
20382 plus_constant (Pmode, stack_pointer_rtx, -4 * num_regs));
20383 RTX_FRAME_RELATED_P (tmp) = 1;
20384 XVECEXP (dwarf, 0, 0) = tmp;
20386 add_reg_note (par, REG_FRAME_RELATED_EXPR, dwarf);
20388 return par;
20391 /* Add a REG_CFA_ADJUST_CFA REG note to INSN.
20392 SIZE is the offset to be adjusted.
20393 DEST and SRC might be stack_pointer_rtx or hard_frame_pointer_rtx. */
20394 static void
20395 arm_add_cfa_adjust_cfa_note (rtx insn, int size, rtx dest, rtx src)
20397 rtx dwarf;
20399 RTX_FRAME_RELATED_P (insn) = 1;
20400 dwarf = gen_rtx_SET (dest, plus_constant (Pmode, src, size));
20401 add_reg_note (insn, REG_CFA_ADJUST_CFA, dwarf);
20404 /* Generate and emit an insn pattern that we will recognize as a pop_multi.
20405 SAVED_REGS_MASK shows which registers need to be restored.
20407 Unfortunately, since this insn does not reflect very well the actual
20408 semantics of the operation, we need to annotate the insn for the benefit
20409 of DWARF2 frame unwind information. */
20410 static void
20411 arm_emit_multi_reg_pop (unsigned long saved_regs_mask)
20413 int num_regs = 0;
20414 int i, j;
20415 rtx par;
20416 rtx dwarf = NULL_RTX;
20417 rtx tmp, reg;
20418 bool return_in_pc = saved_regs_mask & (1 << PC_REGNUM);
20419 int offset_adj;
20420 int emit_update;
20422 offset_adj = return_in_pc ? 1 : 0;
20423 for (i = 0; i <= LAST_ARM_REGNUM; i++)
20424 if (saved_regs_mask & (1 << i))
20425 num_regs++;
20427 gcc_assert (num_regs && num_regs <= 16);
20429 /* If SP is in reglist, then we don't emit SP update insn. */
20430 emit_update = (saved_regs_mask & (1 << SP_REGNUM)) ? 0 : 1;
20432 /* The parallel needs to hold num_regs SETs
20433 and one SET for the stack update. */
20434 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (num_regs + emit_update + offset_adj));
20436 if (return_in_pc)
20437 XVECEXP (par, 0, 0) = ret_rtx;
20439 if (emit_update)
20441 /* Increment the stack pointer, based on there being
20442 num_regs 4-byte registers to restore. */
20443 tmp = gen_rtx_SET (stack_pointer_rtx,
20444 plus_constant (Pmode,
20445 stack_pointer_rtx,
20446 4 * num_regs));
20447 RTX_FRAME_RELATED_P (tmp) = 1;
20448 XVECEXP (par, 0, offset_adj) = tmp;
20451 /* Now restore every reg, which may include PC. */
20452 for (j = 0, i = 0; j < num_regs; i++)
20453 if (saved_regs_mask & (1 << i))
20455 reg = gen_rtx_REG (SImode, i);
20456 if ((num_regs == 1) && emit_update && !return_in_pc)
20458 /* Emit single load with writeback. */
20459 tmp = gen_frame_mem (SImode,
20460 gen_rtx_POST_INC (Pmode,
20461 stack_pointer_rtx));
20462 tmp = emit_insn (gen_rtx_SET (reg, tmp));
20463 REG_NOTES (tmp) = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
20464 return;
20467 tmp = gen_rtx_SET (reg,
20468 gen_frame_mem
20469 (SImode,
20470 plus_constant (Pmode, stack_pointer_rtx, 4 * j)));
20471 RTX_FRAME_RELATED_P (tmp) = 1;
20472 XVECEXP (par, 0, j + emit_update + offset_adj) = tmp;
20474 /* We need to maintain a sequence for DWARF info too. As dwarf info
20475 should not have PC, skip PC. */
20476 if (i != PC_REGNUM)
20477 dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
20479 j++;
20482 if (return_in_pc)
20483 par = emit_jump_insn (par);
20484 else
20485 par = emit_insn (par);
20487 REG_NOTES (par) = dwarf;
20488 if (!return_in_pc)
20489 arm_add_cfa_adjust_cfa_note (par, UNITS_PER_WORD * num_regs,
20490 stack_pointer_rtx, stack_pointer_rtx);
20493 /* Generate and emit an insn pattern that we will recognize as a pop_multi
20494 of NUM_REGS consecutive VFP regs, starting at FIRST_REG.
20496 Unfortunately, since this insn does not reflect very well the actual
20497 semantics of the operation, we need to annotate the insn for the benefit
20498 of DWARF2 frame unwind information. */
20499 static void
20500 arm_emit_vfp_multi_reg_pop (int first_reg, int num_regs, rtx base_reg)
20502 int i, j;
20503 rtx par;
20504 rtx dwarf = NULL_RTX;
20505 rtx tmp, reg;
20507 gcc_assert (num_regs && num_regs <= 32);
20509 /* Workaround ARM10 VFPr1 bug. */
20510 if (num_regs == 2 && !arm_arch6)
20512 if (first_reg == 15)
20513 first_reg--;
20515 num_regs++;
20518 /* We can emit at most 16 D-registers in a single pop_multi instruction, and
20519 there could be up to 32 D-registers to restore.
20520 If there are more than 16 D-registers, make two recursive calls,
20521 each of which emits one pop_multi instruction. */
20522 if (num_regs > 16)
20524 arm_emit_vfp_multi_reg_pop (first_reg, 16, base_reg);
20525 arm_emit_vfp_multi_reg_pop (first_reg + 16, num_regs - 16, base_reg);
20526 return;
20529 /* The parallel needs to hold num_regs SETs
20530 and one SET for the stack update. */
20531 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (num_regs + 1));
20533 /* Increment the stack pointer, based on there being
20534 num_regs 8-byte registers to restore. */
20535 tmp = gen_rtx_SET (base_reg, plus_constant (Pmode, base_reg, 8 * num_regs));
20536 RTX_FRAME_RELATED_P (tmp) = 1;
20537 XVECEXP (par, 0, 0) = tmp;
20539 /* Now show every reg that will be restored, using a SET for each. */
20540 for (j = 0, i=first_reg; j < num_regs; i += 2)
20542 reg = gen_rtx_REG (DFmode, i);
20544 tmp = gen_rtx_SET (reg,
20545 gen_frame_mem
20546 (DFmode,
20547 plus_constant (Pmode, base_reg, 8 * j)));
20548 RTX_FRAME_RELATED_P (tmp) = 1;
20549 XVECEXP (par, 0, j + 1) = tmp;
20551 dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
20553 j++;
20556 par = emit_insn (par);
20557 REG_NOTES (par) = dwarf;
20559 /* Make sure cfa doesn't leave with IP_REGNUM to allow unwinding fron FP. */
20560 if (REGNO (base_reg) == IP_REGNUM)
20562 RTX_FRAME_RELATED_P (par) = 1;
20563 add_reg_note (par, REG_CFA_DEF_CFA, hard_frame_pointer_rtx);
20565 else
20566 arm_add_cfa_adjust_cfa_note (par, 2 * UNITS_PER_WORD * num_regs,
20567 base_reg, base_reg);
20570 /* Generate and emit a pattern that will be recognized as LDRD pattern. If even
20571 number of registers are being popped, multiple LDRD patterns are created for
20572 all register pairs. If odd number of registers are popped, last register is
20573 loaded by using LDR pattern. */
20574 static void
20575 thumb2_emit_ldrd_pop (unsigned long saved_regs_mask)
20577 int num_regs = 0;
20578 int i, j;
20579 rtx par = NULL_RTX;
20580 rtx dwarf = NULL_RTX;
20581 rtx tmp, reg, tmp1;
20582 bool return_in_pc = saved_regs_mask & (1 << PC_REGNUM);
20584 for (i = 0; i <= LAST_ARM_REGNUM; i++)
20585 if (saved_regs_mask & (1 << i))
20586 num_regs++;
20588 gcc_assert (num_regs && num_regs <= 16);
20590 /* We cannot generate ldrd for PC. Hence, reduce the count if PC is
20591 to be popped. So, if num_regs is even, now it will become odd,
20592 and we can generate pop with PC. If num_regs is odd, it will be
20593 even now, and ldr with return can be generated for PC. */
20594 if (return_in_pc)
20595 num_regs--;
20597 gcc_assert (!(saved_regs_mask & (1 << SP_REGNUM)));
20599 /* Var j iterates over all the registers to gather all the registers in
20600 saved_regs_mask. Var i gives index of saved registers in stack frame.
20601 A PARALLEL RTX of register-pair is created here, so that pattern for
20602 LDRD can be matched. As PC is always last register to be popped, and
20603 we have already decremented num_regs if PC, we don't have to worry
20604 about PC in this loop. */
20605 for (i = 0, j = 0; i < (num_regs - (num_regs % 2)); j++)
20606 if (saved_regs_mask & (1 << j))
20608 /* Create RTX for memory load. */
20609 reg = gen_rtx_REG (SImode, j);
20610 tmp = gen_rtx_SET (reg,
20611 gen_frame_mem (SImode,
20612 plus_constant (Pmode,
20613 stack_pointer_rtx, 4 * i)));
20614 RTX_FRAME_RELATED_P (tmp) = 1;
20616 if (i % 2 == 0)
20618 /* When saved-register index (i) is even, the RTX to be emitted is
20619 yet to be created. Hence create it first. The LDRD pattern we
20620 are generating is :
20621 [ (SET (reg_t0) (MEM (PLUS (SP) (NUM))))
20622 (SET (reg_t1) (MEM (PLUS (SP) (NUM + 4)))) ]
20623 where target registers need not be consecutive. */
20624 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
20625 dwarf = NULL_RTX;
20628 /* ith register is added in PARALLEL RTX. If i is even, the reg_i is
20629 added as 0th element and if i is odd, reg_i is added as 1st element
20630 of LDRD pattern shown above. */
20631 XVECEXP (par, 0, (i % 2)) = tmp;
20632 dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
20634 if ((i % 2) == 1)
20636 /* When saved-register index (i) is odd, RTXs for both the registers
20637 to be loaded are generated in above given LDRD pattern, and the
20638 pattern can be emitted now. */
20639 par = emit_insn (par);
20640 REG_NOTES (par) = dwarf;
20641 RTX_FRAME_RELATED_P (par) = 1;
20644 i++;
20647 /* If the number of registers pushed is odd AND return_in_pc is false OR
20648 number of registers are even AND return_in_pc is true, last register is
20649 popped using LDR. It can be PC as well. Hence, adjust the stack first and
20650 then LDR with post increment. */
20652 /* Increment the stack pointer, based on there being
20653 num_regs 4-byte registers to restore. */
20654 tmp = gen_rtx_SET (stack_pointer_rtx,
20655 plus_constant (Pmode, stack_pointer_rtx, 4 * i));
20656 RTX_FRAME_RELATED_P (tmp) = 1;
20657 tmp = emit_insn (tmp);
20658 if (!return_in_pc)
20660 arm_add_cfa_adjust_cfa_note (tmp, UNITS_PER_WORD * i,
20661 stack_pointer_rtx, stack_pointer_rtx);
20664 dwarf = NULL_RTX;
20666 if (((num_regs % 2) == 1 && !return_in_pc)
20667 || ((num_regs % 2) == 0 && return_in_pc))
20669 /* Scan for the single register to be popped. Skip until the saved
20670 register is found. */
20671 for (; (saved_regs_mask & (1 << j)) == 0; j++);
20673 /* Gen LDR with post increment here. */
20674 tmp1 = gen_rtx_MEM (SImode,
20675 gen_rtx_POST_INC (SImode,
20676 stack_pointer_rtx));
20677 set_mem_alias_set (tmp1, get_frame_alias_set ());
20679 reg = gen_rtx_REG (SImode, j);
20680 tmp = gen_rtx_SET (reg, tmp1);
20681 RTX_FRAME_RELATED_P (tmp) = 1;
20682 dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
20684 if (return_in_pc)
20686 /* If return_in_pc, j must be PC_REGNUM. */
20687 gcc_assert (j == PC_REGNUM);
20688 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
20689 XVECEXP (par, 0, 0) = ret_rtx;
20690 XVECEXP (par, 0, 1) = tmp;
20691 par = emit_jump_insn (par);
20693 else
20695 par = emit_insn (tmp);
20696 REG_NOTES (par) = dwarf;
20697 arm_add_cfa_adjust_cfa_note (par, UNITS_PER_WORD,
20698 stack_pointer_rtx, stack_pointer_rtx);
20702 else if ((num_regs % 2) == 1 && return_in_pc)
20704 /* There are 2 registers to be popped. So, generate the pattern
20705 pop_multiple_with_stack_update_and_return to pop in PC. */
20706 arm_emit_multi_reg_pop (saved_regs_mask & (~((1 << j) - 1)));
20709 return;
20712 /* LDRD in ARM mode needs consecutive registers as operands. This function
20713 emits LDRD whenever possible, otherwise it emits single-word loads. It uses
20714 offset addressing and then generates one separate stack udpate. This provides
20715 more scheduling freedom, compared to writeback on every load. However,
20716 if the function returns using load into PC directly
20717 (i.e., if PC is in SAVED_REGS_MASK), the stack needs to be updated
20718 before the last load. TODO: Add a peephole optimization to recognize
20719 the new epilogue sequence as an LDM instruction whenever possible. TODO: Add
20720 peephole optimization to merge the load at stack-offset zero
20721 with the stack update instruction using load with writeback
20722 in post-index addressing mode. */
20723 static void
20724 arm_emit_ldrd_pop (unsigned long saved_regs_mask)
20726 int j = 0;
20727 int offset = 0;
20728 rtx par = NULL_RTX;
20729 rtx dwarf = NULL_RTX;
20730 rtx tmp, mem;
20732 /* Restore saved registers. */
20733 gcc_assert (!((saved_regs_mask & (1 << SP_REGNUM))));
20734 j = 0;
20735 while (j <= LAST_ARM_REGNUM)
20736 if (saved_regs_mask & (1 << j))
20738 if ((j % 2) == 0
20739 && (saved_regs_mask & (1 << (j + 1)))
20740 && (j + 1) != PC_REGNUM)
20742 /* Current register and next register form register pair for which
20743 LDRD can be generated. PC is always the last register popped, and
20744 we handle it separately. */
20745 if (offset > 0)
20746 mem = gen_frame_mem (DImode,
20747 plus_constant (Pmode,
20748 stack_pointer_rtx,
20749 offset));
20750 else
20751 mem = gen_frame_mem (DImode, stack_pointer_rtx);
20753 tmp = gen_rtx_SET (gen_rtx_REG (DImode, j), mem);
20754 tmp = emit_insn (tmp);
20755 RTX_FRAME_RELATED_P (tmp) = 1;
20757 /* Generate dwarf info. */
20759 dwarf = alloc_reg_note (REG_CFA_RESTORE,
20760 gen_rtx_REG (SImode, j),
20761 NULL_RTX);
20762 dwarf = alloc_reg_note (REG_CFA_RESTORE,
20763 gen_rtx_REG (SImode, j + 1),
20764 dwarf);
20766 REG_NOTES (tmp) = dwarf;
20768 offset += 8;
20769 j += 2;
20771 else if (j != PC_REGNUM)
20773 /* Emit a single word load. */
20774 if (offset > 0)
20775 mem = gen_frame_mem (SImode,
20776 plus_constant (Pmode,
20777 stack_pointer_rtx,
20778 offset));
20779 else
20780 mem = gen_frame_mem (SImode, stack_pointer_rtx);
20782 tmp = gen_rtx_SET (gen_rtx_REG (SImode, j), mem);
20783 tmp = emit_insn (tmp);
20784 RTX_FRAME_RELATED_P (tmp) = 1;
20786 /* Generate dwarf info. */
20787 REG_NOTES (tmp) = alloc_reg_note (REG_CFA_RESTORE,
20788 gen_rtx_REG (SImode, j),
20789 NULL_RTX);
20791 offset += 4;
20792 j += 1;
20794 else /* j == PC_REGNUM */
20795 j++;
20797 else
20798 j++;
20800 /* Update the stack. */
20801 if (offset > 0)
20803 tmp = gen_rtx_SET (stack_pointer_rtx,
20804 plus_constant (Pmode,
20805 stack_pointer_rtx,
20806 offset));
20807 tmp = emit_insn (tmp);
20808 arm_add_cfa_adjust_cfa_note (tmp, offset,
20809 stack_pointer_rtx, stack_pointer_rtx);
20810 offset = 0;
20813 if (saved_regs_mask & (1 << PC_REGNUM))
20815 /* Only PC is to be popped. */
20816 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
20817 XVECEXP (par, 0, 0) = ret_rtx;
20818 tmp = gen_rtx_SET (gen_rtx_REG (SImode, PC_REGNUM),
20819 gen_frame_mem (SImode,
20820 gen_rtx_POST_INC (SImode,
20821 stack_pointer_rtx)));
20822 RTX_FRAME_RELATED_P (tmp) = 1;
20823 XVECEXP (par, 0, 1) = tmp;
20824 par = emit_jump_insn (par);
20826 /* Generate dwarf info. */
20827 dwarf = alloc_reg_note (REG_CFA_RESTORE,
20828 gen_rtx_REG (SImode, PC_REGNUM),
20829 NULL_RTX);
20830 REG_NOTES (par) = dwarf;
20831 arm_add_cfa_adjust_cfa_note (par, UNITS_PER_WORD,
20832 stack_pointer_rtx, stack_pointer_rtx);
20836 /* Calculate the size of the return value that is passed in registers. */
20837 static unsigned
20838 arm_size_return_regs (void)
20840 machine_mode mode;
20842 if (crtl->return_rtx != 0)
20843 mode = GET_MODE (crtl->return_rtx);
20844 else
20845 mode = DECL_MODE (DECL_RESULT (current_function_decl));
20847 return GET_MODE_SIZE (mode);
20850 /* Return true if the current function needs to save/restore LR. */
20851 static bool
20852 thumb_force_lr_save (void)
20854 return !cfun->machine->lr_save_eliminated
20855 && (!leaf_function_p ()
20856 || thumb_far_jump_used_p ()
20857 || df_regs_ever_live_p (LR_REGNUM));
20860 /* We do not know if r3 will be available because
20861 we do have an indirect tailcall happening in this
20862 particular case. */
20863 static bool
20864 is_indirect_tailcall_p (rtx call)
20866 rtx pat = PATTERN (call);
20868 /* Indirect tail call. */
20869 pat = XVECEXP (pat, 0, 0);
20870 if (GET_CODE (pat) == SET)
20871 pat = SET_SRC (pat);
20873 pat = XEXP (XEXP (pat, 0), 0);
20874 return REG_P (pat);
20877 /* Return true if r3 is used by any of the tail call insns in the
20878 current function. */
20879 static bool
20880 any_sibcall_could_use_r3 (void)
20882 edge_iterator ei;
20883 edge e;
20885 if (!crtl->tail_call_emit)
20886 return false;
20887 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
20888 if (e->flags & EDGE_SIBCALL)
20890 rtx_insn *call = BB_END (e->src);
20891 if (!CALL_P (call))
20892 call = prev_nonnote_nondebug_insn (call);
20893 gcc_assert (CALL_P (call) && SIBLING_CALL_P (call));
20894 if (find_regno_fusage (call, USE, 3)
20895 || is_indirect_tailcall_p (call))
20896 return true;
20898 return false;
20902 /* Compute the distance from register FROM to register TO.
20903 These can be the arg pointer (26), the soft frame pointer (25),
20904 the stack pointer (13) or the hard frame pointer (11).
20905 In thumb mode r7 is used as the soft frame pointer, if needed.
20906 Typical stack layout looks like this:
20908 old stack pointer -> | |
20909 ----
20910 | | \
20911 | | saved arguments for
20912 | | vararg functions
20913 | | /
20915 hard FP & arg pointer -> | | \
20916 | | stack
20917 | | frame
20918 | | /
20920 | | \
20921 | | call saved
20922 | | registers
20923 soft frame pointer -> | | /
20925 | | \
20926 | | local
20927 | | variables
20928 locals base pointer -> | | /
20930 | | \
20931 | | outgoing
20932 | | arguments
20933 current stack pointer -> | | /
20936 For a given function some or all of these stack components
20937 may not be needed, giving rise to the possibility of
20938 eliminating some of the registers.
20940 The values returned by this function must reflect the behavior
20941 of arm_expand_prologue() and arm_compute_save_reg_mask().
20943 The sign of the number returned reflects the direction of stack
20944 growth, so the values are positive for all eliminations except
20945 from the soft frame pointer to the hard frame pointer.
20947 SFP may point just inside the local variables block to ensure correct
20948 alignment. */
20951 /* Calculate stack offsets. These are used to calculate register elimination
20952 offsets and in prologue/epilogue code. Also calculates which registers
20953 should be saved. */
20955 static arm_stack_offsets *
20956 arm_get_frame_offsets (void)
20958 struct arm_stack_offsets *offsets;
20959 unsigned long func_type;
20960 int leaf;
20961 int saved;
20962 int core_saved;
20963 HOST_WIDE_INT frame_size;
20964 int i;
20966 offsets = &cfun->machine->stack_offsets;
20968 /* We need to know if we are a leaf function. Unfortunately, it
20969 is possible to be called after start_sequence has been called,
20970 which causes get_insns to return the insns for the sequence,
20971 not the function, which will cause leaf_function_p to return
20972 the incorrect result.
20974 to know about leaf functions once reload has completed, and the
20975 frame size cannot be changed after that time, so we can safely
20976 use the cached value. */
20978 if (reload_completed)
20979 return offsets;
20981 /* Initially this is the size of the local variables. It will translated
20982 into an offset once we have determined the size of preceding data. */
20983 frame_size = ROUND_UP_WORD (get_frame_size ());
20985 leaf = leaf_function_p ();
20987 /* Space for variadic functions. */
20988 offsets->saved_args = crtl->args.pretend_args_size;
20990 /* In Thumb mode this is incorrect, but never used. */
20991 offsets->frame
20992 = (offsets->saved_args
20993 + arm_compute_static_chain_stack_bytes ()
20994 + (frame_pointer_needed ? 4 : 0));
20996 if (TARGET_32BIT)
20998 unsigned int regno;
21000 offsets->saved_regs_mask = arm_compute_save_reg_mask ();
21001 core_saved = bit_count (offsets->saved_regs_mask) * 4;
21002 saved = core_saved;
21004 /* We know that SP will be doubleword aligned on entry, and we must
21005 preserve that condition at any subroutine call. We also require the
21006 soft frame pointer to be doubleword aligned. */
21008 if (TARGET_REALLY_IWMMXT)
21010 /* Check for the call-saved iWMMXt registers. */
21011 for (regno = FIRST_IWMMXT_REGNUM;
21012 regno <= LAST_IWMMXT_REGNUM;
21013 regno++)
21014 if (df_regs_ever_live_p (regno) && ! call_used_regs[regno])
21015 saved += 8;
21018 func_type = arm_current_func_type ();
21019 /* Space for saved VFP registers. */
21020 if (! IS_VOLATILE (func_type)
21021 && TARGET_HARD_FLOAT)
21022 saved += arm_get_vfp_saved_size ();
21024 else /* TARGET_THUMB1 */
21026 offsets->saved_regs_mask = thumb1_compute_save_reg_mask ();
21027 core_saved = bit_count (offsets->saved_regs_mask) * 4;
21028 saved = core_saved;
21029 if (TARGET_BACKTRACE)
21030 saved += 16;
21033 /* Saved registers include the stack frame. */
21034 offsets->saved_regs
21035 = offsets->saved_args + arm_compute_static_chain_stack_bytes () + saved;
21036 offsets->soft_frame = offsets->saved_regs + CALLER_INTERWORKING_SLOT_SIZE;
21038 /* A leaf function does not need any stack alignment if it has nothing
21039 on the stack. */
21040 if (leaf && frame_size == 0
21041 /* However if it calls alloca(), we have a dynamically allocated
21042 block of BIGGEST_ALIGNMENT on stack, so still do stack alignment. */
21043 && ! cfun->calls_alloca)
21045 offsets->outgoing_args = offsets->soft_frame;
21046 offsets->locals_base = offsets->soft_frame;
21047 return offsets;
21050 /* Ensure SFP has the correct alignment. */
21051 if (ARM_DOUBLEWORD_ALIGN
21052 && (offsets->soft_frame & 7))
21054 offsets->soft_frame += 4;
21055 /* Try to align stack by pushing an extra reg. Don't bother doing this
21056 when there is a stack frame as the alignment will be rolled into
21057 the normal stack adjustment. */
21058 if (frame_size + crtl->outgoing_args_size == 0)
21060 int reg = -1;
21062 /* Register r3 is caller-saved. Normally it does not need to be
21063 saved on entry by the prologue. However if we choose to save
21064 it for padding then we may confuse the compiler into thinking
21065 a prologue sequence is required when in fact it is not. This
21066 will occur when shrink-wrapping if r3 is used as a scratch
21067 register and there are no other callee-saved writes.
21069 This situation can be avoided when other callee-saved registers
21070 are available and r3 is not mandatory if we choose a callee-saved
21071 register for padding. */
21072 bool prefer_callee_reg_p = false;
21074 /* If it is safe to use r3, then do so. This sometimes
21075 generates better code on Thumb-2 by avoiding the need to
21076 use 32-bit push/pop instructions. */
21077 if (! any_sibcall_could_use_r3 ()
21078 && arm_size_return_regs () <= 12
21079 && (offsets->saved_regs_mask & (1 << 3)) == 0
21080 && (TARGET_THUMB2
21081 || !(TARGET_LDRD && current_tune->prefer_ldrd_strd)))
21083 reg = 3;
21084 if (!TARGET_THUMB2)
21085 prefer_callee_reg_p = true;
21087 if (reg == -1
21088 || prefer_callee_reg_p)
21090 for (i = 4; i <= (TARGET_THUMB1 ? LAST_LO_REGNUM : 11); i++)
21092 /* Avoid fixed registers; they may be changed at
21093 arbitrary times so it's unsafe to restore them
21094 during the epilogue. */
21095 if (!fixed_regs[i]
21096 && (offsets->saved_regs_mask & (1 << i)) == 0)
21098 reg = i;
21099 break;
21104 if (reg != -1)
21106 offsets->saved_regs += 4;
21107 offsets->saved_regs_mask |= (1 << reg);
21112 offsets->locals_base = offsets->soft_frame + frame_size;
21113 offsets->outgoing_args = (offsets->locals_base
21114 + crtl->outgoing_args_size);
21116 if (ARM_DOUBLEWORD_ALIGN)
21118 /* Ensure SP remains doubleword aligned. */
21119 if (offsets->outgoing_args & 7)
21120 offsets->outgoing_args += 4;
21121 gcc_assert (!(offsets->outgoing_args & 7));
21124 return offsets;
21128 /* Calculate the relative offsets for the different stack pointers. Positive
21129 offsets are in the direction of stack growth. */
21131 HOST_WIDE_INT
21132 arm_compute_initial_elimination_offset (unsigned int from, unsigned int to)
21134 arm_stack_offsets *offsets;
21136 offsets = arm_get_frame_offsets ();
21138 /* OK, now we have enough information to compute the distances.
21139 There must be an entry in these switch tables for each pair
21140 of registers in ELIMINABLE_REGS, even if some of the entries
21141 seem to be redundant or useless. */
21142 switch (from)
21144 case ARG_POINTER_REGNUM:
21145 switch (to)
21147 case THUMB_HARD_FRAME_POINTER_REGNUM:
21148 return 0;
21150 case FRAME_POINTER_REGNUM:
21151 /* This is the reverse of the soft frame pointer
21152 to hard frame pointer elimination below. */
21153 return offsets->soft_frame - offsets->saved_args;
21155 case ARM_HARD_FRAME_POINTER_REGNUM:
21156 /* This is only non-zero in the case where the static chain register
21157 is stored above the frame. */
21158 return offsets->frame - offsets->saved_args - 4;
21160 case STACK_POINTER_REGNUM:
21161 /* If nothing has been pushed on the stack at all
21162 then this will return -4. This *is* correct! */
21163 return offsets->outgoing_args - (offsets->saved_args + 4);
21165 default:
21166 gcc_unreachable ();
21168 gcc_unreachable ();
21170 case FRAME_POINTER_REGNUM:
21171 switch (to)
21173 case THUMB_HARD_FRAME_POINTER_REGNUM:
21174 return 0;
21176 case ARM_HARD_FRAME_POINTER_REGNUM:
21177 /* The hard frame pointer points to the top entry in the
21178 stack frame. The soft frame pointer to the bottom entry
21179 in the stack frame. If there is no stack frame at all,
21180 then they are identical. */
21182 return offsets->frame - offsets->soft_frame;
21184 case STACK_POINTER_REGNUM:
21185 return offsets->outgoing_args - offsets->soft_frame;
21187 default:
21188 gcc_unreachable ();
21190 gcc_unreachable ();
21192 default:
21193 /* You cannot eliminate from the stack pointer.
21194 In theory you could eliminate from the hard frame
21195 pointer to the stack pointer, but this will never
21196 happen, since if a stack frame is not needed the
21197 hard frame pointer will never be used. */
21198 gcc_unreachable ();
21202 /* Given FROM and TO register numbers, say whether this elimination is
21203 allowed. Frame pointer elimination is automatically handled.
21205 All eliminations are permissible. Note that ARG_POINTER_REGNUM and
21206 HARD_FRAME_POINTER_REGNUM are in fact the same thing. If we need a frame
21207 pointer, we must eliminate FRAME_POINTER_REGNUM into
21208 HARD_FRAME_POINTER_REGNUM and not into STACK_POINTER_REGNUM or
21209 ARG_POINTER_REGNUM. */
21211 bool
21212 arm_can_eliminate (const int from, const int to)
21214 return ((to == FRAME_POINTER_REGNUM && from == ARG_POINTER_REGNUM) ? false :
21215 (to == STACK_POINTER_REGNUM && frame_pointer_needed) ? false :
21216 (to == ARM_HARD_FRAME_POINTER_REGNUM && TARGET_THUMB) ? false :
21217 (to == THUMB_HARD_FRAME_POINTER_REGNUM && TARGET_ARM) ? false :
21218 true);
21221 /* Emit RTL to save coprocessor registers on function entry. Returns the
21222 number of bytes pushed. */
21224 static int
21225 arm_save_coproc_regs(void)
21227 int saved_size = 0;
21228 unsigned reg;
21229 unsigned start_reg;
21230 rtx insn;
21232 for (reg = LAST_IWMMXT_REGNUM; reg >= FIRST_IWMMXT_REGNUM; reg--)
21233 if (df_regs_ever_live_p (reg) && ! call_used_regs[reg])
21235 insn = gen_rtx_PRE_DEC (Pmode, stack_pointer_rtx);
21236 insn = gen_rtx_MEM (V2SImode, insn);
21237 insn = emit_set_insn (insn, gen_rtx_REG (V2SImode, reg));
21238 RTX_FRAME_RELATED_P (insn) = 1;
21239 saved_size += 8;
21242 if (TARGET_HARD_FLOAT)
21244 start_reg = FIRST_VFP_REGNUM;
21246 for (reg = FIRST_VFP_REGNUM; reg < LAST_VFP_REGNUM; reg += 2)
21248 if ((!df_regs_ever_live_p (reg) || call_used_regs[reg])
21249 && (!df_regs_ever_live_p (reg + 1) || call_used_regs[reg + 1]))
21251 if (start_reg != reg)
21252 saved_size += vfp_emit_fstmd (start_reg,
21253 (reg - start_reg) / 2);
21254 start_reg = reg + 2;
21257 if (start_reg != reg)
21258 saved_size += vfp_emit_fstmd (start_reg,
21259 (reg - start_reg) / 2);
21261 return saved_size;
21265 /* Set the Thumb frame pointer from the stack pointer. */
21267 static void
21268 thumb_set_frame_pointer (arm_stack_offsets *offsets)
21270 HOST_WIDE_INT amount;
21271 rtx insn, dwarf;
21273 amount = offsets->outgoing_args - offsets->locals_base;
21274 if (amount < 1024)
21275 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
21276 stack_pointer_rtx, GEN_INT (amount)));
21277 else
21279 emit_insn (gen_movsi (hard_frame_pointer_rtx, GEN_INT (amount)));
21280 /* Thumb-2 RTL patterns expect sp as the first input. Thumb-1
21281 expects the first two operands to be the same. */
21282 if (TARGET_THUMB2)
21284 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
21285 stack_pointer_rtx,
21286 hard_frame_pointer_rtx));
21288 else
21290 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
21291 hard_frame_pointer_rtx,
21292 stack_pointer_rtx));
21294 dwarf = gen_rtx_SET (hard_frame_pointer_rtx,
21295 plus_constant (Pmode, stack_pointer_rtx, amount));
21296 RTX_FRAME_RELATED_P (dwarf) = 1;
21297 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
21300 RTX_FRAME_RELATED_P (insn) = 1;
21303 struct scratch_reg {
21304 rtx reg;
21305 bool saved;
21308 /* Return a short-lived scratch register for use as a 2nd scratch register on
21309 function entry after the registers are saved in the prologue. This register
21310 must be released by means of release_scratch_register_on_entry. IP is not
21311 considered since it is always used as the 1st scratch register if available.
21313 REGNO1 is the index number of the 1st scratch register and LIVE_REGS is the
21314 mask of live registers. */
21316 static void
21317 get_scratch_register_on_entry (struct scratch_reg *sr, unsigned int regno1,
21318 unsigned long live_regs)
21320 int regno = -1;
21322 sr->saved = false;
21324 if (regno1 != LR_REGNUM && (live_regs & (1 << LR_REGNUM)) != 0)
21325 regno = LR_REGNUM;
21326 else
21328 unsigned int i;
21330 for (i = 4; i < 11; i++)
21331 if (regno1 != i && (live_regs & (1 << i)) != 0)
21333 regno = i;
21334 break;
21337 if (regno < 0)
21339 /* If IP is used as the 1st scratch register for a nested function,
21340 then either r3 wasn't available or is used to preserve IP. */
21341 if (regno1 == IP_REGNUM && IS_NESTED (arm_current_func_type ()))
21342 regno1 = 3;
21343 regno = (regno1 == 3 ? 2 : 3);
21344 sr->saved
21345 = REGNO_REG_SET_P (df_get_live_out (ENTRY_BLOCK_PTR_FOR_FN (cfun)),
21346 regno);
21350 sr->reg = gen_rtx_REG (SImode, regno);
21351 if (sr->saved)
21353 rtx addr = gen_rtx_PRE_DEC (Pmode, stack_pointer_rtx);
21354 rtx insn = emit_set_insn (gen_frame_mem (SImode, addr), sr->reg);
21355 rtx x = gen_rtx_SET (stack_pointer_rtx,
21356 plus_constant (Pmode, stack_pointer_rtx, -4));
21357 RTX_FRAME_RELATED_P (insn) = 1;
21358 add_reg_note (insn, REG_FRAME_RELATED_EXPR, x);
21362 /* Release a scratch register obtained from the preceding function. */
21364 static void
21365 release_scratch_register_on_entry (struct scratch_reg *sr)
21367 if (sr->saved)
21369 rtx addr = gen_rtx_POST_INC (Pmode, stack_pointer_rtx);
21370 rtx insn = emit_set_insn (sr->reg, gen_frame_mem (SImode, addr));
21371 rtx x = gen_rtx_SET (stack_pointer_rtx,
21372 plus_constant (Pmode, stack_pointer_rtx, 4));
21373 RTX_FRAME_RELATED_P (insn) = 1;
21374 add_reg_note (insn, REG_FRAME_RELATED_EXPR, x);
21378 #define PROBE_INTERVAL (1 << STACK_CHECK_PROBE_INTERVAL_EXP)
21380 #if PROBE_INTERVAL > 4096
21381 #error Cannot use indexed addressing mode for stack probing
21382 #endif
21384 /* Emit code to probe a range of stack addresses from FIRST to FIRST+SIZE,
21385 inclusive. These are offsets from the current stack pointer. REGNO1
21386 is the index number of the 1st scratch register and LIVE_REGS is the
21387 mask of live registers. */
21389 static void
21390 arm_emit_probe_stack_range (HOST_WIDE_INT first, HOST_WIDE_INT size,
21391 unsigned int regno1, unsigned long live_regs)
21393 rtx reg1 = gen_rtx_REG (Pmode, regno1);
21395 /* See if we have a constant small number of probes to generate. If so,
21396 that's the easy case. */
21397 if (size <= PROBE_INTERVAL)
21399 emit_move_insn (reg1, GEN_INT (first + PROBE_INTERVAL));
21400 emit_set_insn (reg1, gen_rtx_MINUS (Pmode, stack_pointer_rtx, reg1));
21401 emit_stack_probe (plus_constant (Pmode, reg1, PROBE_INTERVAL - size));
21404 /* The run-time loop is made up of 10 insns in the generic case while the
21405 compile-time loop is made up of 4+2*(n-2) insns for n # of intervals. */
21406 else if (size <= 5 * PROBE_INTERVAL)
21408 HOST_WIDE_INT i, rem;
21410 emit_move_insn (reg1, GEN_INT (first + PROBE_INTERVAL));
21411 emit_set_insn (reg1, gen_rtx_MINUS (Pmode, stack_pointer_rtx, reg1));
21412 emit_stack_probe (reg1);
21414 /* Probe at FIRST + N * PROBE_INTERVAL for values of N from 2 until
21415 it exceeds SIZE. If only two probes are needed, this will not
21416 generate any code. Then probe at FIRST + SIZE. */
21417 for (i = 2 * PROBE_INTERVAL; i < size; i += PROBE_INTERVAL)
21419 emit_set_insn (reg1, plus_constant (Pmode, reg1, -PROBE_INTERVAL));
21420 emit_stack_probe (reg1);
21423 rem = size - (i - PROBE_INTERVAL);
21424 if (rem > 4095 || (TARGET_THUMB2 && rem > 255))
21426 emit_set_insn (reg1, plus_constant (Pmode, reg1, -PROBE_INTERVAL));
21427 emit_stack_probe (plus_constant (Pmode, reg1, PROBE_INTERVAL - rem));
21429 else
21430 emit_stack_probe (plus_constant (Pmode, reg1, -rem));
21433 /* Otherwise, do the same as above, but in a loop. Note that we must be
21434 extra careful with variables wrapping around because we might be at
21435 the very top (or the very bottom) of the address space and we have
21436 to be able to handle this case properly; in particular, we use an
21437 equality test for the loop condition. */
21438 else
21440 HOST_WIDE_INT rounded_size;
21441 struct scratch_reg sr;
21443 get_scratch_register_on_entry (&sr, regno1, live_regs);
21445 emit_move_insn (reg1, GEN_INT (first));
21448 /* Step 1: round SIZE to the previous multiple of the interval. */
21450 rounded_size = size & -PROBE_INTERVAL;
21451 emit_move_insn (sr.reg, GEN_INT (rounded_size));
21454 /* Step 2: compute initial and final value of the loop counter. */
21456 /* TEST_ADDR = SP + FIRST. */
21457 emit_set_insn (reg1, gen_rtx_MINUS (Pmode, stack_pointer_rtx, reg1));
21459 /* LAST_ADDR = SP + FIRST + ROUNDED_SIZE. */
21460 emit_set_insn (sr.reg, gen_rtx_MINUS (Pmode, reg1, sr.reg));
21463 /* Step 3: the loop
21467 TEST_ADDR = TEST_ADDR + PROBE_INTERVAL
21468 probe at TEST_ADDR
21470 while (TEST_ADDR != LAST_ADDR)
21472 probes at FIRST + N * PROBE_INTERVAL for values of N from 1
21473 until it is equal to ROUNDED_SIZE. */
21475 emit_insn (gen_probe_stack_range (reg1, reg1, sr.reg));
21478 /* Step 4: probe at FIRST + SIZE if we cannot assert at compile-time
21479 that SIZE is equal to ROUNDED_SIZE. */
21481 if (size != rounded_size)
21483 HOST_WIDE_INT rem = size - rounded_size;
21485 if (rem > 4095 || (TARGET_THUMB2 && rem > 255))
21487 emit_set_insn (sr.reg,
21488 plus_constant (Pmode, sr.reg, -PROBE_INTERVAL));
21489 emit_stack_probe (plus_constant (Pmode, sr.reg,
21490 PROBE_INTERVAL - rem));
21492 else
21493 emit_stack_probe (plus_constant (Pmode, sr.reg, -rem));
21496 release_scratch_register_on_entry (&sr);
21499 /* Make sure nothing is scheduled before we are done. */
21500 emit_insn (gen_blockage ());
21503 /* Probe a range of stack addresses from REG1 to REG2 inclusive. These are
21504 absolute addresses. */
21506 const char *
21507 output_probe_stack_range (rtx reg1, rtx reg2)
21509 static int labelno = 0;
21510 char loop_lab[32];
21511 rtx xops[2];
21513 ASM_GENERATE_INTERNAL_LABEL (loop_lab, "LPSRL", labelno++);
21515 /* Loop. */
21516 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, loop_lab);
21518 /* TEST_ADDR = TEST_ADDR + PROBE_INTERVAL. */
21519 xops[0] = reg1;
21520 xops[1] = GEN_INT (PROBE_INTERVAL);
21521 output_asm_insn ("sub\t%0, %0, %1", xops);
21523 /* Probe at TEST_ADDR. */
21524 output_asm_insn ("str\tr0, [%0, #0]", xops);
21526 /* Test if TEST_ADDR == LAST_ADDR. */
21527 xops[1] = reg2;
21528 output_asm_insn ("cmp\t%0, %1", xops);
21530 /* Branch. */
21531 fputs ("\tbne\t", asm_out_file);
21532 assemble_name_raw (asm_out_file, loop_lab);
21533 fputc ('\n', asm_out_file);
21535 return "";
21538 /* Generate the prologue instructions for entry into an ARM or Thumb-2
21539 function. */
21540 void
21541 arm_expand_prologue (void)
21543 rtx amount;
21544 rtx insn;
21545 rtx ip_rtx;
21546 unsigned long live_regs_mask;
21547 unsigned long func_type;
21548 int fp_offset = 0;
21549 int saved_pretend_args = 0;
21550 int saved_regs = 0;
21551 unsigned HOST_WIDE_INT args_to_push;
21552 HOST_WIDE_INT size;
21553 arm_stack_offsets *offsets;
21554 bool clobber_ip;
21556 func_type = arm_current_func_type ();
21558 /* Naked functions don't have prologues. */
21559 if (IS_NAKED (func_type))
21561 if (flag_stack_usage_info)
21562 current_function_static_stack_size = 0;
21563 return;
21566 /* Make a copy of c_f_p_a_s as we may need to modify it locally. */
21567 args_to_push = crtl->args.pretend_args_size;
21569 /* Compute which register we will have to save onto the stack. */
21570 offsets = arm_get_frame_offsets ();
21571 live_regs_mask = offsets->saved_regs_mask;
21573 ip_rtx = gen_rtx_REG (SImode, IP_REGNUM);
21575 if (IS_STACKALIGN (func_type))
21577 rtx r0, r1;
21579 /* Handle a word-aligned stack pointer. We generate the following:
21581 mov r0, sp
21582 bic r1, r0, #7
21583 mov sp, r1
21584 <save and restore r0 in normal prologue/epilogue>
21585 mov sp, r0
21586 bx lr
21588 The unwinder doesn't need to know about the stack realignment.
21589 Just tell it we saved SP in r0. */
21590 gcc_assert (TARGET_THUMB2 && !arm_arch_notm && args_to_push == 0);
21592 r0 = gen_rtx_REG (SImode, R0_REGNUM);
21593 r1 = gen_rtx_REG (SImode, R1_REGNUM);
21595 insn = emit_insn (gen_movsi (r0, stack_pointer_rtx));
21596 RTX_FRAME_RELATED_P (insn) = 1;
21597 add_reg_note (insn, REG_CFA_REGISTER, NULL);
21599 emit_insn (gen_andsi3 (r1, r0, GEN_INT (~(HOST_WIDE_INT)7)));
21601 /* ??? The CFA changes here, which may cause GDB to conclude that it
21602 has entered a different function. That said, the unwind info is
21603 correct, individually, before and after this instruction because
21604 we've described the save of SP, which will override the default
21605 handling of SP as restoring from the CFA. */
21606 emit_insn (gen_movsi (stack_pointer_rtx, r1));
21609 /* The static chain register is the same as the IP register. If it is
21610 clobbered when creating the frame, we need to save and restore it. */
21611 clobber_ip = IS_NESTED (func_type)
21612 && ((TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
21613 || (flag_stack_check == STATIC_BUILTIN_STACK_CHECK
21614 && !df_regs_ever_live_p (LR_REGNUM)
21615 && arm_r3_live_at_start_p ()));
21617 /* Find somewhere to store IP whilst the frame is being created.
21618 We try the following places in order:
21620 1. The last argument register r3 if it is available.
21621 2. A slot on the stack above the frame if there are no
21622 arguments to push onto the stack.
21623 3. Register r3 again, after pushing the argument registers
21624 onto the stack, if this is a varargs function.
21625 4. The last slot on the stack created for the arguments to
21626 push, if this isn't a varargs function.
21628 Note - we only need to tell the dwarf2 backend about the SP
21629 adjustment in the second variant; the static chain register
21630 doesn't need to be unwound, as it doesn't contain a value
21631 inherited from the caller. */
21632 if (clobber_ip)
21634 if (!arm_r3_live_at_start_p ())
21635 insn = emit_set_insn (gen_rtx_REG (SImode, 3), ip_rtx);
21636 else if (args_to_push == 0)
21638 rtx addr, dwarf;
21640 gcc_assert(arm_compute_static_chain_stack_bytes() == 4);
21641 saved_regs += 4;
21643 addr = gen_rtx_PRE_DEC (Pmode, stack_pointer_rtx);
21644 insn = emit_set_insn (gen_frame_mem (SImode, addr), ip_rtx);
21645 fp_offset = 4;
21647 /* Just tell the dwarf backend that we adjusted SP. */
21648 dwarf = gen_rtx_SET (stack_pointer_rtx,
21649 plus_constant (Pmode, stack_pointer_rtx,
21650 -fp_offset));
21651 RTX_FRAME_RELATED_P (insn) = 1;
21652 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
21654 else
21656 /* Store the args on the stack. */
21657 if (cfun->machine->uses_anonymous_args)
21659 insn = emit_multi_reg_push ((0xf0 >> (args_to_push / 4)) & 0xf,
21660 (0xf0 >> (args_to_push / 4)) & 0xf);
21661 emit_set_insn (gen_rtx_REG (SImode, 3), ip_rtx);
21662 saved_pretend_args = 1;
21664 else
21666 rtx addr, dwarf;
21668 if (args_to_push == 4)
21669 addr = gen_rtx_PRE_DEC (Pmode, stack_pointer_rtx);
21670 else
21671 addr = gen_rtx_PRE_MODIFY (Pmode, stack_pointer_rtx,
21672 plus_constant (Pmode,
21673 stack_pointer_rtx,
21674 -args_to_push));
21676 insn = emit_set_insn (gen_frame_mem (SImode, addr), ip_rtx);
21678 /* Just tell the dwarf backend that we adjusted SP. */
21679 dwarf = gen_rtx_SET (stack_pointer_rtx,
21680 plus_constant (Pmode, stack_pointer_rtx,
21681 -args_to_push));
21682 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
21685 RTX_FRAME_RELATED_P (insn) = 1;
21686 fp_offset = args_to_push;
21687 args_to_push = 0;
21691 if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
21693 if (IS_INTERRUPT (func_type))
21695 /* Interrupt functions must not corrupt any registers.
21696 Creating a frame pointer however, corrupts the IP
21697 register, so we must push it first. */
21698 emit_multi_reg_push (1 << IP_REGNUM, 1 << IP_REGNUM);
21700 /* Do not set RTX_FRAME_RELATED_P on this insn.
21701 The dwarf stack unwinding code only wants to see one
21702 stack decrement per function, and this is not it. If
21703 this instruction is labeled as being part of the frame
21704 creation sequence then dwarf2out_frame_debug_expr will
21705 die when it encounters the assignment of IP to FP
21706 later on, since the use of SP here establishes SP as
21707 the CFA register and not IP.
21709 Anyway this instruction is not really part of the stack
21710 frame creation although it is part of the prologue. */
21713 insn = emit_set_insn (ip_rtx,
21714 plus_constant (Pmode, stack_pointer_rtx,
21715 fp_offset));
21716 RTX_FRAME_RELATED_P (insn) = 1;
21719 if (args_to_push)
21721 /* Push the argument registers, or reserve space for them. */
21722 if (cfun->machine->uses_anonymous_args)
21723 insn = emit_multi_reg_push
21724 ((0xf0 >> (args_to_push / 4)) & 0xf,
21725 (0xf0 >> (args_to_push / 4)) & 0xf);
21726 else
21727 insn = emit_insn
21728 (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
21729 GEN_INT (- args_to_push)));
21730 RTX_FRAME_RELATED_P (insn) = 1;
21733 /* If this is an interrupt service routine, and the link register
21734 is going to be pushed, and we're not generating extra
21735 push of IP (needed when frame is needed and frame layout if apcs),
21736 subtracting four from LR now will mean that the function return
21737 can be done with a single instruction. */
21738 if ((func_type == ARM_FT_ISR || func_type == ARM_FT_FIQ)
21739 && (live_regs_mask & (1 << LR_REGNUM)) != 0
21740 && !(frame_pointer_needed && TARGET_APCS_FRAME)
21741 && TARGET_ARM)
21743 rtx lr = gen_rtx_REG (SImode, LR_REGNUM);
21745 emit_set_insn (lr, plus_constant (SImode, lr, -4));
21748 if (live_regs_mask)
21750 unsigned long dwarf_regs_mask = live_regs_mask;
21752 saved_regs += bit_count (live_regs_mask) * 4;
21753 if (optimize_size && !frame_pointer_needed
21754 && saved_regs == offsets->saved_regs - offsets->saved_args)
21756 /* If no coprocessor registers are being pushed and we don't have
21757 to worry about a frame pointer then push extra registers to
21758 create the stack frame. This is done is a way that does not
21759 alter the frame layout, so is independent of the epilogue. */
21760 int n;
21761 int frame;
21762 n = 0;
21763 while (n < 8 && (live_regs_mask & (1 << n)) == 0)
21764 n++;
21765 frame = offsets->outgoing_args - (offsets->saved_args + saved_regs);
21766 if (frame && n * 4 >= frame)
21768 n = frame / 4;
21769 live_regs_mask |= (1 << n) - 1;
21770 saved_regs += frame;
21774 if (TARGET_LDRD
21775 && current_tune->prefer_ldrd_strd
21776 && !optimize_function_for_size_p (cfun))
21778 gcc_checking_assert (live_regs_mask == dwarf_regs_mask);
21779 if (TARGET_THUMB2)
21780 thumb2_emit_strd_push (live_regs_mask);
21781 else if (TARGET_ARM
21782 && !TARGET_APCS_FRAME
21783 && !IS_INTERRUPT (func_type))
21784 arm_emit_strd_push (live_regs_mask);
21785 else
21787 insn = emit_multi_reg_push (live_regs_mask, live_regs_mask);
21788 RTX_FRAME_RELATED_P (insn) = 1;
21791 else
21793 insn = emit_multi_reg_push (live_regs_mask, dwarf_regs_mask);
21794 RTX_FRAME_RELATED_P (insn) = 1;
21798 if (! IS_VOLATILE (func_type))
21799 saved_regs += arm_save_coproc_regs ();
21801 if (frame_pointer_needed && TARGET_ARM)
21803 /* Create the new frame pointer. */
21804 if (TARGET_APCS_FRAME)
21806 insn = GEN_INT (-(4 + args_to_push + fp_offset));
21807 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx, ip_rtx, insn));
21808 RTX_FRAME_RELATED_P (insn) = 1;
21810 else
21812 insn = GEN_INT (saved_regs - (4 + fp_offset));
21813 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
21814 stack_pointer_rtx, insn));
21815 RTX_FRAME_RELATED_P (insn) = 1;
21819 size = offsets->outgoing_args - offsets->saved_args;
21820 if (flag_stack_usage_info)
21821 current_function_static_stack_size = size;
21823 /* If this isn't an interrupt service routine and we have a frame, then do
21824 stack checking. We use IP as the first scratch register, except for the
21825 non-APCS nested functions if LR or r3 are available (see clobber_ip). */
21826 if (!IS_INTERRUPT (func_type)
21827 && flag_stack_check == STATIC_BUILTIN_STACK_CHECK)
21829 unsigned int regno;
21831 if (!IS_NESTED (func_type) || clobber_ip)
21832 regno = IP_REGNUM;
21833 else if (df_regs_ever_live_p (LR_REGNUM))
21834 regno = LR_REGNUM;
21835 else
21836 regno = 3;
21838 if (crtl->is_leaf && !cfun->calls_alloca)
21840 if (size > PROBE_INTERVAL && size > STACK_CHECK_PROTECT)
21841 arm_emit_probe_stack_range (STACK_CHECK_PROTECT,
21842 size - STACK_CHECK_PROTECT,
21843 regno, live_regs_mask);
21845 else if (size > 0)
21846 arm_emit_probe_stack_range (STACK_CHECK_PROTECT, size,
21847 regno, live_regs_mask);
21850 /* Recover the static chain register. */
21851 if (clobber_ip)
21853 if (!arm_r3_live_at_start_p () || saved_pretend_args)
21854 insn = gen_rtx_REG (SImode, 3);
21855 else
21857 insn = plus_constant (Pmode, hard_frame_pointer_rtx, 4);
21858 insn = gen_frame_mem (SImode, insn);
21860 emit_set_insn (ip_rtx, insn);
21861 emit_insn (gen_force_register_use (ip_rtx));
21864 if (offsets->outgoing_args != offsets->saved_args + saved_regs)
21866 /* This add can produce multiple insns for a large constant, so we
21867 need to get tricky. */
21868 rtx_insn *last = get_last_insn ();
21870 amount = GEN_INT (offsets->saved_args + saved_regs
21871 - offsets->outgoing_args);
21873 insn = emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
21874 amount));
21877 last = last ? NEXT_INSN (last) : get_insns ();
21878 RTX_FRAME_RELATED_P (last) = 1;
21880 while (last != insn);
21882 /* If the frame pointer is needed, emit a special barrier that
21883 will prevent the scheduler from moving stores to the frame
21884 before the stack adjustment. */
21885 if (frame_pointer_needed)
21886 insn = emit_insn (gen_stack_tie (stack_pointer_rtx,
21887 hard_frame_pointer_rtx));
21891 if (frame_pointer_needed && TARGET_THUMB2)
21892 thumb_set_frame_pointer (offsets);
21894 if (flag_pic && arm_pic_register != INVALID_REGNUM)
21896 unsigned long mask;
21898 mask = live_regs_mask;
21899 mask &= THUMB2_WORK_REGS;
21900 if (!IS_NESTED (func_type))
21901 mask |= (1 << IP_REGNUM);
21902 arm_load_pic_register (mask);
21905 /* If we are profiling, make sure no instructions are scheduled before
21906 the call to mcount. Similarly if the user has requested no
21907 scheduling in the prolog. Similarly if we want non-call exceptions
21908 using the EABI unwinder, to prevent faulting instructions from being
21909 swapped with a stack adjustment. */
21910 if (crtl->profile || !TARGET_SCHED_PROLOG
21911 || (arm_except_unwind_info (&global_options) == UI_TARGET
21912 && cfun->can_throw_non_call_exceptions))
21913 emit_insn (gen_blockage ());
21915 /* If the link register is being kept alive, with the return address in it,
21916 then make sure that it does not get reused by the ce2 pass. */
21917 if ((live_regs_mask & (1 << LR_REGNUM)) == 0)
21918 cfun->machine->lr_save_eliminated = 1;
21921 /* Print condition code to STREAM. Helper function for arm_print_operand. */
21922 static void
21923 arm_print_condition (FILE *stream)
21925 if (arm_ccfsm_state == 3 || arm_ccfsm_state == 4)
21927 /* Branch conversion is not implemented for Thumb-2. */
21928 if (TARGET_THUMB)
21930 output_operand_lossage ("predicated Thumb instruction");
21931 return;
21933 if (current_insn_predicate != NULL)
21935 output_operand_lossage
21936 ("predicated instruction in conditional sequence");
21937 return;
21940 fputs (arm_condition_codes[arm_current_cc], stream);
21942 else if (current_insn_predicate)
21944 enum arm_cond_code code;
21946 if (TARGET_THUMB1)
21948 output_operand_lossage ("predicated Thumb instruction");
21949 return;
21952 code = get_arm_condition_code (current_insn_predicate);
21953 fputs (arm_condition_codes[code], stream);
21958 /* Globally reserved letters: acln
21959 Puncutation letters currently used: @_|?().!#
21960 Lower case letters currently used: bcdefhimpqtvwxyz
21961 Upper case letters currently used: ABCDFGHJKLMNOPQRSTU
21962 Letters previously used, but now deprecated/obsolete: sVWXYZ.
21964 Note that the global reservation for 'c' is only for CONSTANT_ADDRESS_P.
21966 If CODE is 'd', then the X is a condition operand and the instruction
21967 should only be executed if the condition is true.
21968 if CODE is 'D', then the X is a condition operand and the instruction
21969 should only be executed if the condition is false: however, if the mode
21970 of the comparison is CCFPEmode, then always execute the instruction -- we
21971 do this because in these circumstances !GE does not necessarily imply LT;
21972 in these cases the instruction pattern will take care to make sure that
21973 an instruction containing %d will follow, thereby undoing the effects of
21974 doing this instruction unconditionally.
21975 If CODE is 'N' then X is a floating point operand that must be negated
21976 before output.
21977 If CODE is 'B' then output a bitwise inverted value of X (a const int).
21978 If X is a REG and CODE is `M', output a ldm/stm style multi-reg. */
21979 static void
21980 arm_print_operand (FILE *stream, rtx x, int code)
21982 switch (code)
21984 case '@':
21985 fputs (ASM_COMMENT_START, stream);
21986 return;
21988 case '_':
21989 fputs (user_label_prefix, stream);
21990 return;
21992 case '|':
21993 fputs (REGISTER_PREFIX, stream);
21994 return;
21996 case '?':
21997 arm_print_condition (stream);
21998 return;
22000 case '.':
22001 /* The current condition code for a condition code setting instruction.
22002 Preceded by 's' in unified syntax, otherwise followed by 's'. */
22003 fputc('s', stream);
22004 arm_print_condition (stream);
22005 return;
22007 case '!':
22008 /* If the instruction is conditionally executed then print
22009 the current condition code, otherwise print 's'. */
22010 gcc_assert (TARGET_THUMB2);
22011 if (current_insn_predicate)
22012 arm_print_condition (stream);
22013 else
22014 fputc('s', stream);
22015 break;
22017 /* %# is a "break" sequence. It doesn't output anything, but is used to
22018 separate e.g. operand numbers from following text, if that text consists
22019 of further digits which we don't want to be part of the operand
22020 number. */
22021 case '#':
22022 return;
22024 case 'N':
22026 REAL_VALUE_TYPE r;
22027 r = real_value_negate (CONST_DOUBLE_REAL_VALUE (x));
22028 fprintf (stream, "%s", fp_const_from_val (&r));
22030 return;
22032 /* An integer or symbol address without a preceding # sign. */
22033 case 'c':
22034 switch (GET_CODE (x))
22036 case CONST_INT:
22037 fprintf (stream, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
22038 break;
22040 case SYMBOL_REF:
22041 output_addr_const (stream, x);
22042 break;
22044 case CONST:
22045 if (GET_CODE (XEXP (x, 0)) == PLUS
22046 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF)
22048 output_addr_const (stream, x);
22049 break;
22051 /* Fall through. */
22053 default:
22054 output_operand_lossage ("Unsupported operand for code '%c'", code);
22056 return;
22058 /* An integer that we want to print in HEX. */
22059 case 'x':
22060 switch (GET_CODE (x))
22062 case CONST_INT:
22063 fprintf (stream, "#" HOST_WIDE_INT_PRINT_HEX, INTVAL (x));
22064 break;
22066 default:
22067 output_operand_lossage ("Unsupported operand for code '%c'", code);
22069 return;
22071 case 'B':
22072 if (CONST_INT_P (x))
22074 HOST_WIDE_INT val;
22075 val = ARM_SIGN_EXTEND (~INTVAL (x));
22076 fprintf (stream, HOST_WIDE_INT_PRINT_DEC, val);
22078 else
22080 putc ('~', stream);
22081 output_addr_const (stream, x);
22083 return;
22085 case 'b':
22086 /* Print the log2 of a CONST_INT. */
22088 HOST_WIDE_INT val;
22090 if (!CONST_INT_P (x)
22091 || (val = exact_log2 (INTVAL (x) & 0xffffffff)) < 0)
22092 output_operand_lossage ("Unsupported operand for code '%c'", code);
22093 else
22094 fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, val);
22096 return;
22098 case 'L':
22099 /* The low 16 bits of an immediate constant. */
22100 fprintf (stream, HOST_WIDE_INT_PRINT_DEC, INTVAL(x) & 0xffff);
22101 return;
22103 case 'i':
22104 fprintf (stream, "%s", arithmetic_instr (x, 1));
22105 return;
22107 case 'I':
22108 fprintf (stream, "%s", arithmetic_instr (x, 0));
22109 return;
22111 case 'S':
22113 HOST_WIDE_INT val;
22114 const char *shift;
22116 shift = shift_op (x, &val);
22118 if (shift)
22120 fprintf (stream, ", %s ", shift);
22121 if (val == -1)
22122 arm_print_operand (stream, XEXP (x, 1), 0);
22123 else
22124 fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, val);
22127 return;
22129 /* An explanation of the 'Q', 'R' and 'H' register operands:
22131 In a pair of registers containing a DI or DF value the 'Q'
22132 operand returns the register number of the register containing
22133 the least significant part of the value. The 'R' operand returns
22134 the register number of the register containing the most
22135 significant part of the value.
22137 The 'H' operand returns the higher of the two register numbers.
22138 On a run where WORDS_BIG_ENDIAN is true the 'H' operand is the
22139 same as the 'Q' operand, since the most significant part of the
22140 value is held in the lower number register. The reverse is true
22141 on systems where WORDS_BIG_ENDIAN is false.
22143 The purpose of these operands is to distinguish between cases
22144 where the endian-ness of the values is important (for example
22145 when they are added together), and cases where the endian-ness
22146 is irrelevant, but the order of register operations is important.
22147 For example when loading a value from memory into a register
22148 pair, the endian-ness does not matter. Provided that the value
22149 from the lower memory address is put into the lower numbered
22150 register, and the value from the higher address is put into the
22151 higher numbered register, the load will work regardless of whether
22152 the value being loaded is big-wordian or little-wordian. The
22153 order of the two register loads can matter however, if the address
22154 of the memory location is actually held in one of the registers
22155 being overwritten by the load.
22157 The 'Q' and 'R' constraints are also available for 64-bit
22158 constants. */
22159 case 'Q':
22160 if (CONST_INT_P (x) || CONST_DOUBLE_P (x))
22162 rtx part = gen_lowpart (SImode, x);
22163 fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, INTVAL (part));
22164 return;
22167 if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
22169 output_operand_lossage ("invalid operand for code '%c'", code);
22170 return;
22173 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 1 : 0));
22174 return;
22176 case 'R':
22177 if (CONST_INT_P (x) || CONST_DOUBLE_P (x))
22179 machine_mode mode = GET_MODE (x);
22180 rtx part;
22182 if (mode == VOIDmode)
22183 mode = DImode;
22184 part = gen_highpart_mode (SImode, mode, x);
22185 fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, INTVAL (part));
22186 return;
22189 if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
22191 output_operand_lossage ("invalid operand for code '%c'", code);
22192 return;
22195 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 0 : 1));
22196 return;
22198 case 'H':
22199 if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
22201 output_operand_lossage ("invalid operand for code '%c'", code);
22202 return;
22205 asm_fprintf (stream, "%r", REGNO (x) + 1);
22206 return;
22208 case 'J':
22209 if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
22211 output_operand_lossage ("invalid operand for code '%c'", code);
22212 return;
22215 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 3 : 2));
22216 return;
22218 case 'K':
22219 if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
22221 output_operand_lossage ("invalid operand for code '%c'", code);
22222 return;
22225 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 2 : 3));
22226 return;
22228 case 'm':
22229 asm_fprintf (stream, "%r",
22230 REG_P (XEXP (x, 0))
22231 ? REGNO (XEXP (x, 0)) : REGNO (XEXP (XEXP (x, 0), 0)));
22232 return;
22234 case 'M':
22235 asm_fprintf (stream, "{%r-%r}",
22236 REGNO (x),
22237 REGNO (x) + ARM_NUM_REGS (GET_MODE (x)) - 1);
22238 return;
22240 /* Like 'M', but writing doubleword vector registers, for use by Neon
22241 insns. */
22242 case 'h':
22244 int regno = (REGNO (x) - FIRST_VFP_REGNUM) / 2;
22245 int numregs = ARM_NUM_REGS (GET_MODE (x)) / 2;
22246 if (numregs == 1)
22247 asm_fprintf (stream, "{d%d}", regno);
22248 else
22249 asm_fprintf (stream, "{d%d-d%d}", regno, regno + numregs - 1);
22251 return;
22253 case 'd':
22254 /* CONST_TRUE_RTX means always -- that's the default. */
22255 if (x == const_true_rtx)
22256 return;
22258 if (!COMPARISON_P (x))
22260 output_operand_lossage ("invalid operand for code '%c'", code);
22261 return;
22264 fputs (arm_condition_codes[get_arm_condition_code (x)],
22265 stream);
22266 return;
22268 case 'D':
22269 /* CONST_TRUE_RTX means not always -- i.e. never. We shouldn't ever
22270 want to do that. */
22271 if (x == const_true_rtx)
22273 output_operand_lossage ("instruction never executed");
22274 return;
22276 if (!COMPARISON_P (x))
22278 output_operand_lossage ("invalid operand for code '%c'", code);
22279 return;
22282 fputs (arm_condition_codes[ARM_INVERSE_CONDITION_CODE
22283 (get_arm_condition_code (x))],
22284 stream);
22285 return;
22287 case 's':
22288 case 'V':
22289 case 'W':
22290 case 'X':
22291 case 'Y':
22292 case 'Z':
22293 /* Former Maverick support, removed after GCC-4.7. */
22294 output_operand_lossage ("obsolete Maverick format code '%c'", code);
22295 return;
22297 case 'U':
22298 if (!REG_P (x)
22299 || REGNO (x) < FIRST_IWMMXT_GR_REGNUM
22300 || REGNO (x) > LAST_IWMMXT_GR_REGNUM)
22301 /* Bad value for wCG register number. */
22303 output_operand_lossage ("invalid operand for code '%c'", code);
22304 return;
22307 else
22308 fprintf (stream, "%d", REGNO (x) - FIRST_IWMMXT_GR_REGNUM);
22309 return;
22311 /* Print an iWMMXt control register name. */
22312 case 'w':
22313 if (!CONST_INT_P (x)
22314 || INTVAL (x) < 0
22315 || INTVAL (x) >= 16)
22316 /* Bad value for wC register number. */
22318 output_operand_lossage ("invalid operand for code '%c'", code);
22319 return;
22322 else
22324 static const char * wc_reg_names [16] =
22326 "wCID", "wCon", "wCSSF", "wCASF",
22327 "wC4", "wC5", "wC6", "wC7",
22328 "wCGR0", "wCGR1", "wCGR2", "wCGR3",
22329 "wC12", "wC13", "wC14", "wC15"
22332 fputs (wc_reg_names [INTVAL (x)], stream);
22334 return;
22336 /* Print the high single-precision register of a VFP double-precision
22337 register. */
22338 case 'p':
22340 machine_mode mode = GET_MODE (x);
22341 int regno;
22343 if (GET_MODE_SIZE (mode) != 8 || !REG_P (x))
22345 output_operand_lossage ("invalid operand for code '%c'", code);
22346 return;
22349 regno = REGNO (x);
22350 if (!VFP_REGNO_OK_FOR_DOUBLE (regno))
22352 output_operand_lossage ("invalid operand for code '%c'", code);
22353 return;
22356 fprintf (stream, "s%d", regno - FIRST_VFP_REGNUM + 1);
22358 return;
22360 /* Print a VFP/Neon double precision or quad precision register name. */
22361 case 'P':
22362 case 'q':
22364 machine_mode mode = GET_MODE (x);
22365 int is_quad = (code == 'q');
22366 int regno;
22368 if (GET_MODE_SIZE (mode) != (is_quad ? 16 : 8))
22370 output_operand_lossage ("invalid operand for code '%c'", code);
22371 return;
22374 if (!REG_P (x)
22375 || !IS_VFP_REGNUM (REGNO (x)))
22377 output_operand_lossage ("invalid operand for code '%c'", code);
22378 return;
22381 regno = REGNO (x);
22382 if ((is_quad && !NEON_REGNO_OK_FOR_QUAD (regno))
22383 || (!is_quad && !VFP_REGNO_OK_FOR_DOUBLE (regno)))
22385 output_operand_lossage ("invalid operand for code '%c'", code);
22386 return;
22389 fprintf (stream, "%c%d", is_quad ? 'q' : 'd',
22390 (regno - FIRST_VFP_REGNUM) >> (is_quad ? 2 : 1));
22392 return;
22394 /* These two codes print the low/high doubleword register of a Neon quad
22395 register, respectively. For pair-structure types, can also print
22396 low/high quadword registers. */
22397 case 'e':
22398 case 'f':
22400 machine_mode mode = GET_MODE (x);
22401 int regno;
22403 if ((GET_MODE_SIZE (mode) != 16
22404 && GET_MODE_SIZE (mode) != 32) || !REG_P (x))
22406 output_operand_lossage ("invalid operand for code '%c'", code);
22407 return;
22410 regno = REGNO (x);
22411 if (!NEON_REGNO_OK_FOR_QUAD (regno))
22413 output_operand_lossage ("invalid operand for code '%c'", code);
22414 return;
22417 if (GET_MODE_SIZE (mode) == 16)
22418 fprintf (stream, "d%d", ((regno - FIRST_VFP_REGNUM) >> 1)
22419 + (code == 'f' ? 1 : 0));
22420 else
22421 fprintf (stream, "q%d", ((regno - FIRST_VFP_REGNUM) >> 2)
22422 + (code == 'f' ? 1 : 0));
22424 return;
22426 /* Print a VFPv3 floating-point constant, represented as an integer
22427 index. */
22428 case 'G':
22430 int index = vfp3_const_double_index (x);
22431 gcc_assert (index != -1);
22432 fprintf (stream, "%d", index);
22434 return;
22436 /* Print bits representing opcode features for Neon.
22438 Bit 0 is 1 for signed, 0 for unsigned. Floats count as signed
22439 and polynomials as unsigned.
22441 Bit 1 is 1 for floats and polynomials, 0 for ordinary integers.
22443 Bit 2 is 1 for rounding functions, 0 otherwise. */
22445 /* Identify the type as 's', 'u', 'p' or 'f'. */
22446 case 'T':
22448 HOST_WIDE_INT bits = INTVAL (x);
22449 fputc ("uspf"[bits & 3], stream);
22451 return;
22453 /* Likewise, but signed and unsigned integers are both 'i'. */
22454 case 'F':
22456 HOST_WIDE_INT bits = INTVAL (x);
22457 fputc ("iipf"[bits & 3], stream);
22459 return;
22461 /* As for 'T', but emit 'u' instead of 'p'. */
22462 case 't':
22464 HOST_WIDE_INT bits = INTVAL (x);
22465 fputc ("usuf"[bits & 3], stream);
22467 return;
22469 /* Bit 2: rounding (vs none). */
22470 case 'O':
22472 HOST_WIDE_INT bits = INTVAL (x);
22473 fputs ((bits & 4) != 0 ? "r" : "", stream);
22475 return;
22477 /* Memory operand for vld1/vst1 instruction. */
22478 case 'A':
22480 rtx addr;
22481 bool postinc = FALSE;
22482 rtx postinc_reg = NULL;
22483 unsigned align, memsize, align_bits;
22485 gcc_assert (MEM_P (x));
22486 addr = XEXP (x, 0);
22487 if (GET_CODE (addr) == POST_INC)
22489 postinc = 1;
22490 addr = XEXP (addr, 0);
22492 if (GET_CODE (addr) == POST_MODIFY)
22494 postinc_reg = XEXP( XEXP (addr, 1), 1);
22495 addr = XEXP (addr, 0);
22497 asm_fprintf (stream, "[%r", REGNO (addr));
22499 /* We know the alignment of this access, so we can emit a hint in the
22500 instruction (for some alignments) as an aid to the memory subsystem
22501 of the target. */
22502 align = MEM_ALIGN (x) >> 3;
22503 memsize = MEM_SIZE (x);
22505 /* Only certain alignment specifiers are supported by the hardware. */
22506 if (memsize == 32 && (align % 32) == 0)
22507 align_bits = 256;
22508 else if ((memsize == 16 || memsize == 32) && (align % 16) == 0)
22509 align_bits = 128;
22510 else if (memsize >= 8 && (align % 8) == 0)
22511 align_bits = 64;
22512 else
22513 align_bits = 0;
22515 if (align_bits != 0)
22516 asm_fprintf (stream, ":%d", align_bits);
22518 asm_fprintf (stream, "]");
22520 if (postinc)
22521 fputs("!", stream);
22522 if (postinc_reg)
22523 asm_fprintf (stream, ", %r", REGNO (postinc_reg));
22525 return;
22527 case 'C':
22529 rtx addr;
22531 gcc_assert (MEM_P (x));
22532 addr = XEXP (x, 0);
22533 gcc_assert (REG_P (addr));
22534 asm_fprintf (stream, "[%r]", REGNO (addr));
22536 return;
22538 /* Translate an S register number into a D register number and element index. */
22539 case 'y':
22541 machine_mode mode = GET_MODE (x);
22542 int regno;
22544 if (GET_MODE_SIZE (mode) != 4 || !REG_P (x))
22546 output_operand_lossage ("invalid operand for code '%c'", code);
22547 return;
22550 regno = REGNO (x);
22551 if (!VFP_REGNO_OK_FOR_SINGLE (regno))
22553 output_operand_lossage ("invalid operand for code '%c'", code);
22554 return;
22557 regno = regno - FIRST_VFP_REGNUM;
22558 fprintf (stream, "d%d[%d]", regno / 2, regno % 2);
22560 return;
22562 case 'v':
22563 gcc_assert (CONST_DOUBLE_P (x));
22564 int result;
22565 result = vfp3_const_double_for_fract_bits (x);
22566 if (result == 0)
22567 result = vfp3_const_double_for_bits (x);
22568 fprintf (stream, "#%d", result);
22569 return;
22571 /* Register specifier for vld1.16/vst1.16. Translate the S register
22572 number into a D register number and element index. */
22573 case 'z':
22575 machine_mode mode = GET_MODE (x);
22576 int regno;
22578 if (GET_MODE_SIZE (mode) != 2 || !REG_P (x))
22580 output_operand_lossage ("invalid operand for code '%c'", code);
22581 return;
22584 regno = REGNO (x);
22585 if (!VFP_REGNO_OK_FOR_SINGLE (regno))
22587 output_operand_lossage ("invalid operand for code '%c'", code);
22588 return;
22591 regno = regno - FIRST_VFP_REGNUM;
22592 fprintf (stream, "d%d[%d]", regno/2, ((regno % 2) ? 2 : 0));
22594 return;
22596 default:
22597 if (x == 0)
22599 output_operand_lossage ("missing operand");
22600 return;
22603 switch (GET_CODE (x))
22605 case REG:
22606 asm_fprintf (stream, "%r", REGNO (x));
22607 break;
22609 case MEM:
22610 output_address (GET_MODE (x), XEXP (x, 0));
22611 break;
22613 case CONST_DOUBLE:
22615 char fpstr[20];
22616 real_to_decimal (fpstr, CONST_DOUBLE_REAL_VALUE (x),
22617 sizeof (fpstr), 0, 1);
22618 fprintf (stream, "#%s", fpstr);
22620 break;
22622 default:
22623 gcc_assert (GET_CODE (x) != NEG);
22624 fputc ('#', stream);
22625 if (GET_CODE (x) == HIGH)
22627 fputs (":lower16:", stream);
22628 x = XEXP (x, 0);
22631 output_addr_const (stream, x);
22632 break;
22637 /* Target hook for printing a memory address. */
22638 static void
22639 arm_print_operand_address (FILE *stream, machine_mode mode, rtx x)
22641 if (TARGET_32BIT)
22643 int is_minus = GET_CODE (x) == MINUS;
22645 if (REG_P (x))
22646 asm_fprintf (stream, "[%r]", REGNO (x));
22647 else if (GET_CODE (x) == PLUS || is_minus)
22649 rtx base = XEXP (x, 0);
22650 rtx index = XEXP (x, 1);
22651 HOST_WIDE_INT offset = 0;
22652 if (!REG_P (base)
22653 || (REG_P (index) && REGNO (index) == SP_REGNUM))
22655 /* Ensure that BASE is a register. */
22656 /* (one of them must be). */
22657 /* Also ensure the SP is not used as in index register. */
22658 std::swap (base, index);
22660 switch (GET_CODE (index))
22662 case CONST_INT:
22663 offset = INTVAL (index);
22664 if (is_minus)
22665 offset = -offset;
22666 asm_fprintf (stream, "[%r, #%wd]",
22667 REGNO (base), offset);
22668 break;
22670 case REG:
22671 asm_fprintf (stream, "[%r, %s%r]",
22672 REGNO (base), is_minus ? "-" : "",
22673 REGNO (index));
22674 break;
22676 case MULT:
22677 case ASHIFTRT:
22678 case LSHIFTRT:
22679 case ASHIFT:
22680 case ROTATERT:
22682 asm_fprintf (stream, "[%r, %s%r",
22683 REGNO (base), is_minus ? "-" : "",
22684 REGNO (XEXP (index, 0)));
22685 arm_print_operand (stream, index, 'S');
22686 fputs ("]", stream);
22687 break;
22690 default:
22691 gcc_unreachable ();
22694 else if (GET_CODE (x) == PRE_INC || GET_CODE (x) == POST_INC
22695 || GET_CODE (x) == PRE_DEC || GET_CODE (x) == POST_DEC)
22697 gcc_assert (REG_P (XEXP (x, 0)));
22699 if (GET_CODE (x) == PRE_DEC || GET_CODE (x) == PRE_INC)
22700 asm_fprintf (stream, "[%r, #%s%d]!",
22701 REGNO (XEXP (x, 0)),
22702 GET_CODE (x) == PRE_DEC ? "-" : "",
22703 GET_MODE_SIZE (mode));
22704 else
22705 asm_fprintf (stream, "[%r], #%s%d",
22706 REGNO (XEXP (x, 0)),
22707 GET_CODE (x) == POST_DEC ? "-" : "",
22708 GET_MODE_SIZE (mode));
22710 else if (GET_CODE (x) == PRE_MODIFY)
22712 asm_fprintf (stream, "[%r, ", REGNO (XEXP (x, 0)));
22713 if (CONST_INT_P (XEXP (XEXP (x, 1), 1)))
22714 asm_fprintf (stream, "#%wd]!",
22715 INTVAL (XEXP (XEXP (x, 1), 1)));
22716 else
22717 asm_fprintf (stream, "%r]!",
22718 REGNO (XEXP (XEXP (x, 1), 1)));
22720 else if (GET_CODE (x) == POST_MODIFY)
22722 asm_fprintf (stream, "[%r], ", REGNO (XEXP (x, 0)));
22723 if (CONST_INT_P (XEXP (XEXP (x, 1), 1)))
22724 asm_fprintf (stream, "#%wd",
22725 INTVAL (XEXP (XEXP (x, 1), 1)));
22726 else
22727 asm_fprintf (stream, "%r",
22728 REGNO (XEXP (XEXP (x, 1), 1)));
22730 else output_addr_const (stream, x);
22732 else
22734 if (REG_P (x))
22735 asm_fprintf (stream, "[%r]", REGNO (x));
22736 else if (GET_CODE (x) == POST_INC)
22737 asm_fprintf (stream, "%r!", REGNO (XEXP (x, 0)));
22738 else if (GET_CODE (x) == PLUS)
22740 gcc_assert (REG_P (XEXP (x, 0)));
22741 if (CONST_INT_P (XEXP (x, 1)))
22742 asm_fprintf (stream, "[%r, #%wd]",
22743 REGNO (XEXP (x, 0)),
22744 INTVAL (XEXP (x, 1)));
22745 else
22746 asm_fprintf (stream, "[%r, %r]",
22747 REGNO (XEXP (x, 0)),
22748 REGNO (XEXP (x, 1)));
22750 else
22751 output_addr_const (stream, x);
22755 /* Target hook for indicating whether a punctuation character for
22756 TARGET_PRINT_OPERAND is valid. */
22757 static bool
22758 arm_print_operand_punct_valid_p (unsigned char code)
22760 return (code == '@' || code == '|' || code == '.'
22761 || code == '(' || code == ')' || code == '#'
22762 || (TARGET_32BIT && (code == '?'))
22763 || (TARGET_THUMB2 && (code == '!'))
22764 || (TARGET_THUMB && (code == '_')));
22767 /* Target hook for assembling integer objects. The ARM version needs to
22768 handle word-sized values specially. */
22769 static bool
22770 arm_assemble_integer (rtx x, unsigned int size, int aligned_p)
22772 machine_mode mode;
22774 if (size == UNITS_PER_WORD && aligned_p)
22776 fputs ("\t.word\t", asm_out_file);
22777 output_addr_const (asm_out_file, x);
22779 /* Mark symbols as position independent. We only do this in the
22780 .text segment, not in the .data segment. */
22781 if (NEED_GOT_RELOC && flag_pic && making_const_table &&
22782 (GET_CODE (x) == SYMBOL_REF || GET_CODE (x) == LABEL_REF))
22784 /* See legitimize_pic_address for an explanation of the
22785 TARGET_VXWORKS_RTP check. */
22786 if (!arm_pic_data_is_text_relative
22787 || (GET_CODE (x) == SYMBOL_REF && !SYMBOL_REF_LOCAL_P (x)))
22788 fputs ("(GOT)", asm_out_file);
22789 else
22790 fputs ("(GOTOFF)", asm_out_file);
22792 fputc ('\n', asm_out_file);
22793 return true;
22796 mode = GET_MODE (x);
22798 if (arm_vector_mode_supported_p (mode))
22800 int i, units;
22802 gcc_assert (GET_CODE (x) == CONST_VECTOR);
22804 units = CONST_VECTOR_NUNITS (x);
22805 size = GET_MODE_UNIT_SIZE (mode);
22807 if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
22808 for (i = 0; i < units; i++)
22810 rtx elt = CONST_VECTOR_ELT (x, i);
22811 assemble_integer
22812 (elt, size, i == 0 ? BIGGEST_ALIGNMENT : size * BITS_PER_UNIT, 1);
22814 else
22815 for (i = 0; i < units; i++)
22817 rtx elt = CONST_VECTOR_ELT (x, i);
22818 assemble_real
22819 (*CONST_DOUBLE_REAL_VALUE (elt), GET_MODE_INNER (mode),
22820 i == 0 ? BIGGEST_ALIGNMENT : size * BITS_PER_UNIT);
22823 return true;
22826 return default_assemble_integer (x, size, aligned_p);
22829 static void
22830 arm_elf_asm_cdtor (rtx symbol, int priority, bool is_ctor)
22832 section *s;
22834 if (!TARGET_AAPCS_BASED)
22836 (is_ctor ?
22837 default_named_section_asm_out_constructor
22838 : default_named_section_asm_out_destructor) (symbol, priority);
22839 return;
22842 /* Put these in the .init_array section, using a special relocation. */
22843 if (priority != DEFAULT_INIT_PRIORITY)
22845 char buf[18];
22846 sprintf (buf, "%s.%.5u",
22847 is_ctor ? ".init_array" : ".fini_array",
22848 priority);
22849 s = get_section (buf, SECTION_WRITE, NULL_TREE);
22851 else if (is_ctor)
22852 s = ctors_section;
22853 else
22854 s = dtors_section;
22856 switch_to_section (s);
22857 assemble_align (POINTER_SIZE);
22858 fputs ("\t.word\t", asm_out_file);
22859 output_addr_const (asm_out_file, symbol);
22860 fputs ("(target1)\n", asm_out_file);
22863 /* Add a function to the list of static constructors. */
22865 static void
22866 arm_elf_asm_constructor (rtx symbol, int priority)
22868 arm_elf_asm_cdtor (symbol, priority, /*is_ctor=*/true);
22871 /* Add a function to the list of static destructors. */
22873 static void
22874 arm_elf_asm_destructor (rtx symbol, int priority)
22876 arm_elf_asm_cdtor (symbol, priority, /*is_ctor=*/false);
22879 /* A finite state machine takes care of noticing whether or not instructions
22880 can be conditionally executed, and thus decrease execution time and code
22881 size by deleting branch instructions. The fsm is controlled by
22882 final_prescan_insn, and controls the actions of ASM_OUTPUT_OPCODE. */
22884 /* The state of the fsm controlling condition codes are:
22885 0: normal, do nothing special
22886 1: make ASM_OUTPUT_OPCODE not output this instruction
22887 2: make ASM_OUTPUT_OPCODE not output this instruction
22888 3: make instructions conditional
22889 4: make instructions conditional
22891 State transitions (state->state by whom under condition):
22892 0 -> 1 final_prescan_insn if the `target' is a label
22893 0 -> 2 final_prescan_insn if the `target' is an unconditional branch
22894 1 -> 3 ASM_OUTPUT_OPCODE after not having output the conditional branch
22895 2 -> 4 ASM_OUTPUT_OPCODE after not having output the conditional branch
22896 3 -> 0 (*targetm.asm_out.internal_label) if the `target' label is reached
22897 (the target label has CODE_LABEL_NUMBER equal to arm_target_label).
22898 4 -> 0 final_prescan_insn if the `target' unconditional branch is reached
22899 (the target insn is arm_target_insn).
22901 If the jump clobbers the conditions then we use states 2 and 4.
22903 A similar thing can be done with conditional return insns.
22905 XXX In case the `target' is an unconditional branch, this conditionalising
22906 of the instructions always reduces code size, but not always execution
22907 time. But then, I want to reduce the code size to somewhere near what
22908 /bin/cc produces. */
22910 /* In addition to this, state is maintained for Thumb-2 COND_EXEC
22911 instructions. When a COND_EXEC instruction is seen the subsequent
22912 instructions are scanned so that multiple conditional instructions can be
22913 combined into a single IT block. arm_condexec_count and arm_condexec_mask
22914 specify the length and true/false mask for the IT block. These will be
22915 decremented/zeroed by arm_asm_output_opcode as the insns are output. */
22917 /* Returns the index of the ARM condition code string in
22918 `arm_condition_codes', or ARM_NV if the comparison is invalid.
22919 COMPARISON should be an rtx like `(eq (...) (...))'. */
22921 enum arm_cond_code
22922 maybe_get_arm_condition_code (rtx comparison)
22924 machine_mode mode = GET_MODE (XEXP (comparison, 0));
22925 enum arm_cond_code code;
22926 enum rtx_code comp_code = GET_CODE (comparison);
22928 if (GET_MODE_CLASS (mode) != MODE_CC)
22929 mode = SELECT_CC_MODE (comp_code, XEXP (comparison, 0),
22930 XEXP (comparison, 1));
22932 switch (mode)
22934 case CC_DNEmode: code = ARM_NE; goto dominance;
22935 case CC_DEQmode: code = ARM_EQ; goto dominance;
22936 case CC_DGEmode: code = ARM_GE; goto dominance;
22937 case CC_DGTmode: code = ARM_GT; goto dominance;
22938 case CC_DLEmode: code = ARM_LE; goto dominance;
22939 case CC_DLTmode: code = ARM_LT; goto dominance;
22940 case CC_DGEUmode: code = ARM_CS; goto dominance;
22941 case CC_DGTUmode: code = ARM_HI; goto dominance;
22942 case CC_DLEUmode: code = ARM_LS; goto dominance;
22943 case CC_DLTUmode: code = ARM_CC;
22945 dominance:
22946 if (comp_code == EQ)
22947 return ARM_INVERSE_CONDITION_CODE (code);
22948 if (comp_code == NE)
22949 return code;
22950 return ARM_NV;
22952 case CC_NOOVmode:
22953 switch (comp_code)
22955 case NE: return ARM_NE;
22956 case EQ: return ARM_EQ;
22957 case GE: return ARM_PL;
22958 case LT: return ARM_MI;
22959 default: return ARM_NV;
22962 case CC_Zmode:
22963 switch (comp_code)
22965 case NE: return ARM_NE;
22966 case EQ: return ARM_EQ;
22967 default: return ARM_NV;
22970 case CC_Nmode:
22971 switch (comp_code)
22973 case NE: return ARM_MI;
22974 case EQ: return ARM_PL;
22975 default: return ARM_NV;
22978 case CCFPEmode:
22979 case CCFPmode:
22980 /* We can handle all cases except UNEQ and LTGT. */
22981 switch (comp_code)
22983 case GE: return ARM_GE;
22984 case GT: return ARM_GT;
22985 case LE: return ARM_LS;
22986 case LT: return ARM_MI;
22987 case NE: return ARM_NE;
22988 case EQ: return ARM_EQ;
22989 case ORDERED: return ARM_VC;
22990 case UNORDERED: return ARM_VS;
22991 case UNLT: return ARM_LT;
22992 case UNLE: return ARM_LE;
22993 case UNGT: return ARM_HI;
22994 case UNGE: return ARM_PL;
22995 /* UNEQ and LTGT do not have a representation. */
22996 case UNEQ: /* Fall through. */
22997 case LTGT: /* Fall through. */
22998 default: return ARM_NV;
23001 case CC_SWPmode:
23002 switch (comp_code)
23004 case NE: return ARM_NE;
23005 case EQ: return ARM_EQ;
23006 case GE: return ARM_LE;
23007 case GT: return ARM_LT;
23008 case LE: return ARM_GE;
23009 case LT: return ARM_GT;
23010 case GEU: return ARM_LS;
23011 case GTU: return ARM_CC;
23012 case LEU: return ARM_CS;
23013 case LTU: return ARM_HI;
23014 default: return ARM_NV;
23017 case CC_Cmode:
23018 switch (comp_code)
23020 case LTU: return ARM_CS;
23021 case GEU: return ARM_CC;
23022 case NE: return ARM_CS;
23023 case EQ: return ARM_CC;
23024 default: return ARM_NV;
23027 case CC_CZmode:
23028 switch (comp_code)
23030 case NE: return ARM_NE;
23031 case EQ: return ARM_EQ;
23032 case GEU: return ARM_CS;
23033 case GTU: return ARM_HI;
23034 case LEU: return ARM_LS;
23035 case LTU: return ARM_CC;
23036 default: return ARM_NV;
23039 case CC_NCVmode:
23040 switch (comp_code)
23042 case GE: return ARM_GE;
23043 case LT: return ARM_LT;
23044 case GEU: return ARM_CS;
23045 case LTU: return ARM_CC;
23046 default: return ARM_NV;
23049 case CC_Vmode:
23050 switch (comp_code)
23052 case NE: return ARM_VS;
23053 case EQ: return ARM_VC;
23054 default: return ARM_NV;
23057 case CCmode:
23058 switch (comp_code)
23060 case NE: return ARM_NE;
23061 case EQ: return ARM_EQ;
23062 case GE: return ARM_GE;
23063 case GT: return ARM_GT;
23064 case LE: return ARM_LE;
23065 case LT: return ARM_LT;
23066 case GEU: return ARM_CS;
23067 case GTU: return ARM_HI;
23068 case LEU: return ARM_LS;
23069 case LTU: return ARM_CC;
23070 default: return ARM_NV;
23073 default: gcc_unreachable ();
23077 /* Like maybe_get_arm_condition_code, but never return ARM_NV. */
23078 static enum arm_cond_code
23079 get_arm_condition_code (rtx comparison)
23081 enum arm_cond_code code = maybe_get_arm_condition_code (comparison);
23082 gcc_assert (code != ARM_NV);
23083 return code;
23086 /* Tell arm_asm_output_opcode to output IT blocks for conditionally executed
23087 instructions. */
23088 void
23089 thumb2_final_prescan_insn (rtx_insn *insn)
23091 rtx_insn *first_insn = insn;
23092 rtx body = PATTERN (insn);
23093 rtx predicate;
23094 enum arm_cond_code code;
23095 int n;
23096 int mask;
23097 int max;
23099 /* max_insns_skipped in the tune was already taken into account in the
23100 cost model of ifcvt pass when generating COND_EXEC insns. At this stage
23101 just emit the IT blocks as we can. It does not make sense to split
23102 the IT blocks. */
23103 max = MAX_INSN_PER_IT_BLOCK;
23105 /* Remove the previous insn from the count of insns to be output. */
23106 if (arm_condexec_count)
23107 arm_condexec_count--;
23109 /* Nothing to do if we are already inside a conditional block. */
23110 if (arm_condexec_count)
23111 return;
23113 if (GET_CODE (body) != COND_EXEC)
23114 return;
23116 /* Conditional jumps are implemented directly. */
23117 if (JUMP_P (insn))
23118 return;
23120 predicate = COND_EXEC_TEST (body);
23121 arm_current_cc = get_arm_condition_code (predicate);
23123 n = get_attr_ce_count (insn);
23124 arm_condexec_count = 1;
23125 arm_condexec_mask = (1 << n) - 1;
23126 arm_condexec_masklen = n;
23127 /* See if subsequent instructions can be combined into the same block. */
23128 for (;;)
23130 insn = next_nonnote_insn (insn);
23132 /* Jumping into the middle of an IT block is illegal, so a label or
23133 barrier terminates the block. */
23134 if (!NONJUMP_INSN_P (insn) && !JUMP_P (insn))
23135 break;
23137 body = PATTERN (insn);
23138 /* USE and CLOBBER aren't really insns, so just skip them. */
23139 if (GET_CODE (body) == USE
23140 || GET_CODE (body) == CLOBBER)
23141 continue;
23143 /* ??? Recognize conditional jumps, and combine them with IT blocks. */
23144 if (GET_CODE (body) != COND_EXEC)
23145 break;
23146 /* Maximum number of conditionally executed instructions in a block. */
23147 n = get_attr_ce_count (insn);
23148 if (arm_condexec_masklen + n > max)
23149 break;
23151 predicate = COND_EXEC_TEST (body);
23152 code = get_arm_condition_code (predicate);
23153 mask = (1 << n) - 1;
23154 if (arm_current_cc == code)
23155 arm_condexec_mask |= (mask << arm_condexec_masklen);
23156 else if (arm_current_cc != ARM_INVERSE_CONDITION_CODE(code))
23157 break;
23159 arm_condexec_count++;
23160 arm_condexec_masklen += n;
23162 /* A jump must be the last instruction in a conditional block. */
23163 if (JUMP_P (insn))
23164 break;
23166 /* Restore recog_data (getting the attributes of other insns can
23167 destroy this array, but final.c assumes that it remains intact
23168 across this call). */
23169 extract_constrain_insn_cached (first_insn);
23172 void
23173 arm_final_prescan_insn (rtx_insn *insn)
23175 /* BODY will hold the body of INSN. */
23176 rtx body = PATTERN (insn);
23178 /* This will be 1 if trying to repeat the trick, and things need to be
23179 reversed if it appears to fail. */
23180 int reverse = 0;
23182 /* If we start with a return insn, we only succeed if we find another one. */
23183 int seeking_return = 0;
23184 enum rtx_code return_code = UNKNOWN;
23186 /* START_INSN will hold the insn from where we start looking. This is the
23187 first insn after the following code_label if REVERSE is true. */
23188 rtx_insn *start_insn = insn;
23190 /* If in state 4, check if the target branch is reached, in order to
23191 change back to state 0. */
23192 if (arm_ccfsm_state == 4)
23194 if (insn == arm_target_insn)
23196 arm_target_insn = NULL;
23197 arm_ccfsm_state = 0;
23199 return;
23202 /* If in state 3, it is possible to repeat the trick, if this insn is an
23203 unconditional branch to a label, and immediately following this branch
23204 is the previous target label which is only used once, and the label this
23205 branch jumps to is not too far off. */
23206 if (arm_ccfsm_state == 3)
23208 if (simplejump_p (insn))
23210 start_insn = next_nonnote_insn (start_insn);
23211 if (BARRIER_P (start_insn))
23213 /* XXX Isn't this always a barrier? */
23214 start_insn = next_nonnote_insn (start_insn);
23216 if (LABEL_P (start_insn)
23217 && CODE_LABEL_NUMBER (start_insn) == arm_target_label
23218 && LABEL_NUSES (start_insn) == 1)
23219 reverse = TRUE;
23220 else
23221 return;
23223 else if (ANY_RETURN_P (body))
23225 start_insn = next_nonnote_insn (start_insn);
23226 if (BARRIER_P (start_insn))
23227 start_insn = next_nonnote_insn (start_insn);
23228 if (LABEL_P (start_insn)
23229 && CODE_LABEL_NUMBER (start_insn) == arm_target_label
23230 && LABEL_NUSES (start_insn) == 1)
23232 reverse = TRUE;
23233 seeking_return = 1;
23234 return_code = GET_CODE (body);
23236 else
23237 return;
23239 else
23240 return;
23243 gcc_assert (!arm_ccfsm_state || reverse);
23244 if (!JUMP_P (insn))
23245 return;
23247 /* This jump might be paralleled with a clobber of the condition codes
23248 the jump should always come first */
23249 if (GET_CODE (body) == PARALLEL && XVECLEN (body, 0) > 0)
23250 body = XVECEXP (body, 0, 0);
23252 if (reverse
23253 || (GET_CODE (body) == SET && GET_CODE (SET_DEST (body)) == PC
23254 && GET_CODE (SET_SRC (body)) == IF_THEN_ELSE))
23256 int insns_skipped;
23257 int fail = FALSE, succeed = FALSE;
23258 /* Flag which part of the IF_THEN_ELSE is the LABEL_REF. */
23259 int then_not_else = TRUE;
23260 rtx_insn *this_insn = start_insn;
23261 rtx label = 0;
23263 /* Register the insn jumped to. */
23264 if (reverse)
23266 if (!seeking_return)
23267 label = XEXP (SET_SRC (body), 0);
23269 else if (GET_CODE (XEXP (SET_SRC (body), 1)) == LABEL_REF)
23270 label = XEXP (XEXP (SET_SRC (body), 1), 0);
23271 else if (GET_CODE (XEXP (SET_SRC (body), 2)) == LABEL_REF)
23273 label = XEXP (XEXP (SET_SRC (body), 2), 0);
23274 then_not_else = FALSE;
23276 else if (ANY_RETURN_P (XEXP (SET_SRC (body), 1)))
23278 seeking_return = 1;
23279 return_code = GET_CODE (XEXP (SET_SRC (body), 1));
23281 else if (ANY_RETURN_P (XEXP (SET_SRC (body), 2)))
23283 seeking_return = 1;
23284 return_code = GET_CODE (XEXP (SET_SRC (body), 2));
23285 then_not_else = FALSE;
23287 else
23288 gcc_unreachable ();
23290 /* See how many insns this branch skips, and what kind of insns. If all
23291 insns are okay, and the label or unconditional branch to the same
23292 label is not too far away, succeed. */
23293 for (insns_skipped = 0;
23294 !fail && !succeed && insns_skipped++ < max_insns_skipped;)
23296 rtx scanbody;
23298 this_insn = next_nonnote_insn (this_insn);
23299 if (!this_insn)
23300 break;
23302 switch (GET_CODE (this_insn))
23304 case CODE_LABEL:
23305 /* Succeed if it is the target label, otherwise fail since
23306 control falls in from somewhere else. */
23307 if (this_insn == label)
23309 arm_ccfsm_state = 1;
23310 succeed = TRUE;
23312 else
23313 fail = TRUE;
23314 break;
23316 case BARRIER:
23317 /* Succeed if the following insn is the target label.
23318 Otherwise fail.
23319 If return insns are used then the last insn in a function
23320 will be a barrier. */
23321 this_insn = next_nonnote_insn (this_insn);
23322 if (this_insn && this_insn == label)
23324 arm_ccfsm_state = 1;
23325 succeed = TRUE;
23327 else
23328 fail = TRUE;
23329 break;
23331 case CALL_INSN:
23332 /* The AAPCS says that conditional calls should not be
23333 used since they make interworking inefficient (the
23334 linker can't transform BL<cond> into BLX). That's
23335 only a problem if the machine has BLX. */
23336 if (arm_arch5)
23338 fail = TRUE;
23339 break;
23342 /* Succeed if the following insn is the target label, or
23343 if the following two insns are a barrier and the
23344 target label. */
23345 this_insn = next_nonnote_insn (this_insn);
23346 if (this_insn && BARRIER_P (this_insn))
23347 this_insn = next_nonnote_insn (this_insn);
23349 if (this_insn && this_insn == label
23350 && insns_skipped < max_insns_skipped)
23352 arm_ccfsm_state = 1;
23353 succeed = TRUE;
23355 else
23356 fail = TRUE;
23357 break;
23359 case JUMP_INSN:
23360 /* If this is an unconditional branch to the same label, succeed.
23361 If it is to another label, do nothing. If it is conditional,
23362 fail. */
23363 /* XXX Probably, the tests for SET and the PC are
23364 unnecessary. */
23366 scanbody = PATTERN (this_insn);
23367 if (GET_CODE (scanbody) == SET
23368 && GET_CODE (SET_DEST (scanbody)) == PC)
23370 if (GET_CODE (SET_SRC (scanbody)) == LABEL_REF
23371 && XEXP (SET_SRC (scanbody), 0) == label && !reverse)
23373 arm_ccfsm_state = 2;
23374 succeed = TRUE;
23376 else if (GET_CODE (SET_SRC (scanbody)) == IF_THEN_ELSE)
23377 fail = TRUE;
23379 /* Fail if a conditional return is undesirable (e.g. on a
23380 StrongARM), but still allow this if optimizing for size. */
23381 else if (GET_CODE (scanbody) == return_code
23382 && !use_return_insn (TRUE, NULL)
23383 && !optimize_size)
23384 fail = TRUE;
23385 else if (GET_CODE (scanbody) == return_code)
23387 arm_ccfsm_state = 2;
23388 succeed = TRUE;
23390 else if (GET_CODE (scanbody) == PARALLEL)
23392 switch (get_attr_conds (this_insn))
23394 case CONDS_NOCOND:
23395 break;
23396 default:
23397 fail = TRUE;
23398 break;
23401 else
23402 fail = TRUE; /* Unrecognized jump (e.g. epilogue). */
23404 break;
23406 case INSN:
23407 /* Instructions using or affecting the condition codes make it
23408 fail. */
23409 scanbody = PATTERN (this_insn);
23410 if (!(GET_CODE (scanbody) == SET
23411 || GET_CODE (scanbody) == PARALLEL)
23412 || get_attr_conds (this_insn) != CONDS_NOCOND)
23413 fail = TRUE;
23414 break;
23416 default:
23417 break;
23420 if (succeed)
23422 if ((!seeking_return) && (arm_ccfsm_state == 1 || reverse))
23423 arm_target_label = CODE_LABEL_NUMBER (label);
23424 else
23426 gcc_assert (seeking_return || arm_ccfsm_state == 2);
23428 while (this_insn && GET_CODE (PATTERN (this_insn)) == USE)
23430 this_insn = next_nonnote_insn (this_insn);
23431 gcc_assert (!this_insn
23432 || (!BARRIER_P (this_insn)
23433 && !LABEL_P (this_insn)));
23435 if (!this_insn)
23437 /* Oh, dear! we ran off the end.. give up. */
23438 extract_constrain_insn_cached (insn);
23439 arm_ccfsm_state = 0;
23440 arm_target_insn = NULL;
23441 return;
23443 arm_target_insn = this_insn;
23446 /* If REVERSE is true, ARM_CURRENT_CC needs to be inverted from
23447 what it was. */
23448 if (!reverse)
23449 arm_current_cc = get_arm_condition_code (XEXP (SET_SRC (body), 0));
23451 if (reverse || then_not_else)
23452 arm_current_cc = ARM_INVERSE_CONDITION_CODE (arm_current_cc);
23455 /* Restore recog_data (getting the attributes of other insns can
23456 destroy this array, but final.c assumes that it remains intact
23457 across this call. */
23458 extract_constrain_insn_cached (insn);
23462 /* Output IT instructions. */
23463 void
23464 thumb2_asm_output_opcode (FILE * stream)
23466 char buff[5];
23467 int n;
23469 if (arm_condexec_mask)
23471 for (n = 0; n < arm_condexec_masklen; n++)
23472 buff[n] = (arm_condexec_mask & (1 << n)) ? 't' : 'e';
23473 buff[n] = 0;
23474 asm_fprintf(stream, "i%s\t%s\n\t", buff,
23475 arm_condition_codes[arm_current_cc]);
23476 arm_condexec_mask = 0;
23480 /* Returns true if REGNO is a valid register
23481 for holding a quantity of type MODE. */
23483 arm_hard_regno_mode_ok (unsigned int regno, machine_mode mode)
23485 if (GET_MODE_CLASS (mode) == MODE_CC)
23486 return (regno == CC_REGNUM
23487 || (TARGET_HARD_FLOAT
23488 && regno == VFPCC_REGNUM));
23490 if (regno == CC_REGNUM && GET_MODE_CLASS (mode) != MODE_CC)
23491 return false;
23493 if (TARGET_THUMB1)
23494 /* For the Thumb we only allow values bigger than SImode in
23495 registers 0 - 6, so that there is always a second low
23496 register available to hold the upper part of the value.
23497 We probably we ought to ensure that the register is the
23498 start of an even numbered register pair. */
23499 return (ARM_NUM_REGS (mode) < 2) || (regno < LAST_LO_REGNUM);
23501 if (TARGET_HARD_FLOAT && IS_VFP_REGNUM (regno))
23503 if (mode == SFmode || mode == SImode)
23504 return VFP_REGNO_OK_FOR_SINGLE (regno);
23506 if (mode == DFmode)
23507 return VFP_REGNO_OK_FOR_DOUBLE (regno);
23509 if (mode == HFmode)
23510 return VFP_REGNO_OK_FOR_SINGLE (regno);
23512 /* VFP registers can hold HImode values. */
23513 if (mode == HImode)
23514 return VFP_REGNO_OK_FOR_SINGLE (regno);
23516 if (TARGET_NEON)
23517 return (VALID_NEON_DREG_MODE (mode) && VFP_REGNO_OK_FOR_DOUBLE (regno))
23518 || (VALID_NEON_QREG_MODE (mode)
23519 && NEON_REGNO_OK_FOR_QUAD (regno))
23520 || (mode == TImode && NEON_REGNO_OK_FOR_NREGS (regno, 2))
23521 || (mode == EImode && NEON_REGNO_OK_FOR_NREGS (regno, 3))
23522 || (mode == OImode && NEON_REGNO_OK_FOR_NREGS (regno, 4))
23523 || (mode == CImode && NEON_REGNO_OK_FOR_NREGS (regno, 6))
23524 || (mode == XImode && NEON_REGNO_OK_FOR_NREGS (regno, 8));
23526 return FALSE;
23529 if (TARGET_REALLY_IWMMXT)
23531 if (IS_IWMMXT_GR_REGNUM (regno))
23532 return mode == SImode;
23534 if (IS_IWMMXT_REGNUM (regno))
23535 return VALID_IWMMXT_REG_MODE (mode);
23538 /* We allow almost any value to be stored in the general registers.
23539 Restrict doubleword quantities to even register pairs in ARM state
23540 so that we can use ldrd. Do not allow very large Neon structure
23541 opaque modes in general registers; they would use too many. */
23542 if (regno <= LAST_ARM_REGNUM)
23544 if (ARM_NUM_REGS (mode) > 4)
23545 return FALSE;
23547 if (TARGET_THUMB2)
23548 return TRUE;
23550 return !(TARGET_LDRD && GET_MODE_SIZE (mode) > 4 && (regno & 1) != 0);
23553 if (regno == FRAME_POINTER_REGNUM
23554 || regno == ARG_POINTER_REGNUM)
23555 /* We only allow integers in the fake hard registers. */
23556 return GET_MODE_CLASS (mode) == MODE_INT;
23558 return FALSE;
23561 /* Implement MODES_TIEABLE_P. */
23563 bool
23564 arm_modes_tieable_p (machine_mode mode1, machine_mode mode2)
23566 if (GET_MODE_CLASS (mode1) == GET_MODE_CLASS (mode2))
23567 return true;
23569 /* We specifically want to allow elements of "structure" modes to
23570 be tieable to the structure. This more general condition allows
23571 other rarer situations too. */
23572 if (TARGET_NEON
23573 && (VALID_NEON_DREG_MODE (mode1)
23574 || VALID_NEON_QREG_MODE (mode1)
23575 || VALID_NEON_STRUCT_MODE (mode1))
23576 && (VALID_NEON_DREG_MODE (mode2)
23577 || VALID_NEON_QREG_MODE (mode2)
23578 || VALID_NEON_STRUCT_MODE (mode2)))
23579 return true;
23581 return false;
23584 /* For efficiency and historical reasons LO_REGS, HI_REGS and CC_REGS are
23585 not used in arm mode. */
23587 enum reg_class
23588 arm_regno_class (int regno)
23590 if (regno == PC_REGNUM)
23591 return NO_REGS;
23593 if (TARGET_THUMB1)
23595 if (regno == STACK_POINTER_REGNUM)
23596 return STACK_REG;
23597 if (regno == CC_REGNUM)
23598 return CC_REG;
23599 if (regno < 8)
23600 return LO_REGS;
23601 return HI_REGS;
23604 if (TARGET_THUMB2 && regno < 8)
23605 return LO_REGS;
23607 if ( regno <= LAST_ARM_REGNUM
23608 || regno == FRAME_POINTER_REGNUM
23609 || regno == ARG_POINTER_REGNUM)
23610 return TARGET_THUMB2 ? HI_REGS : GENERAL_REGS;
23612 if (regno == CC_REGNUM || regno == VFPCC_REGNUM)
23613 return TARGET_THUMB2 ? CC_REG : NO_REGS;
23615 if (IS_VFP_REGNUM (regno))
23617 if (regno <= D7_VFP_REGNUM)
23618 return VFP_D0_D7_REGS;
23619 else if (regno <= LAST_LO_VFP_REGNUM)
23620 return VFP_LO_REGS;
23621 else
23622 return VFP_HI_REGS;
23625 if (IS_IWMMXT_REGNUM (regno))
23626 return IWMMXT_REGS;
23628 if (IS_IWMMXT_GR_REGNUM (regno))
23629 return IWMMXT_GR_REGS;
23631 return NO_REGS;
23634 /* Handle a special case when computing the offset
23635 of an argument from the frame pointer. */
23637 arm_debugger_arg_offset (int value, rtx addr)
23639 rtx_insn *insn;
23641 /* We are only interested if dbxout_parms() failed to compute the offset. */
23642 if (value != 0)
23643 return 0;
23645 /* We can only cope with the case where the address is held in a register. */
23646 if (!REG_P (addr))
23647 return 0;
23649 /* If we are using the frame pointer to point at the argument, then
23650 an offset of 0 is correct. */
23651 if (REGNO (addr) == (unsigned) HARD_FRAME_POINTER_REGNUM)
23652 return 0;
23654 /* If we are using the stack pointer to point at the
23655 argument, then an offset of 0 is correct. */
23656 /* ??? Check this is consistent with thumb2 frame layout. */
23657 if ((TARGET_THUMB || !frame_pointer_needed)
23658 && REGNO (addr) == SP_REGNUM)
23659 return 0;
23661 /* Oh dear. The argument is pointed to by a register rather
23662 than being held in a register, or being stored at a known
23663 offset from the frame pointer. Since GDB only understands
23664 those two kinds of argument we must translate the address
23665 held in the register into an offset from the frame pointer.
23666 We do this by searching through the insns for the function
23667 looking to see where this register gets its value. If the
23668 register is initialized from the frame pointer plus an offset
23669 then we are in luck and we can continue, otherwise we give up.
23671 This code is exercised by producing debugging information
23672 for a function with arguments like this:
23674 double func (double a, double b, int c, double d) {return d;}
23676 Without this code the stab for parameter 'd' will be set to
23677 an offset of 0 from the frame pointer, rather than 8. */
23679 /* The if() statement says:
23681 If the insn is a normal instruction
23682 and if the insn is setting the value in a register
23683 and if the register being set is the register holding the address of the argument
23684 and if the address is computing by an addition
23685 that involves adding to a register
23686 which is the frame pointer
23687 a constant integer
23689 then... */
23691 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
23693 if ( NONJUMP_INSN_P (insn)
23694 && GET_CODE (PATTERN (insn)) == SET
23695 && REGNO (XEXP (PATTERN (insn), 0)) == REGNO (addr)
23696 && GET_CODE (XEXP (PATTERN (insn), 1)) == PLUS
23697 && REG_P (XEXP (XEXP (PATTERN (insn), 1), 0))
23698 && REGNO (XEXP (XEXP (PATTERN (insn), 1), 0)) == (unsigned) HARD_FRAME_POINTER_REGNUM
23699 && CONST_INT_P (XEXP (XEXP (PATTERN (insn), 1), 1))
23702 value = INTVAL (XEXP (XEXP (PATTERN (insn), 1), 1));
23704 break;
23708 if (value == 0)
23710 debug_rtx (addr);
23711 warning (0, "unable to compute real location of stacked parameter");
23712 value = 8; /* XXX magic hack */
23715 return value;
23718 /* Implement TARGET_PROMOTED_TYPE. */
23720 static tree
23721 arm_promoted_type (const_tree t)
23723 if (SCALAR_FLOAT_TYPE_P (t) && TYPE_PRECISION (t) == 16)
23724 return float_type_node;
23725 return NULL_TREE;
23728 /* Implement TARGET_CONVERT_TO_TYPE.
23729 Specifically, this hook implements the peculiarity of the ARM
23730 half-precision floating-point C semantics that requires conversions between
23731 __fp16 to or from double to do an intermediate conversion to float. */
23733 static tree
23734 arm_convert_to_type (tree type, tree expr)
23736 tree fromtype = TREE_TYPE (expr);
23737 if (!SCALAR_FLOAT_TYPE_P (fromtype) || !SCALAR_FLOAT_TYPE_P (type))
23738 return NULL_TREE;
23739 if ((TYPE_PRECISION (fromtype) == 16 && TYPE_PRECISION (type) > 32)
23740 || (TYPE_PRECISION (type) == 16 && TYPE_PRECISION (fromtype) > 32))
23741 return convert (type, convert (float_type_node, expr));
23742 return NULL_TREE;
23745 /* Implement TARGET_SCALAR_MODE_SUPPORTED_P.
23746 This simply adds HFmode as a supported mode; even though we don't
23747 implement arithmetic on this type directly, it's supported by
23748 optabs conversions, much the way the double-word arithmetic is
23749 special-cased in the default hook. */
23751 static bool
23752 arm_scalar_mode_supported_p (machine_mode mode)
23754 if (mode == HFmode)
23755 return (arm_fp16_format != ARM_FP16_FORMAT_NONE);
23756 else if (ALL_FIXED_POINT_MODE_P (mode))
23757 return true;
23758 else
23759 return default_scalar_mode_supported_p (mode);
23762 /* Set up OPERANDS for a register copy from SRC to DEST, taking care
23763 not to early-clobber SRC registers in the process.
23765 We assume that the operands described by SRC and DEST represent a
23766 decomposed copy of OPERANDS[1] into OPERANDS[0]. COUNT is the
23767 number of components into which the copy has been decomposed. */
23768 void
23769 neon_disambiguate_copy (rtx *operands, rtx *dest, rtx *src, unsigned int count)
23771 unsigned int i;
23773 if (!reg_overlap_mentioned_p (operands[0], operands[1])
23774 || REGNO (operands[0]) < REGNO (operands[1]))
23776 for (i = 0; i < count; i++)
23778 operands[2 * i] = dest[i];
23779 operands[2 * i + 1] = src[i];
23782 else
23784 for (i = 0; i < count; i++)
23786 operands[2 * i] = dest[count - i - 1];
23787 operands[2 * i + 1] = src[count - i - 1];
23792 /* Split operands into moves from op[1] + op[2] into op[0]. */
23794 void
23795 neon_split_vcombine (rtx operands[3])
23797 unsigned int dest = REGNO (operands[0]);
23798 unsigned int src1 = REGNO (operands[1]);
23799 unsigned int src2 = REGNO (operands[2]);
23800 machine_mode halfmode = GET_MODE (operands[1]);
23801 unsigned int halfregs = HARD_REGNO_NREGS (src1, halfmode);
23802 rtx destlo, desthi;
23804 if (src1 == dest && src2 == dest + halfregs)
23806 /* No-op move. Can't split to nothing; emit something. */
23807 emit_note (NOTE_INSN_DELETED);
23808 return;
23811 /* Preserve register attributes for variable tracking. */
23812 destlo = gen_rtx_REG_offset (operands[0], halfmode, dest, 0);
23813 desthi = gen_rtx_REG_offset (operands[0], halfmode, dest + halfregs,
23814 GET_MODE_SIZE (halfmode));
23816 /* Special case of reversed high/low parts. Use VSWP. */
23817 if (src2 == dest && src1 == dest + halfregs)
23819 rtx x = gen_rtx_SET (destlo, operands[1]);
23820 rtx y = gen_rtx_SET (desthi, operands[2]);
23821 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, x, y)));
23822 return;
23825 if (!reg_overlap_mentioned_p (operands[2], destlo))
23827 /* Try to avoid unnecessary moves if part of the result
23828 is in the right place already. */
23829 if (src1 != dest)
23830 emit_move_insn (destlo, operands[1]);
23831 if (src2 != dest + halfregs)
23832 emit_move_insn (desthi, operands[2]);
23834 else
23836 if (src2 != dest + halfregs)
23837 emit_move_insn (desthi, operands[2]);
23838 if (src1 != dest)
23839 emit_move_insn (destlo, operands[1]);
23843 /* Return the number (counting from 0) of
23844 the least significant set bit in MASK. */
23846 inline static int
23847 number_of_first_bit_set (unsigned mask)
23849 return ctz_hwi (mask);
23852 /* Like emit_multi_reg_push, but allowing for a different set of
23853 registers to be described as saved. MASK is the set of registers
23854 to be saved; REAL_REGS is the set of registers to be described as
23855 saved. If REAL_REGS is 0, only describe the stack adjustment. */
23857 static rtx_insn *
23858 thumb1_emit_multi_reg_push (unsigned long mask, unsigned long real_regs)
23860 unsigned long regno;
23861 rtx par[10], tmp, reg;
23862 rtx_insn *insn;
23863 int i, j;
23865 /* Build the parallel of the registers actually being stored. */
23866 for (i = 0; mask; ++i, mask &= mask - 1)
23868 regno = ctz_hwi (mask);
23869 reg = gen_rtx_REG (SImode, regno);
23871 if (i == 0)
23872 tmp = gen_rtx_UNSPEC (BLKmode, gen_rtvec (1, reg), UNSPEC_PUSH_MULT);
23873 else
23874 tmp = gen_rtx_USE (VOIDmode, reg);
23876 par[i] = tmp;
23879 tmp = plus_constant (Pmode, stack_pointer_rtx, -4 * i);
23880 tmp = gen_rtx_PRE_MODIFY (Pmode, stack_pointer_rtx, tmp);
23881 tmp = gen_frame_mem (BLKmode, tmp);
23882 tmp = gen_rtx_SET (tmp, par[0]);
23883 par[0] = tmp;
23885 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (i, par));
23886 insn = emit_insn (tmp);
23888 /* Always build the stack adjustment note for unwind info. */
23889 tmp = plus_constant (Pmode, stack_pointer_rtx, -4 * i);
23890 tmp = gen_rtx_SET (stack_pointer_rtx, tmp);
23891 par[0] = tmp;
23893 /* Build the parallel of the registers recorded as saved for unwind. */
23894 for (j = 0; real_regs; ++j, real_regs &= real_regs - 1)
23896 regno = ctz_hwi (real_regs);
23897 reg = gen_rtx_REG (SImode, regno);
23899 tmp = plus_constant (Pmode, stack_pointer_rtx, j * 4);
23900 tmp = gen_frame_mem (SImode, tmp);
23901 tmp = gen_rtx_SET (tmp, reg);
23902 RTX_FRAME_RELATED_P (tmp) = 1;
23903 par[j + 1] = tmp;
23906 if (j == 0)
23907 tmp = par[0];
23908 else
23910 RTX_FRAME_RELATED_P (par[0]) = 1;
23911 tmp = gen_rtx_SEQUENCE (VOIDmode, gen_rtvec_v (j + 1, par));
23914 add_reg_note (insn, REG_FRAME_RELATED_EXPR, tmp);
23916 return insn;
23919 /* Emit code to push or pop registers to or from the stack. F is the
23920 assembly file. MASK is the registers to pop. */
23921 static void
23922 thumb_pop (FILE *f, unsigned long mask)
23924 int regno;
23925 int lo_mask = mask & 0xFF;
23926 int pushed_words = 0;
23928 gcc_assert (mask);
23930 if (lo_mask == 0 && (mask & (1 << PC_REGNUM)))
23932 /* Special case. Do not generate a POP PC statement here, do it in
23933 thumb_exit() */
23934 thumb_exit (f, -1);
23935 return;
23938 fprintf (f, "\tpop\t{");
23940 /* Look at the low registers first. */
23941 for (regno = 0; regno <= LAST_LO_REGNUM; regno++, lo_mask >>= 1)
23943 if (lo_mask & 1)
23945 asm_fprintf (f, "%r", regno);
23947 if ((lo_mask & ~1) != 0)
23948 fprintf (f, ", ");
23950 pushed_words++;
23954 if (mask & (1 << PC_REGNUM))
23956 /* Catch popping the PC. */
23957 if (TARGET_INTERWORK || TARGET_BACKTRACE
23958 || crtl->calls_eh_return)
23960 /* The PC is never poped directly, instead
23961 it is popped into r3 and then BX is used. */
23962 fprintf (f, "}\n");
23964 thumb_exit (f, -1);
23966 return;
23968 else
23970 if (mask & 0xFF)
23971 fprintf (f, ", ");
23973 asm_fprintf (f, "%r", PC_REGNUM);
23977 fprintf (f, "}\n");
23980 /* Generate code to return from a thumb function.
23981 If 'reg_containing_return_addr' is -1, then the return address is
23982 actually on the stack, at the stack pointer. */
23983 static void
23984 thumb_exit (FILE *f, int reg_containing_return_addr)
23986 unsigned regs_available_for_popping;
23987 unsigned regs_to_pop;
23988 int pops_needed;
23989 unsigned available;
23990 unsigned required;
23991 machine_mode mode;
23992 int size;
23993 int restore_a4 = FALSE;
23995 /* Compute the registers we need to pop. */
23996 regs_to_pop = 0;
23997 pops_needed = 0;
23999 if (reg_containing_return_addr == -1)
24001 regs_to_pop |= 1 << LR_REGNUM;
24002 ++pops_needed;
24005 if (TARGET_BACKTRACE)
24007 /* Restore the (ARM) frame pointer and stack pointer. */
24008 regs_to_pop |= (1 << ARM_HARD_FRAME_POINTER_REGNUM) | (1 << SP_REGNUM);
24009 pops_needed += 2;
24012 /* If there is nothing to pop then just emit the BX instruction and
24013 return. */
24014 if (pops_needed == 0)
24016 if (crtl->calls_eh_return)
24017 asm_fprintf (f, "\tadd\t%r, %r\n", SP_REGNUM, ARM_EH_STACKADJ_REGNUM);
24019 asm_fprintf (f, "\tbx\t%r\n", reg_containing_return_addr);
24020 return;
24022 /* Otherwise if we are not supporting interworking and we have not created
24023 a backtrace structure and the function was not entered in ARM mode then
24024 just pop the return address straight into the PC. */
24025 else if (!TARGET_INTERWORK
24026 && !TARGET_BACKTRACE
24027 && !is_called_in_ARM_mode (current_function_decl)
24028 && !crtl->calls_eh_return)
24030 asm_fprintf (f, "\tpop\t{%r}\n", PC_REGNUM);
24031 return;
24034 /* Find out how many of the (return) argument registers we can corrupt. */
24035 regs_available_for_popping = 0;
24037 /* If returning via __builtin_eh_return, the bottom three registers
24038 all contain information needed for the return. */
24039 if (crtl->calls_eh_return)
24040 size = 12;
24041 else
24043 /* If we can deduce the registers used from the function's
24044 return value. This is more reliable that examining
24045 df_regs_ever_live_p () because that will be set if the register is
24046 ever used in the function, not just if the register is used
24047 to hold a return value. */
24049 if (crtl->return_rtx != 0)
24050 mode = GET_MODE (crtl->return_rtx);
24051 else
24052 mode = DECL_MODE (DECL_RESULT (current_function_decl));
24054 size = GET_MODE_SIZE (mode);
24056 if (size == 0)
24058 /* In a void function we can use any argument register.
24059 In a function that returns a structure on the stack
24060 we can use the second and third argument registers. */
24061 if (mode == VOIDmode)
24062 regs_available_for_popping =
24063 (1 << ARG_REGISTER (1))
24064 | (1 << ARG_REGISTER (2))
24065 | (1 << ARG_REGISTER (3));
24066 else
24067 regs_available_for_popping =
24068 (1 << ARG_REGISTER (2))
24069 | (1 << ARG_REGISTER (3));
24071 else if (size <= 4)
24072 regs_available_for_popping =
24073 (1 << ARG_REGISTER (2))
24074 | (1 << ARG_REGISTER (3));
24075 else if (size <= 8)
24076 regs_available_for_popping =
24077 (1 << ARG_REGISTER (3));
24080 /* Match registers to be popped with registers into which we pop them. */
24081 for (available = regs_available_for_popping,
24082 required = regs_to_pop;
24083 required != 0 && available != 0;
24084 available &= ~(available & - available),
24085 required &= ~(required & - required))
24086 -- pops_needed;
24088 /* If we have any popping registers left over, remove them. */
24089 if (available > 0)
24090 regs_available_for_popping &= ~available;
24092 /* Otherwise if we need another popping register we can use
24093 the fourth argument register. */
24094 else if (pops_needed)
24096 /* If we have not found any free argument registers and
24097 reg a4 contains the return address, we must move it. */
24098 if (regs_available_for_popping == 0
24099 && reg_containing_return_addr == LAST_ARG_REGNUM)
24101 asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM, LAST_ARG_REGNUM);
24102 reg_containing_return_addr = LR_REGNUM;
24104 else if (size > 12)
24106 /* Register a4 is being used to hold part of the return value,
24107 but we have dire need of a free, low register. */
24108 restore_a4 = TRUE;
24110 asm_fprintf (f, "\tmov\t%r, %r\n",IP_REGNUM, LAST_ARG_REGNUM);
24113 if (reg_containing_return_addr != LAST_ARG_REGNUM)
24115 /* The fourth argument register is available. */
24116 regs_available_for_popping |= 1 << LAST_ARG_REGNUM;
24118 --pops_needed;
24122 /* Pop as many registers as we can. */
24123 thumb_pop (f, regs_available_for_popping);
24125 /* Process the registers we popped. */
24126 if (reg_containing_return_addr == -1)
24128 /* The return address was popped into the lowest numbered register. */
24129 regs_to_pop &= ~(1 << LR_REGNUM);
24131 reg_containing_return_addr =
24132 number_of_first_bit_set (regs_available_for_popping);
24134 /* Remove this register for the mask of available registers, so that
24135 the return address will not be corrupted by further pops. */
24136 regs_available_for_popping &= ~(1 << reg_containing_return_addr);
24139 /* If we popped other registers then handle them here. */
24140 if (regs_available_for_popping)
24142 int frame_pointer;
24144 /* Work out which register currently contains the frame pointer. */
24145 frame_pointer = number_of_first_bit_set (regs_available_for_popping);
24147 /* Move it into the correct place. */
24148 asm_fprintf (f, "\tmov\t%r, %r\n",
24149 ARM_HARD_FRAME_POINTER_REGNUM, frame_pointer);
24151 /* (Temporarily) remove it from the mask of popped registers. */
24152 regs_available_for_popping &= ~(1 << frame_pointer);
24153 regs_to_pop &= ~(1 << ARM_HARD_FRAME_POINTER_REGNUM);
24155 if (regs_available_for_popping)
24157 int stack_pointer;
24159 /* We popped the stack pointer as well,
24160 find the register that contains it. */
24161 stack_pointer = number_of_first_bit_set (regs_available_for_popping);
24163 /* Move it into the stack register. */
24164 asm_fprintf (f, "\tmov\t%r, %r\n", SP_REGNUM, stack_pointer);
24166 /* At this point we have popped all necessary registers, so
24167 do not worry about restoring regs_available_for_popping
24168 to its correct value:
24170 assert (pops_needed == 0)
24171 assert (regs_available_for_popping == (1 << frame_pointer))
24172 assert (regs_to_pop == (1 << STACK_POINTER)) */
24174 else
24176 /* Since we have just move the popped value into the frame
24177 pointer, the popping register is available for reuse, and
24178 we know that we still have the stack pointer left to pop. */
24179 regs_available_for_popping |= (1 << frame_pointer);
24183 /* If we still have registers left on the stack, but we no longer have
24184 any registers into which we can pop them, then we must move the return
24185 address into the link register and make available the register that
24186 contained it. */
24187 if (regs_available_for_popping == 0 && pops_needed > 0)
24189 regs_available_for_popping |= 1 << reg_containing_return_addr;
24191 asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM,
24192 reg_containing_return_addr);
24194 reg_containing_return_addr = LR_REGNUM;
24197 /* If we have registers left on the stack then pop some more.
24198 We know that at most we will want to pop FP and SP. */
24199 if (pops_needed > 0)
24201 int popped_into;
24202 int move_to;
24204 thumb_pop (f, regs_available_for_popping);
24206 /* We have popped either FP or SP.
24207 Move whichever one it is into the correct register. */
24208 popped_into = number_of_first_bit_set (regs_available_for_popping);
24209 move_to = number_of_first_bit_set (regs_to_pop);
24211 asm_fprintf (f, "\tmov\t%r, %r\n", move_to, popped_into);
24213 regs_to_pop &= ~(1 << move_to);
24215 --pops_needed;
24218 /* If we still have not popped everything then we must have only
24219 had one register available to us and we are now popping the SP. */
24220 if (pops_needed > 0)
24222 int popped_into;
24224 thumb_pop (f, regs_available_for_popping);
24226 popped_into = number_of_first_bit_set (regs_available_for_popping);
24228 asm_fprintf (f, "\tmov\t%r, %r\n", SP_REGNUM, popped_into);
24230 assert (regs_to_pop == (1 << STACK_POINTER))
24231 assert (pops_needed == 1)
24235 /* If necessary restore the a4 register. */
24236 if (restore_a4)
24238 if (reg_containing_return_addr != LR_REGNUM)
24240 asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM, LAST_ARG_REGNUM);
24241 reg_containing_return_addr = LR_REGNUM;
24244 asm_fprintf (f, "\tmov\t%r, %r\n", LAST_ARG_REGNUM, IP_REGNUM);
24247 if (crtl->calls_eh_return)
24248 asm_fprintf (f, "\tadd\t%r, %r\n", SP_REGNUM, ARM_EH_STACKADJ_REGNUM);
24250 /* Return to caller. */
24251 asm_fprintf (f, "\tbx\t%r\n", reg_containing_return_addr);
24254 /* Scan INSN just before assembler is output for it.
24255 For Thumb-1, we track the status of the condition codes; this
24256 information is used in the cbranchsi4_insn pattern. */
24257 void
24258 thumb1_final_prescan_insn (rtx_insn *insn)
24260 if (flag_print_asm_name)
24261 asm_fprintf (asm_out_file, "%@ 0x%04x\n",
24262 INSN_ADDRESSES (INSN_UID (insn)));
24263 /* Don't overwrite the previous setter when we get to a cbranch. */
24264 if (INSN_CODE (insn) != CODE_FOR_cbranchsi4_insn)
24266 enum attr_conds conds;
24268 if (cfun->machine->thumb1_cc_insn)
24270 if (modified_in_p (cfun->machine->thumb1_cc_op0, insn)
24271 || modified_in_p (cfun->machine->thumb1_cc_op1, insn))
24272 CC_STATUS_INIT;
24274 conds = get_attr_conds (insn);
24275 if (conds == CONDS_SET)
24277 rtx set = single_set (insn);
24278 cfun->machine->thumb1_cc_insn = insn;
24279 cfun->machine->thumb1_cc_op0 = SET_DEST (set);
24280 cfun->machine->thumb1_cc_op1 = const0_rtx;
24281 cfun->machine->thumb1_cc_mode = CC_NOOVmode;
24282 if (INSN_CODE (insn) == CODE_FOR_thumb1_subsi3_insn)
24284 rtx src1 = XEXP (SET_SRC (set), 1);
24285 if (src1 == const0_rtx)
24286 cfun->machine->thumb1_cc_mode = CCmode;
24288 else if (REG_P (SET_DEST (set)) && REG_P (SET_SRC (set)))
24290 /* Record the src register operand instead of dest because
24291 cprop_hardreg pass propagates src. */
24292 cfun->machine->thumb1_cc_op0 = SET_SRC (set);
24295 else if (conds != CONDS_NOCOND)
24296 cfun->machine->thumb1_cc_insn = NULL_RTX;
24299 /* Check if unexpected far jump is used. */
24300 if (cfun->machine->lr_save_eliminated
24301 && get_attr_far_jump (insn) == FAR_JUMP_YES)
24302 internal_error("Unexpected thumb1 far jump");
24306 thumb_shiftable_const (unsigned HOST_WIDE_INT val)
24308 unsigned HOST_WIDE_INT mask = 0xff;
24309 int i;
24311 val = val & (unsigned HOST_WIDE_INT)0xffffffffu;
24312 if (val == 0) /* XXX */
24313 return 0;
24315 for (i = 0; i < 25; i++)
24316 if ((val & (mask << i)) == val)
24317 return 1;
24319 return 0;
24322 /* Returns nonzero if the current function contains,
24323 or might contain a far jump. */
24324 static int
24325 thumb_far_jump_used_p (void)
24327 rtx_insn *insn;
24328 bool far_jump = false;
24329 unsigned int func_size = 0;
24331 /* This test is only important for leaf functions. */
24332 /* assert (!leaf_function_p ()); */
24334 /* If we have already decided that far jumps may be used,
24335 do not bother checking again, and always return true even if
24336 it turns out that they are not being used. Once we have made
24337 the decision that far jumps are present (and that hence the link
24338 register will be pushed onto the stack) we cannot go back on it. */
24339 if (cfun->machine->far_jump_used)
24340 return 1;
24342 /* If this function is not being called from the prologue/epilogue
24343 generation code then it must be being called from the
24344 INITIAL_ELIMINATION_OFFSET macro. */
24345 if (!(ARM_DOUBLEWORD_ALIGN || reload_completed))
24347 /* In this case we know that we are being asked about the elimination
24348 of the arg pointer register. If that register is not being used,
24349 then there are no arguments on the stack, and we do not have to
24350 worry that a far jump might force the prologue to push the link
24351 register, changing the stack offsets. In this case we can just
24352 return false, since the presence of far jumps in the function will
24353 not affect stack offsets.
24355 If the arg pointer is live (or if it was live, but has now been
24356 eliminated and so set to dead) then we do have to test to see if
24357 the function might contain a far jump. This test can lead to some
24358 false negatives, since before reload is completed, then length of
24359 branch instructions is not known, so gcc defaults to returning their
24360 longest length, which in turn sets the far jump attribute to true.
24362 A false negative will not result in bad code being generated, but it
24363 will result in a needless push and pop of the link register. We
24364 hope that this does not occur too often.
24366 If we need doubleword stack alignment this could affect the other
24367 elimination offsets so we can't risk getting it wrong. */
24368 if (df_regs_ever_live_p (ARG_POINTER_REGNUM))
24369 cfun->machine->arg_pointer_live = 1;
24370 else if (!cfun->machine->arg_pointer_live)
24371 return 0;
24374 /* We should not change far_jump_used during or after reload, as there is
24375 no chance to change stack frame layout. */
24376 if (reload_in_progress || reload_completed)
24377 return 0;
24379 /* Check to see if the function contains a branch
24380 insn with the far jump attribute set. */
24381 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
24383 if (JUMP_P (insn) && get_attr_far_jump (insn) == FAR_JUMP_YES)
24385 far_jump = true;
24387 func_size += get_attr_length (insn);
24390 /* Attribute far_jump will always be true for thumb1 before
24391 shorten_branch pass. So checking far_jump attribute before
24392 shorten_branch isn't much useful.
24394 Following heuristic tries to estimate more accurately if a far jump
24395 may finally be used. The heuristic is very conservative as there is
24396 no chance to roll-back the decision of not to use far jump.
24398 Thumb1 long branch offset is -2048 to 2046. The worst case is each
24399 2-byte insn is associated with a 4 byte constant pool. Using
24400 function size 2048/3 as the threshold is conservative enough. */
24401 if (far_jump)
24403 if ((func_size * 3) >= 2048)
24405 /* Record the fact that we have decided that
24406 the function does use far jumps. */
24407 cfun->machine->far_jump_used = 1;
24408 return 1;
24412 return 0;
24415 /* Return nonzero if FUNC must be entered in ARM mode. */
24416 static bool
24417 is_called_in_ARM_mode (tree func)
24419 gcc_assert (TREE_CODE (func) == FUNCTION_DECL);
24421 /* Ignore the problem about functions whose address is taken. */
24422 if (TARGET_CALLEE_INTERWORKING && TREE_PUBLIC (func))
24423 return true;
24425 #ifdef ARM_PE
24426 return lookup_attribute ("interfacearm", DECL_ATTRIBUTES (func)) != NULL_TREE;
24427 #else
24428 return false;
24429 #endif
24432 /* Given the stack offsets and register mask in OFFSETS, decide how
24433 many additional registers to push instead of subtracting a constant
24434 from SP. For epilogues the principle is the same except we use pop.
24435 FOR_PROLOGUE indicates which we're generating. */
24436 static int
24437 thumb1_extra_regs_pushed (arm_stack_offsets *offsets, bool for_prologue)
24439 HOST_WIDE_INT amount;
24440 unsigned long live_regs_mask = offsets->saved_regs_mask;
24441 /* Extract a mask of the ones we can give to the Thumb's push/pop
24442 instruction. */
24443 unsigned long l_mask = live_regs_mask & (for_prologue ? 0x40ff : 0xff);
24444 /* Then count how many other high registers will need to be pushed. */
24445 unsigned long high_regs_pushed = bit_count (live_regs_mask & 0x0f00);
24446 int n_free, reg_base, size;
24448 if (!for_prologue && frame_pointer_needed)
24449 amount = offsets->locals_base - offsets->saved_regs;
24450 else
24451 amount = offsets->outgoing_args - offsets->saved_regs;
24453 /* If the stack frame size is 512 exactly, we can save one load
24454 instruction, which should make this a win even when optimizing
24455 for speed. */
24456 if (!optimize_size && amount != 512)
24457 return 0;
24459 /* Can't do this if there are high registers to push. */
24460 if (high_regs_pushed != 0)
24461 return 0;
24463 /* Shouldn't do it in the prologue if no registers would normally
24464 be pushed at all. In the epilogue, also allow it if we'll have
24465 a pop insn for the PC. */
24466 if (l_mask == 0
24467 && (for_prologue
24468 || TARGET_BACKTRACE
24469 || (live_regs_mask & 1 << LR_REGNUM) == 0
24470 || TARGET_INTERWORK
24471 || crtl->args.pretend_args_size != 0))
24472 return 0;
24474 /* Don't do this if thumb_expand_prologue wants to emit instructions
24475 between the push and the stack frame allocation. */
24476 if (for_prologue
24477 && ((flag_pic && arm_pic_register != INVALID_REGNUM)
24478 || (!frame_pointer_needed && CALLER_INTERWORKING_SLOT_SIZE > 0)))
24479 return 0;
24481 reg_base = 0;
24482 n_free = 0;
24483 if (!for_prologue)
24485 size = arm_size_return_regs ();
24486 reg_base = ARM_NUM_INTS (size);
24487 live_regs_mask >>= reg_base;
24490 while (reg_base + n_free < 8 && !(live_regs_mask & 1)
24491 && (for_prologue || call_used_regs[reg_base + n_free]))
24493 live_regs_mask >>= 1;
24494 n_free++;
24497 if (n_free == 0)
24498 return 0;
24499 gcc_assert (amount / 4 * 4 == amount);
24501 if (amount >= 512 && (amount - n_free * 4) < 512)
24502 return (amount - 508) / 4;
24503 if (amount <= n_free * 4)
24504 return amount / 4;
24505 return 0;
24508 /* The bits which aren't usefully expanded as rtl. */
24509 const char *
24510 thumb1_unexpanded_epilogue (void)
24512 arm_stack_offsets *offsets;
24513 int regno;
24514 unsigned long live_regs_mask = 0;
24515 int high_regs_pushed = 0;
24516 int extra_pop;
24517 int had_to_push_lr;
24518 int size;
24520 if (cfun->machine->return_used_this_function != 0)
24521 return "";
24523 if (IS_NAKED (arm_current_func_type ()))
24524 return "";
24526 offsets = arm_get_frame_offsets ();
24527 live_regs_mask = offsets->saved_regs_mask;
24528 high_regs_pushed = bit_count (live_regs_mask & 0x0f00);
24530 /* If we can deduce the registers used from the function's return value.
24531 This is more reliable that examining df_regs_ever_live_p () because that
24532 will be set if the register is ever used in the function, not just if
24533 the register is used to hold a return value. */
24534 size = arm_size_return_regs ();
24536 extra_pop = thumb1_extra_regs_pushed (offsets, false);
24537 if (extra_pop > 0)
24539 unsigned long extra_mask = (1 << extra_pop) - 1;
24540 live_regs_mask |= extra_mask << ARM_NUM_INTS (size);
24543 /* The prolog may have pushed some high registers to use as
24544 work registers. e.g. the testsuite file:
24545 gcc/testsuite/gcc/gcc.c-torture/execute/complex-2.c
24546 compiles to produce:
24547 push {r4, r5, r6, r7, lr}
24548 mov r7, r9
24549 mov r6, r8
24550 push {r6, r7}
24551 as part of the prolog. We have to undo that pushing here. */
24553 if (high_regs_pushed)
24555 unsigned long mask = live_regs_mask & 0xff;
24556 int next_hi_reg;
24558 /* The available low registers depend on the size of the value we are
24559 returning. */
24560 if (size <= 12)
24561 mask |= 1 << 3;
24562 if (size <= 8)
24563 mask |= 1 << 2;
24565 if (mask == 0)
24566 /* Oh dear! We have no low registers into which we can pop
24567 high registers! */
24568 internal_error
24569 ("no low registers available for popping high registers");
24571 for (next_hi_reg = 8; next_hi_reg < 13; next_hi_reg++)
24572 if (live_regs_mask & (1 << next_hi_reg))
24573 break;
24575 while (high_regs_pushed)
24577 /* Find lo register(s) into which the high register(s) can
24578 be popped. */
24579 for (regno = 0; regno <= LAST_LO_REGNUM; regno++)
24581 if (mask & (1 << regno))
24582 high_regs_pushed--;
24583 if (high_regs_pushed == 0)
24584 break;
24587 mask &= (2 << regno) - 1; /* A noop if regno == 8 */
24589 /* Pop the values into the low register(s). */
24590 thumb_pop (asm_out_file, mask);
24592 /* Move the value(s) into the high registers. */
24593 for (regno = 0; regno <= LAST_LO_REGNUM; regno++)
24595 if (mask & (1 << regno))
24597 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", next_hi_reg,
24598 regno);
24600 for (next_hi_reg++; next_hi_reg < 13; next_hi_reg++)
24601 if (live_regs_mask & (1 << next_hi_reg))
24602 break;
24606 live_regs_mask &= ~0x0f00;
24609 had_to_push_lr = (live_regs_mask & (1 << LR_REGNUM)) != 0;
24610 live_regs_mask &= 0xff;
24612 if (crtl->args.pretend_args_size == 0 || TARGET_BACKTRACE)
24614 /* Pop the return address into the PC. */
24615 if (had_to_push_lr)
24616 live_regs_mask |= 1 << PC_REGNUM;
24618 /* Either no argument registers were pushed or a backtrace
24619 structure was created which includes an adjusted stack
24620 pointer, so just pop everything. */
24621 if (live_regs_mask)
24622 thumb_pop (asm_out_file, live_regs_mask);
24624 /* We have either just popped the return address into the
24625 PC or it is was kept in LR for the entire function.
24626 Note that thumb_pop has already called thumb_exit if the
24627 PC was in the list. */
24628 if (!had_to_push_lr)
24629 thumb_exit (asm_out_file, LR_REGNUM);
24631 else
24633 /* Pop everything but the return address. */
24634 if (live_regs_mask)
24635 thumb_pop (asm_out_file, live_regs_mask);
24637 if (had_to_push_lr)
24639 if (size > 12)
24641 /* We have no free low regs, so save one. */
24642 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", IP_REGNUM,
24643 LAST_ARG_REGNUM);
24646 /* Get the return address into a temporary register. */
24647 thumb_pop (asm_out_file, 1 << LAST_ARG_REGNUM);
24649 if (size > 12)
24651 /* Move the return address to lr. */
24652 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", LR_REGNUM,
24653 LAST_ARG_REGNUM);
24654 /* Restore the low register. */
24655 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", LAST_ARG_REGNUM,
24656 IP_REGNUM);
24657 regno = LR_REGNUM;
24659 else
24660 regno = LAST_ARG_REGNUM;
24662 else
24663 regno = LR_REGNUM;
24665 /* Remove the argument registers that were pushed onto the stack. */
24666 asm_fprintf (asm_out_file, "\tadd\t%r, %r, #%d\n",
24667 SP_REGNUM, SP_REGNUM,
24668 crtl->args.pretend_args_size);
24670 thumb_exit (asm_out_file, regno);
24673 return "";
24676 /* Functions to save and restore machine-specific function data. */
24677 static struct machine_function *
24678 arm_init_machine_status (void)
24680 struct machine_function *machine;
24681 machine = ggc_cleared_alloc<machine_function> ();
24683 #if ARM_FT_UNKNOWN != 0
24684 machine->func_type = ARM_FT_UNKNOWN;
24685 #endif
24686 return machine;
24689 /* Return an RTX indicating where the return address to the
24690 calling function can be found. */
24692 arm_return_addr (int count, rtx frame ATTRIBUTE_UNUSED)
24694 if (count != 0)
24695 return NULL_RTX;
24697 return get_hard_reg_initial_val (Pmode, LR_REGNUM);
24700 /* Do anything needed before RTL is emitted for each function. */
24701 void
24702 arm_init_expanders (void)
24704 /* Arrange to initialize and mark the machine per-function status. */
24705 init_machine_status = arm_init_machine_status;
24707 /* This is to stop the combine pass optimizing away the alignment
24708 adjustment of va_arg. */
24709 /* ??? It is claimed that this should not be necessary. */
24710 if (cfun)
24711 mark_reg_pointer (arg_pointer_rtx, PARM_BOUNDARY);
24714 /* Check that FUNC is called with a different mode. */
24716 bool
24717 arm_change_mode_p (tree func)
24719 if (TREE_CODE (func) != FUNCTION_DECL)
24720 return false;
24722 tree callee_tree = DECL_FUNCTION_SPECIFIC_TARGET (func);
24724 if (!callee_tree)
24725 callee_tree = target_option_default_node;
24727 struct cl_target_option *callee_opts = TREE_TARGET_OPTION (callee_tree);
24728 int flags = callee_opts->x_target_flags;
24730 return (TARGET_THUMB_P (flags) != TARGET_THUMB);
24733 /* Like arm_compute_initial_elimination offset. Simpler because there
24734 isn't an ABI specified frame pointer for Thumb. Instead, we set it
24735 to point at the base of the local variables after static stack
24736 space for a function has been allocated. */
24738 HOST_WIDE_INT
24739 thumb_compute_initial_elimination_offset (unsigned int from, unsigned int to)
24741 arm_stack_offsets *offsets;
24743 offsets = arm_get_frame_offsets ();
24745 switch (from)
24747 case ARG_POINTER_REGNUM:
24748 switch (to)
24750 case STACK_POINTER_REGNUM:
24751 return offsets->outgoing_args - offsets->saved_args;
24753 case FRAME_POINTER_REGNUM:
24754 return offsets->soft_frame - offsets->saved_args;
24756 case ARM_HARD_FRAME_POINTER_REGNUM:
24757 return offsets->saved_regs - offsets->saved_args;
24759 case THUMB_HARD_FRAME_POINTER_REGNUM:
24760 return offsets->locals_base - offsets->saved_args;
24762 default:
24763 gcc_unreachable ();
24765 break;
24767 case FRAME_POINTER_REGNUM:
24768 switch (to)
24770 case STACK_POINTER_REGNUM:
24771 return offsets->outgoing_args - offsets->soft_frame;
24773 case ARM_HARD_FRAME_POINTER_REGNUM:
24774 return offsets->saved_regs - offsets->soft_frame;
24776 case THUMB_HARD_FRAME_POINTER_REGNUM:
24777 return offsets->locals_base - offsets->soft_frame;
24779 default:
24780 gcc_unreachable ();
24782 break;
24784 default:
24785 gcc_unreachable ();
24789 /* Generate the function's prologue. */
24791 void
24792 thumb1_expand_prologue (void)
24794 rtx_insn *insn;
24796 HOST_WIDE_INT amount;
24797 HOST_WIDE_INT size;
24798 arm_stack_offsets *offsets;
24799 unsigned long func_type;
24800 int regno;
24801 unsigned long live_regs_mask;
24802 unsigned long l_mask;
24803 unsigned high_regs_pushed = 0;
24805 func_type = arm_current_func_type ();
24807 /* Naked functions don't have prologues. */
24808 if (IS_NAKED (func_type))
24810 if (flag_stack_usage_info)
24811 current_function_static_stack_size = 0;
24812 return;
24815 if (IS_INTERRUPT (func_type))
24817 error ("interrupt Service Routines cannot be coded in Thumb mode");
24818 return;
24821 if (is_called_in_ARM_mode (current_function_decl))
24822 emit_insn (gen_prologue_thumb1_interwork ());
24824 offsets = arm_get_frame_offsets ();
24825 live_regs_mask = offsets->saved_regs_mask;
24827 /* Extract a mask of the ones we can give to the Thumb's push instruction. */
24828 l_mask = live_regs_mask & 0x40ff;
24829 /* Then count how many other high registers will need to be pushed. */
24830 high_regs_pushed = bit_count (live_regs_mask & 0x0f00);
24832 if (crtl->args.pretend_args_size)
24834 rtx x = GEN_INT (-crtl->args.pretend_args_size);
24836 if (cfun->machine->uses_anonymous_args)
24838 int num_pushes = ARM_NUM_INTS (crtl->args.pretend_args_size);
24839 unsigned long mask;
24841 mask = 1ul << (LAST_ARG_REGNUM + 1);
24842 mask -= 1ul << (LAST_ARG_REGNUM + 1 - num_pushes);
24844 insn = thumb1_emit_multi_reg_push (mask, 0);
24846 else
24848 insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
24849 stack_pointer_rtx, x));
24851 RTX_FRAME_RELATED_P (insn) = 1;
24854 if (TARGET_BACKTRACE)
24856 HOST_WIDE_INT offset = 0;
24857 unsigned work_register;
24858 rtx work_reg, x, arm_hfp_rtx;
24860 /* We have been asked to create a stack backtrace structure.
24861 The code looks like this:
24863 0 .align 2
24864 0 func:
24865 0 sub SP, #16 Reserve space for 4 registers.
24866 2 push {R7} Push low registers.
24867 4 add R7, SP, #20 Get the stack pointer before the push.
24868 6 str R7, [SP, #8] Store the stack pointer
24869 (before reserving the space).
24870 8 mov R7, PC Get hold of the start of this code + 12.
24871 10 str R7, [SP, #16] Store it.
24872 12 mov R7, FP Get hold of the current frame pointer.
24873 14 str R7, [SP, #4] Store it.
24874 16 mov R7, LR Get hold of the current return address.
24875 18 str R7, [SP, #12] Store it.
24876 20 add R7, SP, #16 Point at the start of the
24877 backtrace structure.
24878 22 mov FP, R7 Put this value into the frame pointer. */
24880 work_register = thumb_find_work_register (live_regs_mask);
24881 work_reg = gen_rtx_REG (SImode, work_register);
24882 arm_hfp_rtx = gen_rtx_REG (SImode, ARM_HARD_FRAME_POINTER_REGNUM);
24884 insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
24885 stack_pointer_rtx, GEN_INT (-16)));
24886 RTX_FRAME_RELATED_P (insn) = 1;
24888 if (l_mask)
24890 insn = thumb1_emit_multi_reg_push (l_mask, l_mask);
24891 RTX_FRAME_RELATED_P (insn) = 1;
24893 offset = bit_count (l_mask) * UNITS_PER_WORD;
24896 x = GEN_INT (offset + 16 + crtl->args.pretend_args_size);
24897 emit_insn (gen_addsi3 (work_reg, stack_pointer_rtx, x));
24899 x = plus_constant (Pmode, stack_pointer_rtx, offset + 4);
24900 x = gen_frame_mem (SImode, x);
24901 emit_move_insn (x, work_reg);
24903 /* Make sure that the instruction fetching the PC is in the right place
24904 to calculate "start of backtrace creation code + 12". */
24905 /* ??? The stores using the common WORK_REG ought to be enough to
24906 prevent the scheduler from doing anything weird. Failing that
24907 we could always move all of the following into an UNSPEC_VOLATILE. */
24908 if (l_mask)
24910 x = gen_rtx_REG (SImode, PC_REGNUM);
24911 emit_move_insn (work_reg, x);
24913 x = plus_constant (Pmode, stack_pointer_rtx, offset + 12);
24914 x = gen_frame_mem (SImode, x);
24915 emit_move_insn (x, work_reg);
24917 emit_move_insn (work_reg, arm_hfp_rtx);
24919 x = plus_constant (Pmode, stack_pointer_rtx, offset);
24920 x = gen_frame_mem (SImode, x);
24921 emit_move_insn (x, work_reg);
24923 else
24925 emit_move_insn (work_reg, arm_hfp_rtx);
24927 x = plus_constant (Pmode, stack_pointer_rtx, offset);
24928 x = gen_frame_mem (SImode, x);
24929 emit_move_insn (x, work_reg);
24931 x = gen_rtx_REG (SImode, PC_REGNUM);
24932 emit_move_insn (work_reg, x);
24934 x = plus_constant (Pmode, stack_pointer_rtx, offset + 12);
24935 x = gen_frame_mem (SImode, x);
24936 emit_move_insn (x, work_reg);
24939 x = gen_rtx_REG (SImode, LR_REGNUM);
24940 emit_move_insn (work_reg, x);
24942 x = plus_constant (Pmode, stack_pointer_rtx, offset + 8);
24943 x = gen_frame_mem (SImode, x);
24944 emit_move_insn (x, work_reg);
24946 x = GEN_INT (offset + 12);
24947 emit_insn (gen_addsi3 (work_reg, stack_pointer_rtx, x));
24949 emit_move_insn (arm_hfp_rtx, work_reg);
24951 /* Optimization: If we are not pushing any low registers but we are going
24952 to push some high registers then delay our first push. This will just
24953 be a push of LR and we can combine it with the push of the first high
24954 register. */
24955 else if ((l_mask & 0xff) != 0
24956 || (high_regs_pushed == 0 && l_mask))
24958 unsigned long mask = l_mask;
24959 mask |= (1 << thumb1_extra_regs_pushed (offsets, true)) - 1;
24960 insn = thumb1_emit_multi_reg_push (mask, mask);
24961 RTX_FRAME_RELATED_P (insn) = 1;
24964 if (high_regs_pushed)
24966 unsigned pushable_regs;
24967 unsigned next_hi_reg;
24968 unsigned arg_regs_num = TARGET_AAPCS_BASED ? crtl->args.info.aapcs_ncrn
24969 : crtl->args.info.nregs;
24970 unsigned arg_regs_mask = (1 << arg_regs_num) - 1;
24972 for (next_hi_reg = 12; next_hi_reg > LAST_LO_REGNUM; next_hi_reg--)
24973 if (live_regs_mask & (1 << next_hi_reg))
24974 break;
24976 /* Here we need to mask out registers used for passing arguments
24977 even if they can be pushed. This is to avoid using them to stash the high
24978 registers. Such kind of stash may clobber the use of arguments. */
24979 pushable_regs = l_mask & (~arg_regs_mask) & 0xff;
24981 if (pushable_regs == 0)
24982 pushable_regs = 1 << thumb_find_work_register (live_regs_mask);
24984 while (high_regs_pushed > 0)
24986 unsigned long real_regs_mask = 0;
24988 for (regno = LAST_LO_REGNUM; regno >= 0; regno --)
24990 if (pushable_regs & (1 << regno))
24992 emit_move_insn (gen_rtx_REG (SImode, regno),
24993 gen_rtx_REG (SImode, next_hi_reg));
24995 high_regs_pushed --;
24996 real_regs_mask |= (1 << next_hi_reg);
24998 if (high_regs_pushed)
25000 for (next_hi_reg --; next_hi_reg > LAST_LO_REGNUM;
25001 next_hi_reg --)
25002 if (live_regs_mask & (1 << next_hi_reg))
25003 break;
25005 else
25007 pushable_regs &= ~((1 << regno) - 1);
25008 break;
25013 /* If we had to find a work register and we have not yet
25014 saved the LR then add it to the list of regs to push. */
25015 if (l_mask == (1 << LR_REGNUM))
25017 pushable_regs |= l_mask;
25018 real_regs_mask |= l_mask;
25019 l_mask = 0;
25022 insn = thumb1_emit_multi_reg_push (pushable_regs, real_regs_mask);
25023 RTX_FRAME_RELATED_P (insn) = 1;
25027 /* Load the pic register before setting the frame pointer,
25028 so we can use r7 as a temporary work register. */
25029 if (flag_pic && arm_pic_register != INVALID_REGNUM)
25030 arm_load_pic_register (live_regs_mask);
25032 if (!frame_pointer_needed && CALLER_INTERWORKING_SLOT_SIZE > 0)
25033 emit_move_insn (gen_rtx_REG (Pmode, ARM_HARD_FRAME_POINTER_REGNUM),
25034 stack_pointer_rtx);
25036 size = offsets->outgoing_args - offsets->saved_args;
25037 if (flag_stack_usage_info)
25038 current_function_static_stack_size = size;
25040 /* If we have a frame, then do stack checking. FIXME: not implemented. */
25041 if (flag_stack_check == STATIC_BUILTIN_STACK_CHECK && size)
25042 sorry ("-fstack-check=specific for Thumb-1");
25044 amount = offsets->outgoing_args - offsets->saved_regs;
25045 amount -= 4 * thumb1_extra_regs_pushed (offsets, true);
25046 if (amount)
25048 if (amount < 512)
25050 insn = emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
25051 GEN_INT (- amount)));
25052 RTX_FRAME_RELATED_P (insn) = 1;
25054 else
25056 rtx reg, dwarf;
25058 /* The stack decrement is too big for an immediate value in a single
25059 insn. In theory we could issue multiple subtracts, but after
25060 three of them it becomes more space efficient to place the full
25061 value in the constant pool and load into a register. (Also the
25062 ARM debugger really likes to see only one stack decrement per
25063 function). So instead we look for a scratch register into which
25064 we can load the decrement, and then we subtract this from the
25065 stack pointer. Unfortunately on the thumb the only available
25066 scratch registers are the argument registers, and we cannot use
25067 these as they may hold arguments to the function. Instead we
25068 attempt to locate a call preserved register which is used by this
25069 function. If we can find one, then we know that it will have
25070 been pushed at the start of the prologue and so we can corrupt
25071 it now. */
25072 for (regno = LAST_ARG_REGNUM + 1; regno <= LAST_LO_REGNUM; regno++)
25073 if (live_regs_mask & (1 << regno))
25074 break;
25076 gcc_assert(regno <= LAST_LO_REGNUM);
25078 reg = gen_rtx_REG (SImode, regno);
25080 emit_insn (gen_movsi (reg, GEN_INT (- amount)));
25082 insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
25083 stack_pointer_rtx, reg));
25085 dwarf = gen_rtx_SET (stack_pointer_rtx,
25086 plus_constant (Pmode, stack_pointer_rtx,
25087 -amount));
25088 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
25089 RTX_FRAME_RELATED_P (insn) = 1;
25093 if (frame_pointer_needed)
25094 thumb_set_frame_pointer (offsets);
25096 /* If we are profiling, make sure no instructions are scheduled before
25097 the call to mcount. Similarly if the user has requested no
25098 scheduling in the prolog. Similarly if we want non-call exceptions
25099 using the EABI unwinder, to prevent faulting instructions from being
25100 swapped with a stack adjustment. */
25101 if (crtl->profile || !TARGET_SCHED_PROLOG
25102 || (arm_except_unwind_info (&global_options) == UI_TARGET
25103 && cfun->can_throw_non_call_exceptions))
25104 emit_insn (gen_blockage ());
25106 cfun->machine->lr_save_eliminated = !thumb_force_lr_save ();
25107 if (live_regs_mask & 0xff)
25108 cfun->machine->lr_save_eliminated = 0;
25111 /* Generate pattern *pop_multiple_with_stack_update_and_return if single
25112 POP instruction can be generated. LR should be replaced by PC. All
25113 the checks required are already done by USE_RETURN_INSN (). Hence,
25114 all we really need to check here is if single register is to be
25115 returned, or multiple register return. */
25116 void
25117 thumb2_expand_return (bool simple_return)
25119 int i, num_regs;
25120 unsigned long saved_regs_mask;
25121 arm_stack_offsets *offsets;
25123 offsets = arm_get_frame_offsets ();
25124 saved_regs_mask = offsets->saved_regs_mask;
25126 for (i = 0, num_regs = 0; i <= LAST_ARM_REGNUM; i++)
25127 if (saved_regs_mask & (1 << i))
25128 num_regs++;
25130 if (!simple_return && saved_regs_mask)
25132 if (num_regs == 1)
25134 rtx par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
25135 rtx reg = gen_rtx_REG (SImode, PC_REGNUM);
25136 rtx addr = gen_rtx_MEM (SImode,
25137 gen_rtx_POST_INC (SImode,
25138 stack_pointer_rtx));
25139 set_mem_alias_set (addr, get_frame_alias_set ());
25140 XVECEXP (par, 0, 0) = ret_rtx;
25141 XVECEXP (par, 0, 1) = gen_rtx_SET (reg, addr);
25142 RTX_FRAME_RELATED_P (XVECEXP (par, 0, 1)) = 1;
25143 emit_jump_insn (par);
25145 else
25147 saved_regs_mask &= ~ (1 << LR_REGNUM);
25148 saved_regs_mask |= (1 << PC_REGNUM);
25149 arm_emit_multi_reg_pop (saved_regs_mask);
25152 else
25154 emit_jump_insn (simple_return_rtx);
25158 void
25159 thumb1_expand_epilogue (void)
25161 HOST_WIDE_INT amount;
25162 arm_stack_offsets *offsets;
25163 int regno;
25165 /* Naked functions don't have prologues. */
25166 if (IS_NAKED (arm_current_func_type ()))
25167 return;
25169 offsets = arm_get_frame_offsets ();
25170 amount = offsets->outgoing_args - offsets->saved_regs;
25172 if (frame_pointer_needed)
25174 emit_insn (gen_movsi (stack_pointer_rtx, hard_frame_pointer_rtx));
25175 amount = offsets->locals_base - offsets->saved_regs;
25177 amount -= 4 * thumb1_extra_regs_pushed (offsets, false);
25179 gcc_assert (amount >= 0);
25180 if (amount)
25182 emit_insn (gen_blockage ());
25184 if (amount < 512)
25185 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
25186 GEN_INT (amount)));
25187 else
25189 /* r3 is always free in the epilogue. */
25190 rtx reg = gen_rtx_REG (SImode, LAST_ARG_REGNUM);
25192 emit_insn (gen_movsi (reg, GEN_INT (amount)));
25193 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, reg));
25197 /* Emit a USE (stack_pointer_rtx), so that
25198 the stack adjustment will not be deleted. */
25199 emit_insn (gen_force_register_use (stack_pointer_rtx));
25201 if (crtl->profile || !TARGET_SCHED_PROLOG)
25202 emit_insn (gen_blockage ());
25204 /* Emit a clobber for each insn that will be restored in the epilogue,
25205 so that flow2 will get register lifetimes correct. */
25206 for (regno = 0; regno < 13; regno++)
25207 if (df_regs_ever_live_p (regno) && !call_used_regs[regno])
25208 emit_clobber (gen_rtx_REG (SImode, regno));
25210 if (! df_regs_ever_live_p (LR_REGNUM))
25211 emit_use (gen_rtx_REG (SImode, LR_REGNUM));
25214 /* Epilogue code for APCS frame. */
25215 static void
25216 arm_expand_epilogue_apcs_frame (bool really_return)
25218 unsigned long func_type;
25219 unsigned long saved_regs_mask;
25220 int num_regs = 0;
25221 int i;
25222 int floats_from_frame = 0;
25223 arm_stack_offsets *offsets;
25225 gcc_assert (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM);
25226 func_type = arm_current_func_type ();
25228 /* Get frame offsets for ARM. */
25229 offsets = arm_get_frame_offsets ();
25230 saved_regs_mask = offsets->saved_regs_mask;
25232 /* Find the offset of the floating-point save area in the frame. */
25233 floats_from_frame
25234 = (offsets->saved_args
25235 + arm_compute_static_chain_stack_bytes ()
25236 - offsets->frame);
25238 /* Compute how many core registers saved and how far away the floats are. */
25239 for (i = 0; i <= LAST_ARM_REGNUM; i++)
25240 if (saved_regs_mask & (1 << i))
25242 num_regs++;
25243 floats_from_frame += 4;
25246 if (TARGET_HARD_FLOAT)
25248 int start_reg;
25249 rtx ip_rtx = gen_rtx_REG (SImode, IP_REGNUM);
25251 /* The offset is from IP_REGNUM. */
25252 int saved_size = arm_get_vfp_saved_size ();
25253 if (saved_size > 0)
25255 rtx_insn *insn;
25256 floats_from_frame += saved_size;
25257 insn = emit_insn (gen_addsi3 (ip_rtx,
25258 hard_frame_pointer_rtx,
25259 GEN_INT (-floats_from_frame)));
25260 arm_add_cfa_adjust_cfa_note (insn, -floats_from_frame,
25261 ip_rtx, hard_frame_pointer_rtx);
25264 /* Generate VFP register multi-pop. */
25265 start_reg = FIRST_VFP_REGNUM;
25267 for (i = FIRST_VFP_REGNUM; i < LAST_VFP_REGNUM; i += 2)
25268 /* Look for a case where a reg does not need restoring. */
25269 if ((!df_regs_ever_live_p (i) || call_used_regs[i])
25270 && (!df_regs_ever_live_p (i + 1)
25271 || call_used_regs[i + 1]))
25273 if (start_reg != i)
25274 arm_emit_vfp_multi_reg_pop (start_reg,
25275 (i - start_reg) / 2,
25276 gen_rtx_REG (SImode,
25277 IP_REGNUM));
25278 start_reg = i + 2;
25281 /* Restore the remaining regs that we have discovered (or possibly
25282 even all of them, if the conditional in the for loop never
25283 fired). */
25284 if (start_reg != i)
25285 arm_emit_vfp_multi_reg_pop (start_reg,
25286 (i - start_reg) / 2,
25287 gen_rtx_REG (SImode, IP_REGNUM));
25290 if (TARGET_IWMMXT)
25292 /* The frame pointer is guaranteed to be non-double-word aligned, as
25293 it is set to double-word-aligned old_stack_pointer - 4. */
25294 rtx_insn *insn;
25295 int lrm_count = (num_regs % 2) ? (num_regs + 2) : (num_regs + 1);
25297 for (i = LAST_IWMMXT_REGNUM; i >= FIRST_IWMMXT_REGNUM; i--)
25298 if (df_regs_ever_live_p (i) && !call_used_regs[i])
25300 rtx addr = gen_frame_mem (V2SImode,
25301 plus_constant (Pmode, hard_frame_pointer_rtx,
25302 - lrm_count * 4));
25303 insn = emit_insn (gen_movsi (gen_rtx_REG (V2SImode, i), addr));
25304 REG_NOTES (insn) = alloc_reg_note (REG_CFA_RESTORE,
25305 gen_rtx_REG (V2SImode, i),
25306 NULL_RTX);
25307 lrm_count += 2;
25311 /* saved_regs_mask should contain IP which contains old stack pointer
25312 at the time of activation creation. Since SP and IP are adjacent registers,
25313 we can restore the value directly into SP. */
25314 gcc_assert (saved_regs_mask & (1 << IP_REGNUM));
25315 saved_regs_mask &= ~(1 << IP_REGNUM);
25316 saved_regs_mask |= (1 << SP_REGNUM);
25318 /* There are two registers left in saved_regs_mask - LR and PC. We
25319 only need to restore LR (the return address), but to
25320 save time we can load it directly into PC, unless we need a
25321 special function exit sequence, or we are not really returning. */
25322 if (really_return
25323 && ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL
25324 && !crtl->calls_eh_return)
25325 /* Delete LR from the register mask, so that LR on
25326 the stack is loaded into the PC in the register mask. */
25327 saved_regs_mask &= ~(1 << LR_REGNUM);
25328 else
25329 saved_regs_mask &= ~(1 << PC_REGNUM);
25331 num_regs = bit_count (saved_regs_mask);
25332 if ((offsets->outgoing_args != (1 + num_regs)) || cfun->calls_alloca)
25334 rtx_insn *insn;
25335 emit_insn (gen_blockage ());
25336 /* Unwind the stack to just below the saved registers. */
25337 insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
25338 hard_frame_pointer_rtx,
25339 GEN_INT (- 4 * num_regs)));
25341 arm_add_cfa_adjust_cfa_note (insn, - 4 * num_regs,
25342 stack_pointer_rtx, hard_frame_pointer_rtx);
25345 arm_emit_multi_reg_pop (saved_regs_mask);
25347 if (IS_INTERRUPT (func_type))
25349 /* Interrupt handlers will have pushed the
25350 IP onto the stack, so restore it now. */
25351 rtx_insn *insn;
25352 rtx addr = gen_rtx_MEM (SImode,
25353 gen_rtx_POST_INC (SImode,
25354 stack_pointer_rtx));
25355 set_mem_alias_set (addr, get_frame_alias_set ());
25356 insn = emit_insn (gen_movsi (gen_rtx_REG (SImode, IP_REGNUM), addr));
25357 REG_NOTES (insn) = alloc_reg_note (REG_CFA_RESTORE,
25358 gen_rtx_REG (SImode, IP_REGNUM),
25359 NULL_RTX);
25362 if (!really_return || (saved_regs_mask & (1 << PC_REGNUM)))
25363 return;
25365 if (crtl->calls_eh_return)
25366 emit_insn (gen_addsi3 (stack_pointer_rtx,
25367 stack_pointer_rtx,
25368 gen_rtx_REG (SImode, ARM_EH_STACKADJ_REGNUM)));
25370 if (IS_STACKALIGN (func_type))
25371 /* Restore the original stack pointer. Before prologue, the stack was
25372 realigned and the original stack pointer saved in r0. For details,
25373 see comment in arm_expand_prologue. */
25374 emit_insn (gen_movsi (stack_pointer_rtx, gen_rtx_REG (SImode, R0_REGNUM)));
25376 emit_jump_insn (simple_return_rtx);
25379 /* Generate RTL to represent ARM epilogue. Really_return is true if the
25380 function is not a sibcall. */
25381 void
25382 arm_expand_epilogue (bool really_return)
25384 unsigned long func_type;
25385 unsigned long saved_regs_mask;
25386 int num_regs = 0;
25387 int i;
25388 int amount;
25389 arm_stack_offsets *offsets;
25391 func_type = arm_current_func_type ();
25393 /* Naked functions don't have epilogue. Hence, generate return pattern, and
25394 let output_return_instruction take care of instruction emission if any. */
25395 if (IS_NAKED (func_type)
25396 || (IS_VOLATILE (func_type) && TARGET_ABORT_NORETURN))
25398 if (really_return)
25399 emit_jump_insn (simple_return_rtx);
25400 return;
25403 /* If we are throwing an exception, then we really must be doing a
25404 return, so we can't tail-call. */
25405 gcc_assert (!crtl->calls_eh_return || really_return);
25407 if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
25409 arm_expand_epilogue_apcs_frame (really_return);
25410 return;
25413 /* Get frame offsets for ARM. */
25414 offsets = arm_get_frame_offsets ();
25415 saved_regs_mask = offsets->saved_regs_mask;
25416 num_regs = bit_count (saved_regs_mask);
25418 if (frame_pointer_needed)
25420 rtx_insn *insn;
25421 /* Restore stack pointer if necessary. */
25422 if (TARGET_ARM)
25424 /* In ARM mode, frame pointer points to first saved register.
25425 Restore stack pointer to last saved register. */
25426 amount = offsets->frame - offsets->saved_regs;
25428 /* Force out any pending memory operations that reference stacked data
25429 before stack de-allocation occurs. */
25430 emit_insn (gen_blockage ());
25431 insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
25432 hard_frame_pointer_rtx,
25433 GEN_INT (amount)));
25434 arm_add_cfa_adjust_cfa_note (insn, amount,
25435 stack_pointer_rtx,
25436 hard_frame_pointer_rtx);
25438 /* Emit USE(stack_pointer_rtx) to ensure that stack adjustment is not
25439 deleted. */
25440 emit_insn (gen_force_register_use (stack_pointer_rtx));
25442 else
25444 /* In Thumb-2 mode, the frame pointer points to the last saved
25445 register. */
25446 amount = offsets->locals_base - offsets->saved_regs;
25447 if (amount)
25449 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
25450 hard_frame_pointer_rtx,
25451 GEN_INT (amount)));
25452 arm_add_cfa_adjust_cfa_note (insn, amount,
25453 hard_frame_pointer_rtx,
25454 hard_frame_pointer_rtx);
25457 /* Force out any pending memory operations that reference stacked data
25458 before stack de-allocation occurs. */
25459 emit_insn (gen_blockage ());
25460 insn = emit_insn (gen_movsi (stack_pointer_rtx,
25461 hard_frame_pointer_rtx));
25462 arm_add_cfa_adjust_cfa_note (insn, 0,
25463 stack_pointer_rtx,
25464 hard_frame_pointer_rtx);
25465 /* Emit USE(stack_pointer_rtx) to ensure that stack adjustment is not
25466 deleted. */
25467 emit_insn (gen_force_register_use (stack_pointer_rtx));
25470 else
25472 /* Pop off outgoing args and local frame to adjust stack pointer to
25473 last saved register. */
25474 amount = offsets->outgoing_args - offsets->saved_regs;
25475 if (amount)
25477 rtx_insn *tmp;
25478 /* Force out any pending memory operations that reference stacked data
25479 before stack de-allocation occurs. */
25480 emit_insn (gen_blockage ());
25481 tmp = emit_insn (gen_addsi3 (stack_pointer_rtx,
25482 stack_pointer_rtx,
25483 GEN_INT (amount)));
25484 arm_add_cfa_adjust_cfa_note (tmp, amount,
25485 stack_pointer_rtx, stack_pointer_rtx);
25486 /* Emit USE(stack_pointer_rtx) to ensure that stack adjustment is
25487 not deleted. */
25488 emit_insn (gen_force_register_use (stack_pointer_rtx));
25492 if (TARGET_HARD_FLOAT)
25494 /* Generate VFP register multi-pop. */
25495 int end_reg = LAST_VFP_REGNUM + 1;
25497 /* Scan the registers in reverse order. We need to match
25498 any groupings made in the prologue and generate matching
25499 vldm operations. The need to match groups is because,
25500 unlike pop, vldm can only do consecutive regs. */
25501 for (i = LAST_VFP_REGNUM - 1; i >= FIRST_VFP_REGNUM; i -= 2)
25502 /* Look for a case where a reg does not need restoring. */
25503 if ((!df_regs_ever_live_p (i) || call_used_regs[i])
25504 && (!df_regs_ever_live_p (i + 1)
25505 || call_used_regs[i + 1]))
25507 /* Restore the regs discovered so far (from reg+2 to
25508 end_reg). */
25509 if (end_reg > i + 2)
25510 arm_emit_vfp_multi_reg_pop (i + 2,
25511 (end_reg - (i + 2)) / 2,
25512 stack_pointer_rtx);
25513 end_reg = i;
25516 /* Restore the remaining regs that we have discovered (or possibly
25517 even all of them, if the conditional in the for loop never
25518 fired). */
25519 if (end_reg > i + 2)
25520 arm_emit_vfp_multi_reg_pop (i + 2,
25521 (end_reg - (i + 2)) / 2,
25522 stack_pointer_rtx);
25525 if (TARGET_IWMMXT)
25526 for (i = FIRST_IWMMXT_REGNUM; i <= LAST_IWMMXT_REGNUM; i++)
25527 if (df_regs_ever_live_p (i) && !call_used_regs[i])
25529 rtx_insn *insn;
25530 rtx addr = gen_rtx_MEM (V2SImode,
25531 gen_rtx_POST_INC (SImode,
25532 stack_pointer_rtx));
25533 set_mem_alias_set (addr, get_frame_alias_set ());
25534 insn = emit_insn (gen_movsi (gen_rtx_REG (V2SImode, i), addr));
25535 REG_NOTES (insn) = alloc_reg_note (REG_CFA_RESTORE,
25536 gen_rtx_REG (V2SImode, i),
25537 NULL_RTX);
25538 arm_add_cfa_adjust_cfa_note (insn, UNITS_PER_WORD,
25539 stack_pointer_rtx, stack_pointer_rtx);
25542 if (saved_regs_mask)
25544 rtx insn;
25545 bool return_in_pc = false;
25547 if (ARM_FUNC_TYPE (func_type) != ARM_FT_INTERWORKED
25548 && (TARGET_ARM || ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL)
25549 && !IS_STACKALIGN (func_type)
25550 && really_return
25551 && crtl->args.pretend_args_size == 0
25552 && saved_regs_mask & (1 << LR_REGNUM)
25553 && !crtl->calls_eh_return)
25555 saved_regs_mask &= ~(1 << LR_REGNUM);
25556 saved_regs_mask |= (1 << PC_REGNUM);
25557 return_in_pc = true;
25560 if (num_regs == 1 && (!IS_INTERRUPT (func_type) || !return_in_pc))
25562 for (i = 0; i <= LAST_ARM_REGNUM; i++)
25563 if (saved_regs_mask & (1 << i))
25565 rtx addr = gen_rtx_MEM (SImode,
25566 gen_rtx_POST_INC (SImode,
25567 stack_pointer_rtx));
25568 set_mem_alias_set (addr, get_frame_alias_set ());
25570 if (i == PC_REGNUM)
25572 insn = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
25573 XVECEXP (insn, 0, 0) = ret_rtx;
25574 XVECEXP (insn, 0, 1) = gen_rtx_SET (gen_rtx_REG (SImode, i),
25575 addr);
25576 RTX_FRAME_RELATED_P (XVECEXP (insn, 0, 1)) = 1;
25577 insn = emit_jump_insn (insn);
25579 else
25581 insn = emit_insn (gen_movsi (gen_rtx_REG (SImode, i),
25582 addr));
25583 REG_NOTES (insn) = alloc_reg_note (REG_CFA_RESTORE,
25584 gen_rtx_REG (SImode, i),
25585 NULL_RTX);
25586 arm_add_cfa_adjust_cfa_note (insn, UNITS_PER_WORD,
25587 stack_pointer_rtx,
25588 stack_pointer_rtx);
25592 else
25594 if (TARGET_LDRD
25595 && current_tune->prefer_ldrd_strd
25596 && !optimize_function_for_size_p (cfun))
25598 if (TARGET_THUMB2)
25599 thumb2_emit_ldrd_pop (saved_regs_mask);
25600 else if (TARGET_ARM && !IS_INTERRUPT (func_type))
25601 arm_emit_ldrd_pop (saved_regs_mask);
25602 else
25603 arm_emit_multi_reg_pop (saved_regs_mask);
25605 else
25606 arm_emit_multi_reg_pop (saved_regs_mask);
25609 if (return_in_pc)
25610 return;
25613 amount
25614 = crtl->args.pretend_args_size + arm_compute_static_chain_stack_bytes();
25615 if (amount)
25617 int i, j;
25618 rtx dwarf = NULL_RTX;
25619 rtx_insn *tmp =
25620 emit_insn (gen_addsi3 (stack_pointer_rtx,
25621 stack_pointer_rtx,
25622 GEN_INT (amount)));
25624 RTX_FRAME_RELATED_P (tmp) = 1;
25626 if (cfun->machine->uses_anonymous_args)
25628 /* Restore pretend args. Refer arm_expand_prologue on how to save
25629 pretend_args in stack. */
25630 int num_regs = crtl->args.pretend_args_size / 4;
25631 saved_regs_mask = (0xf0 >> num_regs) & 0xf;
25632 for (j = 0, i = 0; j < num_regs; i++)
25633 if (saved_regs_mask & (1 << i))
25635 rtx reg = gen_rtx_REG (SImode, i);
25636 dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
25637 j++;
25639 REG_NOTES (tmp) = dwarf;
25641 arm_add_cfa_adjust_cfa_note (tmp, amount,
25642 stack_pointer_rtx, stack_pointer_rtx);
25645 if (!really_return)
25646 return;
25648 if (crtl->calls_eh_return)
25649 emit_insn (gen_addsi3 (stack_pointer_rtx,
25650 stack_pointer_rtx,
25651 gen_rtx_REG (SImode, ARM_EH_STACKADJ_REGNUM)));
25653 if (IS_STACKALIGN (func_type))
25654 /* Restore the original stack pointer. Before prologue, the stack was
25655 realigned and the original stack pointer saved in r0. For details,
25656 see comment in arm_expand_prologue. */
25657 emit_insn (gen_movsi (stack_pointer_rtx, gen_rtx_REG (SImode, R0_REGNUM)));
25659 emit_jump_insn (simple_return_rtx);
25662 /* Implementation of insn prologue_thumb1_interwork. This is the first
25663 "instruction" of a function called in ARM mode. Swap to thumb mode. */
25665 const char *
25666 thumb1_output_interwork (void)
25668 const char * name;
25669 FILE *f = asm_out_file;
25671 gcc_assert (MEM_P (DECL_RTL (current_function_decl)));
25672 gcc_assert (GET_CODE (XEXP (DECL_RTL (current_function_decl), 0))
25673 == SYMBOL_REF);
25674 name = XSTR (XEXP (DECL_RTL (current_function_decl), 0), 0);
25676 /* Generate code sequence to switch us into Thumb mode. */
25677 /* The .code 32 directive has already been emitted by
25678 ASM_DECLARE_FUNCTION_NAME. */
25679 asm_fprintf (f, "\torr\t%r, %r, #1\n", IP_REGNUM, PC_REGNUM);
25680 asm_fprintf (f, "\tbx\t%r\n", IP_REGNUM);
25682 /* Generate a label, so that the debugger will notice the
25683 change in instruction sets. This label is also used by
25684 the assembler to bypass the ARM code when this function
25685 is called from a Thumb encoded function elsewhere in the
25686 same file. Hence the definition of STUB_NAME here must
25687 agree with the definition in gas/config/tc-arm.c. */
25689 #define STUB_NAME ".real_start_of"
25691 fprintf (f, "\t.code\t16\n");
25692 #ifdef ARM_PE
25693 if (arm_dllexport_name_p (name))
25694 name = arm_strip_name_encoding (name);
25695 #endif
25696 asm_fprintf (f, "\t.globl %s%U%s\n", STUB_NAME, name);
25697 fprintf (f, "\t.thumb_func\n");
25698 asm_fprintf (f, "%s%U%s:\n", STUB_NAME, name);
25700 return "";
25703 /* Handle the case of a double word load into a low register from
25704 a computed memory address. The computed address may involve a
25705 register which is overwritten by the load. */
25706 const char *
25707 thumb_load_double_from_address (rtx *operands)
25709 rtx addr;
25710 rtx base;
25711 rtx offset;
25712 rtx arg1;
25713 rtx arg2;
25715 gcc_assert (REG_P (operands[0]));
25716 gcc_assert (MEM_P (operands[1]));
25718 /* Get the memory address. */
25719 addr = XEXP (operands[1], 0);
25721 /* Work out how the memory address is computed. */
25722 switch (GET_CODE (addr))
25724 case REG:
25725 operands[2] = adjust_address (operands[1], SImode, 4);
25727 if (REGNO (operands[0]) == REGNO (addr))
25729 output_asm_insn ("ldr\t%H0, %2", operands);
25730 output_asm_insn ("ldr\t%0, %1", operands);
25732 else
25734 output_asm_insn ("ldr\t%0, %1", operands);
25735 output_asm_insn ("ldr\t%H0, %2", operands);
25737 break;
25739 case CONST:
25740 /* Compute <address> + 4 for the high order load. */
25741 operands[2] = adjust_address (operands[1], SImode, 4);
25743 output_asm_insn ("ldr\t%0, %1", operands);
25744 output_asm_insn ("ldr\t%H0, %2", operands);
25745 break;
25747 case PLUS:
25748 arg1 = XEXP (addr, 0);
25749 arg2 = XEXP (addr, 1);
25751 if (CONSTANT_P (arg1))
25752 base = arg2, offset = arg1;
25753 else
25754 base = arg1, offset = arg2;
25756 gcc_assert (REG_P (base));
25758 /* Catch the case of <address> = <reg> + <reg> */
25759 if (REG_P (offset))
25761 int reg_offset = REGNO (offset);
25762 int reg_base = REGNO (base);
25763 int reg_dest = REGNO (operands[0]);
25765 /* Add the base and offset registers together into the
25766 higher destination register. */
25767 asm_fprintf (asm_out_file, "\tadd\t%r, %r, %r",
25768 reg_dest + 1, reg_base, reg_offset);
25770 /* Load the lower destination register from the address in
25771 the higher destination register. */
25772 asm_fprintf (asm_out_file, "\tldr\t%r, [%r, #0]",
25773 reg_dest, reg_dest + 1);
25775 /* Load the higher destination register from its own address
25776 plus 4. */
25777 asm_fprintf (asm_out_file, "\tldr\t%r, [%r, #4]",
25778 reg_dest + 1, reg_dest + 1);
25780 else
25782 /* Compute <address> + 4 for the high order load. */
25783 operands[2] = adjust_address (operands[1], SImode, 4);
25785 /* If the computed address is held in the low order register
25786 then load the high order register first, otherwise always
25787 load the low order register first. */
25788 if (REGNO (operands[0]) == REGNO (base))
25790 output_asm_insn ("ldr\t%H0, %2", operands);
25791 output_asm_insn ("ldr\t%0, %1", operands);
25793 else
25795 output_asm_insn ("ldr\t%0, %1", operands);
25796 output_asm_insn ("ldr\t%H0, %2", operands);
25799 break;
25801 case LABEL_REF:
25802 /* With no registers to worry about we can just load the value
25803 directly. */
25804 operands[2] = adjust_address (operands[1], SImode, 4);
25806 output_asm_insn ("ldr\t%H0, %2", operands);
25807 output_asm_insn ("ldr\t%0, %1", operands);
25808 break;
25810 default:
25811 gcc_unreachable ();
25814 return "";
25817 const char *
25818 thumb_output_move_mem_multiple (int n, rtx *operands)
25820 switch (n)
25822 case 2:
25823 if (REGNO (operands[4]) > REGNO (operands[5]))
25824 std::swap (operands[4], operands[5]);
25826 output_asm_insn ("ldmia\t%1!, {%4, %5}", operands);
25827 output_asm_insn ("stmia\t%0!, {%4, %5}", operands);
25828 break;
25830 case 3:
25831 if (REGNO (operands[4]) > REGNO (operands[5]))
25832 std::swap (operands[4], operands[5]);
25833 if (REGNO (operands[5]) > REGNO (operands[6]))
25834 std::swap (operands[5], operands[6]);
25835 if (REGNO (operands[4]) > REGNO (operands[5]))
25836 std::swap (operands[4], operands[5]);
25838 output_asm_insn ("ldmia\t%1!, {%4, %5, %6}", operands);
25839 output_asm_insn ("stmia\t%0!, {%4, %5, %6}", operands);
25840 break;
25842 default:
25843 gcc_unreachable ();
25846 return "";
25849 /* Output a call-via instruction for thumb state. */
25850 const char *
25851 thumb_call_via_reg (rtx reg)
25853 int regno = REGNO (reg);
25854 rtx *labelp;
25856 gcc_assert (regno < LR_REGNUM);
25858 /* If we are in the normal text section we can use a single instance
25859 per compilation unit. If we are doing function sections, then we need
25860 an entry per section, since we can't rely on reachability. */
25861 if (in_section == text_section)
25863 thumb_call_reg_needed = 1;
25865 if (thumb_call_via_label[regno] == NULL)
25866 thumb_call_via_label[regno] = gen_label_rtx ();
25867 labelp = thumb_call_via_label + regno;
25869 else
25871 if (cfun->machine->call_via[regno] == NULL)
25872 cfun->machine->call_via[regno] = gen_label_rtx ();
25873 labelp = cfun->machine->call_via + regno;
25876 output_asm_insn ("bl\t%a0", labelp);
25877 return "";
25880 /* Routines for generating rtl. */
25881 void
25882 thumb_expand_movmemqi (rtx *operands)
25884 rtx out = copy_to_mode_reg (SImode, XEXP (operands[0], 0));
25885 rtx in = copy_to_mode_reg (SImode, XEXP (operands[1], 0));
25886 HOST_WIDE_INT len = INTVAL (operands[2]);
25887 HOST_WIDE_INT offset = 0;
25889 while (len >= 12)
25891 emit_insn (gen_movmem12b (out, in, out, in));
25892 len -= 12;
25895 if (len >= 8)
25897 emit_insn (gen_movmem8b (out, in, out, in));
25898 len -= 8;
25901 if (len >= 4)
25903 rtx reg = gen_reg_rtx (SImode);
25904 emit_insn (gen_movsi (reg, gen_rtx_MEM (SImode, in)));
25905 emit_insn (gen_movsi (gen_rtx_MEM (SImode, out), reg));
25906 len -= 4;
25907 offset += 4;
25910 if (len >= 2)
25912 rtx reg = gen_reg_rtx (HImode);
25913 emit_insn (gen_movhi (reg, gen_rtx_MEM (HImode,
25914 plus_constant (Pmode, in,
25915 offset))));
25916 emit_insn (gen_movhi (gen_rtx_MEM (HImode, plus_constant (Pmode, out,
25917 offset)),
25918 reg));
25919 len -= 2;
25920 offset += 2;
25923 if (len)
25925 rtx reg = gen_reg_rtx (QImode);
25926 emit_insn (gen_movqi (reg, gen_rtx_MEM (QImode,
25927 plus_constant (Pmode, in,
25928 offset))));
25929 emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (Pmode, out,
25930 offset)),
25931 reg));
25935 void
25936 thumb_reload_out_hi (rtx *operands)
25938 emit_insn (gen_thumb_movhi_clobber (operands[0], operands[1], operands[2]));
25941 /* Return the length of a function name prefix
25942 that starts with the character 'c'. */
25943 static int
25944 arm_get_strip_length (int c)
25946 switch (c)
25948 ARM_NAME_ENCODING_LENGTHS
25949 default: return 0;
25953 /* Return a pointer to a function's name with any
25954 and all prefix encodings stripped from it. */
25955 const char *
25956 arm_strip_name_encoding (const char *name)
25958 int skip;
25960 while ((skip = arm_get_strip_length (* name)))
25961 name += skip;
25963 return name;
25966 /* If there is a '*' anywhere in the name's prefix, then
25967 emit the stripped name verbatim, otherwise prepend an
25968 underscore if leading underscores are being used. */
25969 void
25970 arm_asm_output_labelref (FILE *stream, const char *name)
25972 int skip;
25973 int verbatim = 0;
25975 while ((skip = arm_get_strip_length (* name)))
25977 verbatim |= (*name == '*');
25978 name += skip;
25981 if (verbatim)
25982 fputs (name, stream);
25983 else
25984 asm_fprintf (stream, "%U%s", name);
25987 /* This function is used to emit an EABI tag and its associated value.
25988 We emit the numerical value of the tag in case the assembler does not
25989 support textual tags. (Eg gas prior to 2.20). If requested we include
25990 the tag name in a comment so that anyone reading the assembler output
25991 will know which tag is being set.
25993 This function is not static because arm-c.c needs it too. */
25995 void
25996 arm_emit_eabi_attribute (const char *name, int num, int val)
25998 asm_fprintf (asm_out_file, "\t.eabi_attribute %d, %d", num, val);
25999 if (flag_verbose_asm || flag_debug_asm)
26000 asm_fprintf (asm_out_file, "\t%s %s", ASM_COMMENT_START, name);
26001 asm_fprintf (asm_out_file, "\n");
26004 /* This function is used to print CPU tuning information as comment
26005 in assembler file. Pointers are not printed for now. */
26007 void
26008 arm_print_tune_info (void)
26010 asm_fprintf (asm_out_file, "\t@.tune parameters\n");
26011 asm_fprintf (asm_out_file, "\t\t@constant_limit:\t%d\n",
26012 current_tune->constant_limit);
26013 asm_fprintf (asm_out_file, "\t\t@max_insns_skipped:\t%d\n",
26014 current_tune->max_insns_skipped);
26015 asm_fprintf (asm_out_file, "\t\t@prefetch.num_slots:\t%d\n",
26016 current_tune->prefetch.num_slots);
26017 asm_fprintf (asm_out_file, "\t\t@prefetch.l1_cache_size:\t%d\n",
26018 current_tune->prefetch.l1_cache_size);
26019 asm_fprintf (asm_out_file, "\t\t@prefetch.l1_cache_line_size:\t%d\n",
26020 current_tune->prefetch.l1_cache_line_size);
26021 asm_fprintf (asm_out_file, "\t\t@prefer_constant_pool:\t%d\n",
26022 (int) current_tune->prefer_constant_pool);
26023 asm_fprintf (asm_out_file, "\t\t@branch_cost:\t(s:speed, p:predictable)\n");
26024 asm_fprintf (asm_out_file, "\t\t\t\ts&p\tcost\n");
26025 asm_fprintf (asm_out_file, "\t\t\t\t00\t%d\n",
26026 current_tune->branch_cost (false, false));
26027 asm_fprintf (asm_out_file, "\t\t\t\t01\t%d\n",
26028 current_tune->branch_cost (false, true));
26029 asm_fprintf (asm_out_file, "\t\t\t\t10\t%d\n",
26030 current_tune->branch_cost (true, false));
26031 asm_fprintf (asm_out_file, "\t\t\t\t11\t%d\n",
26032 current_tune->branch_cost (true, true));
26033 asm_fprintf (asm_out_file, "\t\t@prefer_ldrd_strd:\t%d\n",
26034 (int) current_tune->prefer_ldrd_strd);
26035 asm_fprintf (asm_out_file, "\t\t@logical_op_non_short_circuit:\t[%d,%d]\n",
26036 (int) current_tune->logical_op_non_short_circuit_thumb,
26037 (int) current_tune->logical_op_non_short_circuit_arm);
26038 asm_fprintf (asm_out_file, "\t\t@prefer_neon_for_64bits:\t%d\n",
26039 (int) current_tune->prefer_neon_for_64bits);
26040 asm_fprintf (asm_out_file,
26041 "\t\t@disparage_flag_setting_t16_encodings:\t%d\n",
26042 (int) current_tune->disparage_flag_setting_t16_encodings);
26043 asm_fprintf (asm_out_file, "\t\t@string_ops_prefer_neon:\t%d\n",
26044 (int) current_tune->string_ops_prefer_neon);
26045 asm_fprintf (asm_out_file, "\t\t@max_insns_inline_memset:\t%d\n",
26046 current_tune->max_insns_inline_memset);
26047 asm_fprintf (asm_out_file, "\t\t@fusible_ops:\t%u\n",
26048 current_tune->fusible_ops);
26049 asm_fprintf (asm_out_file, "\t\t@sched_autopref:\t%d\n",
26050 (int) current_tune->sched_autopref);
26053 static void
26054 arm_file_start (void)
26056 int val;
26058 if (TARGET_BPABI)
26060 if (arm_selected_arch)
26062 /* armv7ve doesn't support any extensions. */
26063 if (strcmp (arm_selected_arch->name, "armv7ve") == 0)
26065 /* Keep backward compatability for assemblers
26066 which don't support armv7ve. */
26067 asm_fprintf (asm_out_file, "\t.arch armv7-a\n");
26068 asm_fprintf (asm_out_file, "\t.arch_extension virt\n");
26069 asm_fprintf (asm_out_file, "\t.arch_extension idiv\n");
26070 asm_fprintf (asm_out_file, "\t.arch_extension sec\n");
26071 asm_fprintf (asm_out_file, "\t.arch_extension mp\n");
26073 else
26075 const char* pos = strchr (arm_selected_arch->name, '+');
26076 if (pos)
26078 char buf[32];
26079 gcc_assert (strlen (arm_selected_arch->name)
26080 <= sizeof (buf) / sizeof (*pos));
26081 strncpy (buf, arm_selected_arch->name,
26082 (pos - arm_selected_arch->name) * sizeof (*pos));
26083 buf[pos - arm_selected_arch->name] = '\0';
26084 asm_fprintf (asm_out_file, "\t.arch %s\n", buf);
26085 asm_fprintf (asm_out_file, "\t.arch_extension %s\n", pos + 1);
26087 else
26088 asm_fprintf (asm_out_file, "\t.arch %s\n", arm_selected_arch->name);
26091 else if (strncmp (arm_selected_cpu->name, "generic", 7) == 0)
26092 asm_fprintf (asm_out_file, "\t.arch %s\n", arm_selected_cpu->name + 8);
26093 else
26095 const char* truncated_name
26096 = arm_rewrite_selected_cpu (arm_selected_cpu->name);
26097 asm_fprintf (asm_out_file, "\t.cpu %s\n", truncated_name);
26100 if (print_tune_info)
26101 arm_print_tune_info ();
26103 if (! TARGET_SOFT_FLOAT)
26105 if (TARGET_HARD_FLOAT && TARGET_VFP_SINGLE)
26106 arm_emit_eabi_attribute ("Tag_ABI_HardFP_use", 27, 1);
26108 if (TARGET_HARD_FLOAT_ABI)
26109 arm_emit_eabi_attribute ("Tag_ABI_VFP_args", 28, 1);
26112 /* Some of these attributes only apply when the corresponding features
26113 are used. However we don't have any easy way of figuring this out.
26114 Conservatively record the setting that would have been used. */
26116 if (flag_rounding_math)
26117 arm_emit_eabi_attribute ("Tag_ABI_FP_rounding", 19, 1);
26119 if (!flag_unsafe_math_optimizations)
26121 arm_emit_eabi_attribute ("Tag_ABI_FP_denormal", 20, 1);
26122 arm_emit_eabi_attribute ("Tag_ABI_FP_exceptions", 21, 1);
26124 if (flag_signaling_nans)
26125 arm_emit_eabi_attribute ("Tag_ABI_FP_user_exceptions", 22, 1);
26127 arm_emit_eabi_attribute ("Tag_ABI_FP_number_model", 23,
26128 flag_finite_math_only ? 1 : 3);
26130 arm_emit_eabi_attribute ("Tag_ABI_align8_needed", 24, 1);
26131 arm_emit_eabi_attribute ("Tag_ABI_align8_preserved", 25, 1);
26132 arm_emit_eabi_attribute ("Tag_ABI_enum_size", 26,
26133 flag_short_enums ? 1 : 2);
26135 /* Tag_ABI_optimization_goals. */
26136 if (optimize_size)
26137 val = 4;
26138 else if (optimize >= 2)
26139 val = 2;
26140 else if (optimize)
26141 val = 1;
26142 else
26143 val = 6;
26144 arm_emit_eabi_attribute ("Tag_ABI_optimization_goals", 30, val);
26146 arm_emit_eabi_attribute ("Tag_CPU_unaligned_access", 34,
26147 unaligned_access);
26149 if (arm_fp16_format)
26150 arm_emit_eabi_attribute ("Tag_ABI_FP_16bit_format", 38,
26151 (int) arm_fp16_format);
26153 if (arm_lang_output_object_attributes_hook)
26154 arm_lang_output_object_attributes_hook();
26157 default_file_start ();
26160 static void
26161 arm_file_end (void)
26163 int regno;
26165 if (NEED_INDICATE_EXEC_STACK)
26166 /* Add .note.GNU-stack. */
26167 file_end_indicate_exec_stack ();
26169 if (! thumb_call_reg_needed)
26170 return;
26172 switch_to_section (text_section);
26173 asm_fprintf (asm_out_file, "\t.code 16\n");
26174 ASM_OUTPUT_ALIGN (asm_out_file, 1);
26176 for (regno = 0; regno < LR_REGNUM; regno++)
26178 rtx label = thumb_call_via_label[regno];
26180 if (label != 0)
26182 targetm.asm_out.internal_label (asm_out_file, "L",
26183 CODE_LABEL_NUMBER (label));
26184 asm_fprintf (asm_out_file, "\tbx\t%r\n", regno);
26189 #ifndef ARM_PE
26190 /* Symbols in the text segment can be accessed without indirecting via the
26191 constant pool; it may take an extra binary operation, but this is still
26192 faster than indirecting via memory. Don't do this when not optimizing,
26193 since we won't be calculating al of the offsets necessary to do this
26194 simplification. */
26196 static void
26197 arm_encode_section_info (tree decl, rtx rtl, int first)
26199 if (optimize > 0 && TREE_CONSTANT (decl))
26200 SYMBOL_REF_FLAG (XEXP (rtl, 0)) = 1;
26202 default_encode_section_info (decl, rtl, first);
26204 #endif /* !ARM_PE */
26206 static void
26207 arm_internal_label (FILE *stream, const char *prefix, unsigned long labelno)
26209 if (arm_ccfsm_state == 3 && (unsigned) arm_target_label == labelno
26210 && !strcmp (prefix, "L"))
26212 arm_ccfsm_state = 0;
26213 arm_target_insn = NULL;
26215 default_internal_label (stream, prefix, labelno);
26218 /* Output code to add DELTA to the first argument, and then jump
26219 to FUNCTION. Used for C++ multiple inheritance. */
26221 static void
26222 arm_thumb1_mi_thunk (FILE *file, tree, HOST_WIDE_INT delta,
26223 HOST_WIDE_INT, tree function)
26225 static int thunk_label = 0;
26226 char label[256];
26227 char labelpc[256];
26228 int mi_delta = delta;
26229 const char *const mi_op = mi_delta < 0 ? "sub" : "add";
26230 int shift = 0;
26231 int this_regno = (aggregate_value_p (TREE_TYPE (TREE_TYPE (function)), function)
26232 ? 1 : 0);
26233 if (mi_delta < 0)
26234 mi_delta = - mi_delta;
26236 final_start_function (emit_barrier (), file, 1);
26238 if (TARGET_THUMB1)
26240 int labelno = thunk_label++;
26241 ASM_GENERATE_INTERNAL_LABEL (label, "LTHUMBFUNC", labelno);
26242 /* Thunks are entered in arm mode when avaiable. */
26243 if (TARGET_THUMB1_ONLY)
26245 /* push r3 so we can use it as a temporary. */
26246 /* TODO: Omit this save if r3 is not used. */
26247 fputs ("\tpush {r3}\n", file);
26248 fputs ("\tldr\tr3, ", file);
26250 else
26252 fputs ("\tldr\tr12, ", file);
26254 assemble_name (file, label);
26255 fputc ('\n', file);
26256 if (flag_pic)
26258 /* If we are generating PIC, the ldr instruction below loads
26259 "(target - 7) - .LTHUNKPCn" into r12. The pc reads as
26260 the address of the add + 8, so we have:
26262 r12 = (target - 7) - .LTHUNKPCn + (.LTHUNKPCn + 8)
26263 = target + 1.
26265 Note that we have "+ 1" because some versions of GNU ld
26266 don't set the low bit of the result for R_ARM_REL32
26267 relocations against thumb function symbols.
26268 On ARMv6M this is +4, not +8. */
26269 ASM_GENERATE_INTERNAL_LABEL (labelpc, "LTHUNKPC", labelno);
26270 assemble_name (file, labelpc);
26271 fputs (":\n", file);
26272 if (TARGET_THUMB1_ONLY)
26274 /* This is 2 insns after the start of the thunk, so we know it
26275 is 4-byte aligned. */
26276 fputs ("\tadd\tr3, pc, r3\n", file);
26277 fputs ("\tmov r12, r3\n", file);
26279 else
26280 fputs ("\tadd\tr12, pc, r12\n", file);
26282 else if (TARGET_THUMB1_ONLY)
26283 fputs ("\tmov r12, r3\n", file);
26285 if (TARGET_THUMB1_ONLY)
26287 if (mi_delta > 255)
26289 fputs ("\tldr\tr3, ", file);
26290 assemble_name (file, label);
26291 fputs ("+4\n", file);
26292 asm_fprintf (file, "\t%ss\t%r, %r, r3\n",
26293 mi_op, this_regno, this_regno);
26295 else if (mi_delta != 0)
26297 /* Thumb1 unified syntax requires s suffix in instruction name when
26298 one of the operands is immediate. */
26299 asm_fprintf (file, "\t%ss\t%r, %r, #%d\n",
26300 mi_op, this_regno, this_regno,
26301 mi_delta);
26304 else
26306 /* TODO: Use movw/movt for large constants when available. */
26307 while (mi_delta != 0)
26309 if ((mi_delta & (3 << shift)) == 0)
26310 shift += 2;
26311 else
26313 asm_fprintf (file, "\t%s\t%r, %r, #%d\n",
26314 mi_op, this_regno, this_regno,
26315 mi_delta & (0xff << shift));
26316 mi_delta &= ~(0xff << shift);
26317 shift += 8;
26321 if (TARGET_THUMB1)
26323 if (TARGET_THUMB1_ONLY)
26324 fputs ("\tpop\t{r3}\n", file);
26326 fprintf (file, "\tbx\tr12\n");
26327 ASM_OUTPUT_ALIGN (file, 2);
26328 assemble_name (file, label);
26329 fputs (":\n", file);
26330 if (flag_pic)
26332 /* Output ".word .LTHUNKn-[3,7]-.LTHUNKPCn". */
26333 rtx tem = XEXP (DECL_RTL (function), 0);
26334 /* For TARGET_THUMB1_ONLY the thunk is in Thumb mode, so the PC
26335 pipeline offset is four rather than eight. Adjust the offset
26336 accordingly. */
26337 tem = plus_constant (GET_MODE (tem), tem,
26338 TARGET_THUMB1_ONLY ? -3 : -7);
26339 tem = gen_rtx_MINUS (GET_MODE (tem),
26340 tem,
26341 gen_rtx_SYMBOL_REF (Pmode,
26342 ggc_strdup (labelpc)));
26343 assemble_integer (tem, 4, BITS_PER_WORD, 1);
26345 else
26346 /* Output ".word .LTHUNKn". */
26347 assemble_integer (XEXP (DECL_RTL (function), 0), 4, BITS_PER_WORD, 1);
26349 if (TARGET_THUMB1_ONLY && mi_delta > 255)
26350 assemble_integer (GEN_INT(mi_delta), 4, BITS_PER_WORD, 1);
26352 else
26354 fputs ("\tb\t", file);
26355 assemble_name (file, XSTR (XEXP (DECL_RTL (function), 0), 0));
26356 if (NEED_PLT_RELOC)
26357 fputs ("(PLT)", file);
26358 fputc ('\n', file);
26361 final_end_function ();
26364 /* MI thunk handling for TARGET_32BIT. */
26366 static void
26367 arm32_output_mi_thunk (FILE *file, tree, HOST_WIDE_INT delta,
26368 HOST_WIDE_INT vcall_offset, tree function)
26370 /* On ARM, this_regno is R0 or R1 depending on
26371 whether the function returns an aggregate or not.
26373 int this_regno = (aggregate_value_p (TREE_TYPE (TREE_TYPE (function)),
26374 function)
26375 ? R1_REGNUM : R0_REGNUM);
26377 rtx temp = gen_rtx_REG (Pmode, IP_REGNUM);
26378 rtx this_rtx = gen_rtx_REG (Pmode, this_regno);
26379 reload_completed = 1;
26380 emit_note (NOTE_INSN_PROLOGUE_END);
26382 /* Add DELTA to THIS_RTX. */
26383 if (delta != 0)
26384 arm_split_constant (PLUS, Pmode, NULL_RTX,
26385 delta, this_rtx, this_rtx, false);
26387 /* Add *(*THIS_RTX + VCALL_OFFSET) to THIS_RTX. */
26388 if (vcall_offset != 0)
26390 /* Load *THIS_RTX. */
26391 emit_move_insn (temp, gen_rtx_MEM (Pmode, this_rtx));
26392 /* Compute *THIS_RTX + VCALL_OFFSET. */
26393 arm_split_constant (PLUS, Pmode, NULL_RTX, vcall_offset, temp, temp,
26394 false);
26395 /* Compute *(*THIS_RTX + VCALL_OFFSET). */
26396 emit_move_insn (temp, gen_rtx_MEM (Pmode, temp));
26397 emit_insn (gen_add3_insn (this_rtx, this_rtx, temp));
26400 /* Generate a tail call to the target function. */
26401 if (!TREE_USED (function))
26403 assemble_external (function);
26404 TREE_USED (function) = 1;
26406 rtx funexp = XEXP (DECL_RTL (function), 0);
26407 funexp = gen_rtx_MEM (FUNCTION_MODE, funexp);
26408 rtx_insn * insn = emit_call_insn (gen_sibcall (funexp, const0_rtx, NULL_RTX));
26409 SIBLING_CALL_P (insn) = 1;
26411 insn = get_insns ();
26412 shorten_branches (insn);
26413 final_start_function (insn, file, 1);
26414 final (insn, file, 1);
26415 final_end_function ();
26417 /* Stop pretending this is a post-reload pass. */
26418 reload_completed = 0;
26421 /* Output code to add DELTA to the first argument, and then jump
26422 to FUNCTION. Used for C++ multiple inheritance. */
26424 static void
26425 arm_output_mi_thunk (FILE *file, tree thunk, HOST_WIDE_INT delta,
26426 HOST_WIDE_INT vcall_offset, tree function)
26428 if (TARGET_32BIT)
26429 arm32_output_mi_thunk (file, thunk, delta, vcall_offset, function);
26430 else
26431 arm_thumb1_mi_thunk (file, thunk, delta, vcall_offset, function);
26435 arm_emit_vector_const (FILE *file, rtx x)
26437 int i;
26438 const char * pattern;
26440 gcc_assert (GET_CODE (x) == CONST_VECTOR);
26442 switch (GET_MODE (x))
26444 case V2SImode: pattern = "%08x"; break;
26445 case V4HImode: pattern = "%04x"; break;
26446 case V8QImode: pattern = "%02x"; break;
26447 default: gcc_unreachable ();
26450 fprintf (file, "0x");
26451 for (i = CONST_VECTOR_NUNITS (x); i--;)
26453 rtx element;
26455 element = CONST_VECTOR_ELT (x, i);
26456 fprintf (file, pattern, INTVAL (element));
26459 return 1;
26462 /* Emit a fp16 constant appropriately padded to occupy a 4-byte word.
26463 HFmode constant pool entries are actually loaded with ldr. */
26464 void
26465 arm_emit_fp16_const (rtx c)
26467 long bits;
26469 bits = real_to_target (NULL, CONST_DOUBLE_REAL_VALUE (c), HFmode);
26470 if (WORDS_BIG_ENDIAN)
26471 assemble_zeros (2);
26472 assemble_integer (GEN_INT (bits), 2, BITS_PER_WORD, 1);
26473 if (!WORDS_BIG_ENDIAN)
26474 assemble_zeros (2);
26477 const char *
26478 arm_output_load_gr (rtx *operands)
26480 rtx reg;
26481 rtx offset;
26482 rtx wcgr;
26483 rtx sum;
26485 if (!MEM_P (operands [1])
26486 || GET_CODE (sum = XEXP (operands [1], 0)) != PLUS
26487 || !REG_P (reg = XEXP (sum, 0))
26488 || !CONST_INT_P (offset = XEXP (sum, 1))
26489 || ((INTVAL (offset) < 1024) && (INTVAL (offset) > -1024)))
26490 return "wldrw%?\t%0, %1";
26492 /* Fix up an out-of-range load of a GR register. */
26493 output_asm_insn ("str%?\t%0, [sp, #-4]!\t@ Start of GR load expansion", & reg);
26494 wcgr = operands[0];
26495 operands[0] = reg;
26496 output_asm_insn ("ldr%?\t%0, %1", operands);
26498 operands[0] = wcgr;
26499 operands[1] = reg;
26500 output_asm_insn ("tmcr%?\t%0, %1", operands);
26501 output_asm_insn ("ldr%?\t%0, [sp], #4\t@ End of GR load expansion", & reg);
26503 return "";
26506 /* Worker function for TARGET_SETUP_INCOMING_VARARGS.
26508 On the ARM, PRETEND_SIZE is set in order to have the prologue push the last
26509 named arg and all anonymous args onto the stack.
26510 XXX I know the prologue shouldn't be pushing registers, but it is faster
26511 that way. */
26513 static void
26514 arm_setup_incoming_varargs (cumulative_args_t pcum_v,
26515 machine_mode mode,
26516 tree type,
26517 int *pretend_size,
26518 int second_time ATTRIBUTE_UNUSED)
26520 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
26521 int nregs;
26523 cfun->machine->uses_anonymous_args = 1;
26524 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
26526 nregs = pcum->aapcs_ncrn;
26527 if ((nregs & 1) && arm_needs_doubleword_align (mode, type))
26528 nregs++;
26530 else
26531 nregs = pcum->nregs;
26533 if (nregs < NUM_ARG_REGS)
26534 *pretend_size = (NUM_ARG_REGS - nregs) * UNITS_PER_WORD;
26537 /* We can't rely on the caller doing the proper promotion when
26538 using APCS or ATPCS. */
26540 static bool
26541 arm_promote_prototypes (const_tree t ATTRIBUTE_UNUSED)
26543 return !TARGET_AAPCS_BASED;
26546 static machine_mode
26547 arm_promote_function_mode (const_tree type ATTRIBUTE_UNUSED,
26548 machine_mode mode,
26549 int *punsignedp ATTRIBUTE_UNUSED,
26550 const_tree fntype ATTRIBUTE_UNUSED,
26551 int for_return ATTRIBUTE_UNUSED)
26553 if (GET_MODE_CLASS (mode) == MODE_INT
26554 && GET_MODE_SIZE (mode) < 4)
26555 return SImode;
26557 return mode;
26560 /* AAPCS based ABIs use short enums by default. */
26562 static bool
26563 arm_default_short_enums (void)
26565 return TARGET_AAPCS_BASED && arm_abi != ARM_ABI_AAPCS_LINUX;
26569 /* AAPCS requires that anonymous bitfields affect structure alignment. */
26571 static bool
26572 arm_align_anon_bitfield (void)
26574 return TARGET_AAPCS_BASED;
26578 /* The generic C++ ABI says 64-bit (long long). The EABI says 32-bit. */
26580 static tree
26581 arm_cxx_guard_type (void)
26583 return TARGET_AAPCS_BASED ? integer_type_node : long_long_integer_type_node;
26587 /* The EABI says test the least significant bit of a guard variable. */
26589 static bool
26590 arm_cxx_guard_mask_bit (void)
26592 return TARGET_AAPCS_BASED;
26596 /* The EABI specifies that all array cookies are 8 bytes long. */
26598 static tree
26599 arm_get_cookie_size (tree type)
26601 tree size;
26603 if (!TARGET_AAPCS_BASED)
26604 return default_cxx_get_cookie_size (type);
26606 size = build_int_cst (sizetype, 8);
26607 return size;
26611 /* The EABI says that array cookies should also contain the element size. */
26613 static bool
26614 arm_cookie_has_size (void)
26616 return TARGET_AAPCS_BASED;
26620 /* The EABI says constructors and destructors should return a pointer to
26621 the object constructed/destroyed. */
26623 static bool
26624 arm_cxx_cdtor_returns_this (void)
26626 return TARGET_AAPCS_BASED;
26629 /* The EABI says that an inline function may never be the key
26630 method. */
26632 static bool
26633 arm_cxx_key_method_may_be_inline (void)
26635 return !TARGET_AAPCS_BASED;
26638 static void
26639 arm_cxx_determine_class_data_visibility (tree decl)
26641 if (!TARGET_AAPCS_BASED
26642 || !TARGET_DLLIMPORT_DECL_ATTRIBUTES)
26643 return;
26645 /* In general, \S 3.2.5.5 of the ARM EABI requires that class data
26646 is exported. However, on systems without dynamic vague linkage,
26647 \S 3.2.5.6 says that COMDAT class data has hidden linkage. */
26648 if (!TARGET_ARM_DYNAMIC_VAGUE_LINKAGE_P && DECL_COMDAT (decl))
26649 DECL_VISIBILITY (decl) = VISIBILITY_HIDDEN;
26650 else
26651 DECL_VISIBILITY (decl) = VISIBILITY_DEFAULT;
26652 DECL_VISIBILITY_SPECIFIED (decl) = 1;
26655 static bool
26656 arm_cxx_class_data_always_comdat (void)
26658 /* \S 3.2.5.4 of the ARM C++ ABI says that class data only have
26659 vague linkage if the class has no key function. */
26660 return !TARGET_AAPCS_BASED;
26664 /* The EABI says __aeabi_atexit should be used to register static
26665 destructors. */
26667 static bool
26668 arm_cxx_use_aeabi_atexit (void)
26670 return TARGET_AAPCS_BASED;
26674 void
26675 arm_set_return_address (rtx source, rtx scratch)
26677 arm_stack_offsets *offsets;
26678 HOST_WIDE_INT delta;
26679 rtx addr;
26680 unsigned long saved_regs;
26682 offsets = arm_get_frame_offsets ();
26683 saved_regs = offsets->saved_regs_mask;
26685 if ((saved_regs & (1 << LR_REGNUM)) == 0)
26686 emit_move_insn (gen_rtx_REG (Pmode, LR_REGNUM), source);
26687 else
26689 if (frame_pointer_needed)
26690 addr = plus_constant (Pmode, hard_frame_pointer_rtx, -4);
26691 else
26693 /* LR will be the first saved register. */
26694 delta = offsets->outgoing_args - (offsets->frame + 4);
26697 if (delta >= 4096)
26699 emit_insn (gen_addsi3 (scratch, stack_pointer_rtx,
26700 GEN_INT (delta & ~4095)));
26701 addr = scratch;
26702 delta &= 4095;
26704 else
26705 addr = stack_pointer_rtx;
26707 addr = plus_constant (Pmode, addr, delta);
26709 /* The store needs to be marked as frame related in order to prevent
26710 DSE from deleting it as dead if it is based on fp. */
26711 rtx insn = emit_move_insn (gen_frame_mem (Pmode, addr), source);
26712 RTX_FRAME_RELATED_P (insn) = 1;
26713 add_reg_note (insn, REG_CFA_RESTORE, gen_rtx_REG (Pmode, LR_REGNUM));
26718 void
26719 thumb_set_return_address (rtx source, rtx scratch)
26721 arm_stack_offsets *offsets;
26722 HOST_WIDE_INT delta;
26723 HOST_WIDE_INT limit;
26724 int reg;
26725 rtx addr;
26726 unsigned long mask;
26728 emit_use (source);
26730 offsets = arm_get_frame_offsets ();
26731 mask = offsets->saved_regs_mask;
26732 if (mask & (1 << LR_REGNUM))
26734 limit = 1024;
26735 /* Find the saved regs. */
26736 if (frame_pointer_needed)
26738 delta = offsets->soft_frame - offsets->saved_args;
26739 reg = THUMB_HARD_FRAME_POINTER_REGNUM;
26740 if (TARGET_THUMB1)
26741 limit = 128;
26743 else
26745 delta = offsets->outgoing_args - offsets->saved_args;
26746 reg = SP_REGNUM;
26748 /* Allow for the stack frame. */
26749 if (TARGET_THUMB1 && TARGET_BACKTRACE)
26750 delta -= 16;
26751 /* The link register is always the first saved register. */
26752 delta -= 4;
26754 /* Construct the address. */
26755 addr = gen_rtx_REG (SImode, reg);
26756 if (delta > limit)
26758 emit_insn (gen_movsi (scratch, GEN_INT (delta)));
26759 emit_insn (gen_addsi3 (scratch, scratch, stack_pointer_rtx));
26760 addr = scratch;
26762 else
26763 addr = plus_constant (Pmode, addr, delta);
26765 /* The store needs to be marked as frame related in order to prevent
26766 DSE from deleting it as dead if it is based on fp. */
26767 rtx insn = emit_move_insn (gen_frame_mem (Pmode, addr), source);
26768 RTX_FRAME_RELATED_P (insn) = 1;
26769 add_reg_note (insn, REG_CFA_RESTORE, gen_rtx_REG (Pmode, LR_REGNUM));
26771 else
26772 emit_move_insn (gen_rtx_REG (Pmode, LR_REGNUM), source);
26775 /* Implements target hook vector_mode_supported_p. */
26776 bool
26777 arm_vector_mode_supported_p (machine_mode mode)
26779 /* Neon also supports V2SImode, etc. listed in the clause below. */
26780 if (TARGET_NEON && (mode == V2SFmode || mode == V4SImode || mode == V8HImode
26781 || mode == V4HFmode || mode == V16QImode || mode == V4SFmode
26782 || mode == V2DImode || mode == V8HFmode))
26783 return true;
26785 if ((TARGET_NEON || TARGET_IWMMXT)
26786 && ((mode == V2SImode)
26787 || (mode == V4HImode)
26788 || (mode == V8QImode)))
26789 return true;
26791 if (TARGET_INT_SIMD && (mode == V4UQQmode || mode == V4QQmode
26792 || mode == V2UHQmode || mode == V2HQmode || mode == V2UHAmode
26793 || mode == V2HAmode))
26794 return true;
26796 return false;
26799 /* Implements target hook array_mode_supported_p. */
26801 static bool
26802 arm_array_mode_supported_p (machine_mode mode,
26803 unsigned HOST_WIDE_INT nelems)
26805 if (TARGET_NEON
26806 && (VALID_NEON_DREG_MODE (mode) || VALID_NEON_QREG_MODE (mode))
26807 && (nelems >= 2 && nelems <= 4))
26808 return true;
26810 return false;
26813 /* Use the option -mvectorize-with-neon-double to override the use of quardword
26814 registers when autovectorizing for Neon, at least until multiple vector
26815 widths are supported properly by the middle-end. */
26817 static machine_mode
26818 arm_preferred_simd_mode (machine_mode mode)
26820 if (TARGET_NEON)
26821 switch (mode)
26823 case SFmode:
26824 return TARGET_NEON_VECTORIZE_DOUBLE ? V2SFmode : V4SFmode;
26825 case SImode:
26826 return TARGET_NEON_VECTORIZE_DOUBLE ? V2SImode : V4SImode;
26827 case HImode:
26828 return TARGET_NEON_VECTORIZE_DOUBLE ? V4HImode : V8HImode;
26829 case QImode:
26830 return TARGET_NEON_VECTORIZE_DOUBLE ? V8QImode : V16QImode;
26831 case DImode:
26832 if (!TARGET_NEON_VECTORIZE_DOUBLE)
26833 return V2DImode;
26834 break;
26836 default:;
26839 if (TARGET_REALLY_IWMMXT)
26840 switch (mode)
26842 case SImode:
26843 return V2SImode;
26844 case HImode:
26845 return V4HImode;
26846 case QImode:
26847 return V8QImode;
26849 default:;
26852 return word_mode;
26855 /* Implement TARGET_CLASS_LIKELY_SPILLED_P.
26857 We need to define this for LO_REGS on Thumb-1. Otherwise we can end up
26858 using r0-r4 for function arguments, r7 for the stack frame and don't have
26859 enough left over to do doubleword arithmetic. For Thumb-2 all the
26860 potentially problematic instructions accept high registers so this is not
26861 necessary. Care needs to be taken to avoid adding new Thumb-2 patterns
26862 that require many low registers. */
26863 static bool
26864 arm_class_likely_spilled_p (reg_class_t rclass)
26866 if ((TARGET_THUMB1 && rclass == LO_REGS)
26867 || rclass == CC_REG)
26868 return true;
26870 return false;
26873 /* Implements target hook small_register_classes_for_mode_p. */
26874 bool
26875 arm_small_register_classes_for_mode_p (machine_mode mode ATTRIBUTE_UNUSED)
26877 return TARGET_THUMB1;
26880 /* Implement TARGET_SHIFT_TRUNCATION_MASK. SImode shifts use normal
26881 ARM insns and therefore guarantee that the shift count is modulo 256.
26882 DImode shifts (those implemented by lib1funcs.S or by optabs.c)
26883 guarantee no particular behavior for out-of-range counts. */
26885 static unsigned HOST_WIDE_INT
26886 arm_shift_truncation_mask (machine_mode mode)
26888 return mode == SImode ? 255 : 0;
26892 /* Map internal gcc register numbers to DWARF2 register numbers. */
26894 unsigned int
26895 arm_dbx_register_number (unsigned int regno)
26897 if (regno < 16)
26898 return regno;
26900 if (IS_VFP_REGNUM (regno))
26902 /* See comment in arm_dwarf_register_span. */
26903 if (VFP_REGNO_OK_FOR_SINGLE (regno))
26904 return 64 + regno - FIRST_VFP_REGNUM;
26905 else
26906 return 256 + (regno - FIRST_VFP_REGNUM) / 2;
26909 if (IS_IWMMXT_GR_REGNUM (regno))
26910 return 104 + regno - FIRST_IWMMXT_GR_REGNUM;
26912 if (IS_IWMMXT_REGNUM (regno))
26913 return 112 + regno - FIRST_IWMMXT_REGNUM;
26915 return DWARF_FRAME_REGISTERS;
26918 /* Dwarf models VFPv3 registers as 32 64-bit registers.
26919 GCC models tham as 64 32-bit registers, so we need to describe this to
26920 the DWARF generation code. Other registers can use the default. */
26921 static rtx
26922 arm_dwarf_register_span (rtx rtl)
26924 machine_mode mode;
26925 unsigned regno;
26926 rtx parts[16];
26927 int nregs;
26928 int i;
26930 regno = REGNO (rtl);
26931 if (!IS_VFP_REGNUM (regno))
26932 return NULL_RTX;
26934 /* XXX FIXME: The EABI defines two VFP register ranges:
26935 64-95: Legacy VFPv2 numbering for S0-S31 (obsolescent)
26936 256-287: D0-D31
26937 The recommended encoding for S0-S31 is a DW_OP_bit_piece of the
26938 corresponding D register. Until GDB supports this, we shall use the
26939 legacy encodings. We also use these encodings for D0-D15 for
26940 compatibility with older debuggers. */
26941 mode = GET_MODE (rtl);
26942 if (GET_MODE_SIZE (mode) < 8)
26943 return NULL_RTX;
26945 if (VFP_REGNO_OK_FOR_SINGLE (regno))
26947 nregs = GET_MODE_SIZE (mode) / 4;
26948 for (i = 0; i < nregs; i += 2)
26949 if (TARGET_BIG_END)
26951 parts[i] = gen_rtx_REG (SImode, regno + i + 1);
26952 parts[i + 1] = gen_rtx_REG (SImode, regno + i);
26954 else
26956 parts[i] = gen_rtx_REG (SImode, regno + i);
26957 parts[i + 1] = gen_rtx_REG (SImode, regno + i + 1);
26960 else
26962 nregs = GET_MODE_SIZE (mode) / 8;
26963 for (i = 0; i < nregs; i++)
26964 parts[i] = gen_rtx_REG (DImode, regno + i);
26967 return gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (nregs , parts));
26970 #if ARM_UNWIND_INFO
26971 /* Emit unwind directives for a store-multiple instruction or stack pointer
26972 push during alignment.
26973 These should only ever be generated by the function prologue code, so
26974 expect them to have a particular form.
26975 The store-multiple instruction sometimes pushes pc as the last register,
26976 although it should not be tracked into unwind information, or for -Os
26977 sometimes pushes some dummy registers before first register that needs
26978 to be tracked in unwind information; such dummy registers are there just
26979 to avoid separate stack adjustment, and will not be restored in the
26980 epilogue. */
26982 static void
26983 arm_unwind_emit_sequence (FILE * asm_out_file, rtx p)
26985 int i;
26986 HOST_WIDE_INT offset;
26987 HOST_WIDE_INT nregs;
26988 int reg_size;
26989 unsigned reg;
26990 unsigned lastreg;
26991 unsigned padfirst = 0, padlast = 0;
26992 rtx e;
26994 e = XVECEXP (p, 0, 0);
26995 gcc_assert (GET_CODE (e) == SET);
26997 /* First insn will adjust the stack pointer. */
26998 gcc_assert (GET_CODE (e) == SET
26999 && REG_P (SET_DEST (e))
27000 && REGNO (SET_DEST (e)) == SP_REGNUM
27001 && GET_CODE (SET_SRC (e)) == PLUS);
27003 offset = -INTVAL (XEXP (SET_SRC (e), 1));
27004 nregs = XVECLEN (p, 0) - 1;
27005 gcc_assert (nregs);
27007 reg = REGNO (SET_SRC (XVECEXP (p, 0, 1)));
27008 if (reg < 16)
27010 /* For -Os dummy registers can be pushed at the beginning to
27011 avoid separate stack pointer adjustment. */
27012 e = XVECEXP (p, 0, 1);
27013 e = XEXP (SET_DEST (e), 0);
27014 if (GET_CODE (e) == PLUS)
27015 padfirst = INTVAL (XEXP (e, 1));
27016 gcc_assert (padfirst == 0 || optimize_size);
27017 /* The function prologue may also push pc, but not annotate it as it is
27018 never restored. We turn this into a stack pointer adjustment. */
27019 e = XVECEXP (p, 0, nregs);
27020 e = XEXP (SET_DEST (e), 0);
27021 if (GET_CODE (e) == PLUS)
27022 padlast = offset - INTVAL (XEXP (e, 1)) - 4;
27023 else
27024 padlast = offset - 4;
27025 gcc_assert (padlast == 0 || padlast == 4);
27026 if (padlast == 4)
27027 fprintf (asm_out_file, "\t.pad #4\n");
27028 reg_size = 4;
27029 fprintf (asm_out_file, "\t.save {");
27031 else if (IS_VFP_REGNUM (reg))
27033 reg_size = 8;
27034 fprintf (asm_out_file, "\t.vsave {");
27036 else
27037 /* Unknown register type. */
27038 gcc_unreachable ();
27040 /* If the stack increment doesn't match the size of the saved registers,
27041 something has gone horribly wrong. */
27042 gcc_assert (offset == padfirst + nregs * reg_size + padlast);
27044 offset = padfirst;
27045 lastreg = 0;
27046 /* The remaining insns will describe the stores. */
27047 for (i = 1; i <= nregs; i++)
27049 /* Expect (set (mem <addr>) (reg)).
27050 Where <addr> is (reg:SP) or (plus (reg:SP) (const_int)). */
27051 e = XVECEXP (p, 0, i);
27052 gcc_assert (GET_CODE (e) == SET
27053 && MEM_P (SET_DEST (e))
27054 && REG_P (SET_SRC (e)));
27056 reg = REGNO (SET_SRC (e));
27057 gcc_assert (reg >= lastreg);
27059 if (i != 1)
27060 fprintf (asm_out_file, ", ");
27061 /* We can't use %r for vfp because we need to use the
27062 double precision register names. */
27063 if (IS_VFP_REGNUM (reg))
27064 asm_fprintf (asm_out_file, "d%d", (reg - FIRST_VFP_REGNUM) / 2);
27065 else
27066 asm_fprintf (asm_out_file, "%r", reg);
27068 if (flag_checking)
27070 /* Check that the addresses are consecutive. */
27071 e = XEXP (SET_DEST (e), 0);
27072 if (GET_CODE (e) == PLUS)
27073 gcc_assert (REG_P (XEXP (e, 0))
27074 && REGNO (XEXP (e, 0)) == SP_REGNUM
27075 && CONST_INT_P (XEXP (e, 1))
27076 && offset == INTVAL (XEXP (e, 1)));
27077 else
27078 gcc_assert (i == 1
27079 && REG_P (e)
27080 && REGNO (e) == SP_REGNUM);
27081 offset += reg_size;
27084 fprintf (asm_out_file, "}\n");
27085 if (padfirst)
27086 fprintf (asm_out_file, "\t.pad #%d\n", padfirst);
27089 /* Emit unwind directives for a SET. */
27091 static void
27092 arm_unwind_emit_set (FILE * asm_out_file, rtx p)
27094 rtx e0;
27095 rtx e1;
27096 unsigned reg;
27098 e0 = XEXP (p, 0);
27099 e1 = XEXP (p, 1);
27100 switch (GET_CODE (e0))
27102 case MEM:
27103 /* Pushing a single register. */
27104 if (GET_CODE (XEXP (e0, 0)) != PRE_DEC
27105 || !REG_P (XEXP (XEXP (e0, 0), 0))
27106 || REGNO (XEXP (XEXP (e0, 0), 0)) != SP_REGNUM)
27107 abort ();
27109 asm_fprintf (asm_out_file, "\t.save ");
27110 if (IS_VFP_REGNUM (REGNO (e1)))
27111 asm_fprintf(asm_out_file, "{d%d}\n",
27112 (REGNO (e1) - FIRST_VFP_REGNUM) / 2);
27113 else
27114 asm_fprintf(asm_out_file, "{%r}\n", REGNO (e1));
27115 break;
27117 case REG:
27118 if (REGNO (e0) == SP_REGNUM)
27120 /* A stack increment. */
27121 if (GET_CODE (e1) != PLUS
27122 || !REG_P (XEXP (e1, 0))
27123 || REGNO (XEXP (e1, 0)) != SP_REGNUM
27124 || !CONST_INT_P (XEXP (e1, 1)))
27125 abort ();
27127 asm_fprintf (asm_out_file, "\t.pad #%wd\n",
27128 -INTVAL (XEXP (e1, 1)));
27130 else if (REGNO (e0) == HARD_FRAME_POINTER_REGNUM)
27132 HOST_WIDE_INT offset;
27134 if (GET_CODE (e1) == PLUS)
27136 if (!REG_P (XEXP (e1, 0))
27137 || !CONST_INT_P (XEXP (e1, 1)))
27138 abort ();
27139 reg = REGNO (XEXP (e1, 0));
27140 offset = INTVAL (XEXP (e1, 1));
27141 asm_fprintf (asm_out_file, "\t.setfp %r, %r, #%wd\n",
27142 HARD_FRAME_POINTER_REGNUM, reg,
27143 offset);
27145 else if (REG_P (e1))
27147 reg = REGNO (e1);
27148 asm_fprintf (asm_out_file, "\t.setfp %r, %r\n",
27149 HARD_FRAME_POINTER_REGNUM, reg);
27151 else
27152 abort ();
27154 else if (REG_P (e1) && REGNO (e1) == SP_REGNUM)
27156 /* Move from sp to reg. */
27157 asm_fprintf (asm_out_file, "\t.movsp %r\n", REGNO (e0));
27159 else if (GET_CODE (e1) == PLUS
27160 && REG_P (XEXP (e1, 0))
27161 && REGNO (XEXP (e1, 0)) == SP_REGNUM
27162 && CONST_INT_P (XEXP (e1, 1)))
27164 /* Set reg to offset from sp. */
27165 asm_fprintf (asm_out_file, "\t.movsp %r, #%d\n",
27166 REGNO (e0), (int)INTVAL(XEXP (e1, 1)));
27168 else
27169 abort ();
27170 break;
27172 default:
27173 abort ();
27178 /* Emit unwind directives for the given insn. */
27180 static void
27181 arm_unwind_emit (FILE * asm_out_file, rtx_insn *insn)
27183 rtx note, pat;
27184 bool handled_one = false;
27186 if (arm_except_unwind_info (&global_options) != UI_TARGET)
27187 return;
27189 if (!(flag_unwind_tables || crtl->uses_eh_lsda)
27190 && (TREE_NOTHROW (current_function_decl)
27191 || crtl->all_throwers_are_sibcalls))
27192 return;
27194 if (NOTE_P (insn) || !RTX_FRAME_RELATED_P (insn))
27195 return;
27197 for (note = REG_NOTES (insn); note ; note = XEXP (note, 1))
27199 switch (REG_NOTE_KIND (note))
27201 case REG_FRAME_RELATED_EXPR:
27202 pat = XEXP (note, 0);
27203 goto found;
27205 case REG_CFA_REGISTER:
27206 pat = XEXP (note, 0);
27207 if (pat == NULL)
27209 pat = PATTERN (insn);
27210 if (GET_CODE (pat) == PARALLEL)
27211 pat = XVECEXP (pat, 0, 0);
27214 /* Only emitted for IS_STACKALIGN re-alignment. */
27216 rtx dest, src;
27217 unsigned reg;
27219 src = SET_SRC (pat);
27220 dest = SET_DEST (pat);
27222 gcc_assert (src == stack_pointer_rtx);
27223 reg = REGNO (dest);
27224 asm_fprintf (asm_out_file, "\t.unwind_raw 0, 0x%x @ vsp = r%d\n",
27225 reg + 0x90, reg);
27227 handled_one = true;
27228 break;
27230 /* The INSN is generated in epilogue. It is set as RTX_FRAME_RELATED_P
27231 to get correct dwarf information for shrink-wrap. We should not
27232 emit unwind information for it because these are used either for
27233 pretend arguments or notes to adjust sp and restore registers from
27234 stack. */
27235 case REG_CFA_DEF_CFA:
27236 case REG_CFA_ADJUST_CFA:
27237 case REG_CFA_RESTORE:
27238 return;
27240 case REG_CFA_EXPRESSION:
27241 case REG_CFA_OFFSET:
27242 /* ??? Only handling here what we actually emit. */
27243 gcc_unreachable ();
27245 default:
27246 break;
27249 if (handled_one)
27250 return;
27251 pat = PATTERN (insn);
27252 found:
27254 switch (GET_CODE (pat))
27256 case SET:
27257 arm_unwind_emit_set (asm_out_file, pat);
27258 break;
27260 case SEQUENCE:
27261 /* Store multiple. */
27262 arm_unwind_emit_sequence (asm_out_file, pat);
27263 break;
27265 default:
27266 abort();
27271 /* Output a reference from a function exception table to the type_info
27272 object X. The EABI specifies that the symbol should be relocated by
27273 an R_ARM_TARGET2 relocation. */
27275 static bool
27276 arm_output_ttype (rtx x)
27278 fputs ("\t.word\t", asm_out_file);
27279 output_addr_const (asm_out_file, x);
27280 /* Use special relocations for symbol references. */
27281 if (!CONST_INT_P (x))
27282 fputs ("(TARGET2)", asm_out_file);
27283 fputc ('\n', asm_out_file);
27285 return TRUE;
27288 /* Implement TARGET_ASM_EMIT_EXCEPT_PERSONALITY. */
27290 static void
27291 arm_asm_emit_except_personality (rtx personality)
27293 fputs ("\t.personality\t", asm_out_file);
27294 output_addr_const (asm_out_file, personality);
27295 fputc ('\n', asm_out_file);
27297 #endif /* ARM_UNWIND_INFO */
27299 /* Implement TARGET_ASM_INITIALIZE_SECTIONS. */
27301 static void
27302 arm_asm_init_sections (void)
27304 #if ARM_UNWIND_INFO
27305 exception_section = get_unnamed_section (0, output_section_asm_op,
27306 "\t.handlerdata");
27307 #endif /* ARM_UNWIND_INFO */
27309 #ifdef OBJECT_FORMAT_ELF
27310 if (target_pure_code)
27311 text_section->unnamed.data = "\t.section .text,\"0x20000006\",%progbits";
27312 #endif
27315 /* Output unwind directives for the start/end of a function. */
27317 void
27318 arm_output_fn_unwind (FILE * f, bool prologue)
27320 if (arm_except_unwind_info (&global_options) != UI_TARGET)
27321 return;
27323 if (prologue)
27324 fputs ("\t.fnstart\n", f);
27325 else
27327 /* If this function will never be unwound, then mark it as such.
27328 The came condition is used in arm_unwind_emit to suppress
27329 the frame annotations. */
27330 if (!(flag_unwind_tables || crtl->uses_eh_lsda)
27331 && (TREE_NOTHROW (current_function_decl)
27332 || crtl->all_throwers_are_sibcalls))
27333 fputs("\t.cantunwind\n", f);
27335 fputs ("\t.fnend\n", f);
27339 static bool
27340 arm_emit_tls_decoration (FILE *fp, rtx x)
27342 enum tls_reloc reloc;
27343 rtx val;
27345 val = XVECEXP (x, 0, 0);
27346 reloc = (enum tls_reloc) INTVAL (XVECEXP (x, 0, 1));
27348 output_addr_const (fp, val);
27350 switch (reloc)
27352 case TLS_GD32:
27353 fputs ("(tlsgd)", fp);
27354 break;
27355 case TLS_LDM32:
27356 fputs ("(tlsldm)", fp);
27357 break;
27358 case TLS_LDO32:
27359 fputs ("(tlsldo)", fp);
27360 break;
27361 case TLS_IE32:
27362 fputs ("(gottpoff)", fp);
27363 break;
27364 case TLS_LE32:
27365 fputs ("(tpoff)", fp);
27366 break;
27367 case TLS_DESCSEQ:
27368 fputs ("(tlsdesc)", fp);
27369 break;
27370 default:
27371 gcc_unreachable ();
27374 switch (reloc)
27376 case TLS_GD32:
27377 case TLS_LDM32:
27378 case TLS_IE32:
27379 case TLS_DESCSEQ:
27380 fputs (" + (. - ", fp);
27381 output_addr_const (fp, XVECEXP (x, 0, 2));
27382 /* For DESCSEQ the 3rd operand encodes thumbness, and is added */
27383 fputs (reloc == TLS_DESCSEQ ? " + " : " - ", fp);
27384 output_addr_const (fp, XVECEXP (x, 0, 3));
27385 fputc (')', fp);
27386 break;
27387 default:
27388 break;
27391 return TRUE;
27394 /* ARM implementation of TARGET_ASM_OUTPUT_DWARF_DTPREL. */
27396 static void
27397 arm_output_dwarf_dtprel (FILE *file, int size, rtx x)
27399 gcc_assert (size == 4);
27400 fputs ("\t.word\t", file);
27401 output_addr_const (file, x);
27402 fputs ("(tlsldo)", file);
27405 /* Implement TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA. */
27407 static bool
27408 arm_output_addr_const_extra (FILE *fp, rtx x)
27410 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
27411 return arm_emit_tls_decoration (fp, x);
27412 else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_PIC_LABEL)
27414 char label[256];
27415 int labelno = INTVAL (XVECEXP (x, 0, 0));
27417 ASM_GENERATE_INTERNAL_LABEL (label, "LPIC", labelno);
27418 assemble_name_raw (fp, label);
27420 return TRUE;
27422 else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_GOTSYM_OFF)
27424 assemble_name (fp, "_GLOBAL_OFFSET_TABLE_");
27425 if (GOT_PCREL)
27426 fputs ("+.", fp);
27427 fputs ("-(", fp);
27428 output_addr_const (fp, XVECEXP (x, 0, 0));
27429 fputc (')', fp);
27430 return TRUE;
27432 else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_SYMBOL_OFFSET)
27434 output_addr_const (fp, XVECEXP (x, 0, 0));
27435 if (GOT_PCREL)
27436 fputs ("+.", fp);
27437 fputs ("-(", fp);
27438 output_addr_const (fp, XVECEXP (x, 0, 1));
27439 fputc (')', fp);
27440 return TRUE;
27442 else if (GET_CODE (x) == CONST_VECTOR)
27443 return arm_emit_vector_const (fp, x);
27445 return FALSE;
27448 /* Output assembly for a shift instruction.
27449 SET_FLAGS determines how the instruction modifies the condition codes.
27450 0 - Do not set condition codes.
27451 1 - Set condition codes.
27452 2 - Use smallest instruction. */
27453 const char *
27454 arm_output_shift(rtx * operands, int set_flags)
27456 char pattern[100];
27457 static const char flag_chars[3] = {'?', '.', '!'};
27458 const char *shift;
27459 HOST_WIDE_INT val;
27460 char c;
27462 c = flag_chars[set_flags];
27463 shift = shift_op(operands[3], &val);
27464 if (shift)
27466 if (val != -1)
27467 operands[2] = GEN_INT(val);
27468 sprintf (pattern, "%s%%%c\t%%0, %%1, %%2", shift, c);
27470 else
27471 sprintf (pattern, "mov%%%c\t%%0, %%1", c);
27473 output_asm_insn (pattern, operands);
27474 return "";
27477 /* Output assembly for a WMMX immediate shift instruction. */
27478 const char *
27479 arm_output_iwmmxt_shift_immediate (const char *insn_name, rtx *operands, bool wror_or_wsra)
27481 int shift = INTVAL (operands[2]);
27482 char templ[50];
27483 machine_mode opmode = GET_MODE (operands[0]);
27485 gcc_assert (shift >= 0);
27487 /* If the shift value in the register versions is > 63 (for D qualifier),
27488 31 (for W qualifier) or 15 (for H qualifier). */
27489 if (((opmode == V4HImode) && (shift > 15))
27490 || ((opmode == V2SImode) && (shift > 31))
27491 || ((opmode == DImode) && (shift > 63)))
27493 if (wror_or_wsra)
27495 sprintf (templ, "%s\t%%0, %%1, #%d", insn_name, 32);
27496 output_asm_insn (templ, operands);
27497 if (opmode == DImode)
27499 sprintf (templ, "%s\t%%0, %%0, #%d", insn_name, 32);
27500 output_asm_insn (templ, operands);
27503 else
27505 /* The destination register will contain all zeros. */
27506 sprintf (templ, "wzero\t%%0");
27507 output_asm_insn (templ, operands);
27509 return "";
27512 if ((opmode == DImode) && (shift > 32))
27514 sprintf (templ, "%s\t%%0, %%1, #%d", insn_name, 32);
27515 output_asm_insn (templ, operands);
27516 sprintf (templ, "%s\t%%0, %%0, #%d", insn_name, shift - 32);
27517 output_asm_insn (templ, operands);
27519 else
27521 sprintf (templ, "%s\t%%0, %%1, #%d", insn_name, shift);
27522 output_asm_insn (templ, operands);
27524 return "";
27527 /* Output assembly for a WMMX tinsr instruction. */
27528 const char *
27529 arm_output_iwmmxt_tinsr (rtx *operands)
27531 int mask = INTVAL (operands[3]);
27532 int i;
27533 char templ[50];
27534 int units = mode_nunits[GET_MODE (operands[0])];
27535 gcc_assert ((mask & (mask - 1)) == 0);
27536 for (i = 0; i < units; ++i)
27538 if ((mask & 0x01) == 1)
27540 break;
27542 mask >>= 1;
27544 gcc_assert (i < units);
27546 switch (GET_MODE (operands[0]))
27548 case V8QImode:
27549 sprintf (templ, "tinsrb%%?\t%%0, %%2, #%d", i);
27550 break;
27551 case V4HImode:
27552 sprintf (templ, "tinsrh%%?\t%%0, %%2, #%d", i);
27553 break;
27554 case V2SImode:
27555 sprintf (templ, "tinsrw%%?\t%%0, %%2, #%d", i);
27556 break;
27557 default:
27558 gcc_unreachable ();
27559 break;
27561 output_asm_insn (templ, operands);
27563 return "";
27566 /* Output a Thumb-1 casesi dispatch sequence. */
27567 const char *
27568 thumb1_output_casesi (rtx *operands)
27570 rtx diff_vec = PATTERN (NEXT_INSN (as_a <rtx_insn *> (operands[0])));
27572 gcc_assert (GET_CODE (diff_vec) == ADDR_DIFF_VEC);
27574 switch (GET_MODE(diff_vec))
27576 case QImode:
27577 return (ADDR_DIFF_VEC_FLAGS (diff_vec).offset_unsigned ?
27578 "bl\t%___gnu_thumb1_case_uqi" : "bl\t%___gnu_thumb1_case_sqi");
27579 case HImode:
27580 return (ADDR_DIFF_VEC_FLAGS (diff_vec).offset_unsigned ?
27581 "bl\t%___gnu_thumb1_case_uhi" : "bl\t%___gnu_thumb1_case_shi");
27582 case SImode:
27583 return "bl\t%___gnu_thumb1_case_si";
27584 default:
27585 gcc_unreachable ();
27589 /* Output a Thumb-2 casesi instruction. */
27590 const char *
27591 thumb2_output_casesi (rtx *operands)
27593 rtx diff_vec = PATTERN (NEXT_INSN (as_a <rtx_insn *> (operands[2])));
27595 gcc_assert (GET_CODE (diff_vec) == ADDR_DIFF_VEC);
27597 output_asm_insn ("cmp\t%0, %1", operands);
27598 output_asm_insn ("bhi\t%l3", operands);
27599 switch (GET_MODE(diff_vec))
27601 case QImode:
27602 return "tbb\t[%|pc, %0]";
27603 case HImode:
27604 return "tbh\t[%|pc, %0, lsl #1]";
27605 case SImode:
27606 if (flag_pic)
27608 output_asm_insn ("adr\t%4, %l2", operands);
27609 output_asm_insn ("ldr\t%5, [%4, %0, lsl #2]", operands);
27610 output_asm_insn ("add\t%4, %4, %5", operands);
27611 return "bx\t%4";
27613 else
27615 output_asm_insn ("adr\t%4, %l2", operands);
27616 return "ldr\t%|pc, [%4, %0, lsl #2]";
27618 default:
27619 gcc_unreachable ();
27623 /* Implement TARGET_SCHED_ISSUE_RATE. Lookup the issue rate in the
27624 per-core tuning structs. */
27625 static int
27626 arm_issue_rate (void)
27628 return current_tune->issue_rate;
27631 /* Return how many instructions should scheduler lookahead to choose the
27632 best one. */
27633 static int
27634 arm_first_cycle_multipass_dfa_lookahead (void)
27636 int issue_rate = arm_issue_rate ();
27638 return issue_rate > 1 && !sched_fusion ? issue_rate : 0;
27641 /* Enable modeling of L2 auto-prefetcher. */
27642 static int
27643 arm_first_cycle_multipass_dfa_lookahead_guard (rtx_insn *insn, int ready_index)
27645 return autopref_multipass_dfa_lookahead_guard (insn, ready_index);
27648 const char *
27649 arm_mangle_type (const_tree type)
27651 /* The ARM ABI documents (10th October 2008) say that "__va_list"
27652 has to be managled as if it is in the "std" namespace. */
27653 if (TARGET_AAPCS_BASED
27654 && lang_hooks.types_compatible_p (CONST_CAST_TREE (type), va_list_type))
27655 return "St9__va_list";
27657 /* Half-precision float. */
27658 if (TREE_CODE (type) == REAL_TYPE && TYPE_PRECISION (type) == 16)
27659 return "Dh";
27661 /* Try mangling as a Neon type, TYPE_NAME is non-NULL if this is a
27662 builtin type. */
27663 if (TYPE_NAME (type) != NULL)
27664 return arm_mangle_builtin_type (type);
27666 /* Use the default mangling. */
27667 return NULL;
27670 /* Order of allocation of core registers for Thumb: this allocation is
27671 written over the corresponding initial entries of the array
27672 initialized with REG_ALLOC_ORDER. We allocate all low registers
27673 first. Saving and restoring a low register is usually cheaper than
27674 using a call-clobbered high register. */
27676 static const int thumb_core_reg_alloc_order[] =
27678 3, 2, 1, 0, 4, 5, 6, 7,
27679 14, 12, 8, 9, 10, 11
27682 /* Adjust register allocation order when compiling for Thumb. */
27684 void
27685 arm_order_regs_for_local_alloc (void)
27687 const int arm_reg_alloc_order[] = REG_ALLOC_ORDER;
27688 memcpy(reg_alloc_order, arm_reg_alloc_order, sizeof (reg_alloc_order));
27689 if (TARGET_THUMB)
27690 memcpy (reg_alloc_order, thumb_core_reg_alloc_order,
27691 sizeof (thumb_core_reg_alloc_order));
27694 /* Implement TARGET_FRAME_POINTER_REQUIRED. */
27696 bool
27697 arm_frame_pointer_required (void)
27699 if (SUBTARGET_FRAME_POINTER_REQUIRED)
27700 return true;
27702 /* If the function receives nonlocal gotos, it needs to save the frame
27703 pointer in the nonlocal_goto_save_area object. */
27704 if (cfun->has_nonlocal_label)
27705 return true;
27707 /* The frame pointer is required for non-leaf APCS frames. */
27708 if (TARGET_ARM && TARGET_APCS_FRAME && !leaf_function_p ())
27709 return true;
27711 /* If we are probing the stack in the prologue, we will have a faulting
27712 instruction prior to the stack adjustment and this requires a frame
27713 pointer if we want to catch the exception using the EABI unwinder. */
27714 if (!IS_INTERRUPT (arm_current_func_type ())
27715 && flag_stack_check == STATIC_BUILTIN_STACK_CHECK
27716 && arm_except_unwind_info (&global_options) == UI_TARGET
27717 && cfun->can_throw_non_call_exceptions)
27719 HOST_WIDE_INT size = get_frame_size ();
27721 /* That's irrelevant if there is no stack adjustment. */
27722 if (size <= 0)
27723 return false;
27725 /* That's relevant only if there is a stack probe. */
27726 if (crtl->is_leaf && !cfun->calls_alloca)
27728 /* We don't have the final size of the frame so adjust. */
27729 size += 32 * UNITS_PER_WORD;
27730 if (size > PROBE_INTERVAL && size > STACK_CHECK_PROTECT)
27731 return true;
27733 else
27734 return true;
27737 return false;
27740 /* Only thumb1 can't support conditional execution, so return true if
27741 the target is not thumb1. */
27742 static bool
27743 arm_have_conditional_execution (void)
27745 return !TARGET_THUMB1;
27748 /* The AAPCS sets the maximum alignment of a vector to 64 bits. */
27749 static HOST_WIDE_INT
27750 arm_vector_alignment (const_tree type)
27752 HOST_WIDE_INT align = tree_to_shwi (TYPE_SIZE (type));
27754 if (TARGET_AAPCS_BASED)
27755 align = MIN (align, 64);
27757 return align;
27760 static unsigned int
27761 arm_autovectorize_vector_sizes (void)
27763 return TARGET_NEON_VECTORIZE_DOUBLE ? 0 : (16 | 8);
27766 static bool
27767 arm_vector_alignment_reachable (const_tree type, bool is_packed)
27769 /* Vectors which aren't in packed structures will not be less aligned than
27770 the natural alignment of their element type, so this is safe. */
27771 if (TARGET_NEON && !BYTES_BIG_ENDIAN && unaligned_access)
27772 return !is_packed;
27774 return default_builtin_vector_alignment_reachable (type, is_packed);
27777 static bool
27778 arm_builtin_support_vector_misalignment (machine_mode mode,
27779 const_tree type, int misalignment,
27780 bool is_packed)
27782 if (TARGET_NEON && !BYTES_BIG_ENDIAN && unaligned_access)
27784 HOST_WIDE_INT align = TYPE_ALIGN_UNIT (type);
27786 if (is_packed)
27787 return align == 1;
27789 /* If the misalignment is unknown, we should be able to handle the access
27790 so long as it is not to a member of a packed data structure. */
27791 if (misalignment == -1)
27792 return true;
27794 /* Return true if the misalignment is a multiple of the natural alignment
27795 of the vector's element type. This is probably always going to be
27796 true in practice, since we've already established that this isn't a
27797 packed access. */
27798 return ((misalignment % align) == 0);
27801 return default_builtin_support_vector_misalignment (mode, type, misalignment,
27802 is_packed);
27805 static void
27806 arm_conditional_register_usage (void)
27808 int regno;
27810 if (TARGET_THUMB1 && optimize_size)
27812 /* When optimizing for size on Thumb-1, it's better not
27813 to use the HI regs, because of the overhead of
27814 stacking them. */
27815 for (regno = FIRST_HI_REGNUM; regno <= LAST_HI_REGNUM; ++regno)
27816 fixed_regs[regno] = call_used_regs[regno] = 1;
27819 /* The link register can be clobbered by any branch insn,
27820 but we have no way to track that at present, so mark
27821 it as unavailable. */
27822 if (TARGET_THUMB1)
27823 fixed_regs[LR_REGNUM] = call_used_regs[LR_REGNUM] = 1;
27825 if (TARGET_32BIT && TARGET_HARD_FLOAT)
27827 /* VFPv3 registers are disabled when earlier VFP
27828 versions are selected due to the definition of
27829 LAST_VFP_REGNUM. */
27830 for (regno = FIRST_VFP_REGNUM;
27831 regno <= LAST_VFP_REGNUM; ++ regno)
27833 fixed_regs[regno] = 0;
27834 call_used_regs[regno] = regno < FIRST_VFP_REGNUM + 16
27835 || regno >= FIRST_VFP_REGNUM + 32;
27839 if (TARGET_REALLY_IWMMXT)
27841 regno = FIRST_IWMMXT_GR_REGNUM;
27842 /* The 2002/10/09 revision of the XScale ABI has wCG0
27843 and wCG1 as call-preserved registers. The 2002/11/21
27844 revision changed this so that all wCG registers are
27845 scratch registers. */
27846 for (regno = FIRST_IWMMXT_GR_REGNUM;
27847 regno <= LAST_IWMMXT_GR_REGNUM; ++ regno)
27848 fixed_regs[regno] = 0;
27849 /* The XScale ABI has wR0 - wR9 as scratch registers,
27850 the rest as call-preserved registers. */
27851 for (regno = FIRST_IWMMXT_REGNUM;
27852 regno <= LAST_IWMMXT_REGNUM; ++ regno)
27854 fixed_regs[regno] = 0;
27855 call_used_regs[regno] = regno < FIRST_IWMMXT_REGNUM + 10;
27859 if ((unsigned) PIC_OFFSET_TABLE_REGNUM != INVALID_REGNUM)
27861 fixed_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
27862 call_used_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
27864 else if (TARGET_APCS_STACK)
27866 fixed_regs[10] = 1;
27867 call_used_regs[10] = 1;
27869 /* -mcaller-super-interworking reserves r11 for calls to
27870 _interwork_r11_call_via_rN(). Making the register global
27871 is an easy way of ensuring that it remains valid for all
27872 calls. */
27873 if (TARGET_APCS_FRAME || TARGET_CALLER_INTERWORKING
27874 || TARGET_TPCS_FRAME || TARGET_TPCS_LEAF_FRAME)
27876 fixed_regs[ARM_HARD_FRAME_POINTER_REGNUM] = 1;
27877 call_used_regs[ARM_HARD_FRAME_POINTER_REGNUM] = 1;
27878 if (TARGET_CALLER_INTERWORKING)
27879 global_regs[ARM_HARD_FRAME_POINTER_REGNUM] = 1;
27881 SUBTARGET_CONDITIONAL_REGISTER_USAGE
27884 static reg_class_t
27885 arm_preferred_rename_class (reg_class_t rclass)
27887 /* Thumb-2 instructions using LO_REGS may be smaller than instructions
27888 using GENERIC_REGS. During register rename pass, we prefer LO_REGS,
27889 and code size can be reduced. */
27890 if (TARGET_THUMB2 && rclass == GENERAL_REGS)
27891 return LO_REGS;
27892 else
27893 return NO_REGS;
27896 /* Compute the attribute "length" of insn "*push_multi".
27897 So this function MUST be kept in sync with that insn pattern. */
27899 arm_attr_length_push_multi(rtx parallel_op, rtx first_op)
27901 int i, regno, hi_reg;
27902 int num_saves = XVECLEN (parallel_op, 0);
27904 /* ARM mode. */
27905 if (TARGET_ARM)
27906 return 4;
27907 /* Thumb1 mode. */
27908 if (TARGET_THUMB1)
27909 return 2;
27911 /* Thumb2 mode. */
27912 regno = REGNO (first_op);
27913 /* For PUSH/STM under Thumb2 mode, we can use 16-bit encodings if the register
27914 list is 8-bit. Normally this means all registers in the list must be
27915 LO_REGS, that is (R0 -R7). If any HI_REGS used, then we must use 32-bit
27916 encodings. There is one exception for PUSH that LR in HI_REGS can be used
27917 with 16-bit encoding. */
27918 hi_reg = (REGNO_REG_CLASS (regno) == HI_REGS) && (regno != LR_REGNUM);
27919 for (i = 1; i < num_saves && !hi_reg; i++)
27921 regno = REGNO (XEXP (XVECEXP (parallel_op, 0, i), 0));
27922 hi_reg |= (REGNO_REG_CLASS (regno) == HI_REGS) && (regno != LR_REGNUM);
27925 if (!hi_reg)
27926 return 2;
27927 return 4;
27930 /* Compute the attribute "length" of insn. Currently, this function is used
27931 for "*load_multiple_with_writeback", "*pop_multiple_with_return" and
27932 "*pop_multiple_with_writeback_and_return". OPERANDS is the toplevel PARALLEL
27933 rtx, RETURN_PC is true if OPERANDS contains return insn. WRITE_BACK_P is
27934 true if OPERANDS contains insn which explicit updates base register. */
27937 arm_attr_length_pop_multi (rtx *operands, bool return_pc, bool write_back_p)
27939 /* ARM mode. */
27940 if (TARGET_ARM)
27941 return 4;
27942 /* Thumb1 mode. */
27943 if (TARGET_THUMB1)
27944 return 2;
27946 rtx parallel_op = operands[0];
27947 /* Initialize to elements number of PARALLEL. */
27948 unsigned indx = XVECLEN (parallel_op, 0) - 1;
27949 /* Initialize the value to base register. */
27950 unsigned regno = REGNO (operands[1]);
27951 /* Skip return and write back pattern.
27952 We only need register pop pattern for later analysis. */
27953 unsigned first_indx = 0;
27954 first_indx += return_pc ? 1 : 0;
27955 first_indx += write_back_p ? 1 : 0;
27957 /* A pop operation can be done through LDM or POP. If the base register is SP
27958 and if it's with write back, then a LDM will be alias of POP. */
27959 bool pop_p = (regno == SP_REGNUM && write_back_p);
27960 bool ldm_p = !pop_p;
27962 /* Check base register for LDM. */
27963 if (ldm_p && REGNO_REG_CLASS (regno) == HI_REGS)
27964 return 4;
27966 /* Check each register in the list. */
27967 for (; indx >= first_indx; indx--)
27969 regno = REGNO (XEXP (XVECEXP (parallel_op, 0, indx), 0));
27970 /* For POP, PC in HI_REGS can be used with 16-bit encoding. See similar
27971 comment in arm_attr_length_push_multi. */
27972 if (REGNO_REG_CLASS (regno) == HI_REGS
27973 && (regno != PC_REGNUM || ldm_p))
27974 return 4;
27977 return 2;
27980 /* Compute the number of instructions emitted by output_move_double. */
27982 arm_count_output_move_double_insns (rtx *operands)
27984 int count;
27985 rtx ops[2];
27986 /* output_move_double may modify the operands array, so call it
27987 here on a copy of the array. */
27988 ops[0] = operands[0];
27989 ops[1] = operands[1];
27990 output_move_double (ops, false, &count);
27991 return count;
27995 vfp3_const_double_for_fract_bits (rtx operand)
27997 REAL_VALUE_TYPE r0;
27999 if (!CONST_DOUBLE_P (operand))
28000 return 0;
28002 r0 = *CONST_DOUBLE_REAL_VALUE (operand);
28003 if (exact_real_inverse (DFmode, &r0)
28004 && !REAL_VALUE_NEGATIVE (r0))
28006 if (exact_real_truncate (DFmode, &r0))
28008 HOST_WIDE_INT value = real_to_integer (&r0);
28009 value = value & 0xffffffff;
28010 if ((value != 0) && ( (value & (value - 1)) == 0))
28012 int ret = exact_log2 (value);
28013 gcc_assert (IN_RANGE (ret, 0, 31));
28014 return ret;
28018 return 0;
28021 /* If X is a CONST_DOUBLE with a value that is a power of 2 whose
28022 log2 is in [1, 32], return that log2. Otherwise return -1.
28023 This is used in the patterns for vcvt.s32.f32 floating-point to
28024 fixed-point conversions. */
28027 vfp3_const_double_for_bits (rtx x)
28029 const REAL_VALUE_TYPE *r;
28031 if (!CONST_DOUBLE_P (x))
28032 return -1;
28034 r = CONST_DOUBLE_REAL_VALUE (x);
28036 if (REAL_VALUE_NEGATIVE (*r)
28037 || REAL_VALUE_ISNAN (*r)
28038 || REAL_VALUE_ISINF (*r)
28039 || !real_isinteger (r, SFmode))
28040 return -1;
28042 HOST_WIDE_INT hwint = exact_log2 (real_to_integer (r));
28044 /* The exact_log2 above will have returned -1 if this is
28045 not an exact log2. */
28046 if (!IN_RANGE (hwint, 1, 32))
28047 return -1;
28049 return hwint;
28053 /* Emit a memory barrier around an atomic sequence according to MODEL. */
28055 static void
28056 arm_pre_atomic_barrier (enum memmodel model)
28058 if (need_atomic_barrier_p (model, true))
28059 emit_insn (gen_memory_barrier ());
28062 static void
28063 arm_post_atomic_barrier (enum memmodel model)
28065 if (need_atomic_barrier_p (model, false))
28066 emit_insn (gen_memory_barrier ());
28069 /* Emit the load-exclusive and store-exclusive instructions.
28070 Use acquire and release versions if necessary. */
28072 static void
28073 arm_emit_load_exclusive (machine_mode mode, rtx rval, rtx mem, bool acq)
28075 rtx (*gen) (rtx, rtx);
28077 if (acq)
28079 switch (mode)
28081 case QImode: gen = gen_arm_load_acquire_exclusiveqi; break;
28082 case HImode: gen = gen_arm_load_acquire_exclusivehi; break;
28083 case SImode: gen = gen_arm_load_acquire_exclusivesi; break;
28084 case DImode: gen = gen_arm_load_acquire_exclusivedi; break;
28085 default:
28086 gcc_unreachable ();
28089 else
28091 switch (mode)
28093 case QImode: gen = gen_arm_load_exclusiveqi; break;
28094 case HImode: gen = gen_arm_load_exclusivehi; break;
28095 case SImode: gen = gen_arm_load_exclusivesi; break;
28096 case DImode: gen = gen_arm_load_exclusivedi; break;
28097 default:
28098 gcc_unreachable ();
28102 emit_insn (gen (rval, mem));
28105 static void
28106 arm_emit_store_exclusive (machine_mode mode, rtx bval, rtx rval,
28107 rtx mem, bool rel)
28109 rtx (*gen) (rtx, rtx, rtx);
28111 if (rel)
28113 switch (mode)
28115 case QImode: gen = gen_arm_store_release_exclusiveqi; break;
28116 case HImode: gen = gen_arm_store_release_exclusivehi; break;
28117 case SImode: gen = gen_arm_store_release_exclusivesi; break;
28118 case DImode: gen = gen_arm_store_release_exclusivedi; break;
28119 default:
28120 gcc_unreachable ();
28123 else
28125 switch (mode)
28127 case QImode: gen = gen_arm_store_exclusiveqi; break;
28128 case HImode: gen = gen_arm_store_exclusivehi; break;
28129 case SImode: gen = gen_arm_store_exclusivesi; break;
28130 case DImode: gen = gen_arm_store_exclusivedi; break;
28131 default:
28132 gcc_unreachable ();
28136 emit_insn (gen (bval, rval, mem));
28139 /* Mark the previous jump instruction as unlikely. */
28141 static void
28142 emit_unlikely_jump (rtx insn)
28144 int very_unlikely = REG_BR_PROB_BASE / 100 - 1;
28146 insn = emit_jump_insn (insn);
28147 add_int_reg_note (insn, REG_BR_PROB, very_unlikely);
28150 /* Expand a compare and swap pattern. */
28152 void
28153 arm_expand_compare_and_swap (rtx operands[])
28155 rtx bval, rval, mem, oldval, newval, is_weak, mod_s, mod_f, x;
28156 machine_mode mode;
28157 rtx (*gen) (rtx, rtx, rtx, rtx, rtx, rtx, rtx);
28159 bval = operands[0];
28160 rval = operands[1];
28161 mem = operands[2];
28162 oldval = operands[3];
28163 newval = operands[4];
28164 is_weak = operands[5];
28165 mod_s = operands[6];
28166 mod_f = operands[7];
28167 mode = GET_MODE (mem);
28169 /* Normally the succ memory model must be stronger than fail, but in the
28170 unlikely event of fail being ACQUIRE and succ being RELEASE we need to
28171 promote succ to ACQ_REL so that we don't lose the acquire semantics. */
28173 if (TARGET_HAVE_LDACQ
28174 && is_mm_acquire (memmodel_from_int (INTVAL (mod_f)))
28175 && is_mm_release (memmodel_from_int (INTVAL (mod_s))))
28176 mod_s = GEN_INT (MEMMODEL_ACQ_REL);
28178 switch (mode)
28180 case QImode:
28181 case HImode:
28182 /* For narrow modes, we're going to perform the comparison in SImode,
28183 so do the zero-extension now. */
28184 rval = gen_reg_rtx (SImode);
28185 oldval = convert_modes (SImode, mode, oldval, true);
28186 /* FALLTHRU */
28188 case SImode:
28189 /* Force the value into a register if needed. We waited until after
28190 the zero-extension above to do this properly. */
28191 if (!arm_add_operand (oldval, SImode))
28192 oldval = force_reg (SImode, oldval);
28193 break;
28195 case DImode:
28196 if (!cmpdi_operand (oldval, mode))
28197 oldval = force_reg (mode, oldval);
28198 break;
28200 default:
28201 gcc_unreachable ();
28204 switch (mode)
28206 case QImode: gen = gen_atomic_compare_and_swapqi_1; break;
28207 case HImode: gen = gen_atomic_compare_and_swaphi_1; break;
28208 case SImode: gen = gen_atomic_compare_and_swapsi_1; break;
28209 case DImode: gen = gen_atomic_compare_and_swapdi_1; break;
28210 default:
28211 gcc_unreachable ();
28214 emit_insn (gen (rval, mem, oldval, newval, is_weak, mod_s, mod_f));
28216 if (mode == QImode || mode == HImode)
28217 emit_move_insn (operands[1], gen_lowpart (mode, rval));
28219 /* In all cases, we arrange for success to be signaled by Z set.
28220 This arrangement allows for the boolean result to be used directly
28221 in a subsequent branch, post optimization. */
28222 x = gen_rtx_REG (CCmode, CC_REGNUM);
28223 x = gen_rtx_EQ (SImode, x, const0_rtx);
28224 emit_insn (gen_rtx_SET (bval, x));
28227 /* Split a compare and swap pattern. It is IMPLEMENTATION DEFINED whether
28228 another memory store between the load-exclusive and store-exclusive can
28229 reset the monitor from Exclusive to Open state. This means we must wait
28230 until after reload to split the pattern, lest we get a register spill in
28231 the middle of the atomic sequence. */
28233 void
28234 arm_split_compare_and_swap (rtx operands[])
28236 rtx rval, mem, oldval, newval, scratch;
28237 machine_mode mode;
28238 enum memmodel mod_s, mod_f;
28239 bool is_weak;
28240 rtx_code_label *label1, *label2;
28241 rtx x, cond;
28243 rval = operands[0];
28244 mem = operands[1];
28245 oldval = operands[2];
28246 newval = operands[3];
28247 is_weak = (operands[4] != const0_rtx);
28248 mod_s = memmodel_from_int (INTVAL (operands[5]));
28249 mod_f = memmodel_from_int (INTVAL (operands[6]));
28250 scratch = operands[7];
28251 mode = GET_MODE (mem);
28253 bool is_armv8_sync = arm_arch8 && is_mm_sync (mod_s);
28255 bool use_acquire = TARGET_HAVE_LDACQ
28256 && !(is_mm_relaxed (mod_s) || is_mm_consume (mod_s)
28257 || is_mm_release (mod_s));
28259 bool use_release = TARGET_HAVE_LDACQ
28260 && !(is_mm_relaxed (mod_s) || is_mm_consume (mod_s)
28261 || is_mm_acquire (mod_s));
28263 /* For ARMv8, the load-acquire is too weak for __sync memory orders. Instead,
28264 a full barrier is emitted after the store-release. */
28265 if (is_armv8_sync)
28266 use_acquire = false;
28268 /* Checks whether a barrier is needed and emits one accordingly. */
28269 if (!(use_acquire || use_release))
28270 arm_pre_atomic_barrier (mod_s);
28272 label1 = NULL;
28273 if (!is_weak)
28275 label1 = gen_label_rtx ();
28276 emit_label (label1);
28278 label2 = gen_label_rtx ();
28280 arm_emit_load_exclusive (mode, rval, mem, use_acquire);
28282 cond = arm_gen_compare_reg (NE, rval, oldval, scratch);
28283 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
28284 x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
28285 gen_rtx_LABEL_REF (Pmode, label2), pc_rtx);
28286 emit_unlikely_jump (gen_rtx_SET (pc_rtx, x));
28288 arm_emit_store_exclusive (mode, scratch, mem, newval, use_release);
28290 /* Weak or strong, we want EQ to be true for success, so that we
28291 match the flags that we got from the compare above. */
28292 cond = gen_rtx_REG (CCmode, CC_REGNUM);
28293 x = gen_rtx_COMPARE (CCmode, scratch, const0_rtx);
28294 emit_insn (gen_rtx_SET (cond, x));
28296 if (!is_weak)
28298 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
28299 x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
28300 gen_rtx_LABEL_REF (Pmode, label1), pc_rtx);
28301 emit_unlikely_jump (gen_rtx_SET (pc_rtx, x));
28304 if (!is_mm_relaxed (mod_f))
28305 emit_label (label2);
28307 /* Checks whether a barrier is needed and emits one accordingly. */
28308 if (is_armv8_sync
28309 || !(use_acquire || use_release))
28310 arm_post_atomic_barrier (mod_s);
28312 if (is_mm_relaxed (mod_f))
28313 emit_label (label2);
28316 void
28317 arm_split_atomic_op (enum rtx_code code, rtx old_out, rtx new_out, rtx mem,
28318 rtx value, rtx model_rtx, rtx cond)
28320 enum memmodel model = memmodel_from_int (INTVAL (model_rtx));
28321 machine_mode mode = GET_MODE (mem);
28322 machine_mode wmode = (mode == DImode ? DImode : SImode);
28323 rtx_code_label *label;
28324 rtx x;
28326 bool is_armv8_sync = arm_arch8 && is_mm_sync (model);
28328 bool use_acquire = TARGET_HAVE_LDACQ
28329 && !(is_mm_relaxed (model) || is_mm_consume (model)
28330 || is_mm_release (model));
28332 bool use_release = TARGET_HAVE_LDACQ
28333 && !(is_mm_relaxed (model) || is_mm_consume (model)
28334 || is_mm_acquire (model));
28336 /* For ARMv8, a load-acquire is too weak for __sync memory orders. Instead,
28337 a full barrier is emitted after the store-release. */
28338 if (is_armv8_sync)
28339 use_acquire = false;
28341 /* Checks whether a barrier is needed and emits one accordingly. */
28342 if (!(use_acquire || use_release))
28343 arm_pre_atomic_barrier (model);
28345 label = gen_label_rtx ();
28346 emit_label (label);
28348 if (new_out)
28349 new_out = gen_lowpart (wmode, new_out);
28350 if (old_out)
28351 old_out = gen_lowpart (wmode, old_out);
28352 else
28353 old_out = new_out;
28354 value = simplify_gen_subreg (wmode, value, mode, 0);
28356 arm_emit_load_exclusive (mode, old_out, mem, use_acquire);
28358 switch (code)
28360 case SET:
28361 new_out = value;
28362 break;
28364 case NOT:
28365 x = gen_rtx_AND (wmode, old_out, value);
28366 emit_insn (gen_rtx_SET (new_out, x));
28367 x = gen_rtx_NOT (wmode, new_out);
28368 emit_insn (gen_rtx_SET (new_out, x));
28369 break;
28371 case MINUS:
28372 if (CONST_INT_P (value))
28374 value = GEN_INT (-INTVAL (value));
28375 code = PLUS;
28377 /* FALLTHRU */
28379 case PLUS:
28380 if (mode == DImode)
28382 /* DImode plus/minus need to clobber flags. */
28383 /* The adddi3 and subdi3 patterns are incorrectly written so that
28384 they require matching operands, even when we could easily support
28385 three operands. Thankfully, this can be fixed up post-splitting,
28386 as the individual add+adc patterns do accept three operands and
28387 post-reload cprop can make these moves go away. */
28388 emit_move_insn (new_out, old_out);
28389 if (code == PLUS)
28390 x = gen_adddi3 (new_out, new_out, value);
28391 else
28392 x = gen_subdi3 (new_out, new_out, value);
28393 emit_insn (x);
28394 break;
28396 /* FALLTHRU */
28398 default:
28399 x = gen_rtx_fmt_ee (code, wmode, old_out, value);
28400 emit_insn (gen_rtx_SET (new_out, x));
28401 break;
28404 arm_emit_store_exclusive (mode, cond, mem, gen_lowpart (mode, new_out),
28405 use_release);
28407 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
28408 emit_unlikely_jump (gen_cbranchsi4 (x, cond, const0_rtx, label));
28410 /* Checks whether a barrier is needed and emits one accordingly. */
28411 if (is_armv8_sync
28412 || !(use_acquire || use_release))
28413 arm_post_atomic_barrier (model);
28416 #define MAX_VECT_LEN 16
28418 struct expand_vec_perm_d
28420 rtx target, op0, op1;
28421 unsigned char perm[MAX_VECT_LEN];
28422 machine_mode vmode;
28423 unsigned char nelt;
28424 bool one_vector_p;
28425 bool testing_p;
28428 /* Generate a variable permutation. */
28430 static void
28431 arm_expand_vec_perm_1 (rtx target, rtx op0, rtx op1, rtx sel)
28433 machine_mode vmode = GET_MODE (target);
28434 bool one_vector_p = rtx_equal_p (op0, op1);
28436 gcc_checking_assert (vmode == V8QImode || vmode == V16QImode);
28437 gcc_checking_assert (GET_MODE (op0) == vmode);
28438 gcc_checking_assert (GET_MODE (op1) == vmode);
28439 gcc_checking_assert (GET_MODE (sel) == vmode);
28440 gcc_checking_assert (TARGET_NEON);
28442 if (one_vector_p)
28444 if (vmode == V8QImode)
28445 emit_insn (gen_neon_vtbl1v8qi (target, op0, sel));
28446 else
28447 emit_insn (gen_neon_vtbl1v16qi (target, op0, sel));
28449 else
28451 rtx pair;
28453 if (vmode == V8QImode)
28455 pair = gen_reg_rtx (V16QImode);
28456 emit_insn (gen_neon_vcombinev8qi (pair, op0, op1));
28457 pair = gen_lowpart (TImode, pair);
28458 emit_insn (gen_neon_vtbl2v8qi (target, pair, sel));
28460 else
28462 pair = gen_reg_rtx (OImode);
28463 emit_insn (gen_neon_vcombinev16qi (pair, op0, op1));
28464 emit_insn (gen_neon_vtbl2v16qi (target, pair, sel));
28469 void
28470 arm_expand_vec_perm (rtx target, rtx op0, rtx op1, rtx sel)
28472 machine_mode vmode = GET_MODE (target);
28473 unsigned int i, nelt = GET_MODE_NUNITS (vmode);
28474 bool one_vector_p = rtx_equal_p (op0, op1);
28475 rtx rmask[MAX_VECT_LEN], mask;
28477 /* TODO: ARM's VTBL indexing is little-endian. In order to handle GCC's
28478 numbering of elements for big-endian, we must reverse the order. */
28479 gcc_checking_assert (!BYTES_BIG_ENDIAN);
28481 /* The VTBL instruction does not use a modulo index, so we must take care
28482 of that ourselves. */
28483 mask = GEN_INT (one_vector_p ? nelt - 1 : 2 * nelt - 1);
28484 for (i = 0; i < nelt; ++i)
28485 rmask[i] = mask;
28486 mask = gen_rtx_CONST_VECTOR (vmode, gen_rtvec_v (nelt, rmask));
28487 sel = expand_simple_binop (vmode, AND, sel, mask, NULL, 0, OPTAB_LIB_WIDEN);
28489 arm_expand_vec_perm_1 (target, op0, op1, sel);
28492 /* Map lane ordering between architectural lane order, and GCC lane order,
28493 taking into account ABI. See comment above output_move_neon for details. */
28495 static int
28496 neon_endian_lane_map (machine_mode mode, int lane)
28498 if (BYTES_BIG_ENDIAN)
28500 int nelems = GET_MODE_NUNITS (mode);
28501 /* Reverse lane order. */
28502 lane = (nelems - 1 - lane);
28503 /* Reverse D register order, to match ABI. */
28504 if (GET_MODE_SIZE (mode) == 16)
28505 lane = lane ^ (nelems / 2);
28507 return lane;
28510 /* Some permutations index into pairs of vectors, this is a helper function
28511 to map indexes into those pairs of vectors. */
28513 static int
28514 neon_pair_endian_lane_map (machine_mode mode, int lane)
28516 int nelem = GET_MODE_NUNITS (mode);
28517 if (BYTES_BIG_ENDIAN)
28518 lane =
28519 neon_endian_lane_map (mode, lane & (nelem - 1)) + (lane & nelem);
28520 return lane;
28523 /* Generate or test for an insn that supports a constant permutation. */
28525 /* Recognize patterns for the VUZP insns. */
28527 static bool
28528 arm_evpc_neon_vuzp (struct expand_vec_perm_d *d)
28530 unsigned int i, odd, mask, nelt = d->nelt;
28531 rtx out0, out1, in0, in1;
28532 rtx (*gen)(rtx, rtx, rtx, rtx);
28533 int first_elem;
28534 int swap_nelt;
28536 if (GET_MODE_UNIT_SIZE (d->vmode) >= 8)
28537 return false;
28539 /* arm_expand_vec_perm_const_1 () helpfully swaps the operands for the
28540 big endian pattern on 64 bit vectors, so we correct for that. */
28541 swap_nelt = BYTES_BIG_ENDIAN && !d->one_vector_p
28542 && GET_MODE_SIZE (d->vmode) == 8 ? d->nelt : 0;
28544 first_elem = d->perm[neon_endian_lane_map (d->vmode, 0)] ^ swap_nelt;
28546 if (first_elem == neon_endian_lane_map (d->vmode, 0))
28547 odd = 0;
28548 else if (first_elem == neon_endian_lane_map (d->vmode, 1))
28549 odd = 1;
28550 else
28551 return false;
28552 mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
28554 for (i = 0; i < nelt; i++)
28556 unsigned elt =
28557 (neon_pair_endian_lane_map (d->vmode, i) * 2 + odd) & mask;
28558 if ((d->perm[i] ^ swap_nelt) != neon_pair_endian_lane_map (d->vmode, elt))
28559 return false;
28562 /* Success! */
28563 if (d->testing_p)
28564 return true;
28566 switch (d->vmode)
28568 case V16QImode: gen = gen_neon_vuzpv16qi_internal; break;
28569 case V8QImode: gen = gen_neon_vuzpv8qi_internal; break;
28570 case V8HImode: gen = gen_neon_vuzpv8hi_internal; break;
28571 case V4HImode: gen = gen_neon_vuzpv4hi_internal; break;
28572 case V8HFmode: gen = gen_neon_vuzpv8hf_internal; break;
28573 case V4HFmode: gen = gen_neon_vuzpv4hf_internal; break;
28574 case V4SImode: gen = gen_neon_vuzpv4si_internal; break;
28575 case V2SImode: gen = gen_neon_vuzpv2si_internal; break;
28576 case V2SFmode: gen = gen_neon_vuzpv2sf_internal; break;
28577 case V4SFmode: gen = gen_neon_vuzpv4sf_internal; break;
28578 default:
28579 gcc_unreachable ();
28582 in0 = d->op0;
28583 in1 = d->op1;
28584 if (swap_nelt != 0)
28585 std::swap (in0, in1);
28587 out0 = d->target;
28588 out1 = gen_reg_rtx (d->vmode);
28589 if (odd)
28590 std::swap (out0, out1);
28592 emit_insn (gen (out0, in0, in1, out1));
28593 return true;
28596 /* Recognize patterns for the VZIP insns. */
28598 static bool
28599 arm_evpc_neon_vzip (struct expand_vec_perm_d *d)
28601 unsigned int i, high, mask, nelt = d->nelt;
28602 rtx out0, out1, in0, in1;
28603 rtx (*gen)(rtx, rtx, rtx, rtx);
28604 int first_elem;
28605 bool is_swapped;
28607 if (GET_MODE_UNIT_SIZE (d->vmode) >= 8)
28608 return false;
28610 is_swapped = BYTES_BIG_ENDIAN;
28612 first_elem = d->perm[neon_endian_lane_map (d->vmode, 0) ^ is_swapped];
28614 high = nelt / 2;
28615 if (first_elem == neon_endian_lane_map (d->vmode, high))
28617 else if (first_elem == neon_endian_lane_map (d->vmode, 0))
28618 high = 0;
28619 else
28620 return false;
28621 mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
28623 for (i = 0; i < nelt / 2; i++)
28625 unsigned elt =
28626 neon_pair_endian_lane_map (d->vmode, i + high) & mask;
28627 if (d->perm[neon_pair_endian_lane_map (d->vmode, 2 * i + is_swapped)]
28628 != elt)
28629 return false;
28630 elt =
28631 neon_pair_endian_lane_map (d->vmode, i + nelt + high) & mask;
28632 if (d->perm[neon_pair_endian_lane_map (d->vmode, 2 * i + !is_swapped)]
28633 != elt)
28634 return false;
28637 /* Success! */
28638 if (d->testing_p)
28639 return true;
28641 switch (d->vmode)
28643 case V16QImode: gen = gen_neon_vzipv16qi_internal; break;
28644 case V8QImode: gen = gen_neon_vzipv8qi_internal; break;
28645 case V8HImode: gen = gen_neon_vzipv8hi_internal; break;
28646 case V4HImode: gen = gen_neon_vzipv4hi_internal; break;
28647 case V8HFmode: gen = gen_neon_vzipv8hf_internal; break;
28648 case V4HFmode: gen = gen_neon_vzipv4hf_internal; break;
28649 case V4SImode: gen = gen_neon_vzipv4si_internal; break;
28650 case V2SImode: gen = gen_neon_vzipv2si_internal; break;
28651 case V2SFmode: gen = gen_neon_vzipv2sf_internal; break;
28652 case V4SFmode: gen = gen_neon_vzipv4sf_internal; break;
28653 default:
28654 gcc_unreachable ();
28657 in0 = d->op0;
28658 in1 = d->op1;
28659 if (is_swapped)
28660 std::swap (in0, in1);
28662 out0 = d->target;
28663 out1 = gen_reg_rtx (d->vmode);
28664 if (high)
28665 std::swap (out0, out1);
28667 emit_insn (gen (out0, in0, in1, out1));
28668 return true;
28671 /* Recognize patterns for the VREV insns. */
28673 static bool
28674 arm_evpc_neon_vrev (struct expand_vec_perm_d *d)
28676 unsigned int i, j, diff, nelt = d->nelt;
28677 rtx (*gen)(rtx, rtx);
28679 if (!d->one_vector_p)
28680 return false;
28682 diff = d->perm[0];
28683 switch (diff)
28685 case 7:
28686 switch (d->vmode)
28688 case V16QImode: gen = gen_neon_vrev64v16qi; break;
28689 case V8QImode: gen = gen_neon_vrev64v8qi; break;
28690 default:
28691 return false;
28693 break;
28694 case 3:
28695 switch (d->vmode)
28697 case V16QImode: gen = gen_neon_vrev32v16qi; break;
28698 case V8QImode: gen = gen_neon_vrev32v8qi; break;
28699 case V8HImode: gen = gen_neon_vrev64v8hi; break;
28700 case V4HImode: gen = gen_neon_vrev64v4hi; break;
28701 case V8HFmode: gen = gen_neon_vrev64v8hf; break;
28702 case V4HFmode: gen = gen_neon_vrev64v4hf; break;
28703 default:
28704 return false;
28706 break;
28707 case 1:
28708 switch (d->vmode)
28710 case V16QImode: gen = gen_neon_vrev16v16qi; break;
28711 case V8QImode: gen = gen_neon_vrev16v8qi; break;
28712 case V8HImode: gen = gen_neon_vrev32v8hi; break;
28713 case V4HImode: gen = gen_neon_vrev32v4hi; break;
28714 case V4SImode: gen = gen_neon_vrev64v4si; break;
28715 case V2SImode: gen = gen_neon_vrev64v2si; break;
28716 case V4SFmode: gen = gen_neon_vrev64v4sf; break;
28717 case V2SFmode: gen = gen_neon_vrev64v2sf; break;
28718 default:
28719 return false;
28721 break;
28722 default:
28723 return false;
28726 for (i = 0; i < nelt ; i += diff + 1)
28727 for (j = 0; j <= diff; j += 1)
28729 /* This is guaranteed to be true as the value of diff
28730 is 7, 3, 1 and we should have enough elements in the
28731 queue to generate this. Getting a vector mask with a
28732 value of diff other than these values implies that
28733 something is wrong by the time we get here. */
28734 gcc_assert (i + j < nelt);
28735 if (d->perm[i + j] != i + diff - j)
28736 return false;
28739 /* Success! */
28740 if (d->testing_p)
28741 return true;
28743 emit_insn (gen (d->target, d->op0));
28744 return true;
28747 /* Recognize patterns for the VTRN insns. */
28749 static bool
28750 arm_evpc_neon_vtrn (struct expand_vec_perm_d *d)
28752 unsigned int i, odd, mask, nelt = d->nelt;
28753 rtx out0, out1, in0, in1;
28754 rtx (*gen)(rtx, rtx, rtx, rtx);
28756 if (GET_MODE_UNIT_SIZE (d->vmode) >= 8)
28757 return false;
28759 /* Note that these are little-endian tests. Adjust for big-endian later. */
28760 if (d->perm[0] == 0)
28761 odd = 0;
28762 else if (d->perm[0] == 1)
28763 odd = 1;
28764 else
28765 return false;
28766 mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
28768 for (i = 0; i < nelt; i += 2)
28770 if (d->perm[i] != i + odd)
28771 return false;
28772 if (d->perm[i + 1] != ((i + nelt + odd) & mask))
28773 return false;
28776 /* Success! */
28777 if (d->testing_p)
28778 return true;
28780 switch (d->vmode)
28782 case V16QImode: gen = gen_neon_vtrnv16qi_internal; break;
28783 case V8QImode: gen = gen_neon_vtrnv8qi_internal; break;
28784 case V8HImode: gen = gen_neon_vtrnv8hi_internal; break;
28785 case V4HImode: gen = gen_neon_vtrnv4hi_internal; break;
28786 case V8HFmode: gen = gen_neon_vtrnv8hf_internal; break;
28787 case V4HFmode: gen = gen_neon_vtrnv4hf_internal; break;
28788 case V4SImode: gen = gen_neon_vtrnv4si_internal; break;
28789 case V2SImode: gen = gen_neon_vtrnv2si_internal; break;
28790 case V2SFmode: gen = gen_neon_vtrnv2sf_internal; break;
28791 case V4SFmode: gen = gen_neon_vtrnv4sf_internal; break;
28792 default:
28793 gcc_unreachable ();
28796 in0 = d->op0;
28797 in1 = d->op1;
28798 if (BYTES_BIG_ENDIAN)
28800 std::swap (in0, in1);
28801 odd = !odd;
28804 out0 = d->target;
28805 out1 = gen_reg_rtx (d->vmode);
28806 if (odd)
28807 std::swap (out0, out1);
28809 emit_insn (gen (out0, in0, in1, out1));
28810 return true;
28813 /* Recognize patterns for the VEXT insns. */
28815 static bool
28816 arm_evpc_neon_vext (struct expand_vec_perm_d *d)
28818 unsigned int i, nelt = d->nelt;
28819 rtx (*gen) (rtx, rtx, rtx, rtx);
28820 rtx offset;
28822 unsigned int location;
28824 unsigned int next = d->perm[0] + 1;
28826 /* TODO: Handle GCC's numbering of elements for big-endian. */
28827 if (BYTES_BIG_ENDIAN)
28828 return false;
28830 /* Check if the extracted indexes are increasing by one. */
28831 for (i = 1; i < nelt; next++, i++)
28833 /* If we hit the most significant element of the 2nd vector in
28834 the previous iteration, no need to test further. */
28835 if (next == 2 * nelt)
28836 return false;
28838 /* If we are operating on only one vector: it could be a
28839 rotation. If there are only two elements of size < 64, let
28840 arm_evpc_neon_vrev catch it. */
28841 if (d->one_vector_p && (next == nelt))
28843 if ((nelt == 2) && (d->vmode != V2DImode))
28844 return false;
28845 else
28846 next = 0;
28849 if (d->perm[i] != next)
28850 return false;
28853 location = d->perm[0];
28855 switch (d->vmode)
28857 case V16QImode: gen = gen_neon_vextv16qi; break;
28858 case V8QImode: gen = gen_neon_vextv8qi; break;
28859 case V4HImode: gen = gen_neon_vextv4hi; break;
28860 case V8HImode: gen = gen_neon_vextv8hi; break;
28861 case V2SImode: gen = gen_neon_vextv2si; break;
28862 case V4SImode: gen = gen_neon_vextv4si; break;
28863 case V4HFmode: gen = gen_neon_vextv4hf; break;
28864 case V8HFmode: gen = gen_neon_vextv8hf; break;
28865 case V2SFmode: gen = gen_neon_vextv2sf; break;
28866 case V4SFmode: gen = gen_neon_vextv4sf; break;
28867 case V2DImode: gen = gen_neon_vextv2di; break;
28868 default:
28869 return false;
28872 /* Success! */
28873 if (d->testing_p)
28874 return true;
28876 offset = GEN_INT (location);
28877 emit_insn (gen (d->target, d->op0, d->op1, offset));
28878 return true;
28881 /* The NEON VTBL instruction is a fully variable permuation that's even
28882 stronger than what we expose via VEC_PERM_EXPR. What it doesn't do
28883 is mask the index operand as VEC_PERM_EXPR requires. Therefore we
28884 can do slightly better by expanding this as a constant where we don't
28885 have to apply a mask. */
28887 static bool
28888 arm_evpc_neon_vtbl (struct expand_vec_perm_d *d)
28890 rtx rperm[MAX_VECT_LEN], sel;
28891 machine_mode vmode = d->vmode;
28892 unsigned int i, nelt = d->nelt;
28894 /* TODO: ARM's VTBL indexing is little-endian. In order to handle GCC's
28895 numbering of elements for big-endian, we must reverse the order. */
28896 if (BYTES_BIG_ENDIAN)
28897 return false;
28899 if (d->testing_p)
28900 return true;
28902 /* Generic code will try constant permutation twice. Once with the
28903 original mode and again with the elements lowered to QImode.
28904 So wait and don't do the selector expansion ourselves. */
28905 if (vmode != V8QImode && vmode != V16QImode)
28906 return false;
28908 for (i = 0; i < nelt; ++i)
28909 rperm[i] = GEN_INT (d->perm[i]);
28910 sel = gen_rtx_CONST_VECTOR (vmode, gen_rtvec_v (nelt, rperm));
28911 sel = force_reg (vmode, sel);
28913 arm_expand_vec_perm_1 (d->target, d->op0, d->op1, sel);
28914 return true;
28917 static bool
28918 arm_expand_vec_perm_const_1 (struct expand_vec_perm_d *d)
28920 /* Check if the input mask matches vext before reordering the
28921 operands. */
28922 if (TARGET_NEON)
28923 if (arm_evpc_neon_vext (d))
28924 return true;
28926 /* The pattern matching functions above are written to look for a small
28927 number to begin the sequence (0, 1, N/2). If we begin with an index
28928 from the second operand, we can swap the operands. */
28929 if (d->perm[0] >= d->nelt)
28931 unsigned i, nelt = d->nelt;
28933 for (i = 0; i < nelt; ++i)
28934 d->perm[i] = (d->perm[i] + nelt) & (2 * nelt - 1);
28936 std::swap (d->op0, d->op1);
28939 if (TARGET_NEON)
28941 if (arm_evpc_neon_vuzp (d))
28942 return true;
28943 if (arm_evpc_neon_vzip (d))
28944 return true;
28945 if (arm_evpc_neon_vrev (d))
28946 return true;
28947 if (arm_evpc_neon_vtrn (d))
28948 return true;
28949 return arm_evpc_neon_vtbl (d);
28951 return false;
28954 /* Expand a vec_perm_const pattern. */
28956 bool
28957 arm_expand_vec_perm_const (rtx target, rtx op0, rtx op1, rtx sel)
28959 struct expand_vec_perm_d d;
28960 int i, nelt, which;
28962 d.target = target;
28963 d.op0 = op0;
28964 d.op1 = op1;
28966 d.vmode = GET_MODE (target);
28967 gcc_assert (VECTOR_MODE_P (d.vmode));
28968 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
28969 d.testing_p = false;
28971 for (i = which = 0; i < nelt; ++i)
28973 rtx e = XVECEXP (sel, 0, i);
28974 int ei = INTVAL (e) & (2 * nelt - 1);
28975 which |= (ei < nelt ? 1 : 2);
28976 d.perm[i] = ei;
28979 switch (which)
28981 default:
28982 gcc_unreachable();
28984 case 3:
28985 d.one_vector_p = false;
28986 if (!rtx_equal_p (op0, op1))
28987 break;
28989 /* The elements of PERM do not suggest that only the first operand
28990 is used, but both operands are identical. Allow easier matching
28991 of the permutation by folding the permutation into the single
28992 input vector. */
28993 /* FALLTHRU */
28994 case 2:
28995 for (i = 0; i < nelt; ++i)
28996 d.perm[i] &= nelt - 1;
28997 d.op0 = op1;
28998 d.one_vector_p = true;
28999 break;
29001 case 1:
29002 d.op1 = op0;
29003 d.one_vector_p = true;
29004 break;
29007 return arm_expand_vec_perm_const_1 (&d);
29010 /* Implement TARGET_VECTORIZE_VEC_PERM_CONST_OK. */
29012 static bool
29013 arm_vectorize_vec_perm_const_ok (machine_mode vmode,
29014 const unsigned char *sel)
29016 struct expand_vec_perm_d d;
29017 unsigned int i, nelt, which;
29018 bool ret;
29020 d.vmode = vmode;
29021 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
29022 d.testing_p = true;
29023 memcpy (d.perm, sel, nelt);
29025 /* Categorize the set of elements in the selector. */
29026 for (i = which = 0; i < nelt; ++i)
29028 unsigned char e = d.perm[i];
29029 gcc_assert (e < 2 * nelt);
29030 which |= (e < nelt ? 1 : 2);
29033 /* For all elements from second vector, fold the elements to first. */
29034 if (which == 2)
29035 for (i = 0; i < nelt; ++i)
29036 d.perm[i] -= nelt;
29038 /* Check whether the mask can be applied to the vector type. */
29039 d.one_vector_p = (which != 3);
29041 d.target = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 1);
29042 d.op1 = d.op0 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 2);
29043 if (!d.one_vector_p)
29044 d.op1 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 3);
29046 start_sequence ();
29047 ret = arm_expand_vec_perm_const_1 (&d);
29048 end_sequence ();
29050 return ret;
29053 bool
29054 arm_autoinc_modes_ok_p (machine_mode mode, enum arm_auto_incmodes code)
29056 /* If we are soft float and we do not have ldrd
29057 then all auto increment forms are ok. */
29058 if (TARGET_SOFT_FLOAT && (TARGET_LDRD || GET_MODE_SIZE (mode) <= 4))
29059 return true;
29061 switch (code)
29063 /* Post increment and Pre Decrement are supported for all
29064 instruction forms except for vector forms. */
29065 case ARM_POST_INC:
29066 case ARM_PRE_DEC:
29067 if (VECTOR_MODE_P (mode))
29069 if (code != ARM_PRE_DEC)
29070 return true;
29071 else
29072 return false;
29075 return true;
29077 case ARM_POST_DEC:
29078 case ARM_PRE_INC:
29079 /* Without LDRD and mode size greater than
29080 word size, there is no point in auto-incrementing
29081 because ldm and stm will not have these forms. */
29082 if (!TARGET_LDRD && GET_MODE_SIZE (mode) > 4)
29083 return false;
29085 /* Vector and floating point modes do not support
29086 these auto increment forms. */
29087 if (FLOAT_MODE_P (mode) || VECTOR_MODE_P (mode))
29088 return false;
29090 return true;
29092 default:
29093 return false;
29097 return false;
29100 /* The default expansion of general 64-bit shifts in core-regs is suboptimal,
29101 on ARM, since we know that shifts by negative amounts are no-ops.
29102 Additionally, the default expansion code is not available or suitable
29103 for post-reload insn splits (this can occur when the register allocator
29104 chooses not to do a shift in NEON).
29106 This function is used in both initial expand and post-reload splits, and
29107 handles all kinds of 64-bit shifts.
29109 Input requirements:
29110 - It is safe for the input and output to be the same register, but
29111 early-clobber rules apply for the shift amount and scratch registers.
29112 - Shift by register requires both scratch registers. In all other cases
29113 the scratch registers may be NULL.
29114 - Ashiftrt by a register also clobbers the CC register. */
29115 void
29116 arm_emit_coreregs_64bit_shift (enum rtx_code code, rtx out, rtx in,
29117 rtx amount, rtx scratch1, rtx scratch2)
29119 rtx out_high = gen_highpart (SImode, out);
29120 rtx out_low = gen_lowpart (SImode, out);
29121 rtx in_high = gen_highpart (SImode, in);
29122 rtx in_low = gen_lowpart (SImode, in);
29124 /* Terminology:
29125 in = the register pair containing the input value.
29126 out = the destination register pair.
29127 up = the high- or low-part of each pair.
29128 down = the opposite part to "up".
29129 In a shift, we can consider bits to shift from "up"-stream to
29130 "down"-stream, so in a left-shift "up" is the low-part and "down"
29131 is the high-part of each register pair. */
29133 rtx out_up = code == ASHIFT ? out_low : out_high;
29134 rtx out_down = code == ASHIFT ? out_high : out_low;
29135 rtx in_up = code == ASHIFT ? in_low : in_high;
29136 rtx in_down = code == ASHIFT ? in_high : in_low;
29138 gcc_assert (code == ASHIFT || code == ASHIFTRT || code == LSHIFTRT);
29139 gcc_assert (out
29140 && (REG_P (out) || GET_CODE (out) == SUBREG)
29141 && GET_MODE (out) == DImode);
29142 gcc_assert (in
29143 && (REG_P (in) || GET_CODE (in) == SUBREG)
29144 && GET_MODE (in) == DImode);
29145 gcc_assert (amount
29146 && (((REG_P (amount) || GET_CODE (amount) == SUBREG)
29147 && GET_MODE (amount) == SImode)
29148 || CONST_INT_P (amount)));
29149 gcc_assert (scratch1 == NULL
29150 || (GET_CODE (scratch1) == SCRATCH)
29151 || (GET_MODE (scratch1) == SImode
29152 && REG_P (scratch1)));
29153 gcc_assert (scratch2 == NULL
29154 || (GET_CODE (scratch2) == SCRATCH)
29155 || (GET_MODE (scratch2) == SImode
29156 && REG_P (scratch2)));
29157 gcc_assert (!REG_P (out) || !REG_P (amount)
29158 || !HARD_REGISTER_P (out)
29159 || (REGNO (out) != REGNO (amount)
29160 && REGNO (out) + 1 != REGNO (amount)));
29162 /* Macros to make following code more readable. */
29163 #define SUB_32(DEST,SRC) \
29164 gen_addsi3 ((DEST), (SRC), GEN_INT (-32))
29165 #define RSB_32(DEST,SRC) \
29166 gen_subsi3 ((DEST), GEN_INT (32), (SRC))
29167 #define SUB_S_32(DEST,SRC) \
29168 gen_addsi3_compare0 ((DEST), (SRC), \
29169 GEN_INT (-32))
29170 #define SET(DEST,SRC) \
29171 gen_rtx_SET ((DEST), (SRC))
29172 #define SHIFT(CODE,SRC,AMOUNT) \
29173 gen_rtx_fmt_ee ((CODE), SImode, (SRC), (AMOUNT))
29174 #define LSHIFT(CODE,SRC,AMOUNT) \
29175 gen_rtx_fmt_ee ((CODE) == ASHIFT ? ASHIFT : LSHIFTRT, \
29176 SImode, (SRC), (AMOUNT))
29177 #define REV_LSHIFT(CODE,SRC,AMOUNT) \
29178 gen_rtx_fmt_ee ((CODE) == ASHIFT ? LSHIFTRT : ASHIFT, \
29179 SImode, (SRC), (AMOUNT))
29180 #define ORR(A,B) \
29181 gen_rtx_IOR (SImode, (A), (B))
29182 #define BRANCH(COND,LABEL) \
29183 gen_arm_cond_branch ((LABEL), \
29184 gen_rtx_ ## COND (CCmode, cc_reg, \
29185 const0_rtx), \
29186 cc_reg)
29188 /* Shifts by register and shifts by constant are handled separately. */
29189 if (CONST_INT_P (amount))
29191 /* We have a shift-by-constant. */
29193 /* First, handle out-of-range shift amounts.
29194 In both cases we try to match the result an ARM instruction in a
29195 shift-by-register would give. This helps reduce execution
29196 differences between optimization levels, but it won't stop other
29197 parts of the compiler doing different things. This is "undefined
29198 behavior, in any case. */
29199 if (INTVAL (amount) <= 0)
29200 emit_insn (gen_movdi (out, in));
29201 else if (INTVAL (amount) >= 64)
29203 if (code == ASHIFTRT)
29205 rtx const31_rtx = GEN_INT (31);
29206 emit_insn (SET (out_down, SHIFT (code, in_up, const31_rtx)));
29207 emit_insn (SET (out_up, SHIFT (code, in_up, const31_rtx)));
29209 else
29210 emit_insn (gen_movdi (out, const0_rtx));
29213 /* Now handle valid shifts. */
29214 else if (INTVAL (amount) < 32)
29216 /* Shifts by a constant less than 32. */
29217 rtx reverse_amount = GEN_INT (32 - INTVAL (amount));
29219 /* Clearing the out register in DImode first avoids lots
29220 of spilling and results in less stack usage.
29221 Later this redundant insn is completely removed.
29222 Do that only if "in" and "out" are different registers. */
29223 if (REG_P (out) && REG_P (in) && REGNO (out) != REGNO (in))
29224 emit_insn (SET (out, const0_rtx));
29225 emit_insn (SET (out_down, LSHIFT (code, in_down, amount)));
29226 emit_insn (SET (out_down,
29227 ORR (REV_LSHIFT (code, in_up, reverse_amount),
29228 out_down)));
29229 emit_insn (SET (out_up, SHIFT (code, in_up, amount)));
29231 else
29233 /* Shifts by a constant greater than 31. */
29234 rtx adj_amount = GEN_INT (INTVAL (amount) - 32);
29236 if (REG_P (out) && REG_P (in) && REGNO (out) != REGNO (in))
29237 emit_insn (SET (out, const0_rtx));
29238 emit_insn (SET (out_down, SHIFT (code, in_up, adj_amount)));
29239 if (code == ASHIFTRT)
29240 emit_insn (gen_ashrsi3 (out_up, in_up,
29241 GEN_INT (31)));
29242 else
29243 emit_insn (SET (out_up, const0_rtx));
29246 else
29248 /* We have a shift-by-register. */
29249 rtx cc_reg = gen_rtx_REG (CC_NOOVmode, CC_REGNUM);
29251 /* This alternative requires the scratch registers. */
29252 gcc_assert (scratch1 && REG_P (scratch1));
29253 gcc_assert (scratch2 && REG_P (scratch2));
29255 /* We will need the values "amount-32" and "32-amount" later.
29256 Swapping them around now allows the later code to be more general. */
29257 switch (code)
29259 case ASHIFT:
29260 emit_insn (SUB_32 (scratch1, amount));
29261 emit_insn (RSB_32 (scratch2, amount));
29262 break;
29263 case ASHIFTRT:
29264 emit_insn (RSB_32 (scratch1, amount));
29265 /* Also set CC = amount > 32. */
29266 emit_insn (SUB_S_32 (scratch2, amount));
29267 break;
29268 case LSHIFTRT:
29269 emit_insn (RSB_32 (scratch1, amount));
29270 emit_insn (SUB_32 (scratch2, amount));
29271 break;
29272 default:
29273 gcc_unreachable ();
29276 /* Emit code like this:
29278 arithmetic-left:
29279 out_down = in_down << amount;
29280 out_down = (in_up << (amount - 32)) | out_down;
29281 out_down = ((unsigned)in_up >> (32 - amount)) | out_down;
29282 out_up = in_up << amount;
29284 arithmetic-right:
29285 out_down = in_down >> amount;
29286 out_down = (in_up << (32 - amount)) | out_down;
29287 if (amount < 32)
29288 out_down = ((signed)in_up >> (amount - 32)) | out_down;
29289 out_up = in_up << amount;
29291 logical-right:
29292 out_down = in_down >> amount;
29293 out_down = (in_up << (32 - amount)) | out_down;
29294 if (amount < 32)
29295 out_down = ((unsigned)in_up >> (amount - 32)) | out_down;
29296 out_up = in_up << amount;
29298 The ARM and Thumb2 variants are the same but implemented slightly
29299 differently. If this were only called during expand we could just
29300 use the Thumb2 case and let combine do the right thing, but this
29301 can also be called from post-reload splitters. */
29303 emit_insn (SET (out_down, LSHIFT (code, in_down, amount)));
29305 if (!TARGET_THUMB2)
29307 /* Emit code for ARM mode. */
29308 emit_insn (SET (out_down,
29309 ORR (SHIFT (ASHIFT, in_up, scratch1), out_down)));
29310 if (code == ASHIFTRT)
29312 rtx_code_label *done_label = gen_label_rtx ();
29313 emit_jump_insn (BRANCH (LT, done_label));
29314 emit_insn (SET (out_down, ORR (SHIFT (ASHIFTRT, in_up, scratch2),
29315 out_down)));
29316 emit_label (done_label);
29318 else
29319 emit_insn (SET (out_down, ORR (SHIFT (LSHIFTRT, in_up, scratch2),
29320 out_down)));
29322 else
29324 /* Emit code for Thumb2 mode.
29325 Thumb2 can't do shift and or in one insn. */
29326 emit_insn (SET (scratch1, SHIFT (ASHIFT, in_up, scratch1)));
29327 emit_insn (gen_iorsi3 (out_down, out_down, scratch1));
29329 if (code == ASHIFTRT)
29331 rtx_code_label *done_label = gen_label_rtx ();
29332 emit_jump_insn (BRANCH (LT, done_label));
29333 emit_insn (SET (scratch2, SHIFT (ASHIFTRT, in_up, scratch2)));
29334 emit_insn (SET (out_down, ORR (out_down, scratch2)));
29335 emit_label (done_label);
29337 else
29339 emit_insn (SET (scratch2, SHIFT (LSHIFTRT, in_up, scratch2)));
29340 emit_insn (gen_iorsi3 (out_down, out_down, scratch2));
29344 emit_insn (SET (out_up, SHIFT (code, in_up, amount)));
29347 #undef SUB_32
29348 #undef RSB_32
29349 #undef SUB_S_32
29350 #undef SET
29351 #undef SHIFT
29352 #undef LSHIFT
29353 #undef REV_LSHIFT
29354 #undef ORR
29355 #undef BRANCH
29358 /* Returns true if the pattern is a valid symbolic address, which is either a
29359 symbol_ref or (symbol_ref + addend).
29361 According to the ARM ELF ABI, the initial addend of REL-type relocations
29362 processing MOVW and MOVT instructions is formed by interpreting the 16-bit
29363 literal field of the instruction as a 16-bit signed value in the range
29364 -32768 <= A < 32768. */
29366 bool
29367 arm_valid_symbolic_address_p (rtx addr)
29369 rtx xop0, xop1 = NULL_RTX;
29370 rtx tmp = addr;
29372 if (GET_CODE (tmp) == SYMBOL_REF || GET_CODE (tmp) == LABEL_REF)
29373 return true;
29375 /* (const (plus: symbol_ref const_int)) */
29376 if (GET_CODE (addr) == CONST)
29377 tmp = XEXP (addr, 0);
29379 if (GET_CODE (tmp) == PLUS)
29381 xop0 = XEXP (tmp, 0);
29382 xop1 = XEXP (tmp, 1);
29384 if (GET_CODE (xop0) == SYMBOL_REF && CONST_INT_P (xop1))
29385 return IN_RANGE (INTVAL (xop1), -0x8000, 0x7fff);
29388 return false;
29391 /* Returns true if a valid comparison operation and makes
29392 the operands in a form that is valid. */
29393 bool
29394 arm_validize_comparison (rtx *comparison, rtx * op1, rtx * op2)
29396 enum rtx_code code = GET_CODE (*comparison);
29397 int code_int;
29398 machine_mode mode = (GET_MODE (*op1) == VOIDmode)
29399 ? GET_MODE (*op2) : GET_MODE (*op1);
29401 gcc_assert (GET_MODE (*op1) != VOIDmode || GET_MODE (*op2) != VOIDmode);
29403 if (code == UNEQ || code == LTGT)
29404 return false;
29406 code_int = (int)code;
29407 arm_canonicalize_comparison (&code_int, op1, op2, 0);
29408 PUT_CODE (*comparison, (enum rtx_code)code_int);
29410 switch (mode)
29412 case SImode:
29413 if (!arm_add_operand (*op1, mode))
29414 *op1 = force_reg (mode, *op1);
29415 if (!arm_add_operand (*op2, mode))
29416 *op2 = force_reg (mode, *op2);
29417 return true;
29419 case DImode:
29420 if (!cmpdi_operand (*op1, mode))
29421 *op1 = force_reg (mode, *op1);
29422 if (!cmpdi_operand (*op2, mode))
29423 *op2 = force_reg (mode, *op2);
29424 return true;
29426 case HFmode:
29427 if (!TARGET_VFP_FP16INST)
29428 break;
29429 /* FP16 comparisons are done in SF mode. */
29430 mode = SFmode;
29431 *op1 = convert_to_mode (mode, *op1, 1);
29432 *op2 = convert_to_mode (mode, *op2, 1);
29433 /* Fall through. */
29434 case SFmode:
29435 case DFmode:
29436 if (!vfp_compare_operand (*op1, mode))
29437 *op1 = force_reg (mode, *op1);
29438 if (!vfp_compare_operand (*op2, mode))
29439 *op2 = force_reg (mode, *op2);
29440 return true;
29441 default:
29442 break;
29445 return false;
29449 /* Maximum number of instructions to set block of memory. */
29450 static int
29451 arm_block_set_max_insns (void)
29453 if (optimize_function_for_size_p (cfun))
29454 return 4;
29455 else
29456 return current_tune->max_insns_inline_memset;
29459 /* Return TRUE if it's profitable to set block of memory for
29460 non-vectorized case. VAL is the value to set the memory
29461 with. LENGTH is the number of bytes to set. ALIGN is the
29462 alignment of the destination memory in bytes. UNALIGNED_P
29463 is TRUE if we can only set the memory with instructions
29464 meeting alignment requirements. USE_STRD_P is TRUE if we
29465 can use strd to set the memory. */
29466 static bool
29467 arm_block_set_non_vect_profit_p (rtx val,
29468 unsigned HOST_WIDE_INT length,
29469 unsigned HOST_WIDE_INT align,
29470 bool unaligned_p, bool use_strd_p)
29472 int num = 0;
29473 /* For leftovers in bytes of 0-7, we can set the memory block using
29474 strb/strh/str with minimum instruction number. */
29475 const int leftover[8] = {0, 1, 1, 2, 1, 2, 2, 3};
29477 if (unaligned_p)
29479 num = arm_const_inline_cost (SET, val);
29480 num += length / align + length % align;
29482 else if (use_strd_p)
29484 num = arm_const_double_inline_cost (val);
29485 num += (length >> 3) + leftover[length & 7];
29487 else
29489 num = arm_const_inline_cost (SET, val);
29490 num += (length >> 2) + leftover[length & 3];
29493 /* We may be able to combine last pair STRH/STRB into a single STR
29494 by shifting one byte back. */
29495 if (unaligned_access && length > 3 && (length & 3) == 3)
29496 num--;
29498 return (num <= arm_block_set_max_insns ());
29501 /* Return TRUE if it's profitable to set block of memory for
29502 vectorized case. LENGTH is the number of bytes to set.
29503 ALIGN is the alignment of destination memory in bytes.
29504 MODE is the vector mode used to set the memory. */
29505 static bool
29506 arm_block_set_vect_profit_p (unsigned HOST_WIDE_INT length,
29507 unsigned HOST_WIDE_INT align,
29508 machine_mode mode)
29510 int num;
29511 bool unaligned_p = ((align & 3) != 0);
29512 unsigned int nelt = GET_MODE_NUNITS (mode);
29514 /* Instruction loading constant value. */
29515 num = 1;
29516 /* Instructions storing the memory. */
29517 num += (length + nelt - 1) / nelt;
29518 /* Instructions adjusting the address expression. Only need to
29519 adjust address expression if it's 4 bytes aligned and bytes
29520 leftover can only be stored by mis-aligned store instruction. */
29521 if (!unaligned_p && (length & 3) != 0)
29522 num++;
29524 /* Store the first 16 bytes using vst1:v16qi for the aligned case. */
29525 if (!unaligned_p && mode == V16QImode)
29526 num--;
29528 return (num <= arm_block_set_max_insns ());
29531 /* Set a block of memory using vectorization instructions for the
29532 unaligned case. We fill the first LENGTH bytes of the memory
29533 area starting from DSTBASE with byte constant VALUE. ALIGN is
29534 the alignment requirement of memory. Return TRUE if succeeded. */
29535 static bool
29536 arm_block_set_unaligned_vect (rtx dstbase,
29537 unsigned HOST_WIDE_INT length,
29538 unsigned HOST_WIDE_INT value,
29539 unsigned HOST_WIDE_INT align)
29541 unsigned int i, j, nelt_v16, nelt_v8, nelt_mode;
29542 rtx dst, mem;
29543 rtx val_elt, val_vec, reg;
29544 rtx rval[MAX_VECT_LEN];
29545 rtx (*gen_func) (rtx, rtx);
29546 machine_mode mode;
29547 unsigned HOST_WIDE_INT v = value;
29548 unsigned int offset = 0;
29549 gcc_assert ((align & 0x3) != 0);
29550 nelt_v8 = GET_MODE_NUNITS (V8QImode);
29551 nelt_v16 = GET_MODE_NUNITS (V16QImode);
29552 if (length >= nelt_v16)
29554 mode = V16QImode;
29555 gen_func = gen_movmisalignv16qi;
29557 else
29559 mode = V8QImode;
29560 gen_func = gen_movmisalignv8qi;
29562 nelt_mode = GET_MODE_NUNITS (mode);
29563 gcc_assert (length >= nelt_mode);
29564 /* Skip if it isn't profitable. */
29565 if (!arm_block_set_vect_profit_p (length, align, mode))
29566 return false;
29568 dst = copy_addr_to_reg (XEXP (dstbase, 0));
29569 mem = adjust_automodify_address (dstbase, mode, dst, offset);
29571 v = sext_hwi (v, BITS_PER_WORD);
29572 val_elt = GEN_INT (v);
29573 for (j = 0; j < nelt_mode; j++)
29574 rval[j] = val_elt;
29576 reg = gen_reg_rtx (mode);
29577 val_vec = gen_rtx_CONST_VECTOR (mode, gen_rtvec_v (nelt_mode, rval));
29578 /* Emit instruction loading the constant value. */
29579 emit_move_insn (reg, val_vec);
29581 /* Handle nelt_mode bytes in a vector. */
29582 for (i = 0; (i + nelt_mode <= length); i += nelt_mode)
29584 emit_insn ((*gen_func) (mem, reg));
29585 if (i + 2 * nelt_mode <= length)
29587 emit_insn (gen_add2_insn (dst, GEN_INT (nelt_mode)));
29588 offset += nelt_mode;
29589 mem = adjust_automodify_address (dstbase, mode, dst, offset);
29593 /* If there are not less than nelt_v8 bytes leftover, we must be in
29594 V16QI mode. */
29595 gcc_assert ((i + nelt_v8) > length || mode == V16QImode);
29597 /* Handle (8, 16) bytes leftover. */
29598 if (i + nelt_v8 < length)
29600 emit_insn (gen_add2_insn (dst, GEN_INT (length - i)));
29601 offset += length - i;
29602 mem = adjust_automodify_address (dstbase, mode, dst, offset);
29604 /* We are shifting bytes back, set the alignment accordingly. */
29605 if ((length & 1) != 0 && align >= 2)
29606 set_mem_align (mem, BITS_PER_UNIT);
29608 emit_insn (gen_movmisalignv16qi (mem, reg));
29610 /* Handle (0, 8] bytes leftover. */
29611 else if (i < length && i + nelt_v8 >= length)
29613 if (mode == V16QImode)
29614 reg = gen_lowpart (V8QImode, reg);
29616 emit_insn (gen_add2_insn (dst, GEN_INT ((length - i)
29617 + (nelt_mode - nelt_v8))));
29618 offset += (length - i) + (nelt_mode - nelt_v8);
29619 mem = adjust_automodify_address (dstbase, V8QImode, dst, offset);
29621 /* We are shifting bytes back, set the alignment accordingly. */
29622 if ((length & 1) != 0 && align >= 2)
29623 set_mem_align (mem, BITS_PER_UNIT);
29625 emit_insn (gen_movmisalignv8qi (mem, reg));
29628 return true;
29631 /* Set a block of memory using vectorization instructions for the
29632 aligned case. We fill the first LENGTH bytes of the memory area
29633 starting from DSTBASE with byte constant VALUE. ALIGN is the
29634 alignment requirement of memory. Return TRUE if succeeded. */
29635 static bool
29636 arm_block_set_aligned_vect (rtx dstbase,
29637 unsigned HOST_WIDE_INT length,
29638 unsigned HOST_WIDE_INT value,
29639 unsigned HOST_WIDE_INT align)
29641 unsigned int i, j, nelt_v8, nelt_v16, nelt_mode;
29642 rtx dst, addr, mem;
29643 rtx val_elt, val_vec, reg;
29644 rtx rval[MAX_VECT_LEN];
29645 machine_mode mode;
29646 unsigned HOST_WIDE_INT v = value;
29647 unsigned int offset = 0;
29649 gcc_assert ((align & 0x3) == 0);
29650 nelt_v8 = GET_MODE_NUNITS (V8QImode);
29651 nelt_v16 = GET_MODE_NUNITS (V16QImode);
29652 if (length >= nelt_v16 && unaligned_access && !BYTES_BIG_ENDIAN)
29653 mode = V16QImode;
29654 else
29655 mode = V8QImode;
29657 nelt_mode = GET_MODE_NUNITS (mode);
29658 gcc_assert (length >= nelt_mode);
29659 /* Skip if it isn't profitable. */
29660 if (!arm_block_set_vect_profit_p (length, align, mode))
29661 return false;
29663 dst = copy_addr_to_reg (XEXP (dstbase, 0));
29665 v = sext_hwi (v, BITS_PER_WORD);
29666 val_elt = GEN_INT (v);
29667 for (j = 0; j < nelt_mode; j++)
29668 rval[j] = val_elt;
29670 reg = gen_reg_rtx (mode);
29671 val_vec = gen_rtx_CONST_VECTOR (mode, gen_rtvec_v (nelt_mode, rval));
29672 /* Emit instruction loading the constant value. */
29673 emit_move_insn (reg, val_vec);
29675 i = 0;
29676 /* Handle first 16 bytes specially using vst1:v16qi instruction. */
29677 if (mode == V16QImode)
29679 mem = adjust_automodify_address (dstbase, mode, dst, offset);
29680 emit_insn (gen_movmisalignv16qi (mem, reg));
29681 i += nelt_mode;
29682 /* Handle (8, 16) bytes leftover using vst1:v16qi again. */
29683 if (i + nelt_v8 < length && i + nelt_v16 > length)
29685 emit_insn (gen_add2_insn (dst, GEN_INT (length - nelt_mode)));
29686 offset += length - nelt_mode;
29687 mem = adjust_automodify_address (dstbase, mode, dst, offset);
29688 /* We are shifting bytes back, set the alignment accordingly. */
29689 if ((length & 0x3) == 0)
29690 set_mem_align (mem, BITS_PER_UNIT * 4);
29691 else if ((length & 0x1) == 0)
29692 set_mem_align (mem, BITS_PER_UNIT * 2);
29693 else
29694 set_mem_align (mem, BITS_PER_UNIT);
29696 emit_insn (gen_movmisalignv16qi (mem, reg));
29697 return true;
29699 /* Fall through for bytes leftover. */
29700 mode = V8QImode;
29701 nelt_mode = GET_MODE_NUNITS (mode);
29702 reg = gen_lowpart (V8QImode, reg);
29705 /* Handle 8 bytes in a vector. */
29706 for (; (i + nelt_mode <= length); i += nelt_mode)
29708 addr = plus_constant (Pmode, dst, i);
29709 mem = adjust_automodify_address (dstbase, mode, addr, offset + i);
29710 emit_move_insn (mem, reg);
29713 /* Handle single word leftover by shifting 4 bytes back. We can
29714 use aligned access for this case. */
29715 if (i + UNITS_PER_WORD == length)
29717 addr = plus_constant (Pmode, dst, i - UNITS_PER_WORD);
29718 offset += i - UNITS_PER_WORD;
29719 mem = adjust_automodify_address (dstbase, mode, addr, offset);
29720 /* We are shifting 4 bytes back, set the alignment accordingly. */
29721 if (align > UNITS_PER_WORD)
29722 set_mem_align (mem, BITS_PER_UNIT * UNITS_PER_WORD);
29724 emit_move_insn (mem, reg);
29726 /* Handle (0, 4), (4, 8) bytes leftover by shifting bytes back.
29727 We have to use unaligned access for this case. */
29728 else if (i < length)
29730 emit_insn (gen_add2_insn (dst, GEN_INT (length - nelt_mode)));
29731 offset += length - nelt_mode;
29732 mem = adjust_automodify_address (dstbase, mode, dst, offset);
29733 /* We are shifting bytes back, set the alignment accordingly. */
29734 if ((length & 1) == 0)
29735 set_mem_align (mem, BITS_PER_UNIT * 2);
29736 else
29737 set_mem_align (mem, BITS_PER_UNIT);
29739 emit_insn (gen_movmisalignv8qi (mem, reg));
29742 return true;
29745 /* Set a block of memory using plain strh/strb instructions, only
29746 using instructions allowed by ALIGN on processor. We fill the
29747 first LENGTH bytes of the memory area starting from DSTBASE
29748 with byte constant VALUE. ALIGN is the alignment requirement
29749 of memory. */
29750 static bool
29751 arm_block_set_unaligned_non_vect (rtx dstbase,
29752 unsigned HOST_WIDE_INT length,
29753 unsigned HOST_WIDE_INT value,
29754 unsigned HOST_WIDE_INT align)
29756 unsigned int i;
29757 rtx dst, addr, mem;
29758 rtx val_exp, val_reg, reg;
29759 machine_mode mode;
29760 HOST_WIDE_INT v = value;
29762 gcc_assert (align == 1 || align == 2);
29764 if (align == 2)
29765 v |= (value << BITS_PER_UNIT);
29767 v = sext_hwi (v, BITS_PER_WORD);
29768 val_exp = GEN_INT (v);
29769 /* Skip if it isn't profitable. */
29770 if (!arm_block_set_non_vect_profit_p (val_exp, length,
29771 align, true, false))
29772 return false;
29774 dst = copy_addr_to_reg (XEXP (dstbase, 0));
29775 mode = (align == 2 ? HImode : QImode);
29776 val_reg = force_reg (SImode, val_exp);
29777 reg = gen_lowpart (mode, val_reg);
29779 for (i = 0; (i + GET_MODE_SIZE (mode) <= length); i += GET_MODE_SIZE (mode))
29781 addr = plus_constant (Pmode, dst, i);
29782 mem = adjust_automodify_address (dstbase, mode, addr, i);
29783 emit_move_insn (mem, reg);
29786 /* Handle single byte leftover. */
29787 if (i + 1 == length)
29789 reg = gen_lowpart (QImode, val_reg);
29790 addr = plus_constant (Pmode, dst, i);
29791 mem = adjust_automodify_address (dstbase, QImode, addr, i);
29792 emit_move_insn (mem, reg);
29793 i++;
29796 gcc_assert (i == length);
29797 return true;
29800 /* Set a block of memory using plain strd/str/strh/strb instructions,
29801 to permit unaligned copies on processors which support unaligned
29802 semantics for those instructions. We fill the first LENGTH bytes
29803 of the memory area starting from DSTBASE with byte constant VALUE.
29804 ALIGN is the alignment requirement of memory. */
29805 static bool
29806 arm_block_set_aligned_non_vect (rtx dstbase,
29807 unsigned HOST_WIDE_INT length,
29808 unsigned HOST_WIDE_INT value,
29809 unsigned HOST_WIDE_INT align)
29811 unsigned int i;
29812 rtx dst, addr, mem;
29813 rtx val_exp, val_reg, reg;
29814 unsigned HOST_WIDE_INT v;
29815 bool use_strd_p;
29817 use_strd_p = (length >= 2 * UNITS_PER_WORD && (align & 3) == 0
29818 && TARGET_LDRD && current_tune->prefer_ldrd_strd);
29820 v = (value | (value << 8) | (value << 16) | (value << 24));
29821 if (length < UNITS_PER_WORD)
29822 v &= (0xFFFFFFFF >> (UNITS_PER_WORD - length) * BITS_PER_UNIT);
29824 if (use_strd_p)
29825 v |= (v << BITS_PER_WORD);
29826 else
29827 v = sext_hwi (v, BITS_PER_WORD);
29829 val_exp = GEN_INT (v);
29830 /* Skip if it isn't profitable. */
29831 if (!arm_block_set_non_vect_profit_p (val_exp, length,
29832 align, false, use_strd_p))
29834 if (!use_strd_p)
29835 return false;
29837 /* Try without strd. */
29838 v = (v >> BITS_PER_WORD);
29839 v = sext_hwi (v, BITS_PER_WORD);
29840 val_exp = GEN_INT (v);
29841 use_strd_p = false;
29842 if (!arm_block_set_non_vect_profit_p (val_exp, length,
29843 align, false, use_strd_p))
29844 return false;
29847 i = 0;
29848 dst = copy_addr_to_reg (XEXP (dstbase, 0));
29849 /* Handle double words using strd if possible. */
29850 if (use_strd_p)
29852 val_reg = force_reg (DImode, val_exp);
29853 reg = val_reg;
29854 for (; (i + 8 <= length); i += 8)
29856 addr = plus_constant (Pmode, dst, i);
29857 mem = adjust_automodify_address (dstbase, DImode, addr, i);
29858 emit_move_insn (mem, reg);
29861 else
29862 val_reg = force_reg (SImode, val_exp);
29864 /* Handle words. */
29865 reg = (use_strd_p ? gen_lowpart (SImode, val_reg) : val_reg);
29866 for (; (i + 4 <= length); i += 4)
29868 addr = plus_constant (Pmode, dst, i);
29869 mem = adjust_automodify_address (dstbase, SImode, addr, i);
29870 if ((align & 3) == 0)
29871 emit_move_insn (mem, reg);
29872 else
29873 emit_insn (gen_unaligned_storesi (mem, reg));
29876 /* Merge last pair of STRH and STRB into a STR if possible. */
29877 if (unaligned_access && i > 0 && (i + 3) == length)
29879 addr = plus_constant (Pmode, dst, i - 1);
29880 mem = adjust_automodify_address (dstbase, SImode, addr, i - 1);
29881 /* We are shifting one byte back, set the alignment accordingly. */
29882 if ((align & 1) == 0)
29883 set_mem_align (mem, BITS_PER_UNIT);
29885 /* Most likely this is an unaligned access, and we can't tell at
29886 compilation time. */
29887 emit_insn (gen_unaligned_storesi (mem, reg));
29888 return true;
29891 /* Handle half word leftover. */
29892 if (i + 2 <= length)
29894 reg = gen_lowpart (HImode, val_reg);
29895 addr = plus_constant (Pmode, dst, i);
29896 mem = adjust_automodify_address (dstbase, HImode, addr, i);
29897 if ((align & 1) == 0)
29898 emit_move_insn (mem, reg);
29899 else
29900 emit_insn (gen_unaligned_storehi (mem, reg));
29902 i += 2;
29905 /* Handle single byte leftover. */
29906 if (i + 1 == length)
29908 reg = gen_lowpart (QImode, val_reg);
29909 addr = plus_constant (Pmode, dst, i);
29910 mem = adjust_automodify_address (dstbase, QImode, addr, i);
29911 emit_move_insn (mem, reg);
29914 return true;
29917 /* Set a block of memory using vectorization instructions for both
29918 aligned and unaligned cases. We fill the first LENGTH bytes of
29919 the memory area starting from DSTBASE with byte constant VALUE.
29920 ALIGN is the alignment requirement of memory. */
29921 static bool
29922 arm_block_set_vect (rtx dstbase,
29923 unsigned HOST_WIDE_INT length,
29924 unsigned HOST_WIDE_INT value,
29925 unsigned HOST_WIDE_INT align)
29927 /* Check whether we need to use unaligned store instruction. */
29928 if (((align & 3) != 0 || (length & 3) != 0)
29929 /* Check whether unaligned store instruction is available. */
29930 && (!unaligned_access || BYTES_BIG_ENDIAN))
29931 return false;
29933 if ((align & 3) == 0)
29934 return arm_block_set_aligned_vect (dstbase, length, value, align);
29935 else
29936 return arm_block_set_unaligned_vect (dstbase, length, value, align);
29939 /* Expand string store operation. Firstly we try to do that by using
29940 vectorization instructions, then try with ARM unaligned access and
29941 double-word store if profitable. OPERANDS[0] is the destination,
29942 OPERANDS[1] is the number of bytes, operands[2] is the value to
29943 initialize the memory, OPERANDS[3] is the known alignment of the
29944 destination. */
29945 bool
29946 arm_gen_setmem (rtx *operands)
29948 rtx dstbase = operands[0];
29949 unsigned HOST_WIDE_INT length;
29950 unsigned HOST_WIDE_INT value;
29951 unsigned HOST_WIDE_INT align;
29953 if (!CONST_INT_P (operands[2]) || !CONST_INT_P (operands[1]))
29954 return false;
29956 length = UINTVAL (operands[1]);
29957 if (length > 64)
29958 return false;
29960 value = (UINTVAL (operands[2]) & 0xFF);
29961 align = UINTVAL (operands[3]);
29962 if (TARGET_NEON && length >= 8
29963 && current_tune->string_ops_prefer_neon
29964 && arm_block_set_vect (dstbase, length, value, align))
29965 return true;
29967 if (!unaligned_access && (align & 3) != 0)
29968 return arm_block_set_unaligned_non_vect (dstbase, length, value, align);
29970 return arm_block_set_aligned_non_vect (dstbase, length, value, align);
29974 static bool
29975 arm_macro_fusion_p (void)
29977 return current_tune->fusible_ops != tune_params::FUSE_NOTHING;
29980 /* Return true if the two back-to-back sets PREV_SET, CURR_SET are suitable
29981 for MOVW / MOVT macro fusion. */
29983 static bool
29984 arm_sets_movw_movt_fusible_p (rtx prev_set, rtx curr_set)
29986 /* We are trying to fuse
29987 movw imm / movt imm
29988 instructions as a group that gets scheduled together. */
29990 rtx set_dest = SET_DEST (curr_set);
29992 if (GET_MODE (set_dest) != SImode)
29993 return false;
29995 /* We are trying to match:
29996 prev (movw) == (set (reg r0) (const_int imm16))
29997 curr (movt) == (set (zero_extract (reg r0)
29998 (const_int 16)
29999 (const_int 16))
30000 (const_int imm16_1))
30002 prev (movw) == (set (reg r1)
30003 (high (symbol_ref ("SYM"))))
30004 curr (movt) == (set (reg r0)
30005 (lo_sum (reg r1)
30006 (symbol_ref ("SYM")))) */
30008 if (GET_CODE (set_dest) == ZERO_EXTRACT)
30010 if (CONST_INT_P (SET_SRC (curr_set))
30011 && CONST_INT_P (SET_SRC (prev_set))
30012 && REG_P (XEXP (set_dest, 0))
30013 && REG_P (SET_DEST (prev_set))
30014 && REGNO (XEXP (set_dest, 0)) == REGNO (SET_DEST (prev_set)))
30015 return true;
30018 else if (GET_CODE (SET_SRC (curr_set)) == LO_SUM
30019 && REG_P (SET_DEST (curr_set))
30020 && REG_P (SET_DEST (prev_set))
30021 && GET_CODE (SET_SRC (prev_set)) == HIGH
30022 && REGNO (SET_DEST (curr_set)) == REGNO (SET_DEST (prev_set)))
30023 return true;
30025 return false;
30028 static bool
30029 aarch_macro_fusion_pair_p (rtx_insn* prev, rtx_insn* curr)
30031 rtx prev_set = single_set (prev);
30032 rtx curr_set = single_set (curr);
30034 if (!prev_set
30035 || !curr_set)
30036 return false;
30038 if (any_condjump_p (curr))
30039 return false;
30041 if (!arm_macro_fusion_p ())
30042 return false;
30044 if (current_tune->fusible_ops & tune_params::FUSE_AES_AESMC
30045 && aarch_crypto_can_dual_issue (prev, curr))
30046 return true;
30048 if (current_tune->fusible_ops & tune_params::FUSE_MOVW_MOVT
30049 && arm_sets_movw_movt_fusible_p (prev_set, curr_set))
30050 return true;
30052 return false;
30055 /* Return true iff the instruction fusion described by OP is enabled. */
30056 bool
30057 arm_fusion_enabled_p (tune_params::fuse_ops op)
30059 return current_tune->fusible_ops & op;
30062 /* Implement the TARGET_ASAN_SHADOW_OFFSET hook. */
30064 static unsigned HOST_WIDE_INT
30065 arm_asan_shadow_offset (void)
30067 return HOST_WIDE_INT_1U << 29;
30071 /* This is a temporary fix for PR60655. Ideally we need
30072 to handle most of these cases in the generic part but
30073 currently we reject minus (..) (sym_ref). We try to
30074 ameliorate the case with minus (sym_ref1) (sym_ref2)
30075 where they are in the same section. */
30077 static bool
30078 arm_const_not_ok_for_debug_p (rtx p)
30080 tree decl_op0 = NULL;
30081 tree decl_op1 = NULL;
30083 if (GET_CODE (p) == MINUS)
30085 if (GET_CODE (XEXP (p, 1)) == SYMBOL_REF)
30087 decl_op1 = SYMBOL_REF_DECL (XEXP (p, 1));
30088 if (decl_op1
30089 && GET_CODE (XEXP (p, 0)) == SYMBOL_REF
30090 && (decl_op0 = SYMBOL_REF_DECL (XEXP (p, 0))))
30092 if ((TREE_CODE (decl_op1) == VAR_DECL
30093 || TREE_CODE (decl_op1) == CONST_DECL)
30094 && (TREE_CODE (decl_op0) == VAR_DECL
30095 || TREE_CODE (decl_op0) == CONST_DECL))
30096 return (get_variable_section (decl_op1, false)
30097 != get_variable_section (decl_op0, false));
30099 if (TREE_CODE (decl_op1) == LABEL_DECL
30100 && TREE_CODE (decl_op0) == LABEL_DECL)
30101 return (DECL_CONTEXT (decl_op1)
30102 != DECL_CONTEXT (decl_op0));
30105 return true;
30109 return false;
30112 /* return TRUE if x is a reference to a value in a constant pool */
30113 extern bool
30114 arm_is_constant_pool_ref (rtx x)
30116 return (MEM_P (x)
30117 && GET_CODE (XEXP (x, 0)) == SYMBOL_REF
30118 && CONSTANT_POOL_ADDRESS_P (XEXP (x, 0)));
30121 /* Remember the last target of arm_set_current_function. */
30122 static GTY(()) tree arm_previous_fndecl;
30124 /* Restore or save the TREE_TARGET_GLOBALS from or to NEW_TREE. */
30126 void
30127 save_restore_target_globals (tree new_tree)
30129 /* If we have a previous state, use it. */
30130 if (TREE_TARGET_GLOBALS (new_tree))
30131 restore_target_globals (TREE_TARGET_GLOBALS (new_tree));
30132 else if (new_tree == target_option_default_node)
30133 restore_target_globals (&default_target_globals);
30134 else
30136 /* Call target_reinit and save the state for TARGET_GLOBALS. */
30137 TREE_TARGET_GLOBALS (new_tree) = save_target_globals_default_opts ();
30140 arm_option_params_internal ();
30143 /* Invalidate arm_previous_fndecl. */
30145 void
30146 arm_reset_previous_fndecl (void)
30148 arm_previous_fndecl = NULL_TREE;
30151 /* Establish appropriate back-end context for processing the function
30152 FNDECL. The argument might be NULL to indicate processing at top
30153 level, outside of any function scope. */
30155 static void
30156 arm_set_current_function (tree fndecl)
30158 if (!fndecl || fndecl == arm_previous_fndecl)
30159 return;
30161 tree old_tree = (arm_previous_fndecl
30162 ? DECL_FUNCTION_SPECIFIC_TARGET (arm_previous_fndecl)
30163 : NULL_TREE);
30165 tree new_tree = DECL_FUNCTION_SPECIFIC_TARGET (fndecl);
30167 /* If current function has no attributes but previous one did,
30168 use the default node. */
30169 if (! new_tree && old_tree)
30170 new_tree = target_option_default_node;
30172 /* If nothing to do return. #pragma GCC reset or #pragma GCC pop to
30173 the default have been handled by save_restore_target_globals from
30174 arm_pragma_target_parse. */
30175 if (old_tree == new_tree)
30176 return;
30178 arm_previous_fndecl = fndecl;
30180 /* First set the target options. */
30181 cl_target_option_restore (&global_options, TREE_TARGET_OPTION (new_tree));
30183 save_restore_target_globals (new_tree);
30186 /* Implement TARGET_OPTION_PRINT. */
30188 static void
30189 arm_option_print (FILE *file, int indent, struct cl_target_option *ptr)
30191 int flags = ptr->x_target_flags;
30192 const struct arm_fpu_desc *fpu_desc = &all_fpus[ptr->x_arm_fpu_index];
30194 fprintf (file, "%*sselected arch %s\n", indent, "",
30195 TARGET_THUMB2_P (flags) ? "thumb2" :
30196 TARGET_THUMB_P (flags) ? "thumb1" :
30197 "arm");
30199 fprintf (file, "%*sselected fpu %s\n", indent, "", fpu_desc->name);
30202 /* Hook to determine if one function can safely inline another. */
30204 static bool
30205 arm_can_inline_p (tree caller, tree callee)
30207 tree caller_tree = DECL_FUNCTION_SPECIFIC_TARGET (caller);
30208 tree callee_tree = DECL_FUNCTION_SPECIFIC_TARGET (callee);
30210 struct cl_target_option *caller_opts
30211 = TREE_TARGET_OPTION (caller_tree ? caller_tree
30212 : target_option_default_node);
30214 struct cl_target_option *callee_opts
30215 = TREE_TARGET_OPTION (callee_tree ? callee_tree
30216 : target_option_default_node);
30218 const struct arm_fpu_desc *caller_fpu
30219 = &all_fpus[caller_opts->x_arm_fpu_index];
30220 const struct arm_fpu_desc *callee_fpu
30221 = &all_fpus[callee_opts->x_arm_fpu_index];
30223 /* Callee's fpu features should be a subset of the caller's. */
30224 if ((caller_fpu->features & callee_fpu->features) != callee_fpu->features)
30225 return false;
30227 /* Need same FPU regs. */
30228 if (callee_fpu->regs != callee_fpu->regs)
30229 return false;
30231 /* OK to inline between different modes.
30232 Function with mode specific instructions, e.g using asm,
30233 must be explicitly protected with noinline. */
30234 return true;
30237 /* Hook to fix function's alignment affected by target attribute. */
30239 static void
30240 arm_relayout_function (tree fndecl)
30242 if (DECL_USER_ALIGN (fndecl))
30243 return;
30245 tree callee_tree = DECL_FUNCTION_SPECIFIC_TARGET (fndecl);
30247 if (!callee_tree)
30248 callee_tree = target_option_default_node;
30250 struct cl_target_option *opts = TREE_TARGET_OPTION (callee_tree);
30251 SET_DECL_ALIGN (fndecl, FUNCTION_BOUNDARY_P (opts->x_target_flags));
30254 /* Inner function to process the attribute((target(...))), take an argument and
30255 set the current options from the argument. If we have a list, recursively
30256 go over the list. */
30258 static bool
30259 arm_valid_target_attribute_rec (tree args, struct gcc_options *opts)
30261 if (TREE_CODE (args) == TREE_LIST)
30263 bool ret = true;
30265 for (; args; args = TREE_CHAIN (args))
30266 if (TREE_VALUE (args)
30267 && !arm_valid_target_attribute_rec (TREE_VALUE (args), opts))
30268 ret = false;
30269 return ret;
30272 else if (TREE_CODE (args) != STRING_CST)
30274 error ("attribute %<target%> argument not a string");
30275 return false;
30278 char *argstr = ASTRDUP (TREE_STRING_POINTER (args));
30279 char *q;
30281 while ((q = strtok (argstr, ",")) != NULL)
30283 while (ISSPACE (*q)) ++q;
30285 argstr = NULL;
30286 if (!strncmp (q, "thumb", 5))
30287 opts->x_target_flags |= MASK_THUMB;
30289 else if (!strncmp (q, "arm", 3))
30290 opts->x_target_flags &= ~MASK_THUMB;
30292 else if (!strncmp (q, "fpu=", 4))
30294 if (! opt_enum_arg_to_value (OPT_mfpu_, q+4,
30295 &opts->x_arm_fpu_index, CL_TARGET))
30297 error ("invalid fpu for attribute(target(\"%s\"))", q);
30298 return false;
30301 else
30303 error ("attribute(target(\"%s\")) is unknown", q);
30304 return false;
30307 arm_option_check_internal (opts);
30310 return true;
30313 /* Return a TARGET_OPTION_NODE tree of the target options listed or NULL. */
30315 tree
30316 arm_valid_target_attribute_tree (tree args, struct gcc_options *opts,
30317 struct gcc_options *opts_set)
30319 if (!arm_valid_target_attribute_rec (args, opts))
30320 return NULL_TREE;
30322 /* Do any overrides, such as global options arch=xxx. */
30323 arm_option_override_internal (opts, opts_set);
30325 return build_target_option_node (opts);
30328 static void
30329 add_attribute (const char * mode, tree *attributes)
30331 size_t len = strlen (mode);
30332 tree value = build_string (len, mode);
30334 TREE_TYPE (value) = build_array_type (char_type_node,
30335 build_index_type (size_int (len)));
30337 *attributes = tree_cons (get_identifier ("target"),
30338 build_tree_list (NULL_TREE, value),
30339 *attributes);
30342 /* For testing. Insert thumb or arm modes alternatively on functions. */
30344 static void
30345 arm_insert_attributes (tree fndecl, tree * attributes)
30347 const char *mode;
30349 if (! TARGET_FLIP_THUMB)
30350 return;
30352 if (TREE_CODE (fndecl) != FUNCTION_DECL || DECL_EXTERNAL(fndecl)
30353 || DECL_BUILT_IN (fndecl) || DECL_ARTIFICIAL (fndecl))
30354 return;
30356 /* Nested definitions must inherit mode. */
30357 if (current_function_decl)
30359 mode = TARGET_THUMB ? "thumb" : "arm";
30360 add_attribute (mode, attributes);
30361 return;
30364 /* If there is already a setting don't change it. */
30365 if (lookup_attribute ("target", *attributes) != NULL)
30366 return;
30368 mode = thumb_flipper ? "thumb" : "arm";
30369 add_attribute (mode, attributes);
30371 thumb_flipper = !thumb_flipper;
30374 /* Hook to validate attribute((target("string"))). */
30376 static bool
30377 arm_valid_target_attribute_p (tree fndecl, tree ARG_UNUSED (name),
30378 tree args, int ARG_UNUSED (flags))
30380 bool ret = true;
30381 struct gcc_options func_options;
30382 tree cur_tree, new_optimize;
30383 gcc_assert ((fndecl != NULL_TREE) && (args != NULL_TREE));
30385 /* Get the optimization options of the current function. */
30386 tree func_optimize = DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl);
30388 /* If the function changed the optimization levels as well as setting target
30389 options, start with the optimizations specified. */
30390 if (!func_optimize)
30391 func_optimize = optimization_default_node;
30393 /* Init func_options. */
30394 memset (&func_options, 0, sizeof (func_options));
30395 init_options_struct (&func_options, NULL);
30396 lang_hooks.init_options_struct (&func_options);
30398 /* Initialize func_options to the defaults. */
30399 cl_optimization_restore (&func_options,
30400 TREE_OPTIMIZATION (func_optimize));
30402 cl_target_option_restore (&func_options,
30403 TREE_TARGET_OPTION (target_option_default_node));
30405 /* Set func_options flags with new target mode. */
30406 cur_tree = arm_valid_target_attribute_tree (args, &func_options,
30407 &global_options_set);
30409 if (cur_tree == NULL_TREE)
30410 ret = false;
30412 new_optimize = build_optimization_node (&func_options);
30414 DECL_FUNCTION_SPECIFIC_TARGET (fndecl) = cur_tree;
30416 DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl) = new_optimize;
30418 finalize_options_struct (&func_options);
30420 return ret;
30423 void
30424 arm_declare_function_name (FILE *stream, const char *name, tree decl)
30427 fprintf (stream, "\t.syntax unified\n");
30429 if (TARGET_THUMB)
30431 if (is_called_in_ARM_mode (decl)
30432 || (TARGET_THUMB1 && !TARGET_THUMB1_ONLY
30433 && cfun->is_thunk))
30434 fprintf (stream, "\t.code 32\n");
30435 else if (TARGET_THUMB1)
30436 fprintf (stream, "\t.code\t16\n\t.thumb_func\n");
30437 else
30438 fprintf (stream, "\t.thumb\n\t.thumb_func\n");
30440 else
30441 fprintf (stream, "\t.arm\n");
30443 asm_fprintf (asm_out_file, "\t.fpu %s\n",
30444 TARGET_SOFT_FLOAT ? "softvfp" : TARGET_FPU_NAME);
30446 if (TARGET_POKE_FUNCTION_NAME)
30447 arm_poke_function_name (stream, (const char *) name);
30450 /* If MEM is in the form of [base+offset], extract the two parts
30451 of address and set to BASE and OFFSET, otherwise return false
30452 after clearing BASE and OFFSET. */
30454 static bool
30455 extract_base_offset_in_addr (rtx mem, rtx *base, rtx *offset)
30457 rtx addr;
30459 gcc_assert (MEM_P (mem));
30461 addr = XEXP (mem, 0);
30463 /* Strip off const from addresses like (const (addr)). */
30464 if (GET_CODE (addr) == CONST)
30465 addr = XEXP (addr, 0);
30467 if (GET_CODE (addr) == REG)
30469 *base = addr;
30470 *offset = const0_rtx;
30471 return true;
30474 if (GET_CODE (addr) == PLUS
30475 && GET_CODE (XEXP (addr, 0)) == REG
30476 && CONST_INT_P (XEXP (addr, 1)))
30478 *base = XEXP (addr, 0);
30479 *offset = XEXP (addr, 1);
30480 return true;
30483 *base = NULL_RTX;
30484 *offset = NULL_RTX;
30486 return false;
30489 /* If INSN is a load or store of address in the form of [base+offset],
30490 extract the two parts and set to BASE and OFFSET. IS_LOAD is set
30491 to TRUE if it's a load. Return TRUE if INSN is such an instruction,
30492 otherwise return FALSE. */
30494 static bool
30495 fusion_load_store (rtx_insn *insn, rtx *base, rtx *offset, bool *is_load)
30497 rtx x, dest, src;
30499 gcc_assert (INSN_P (insn));
30500 x = PATTERN (insn);
30501 if (GET_CODE (x) != SET)
30502 return false;
30504 src = SET_SRC (x);
30505 dest = SET_DEST (x);
30506 if (GET_CODE (src) == REG && GET_CODE (dest) == MEM)
30508 *is_load = false;
30509 extract_base_offset_in_addr (dest, base, offset);
30511 else if (GET_CODE (src) == MEM && GET_CODE (dest) == REG)
30513 *is_load = true;
30514 extract_base_offset_in_addr (src, base, offset);
30516 else
30517 return false;
30519 return (*base != NULL_RTX && *offset != NULL_RTX);
30522 /* Implement the TARGET_SCHED_FUSION_PRIORITY hook.
30524 Currently we only support to fuse ldr or str instructions, so FUSION_PRI
30525 and PRI are only calculated for these instructions. For other instruction,
30526 FUSION_PRI and PRI are simply set to MAX_PRI. In the future, other kind
30527 instruction fusion can be supported by returning different priorities.
30529 It's important that irrelevant instructions get the largest FUSION_PRI. */
30531 static void
30532 arm_sched_fusion_priority (rtx_insn *insn, int max_pri,
30533 int *fusion_pri, int *pri)
30535 int tmp, off_val;
30536 bool is_load;
30537 rtx base, offset;
30539 gcc_assert (INSN_P (insn));
30541 tmp = max_pri - 1;
30542 if (!fusion_load_store (insn, &base, &offset, &is_load))
30544 *pri = tmp;
30545 *fusion_pri = tmp;
30546 return;
30549 /* Load goes first. */
30550 if (is_load)
30551 *fusion_pri = tmp - 1;
30552 else
30553 *fusion_pri = tmp - 2;
30555 tmp /= 2;
30557 /* INSN with smaller base register goes first. */
30558 tmp -= ((REGNO (base) & 0xff) << 20);
30560 /* INSN with smaller offset goes first. */
30561 off_val = (int)(INTVAL (offset));
30562 if (off_val >= 0)
30563 tmp -= (off_val & 0xfffff);
30564 else
30565 tmp += ((- off_val) & 0xfffff);
30567 *pri = tmp;
30568 return;
30572 /* Construct and return a PARALLEL RTX vector with elements numbering the
30573 lanes of either the high (HIGH == TRUE) or low (HIGH == FALSE) half of
30574 the vector - from the perspective of the architecture. This does not
30575 line up with GCC's perspective on lane numbers, so we end up with
30576 different masks depending on our target endian-ness. The diagram
30577 below may help. We must draw the distinction when building masks
30578 which select one half of the vector. An instruction selecting
30579 architectural low-lanes for a big-endian target, must be described using
30580 a mask selecting GCC high-lanes.
30582 Big-Endian Little-Endian
30584 GCC 0 1 2 3 3 2 1 0
30585 | x | x | x | x | | x | x | x | x |
30586 Architecture 3 2 1 0 3 2 1 0
30588 Low Mask: { 2, 3 } { 0, 1 }
30589 High Mask: { 0, 1 } { 2, 3 }
30593 arm_simd_vect_par_cnst_half (machine_mode mode, bool high)
30595 int nunits = GET_MODE_NUNITS (mode);
30596 rtvec v = rtvec_alloc (nunits / 2);
30597 int high_base = nunits / 2;
30598 int low_base = 0;
30599 int base;
30600 rtx t1;
30601 int i;
30603 if (BYTES_BIG_ENDIAN)
30604 base = high ? low_base : high_base;
30605 else
30606 base = high ? high_base : low_base;
30608 for (i = 0; i < nunits / 2; i++)
30609 RTVEC_ELT (v, i) = GEN_INT (base + i);
30611 t1 = gen_rtx_PARALLEL (mode, v);
30612 return t1;
30615 /* Check OP for validity as a PARALLEL RTX vector with elements
30616 numbering the lanes of either the high (HIGH == TRUE) or low lanes,
30617 from the perspective of the architecture. See the diagram above
30618 arm_simd_vect_par_cnst_half_p for more details. */
30620 bool
30621 arm_simd_check_vect_par_cnst_half_p (rtx op, machine_mode mode,
30622 bool high)
30624 rtx ideal = arm_simd_vect_par_cnst_half (mode, high);
30625 HOST_WIDE_INT count_op = XVECLEN (op, 0);
30626 HOST_WIDE_INT count_ideal = XVECLEN (ideal, 0);
30627 int i = 0;
30629 if (!VECTOR_MODE_P (mode))
30630 return false;
30632 if (count_op != count_ideal)
30633 return false;
30635 for (i = 0; i < count_ideal; i++)
30637 rtx elt_op = XVECEXP (op, 0, i);
30638 rtx elt_ideal = XVECEXP (ideal, 0, i);
30640 if (!CONST_INT_P (elt_op)
30641 || INTVAL (elt_ideal) != INTVAL (elt_op))
30642 return false;
30644 return true;
30647 /* Can output mi_thunk for all cases except for non-zero vcall_offset
30648 in Thumb1. */
30649 static bool
30650 arm_can_output_mi_thunk (const_tree, HOST_WIDE_INT, HOST_WIDE_INT vcall_offset,
30651 const_tree)
30653 /* For now, we punt and not handle this for TARGET_THUMB1. */
30654 if (vcall_offset && TARGET_THUMB1)
30655 return false;
30657 /* Otherwise ok. */
30658 return true;
30661 /* Generate RTL for a conditional branch with rtx comparison CODE in
30662 mode CC_MODE. The destination of the unlikely conditional branch
30663 is LABEL_REF. */
30665 void
30666 arm_gen_unlikely_cbranch (enum rtx_code code, machine_mode cc_mode,
30667 rtx label_ref)
30669 rtx x;
30670 x = gen_rtx_fmt_ee (code, VOIDmode,
30671 gen_rtx_REG (cc_mode, CC_REGNUM),
30672 const0_rtx);
30674 x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
30675 gen_rtx_LABEL_REF (VOIDmode, label_ref),
30676 pc_rtx);
30677 emit_unlikely_jump (gen_rtx_SET (pc_rtx, x));
30680 /* Implement the TARGET_ASM_ELF_FLAGS_NUMERIC hook.
30682 For pure-code sections there is no letter code for this attribute, so
30683 output all the section flags numerically when this is needed. */
30685 static bool
30686 arm_asm_elf_flags_numeric (unsigned int flags, unsigned int *num)
30689 if (flags & SECTION_ARM_PURECODE)
30691 *num = 0x20000000;
30693 if (!(flags & SECTION_DEBUG))
30694 *num |= 0x2;
30695 if (flags & SECTION_EXCLUDE)
30696 *num |= 0x80000000;
30697 if (flags & SECTION_WRITE)
30698 *num |= 0x1;
30699 if (flags & SECTION_CODE)
30700 *num |= 0x4;
30701 if (flags & SECTION_MERGE)
30702 *num |= 0x10;
30703 if (flags & SECTION_STRINGS)
30704 *num |= 0x20;
30705 if (flags & SECTION_TLS)
30706 *num |= 0x400;
30707 if (HAVE_COMDAT_GROUP && (flags & SECTION_LINKONCE))
30708 *num |= 0x200;
30710 return true;
30713 return false;
30716 /* Implement the TARGET_ASM_FUNCTION_SECTION hook.
30718 If pure-code is passed as an option, make sure all functions are in
30719 sections that have the SHF_ARM_PURECODE attribute. */
30721 static section *
30722 arm_function_section (tree decl, enum node_frequency freq,
30723 bool startup, bool exit)
30725 const char * section_name;
30726 section * sec;
30728 if (!decl || TREE_CODE (decl) != FUNCTION_DECL)
30729 return default_function_section (decl, freq, startup, exit);
30731 if (!target_pure_code)
30732 return default_function_section (decl, freq, startup, exit);
30735 section_name = DECL_SECTION_NAME (decl);
30737 /* If a function is not in a named section then it falls under the 'default'
30738 text section, also known as '.text'. We can preserve previous behavior as
30739 the default text section already has the SHF_ARM_PURECODE section
30740 attribute. */
30741 if (!section_name)
30743 section *default_sec = default_function_section (decl, freq, startup,
30744 exit);
30746 /* If default_sec is not null, then it must be a special section like for
30747 example .text.startup. We set the pure-code attribute and return the
30748 same section to preserve existing behavior. */
30749 if (default_sec)
30750 default_sec->common.flags |= SECTION_ARM_PURECODE;
30751 return default_sec;
30754 /* Otherwise look whether a section has already been created with
30755 'section_name'. */
30756 sec = get_named_section (decl, section_name, 0);
30757 if (!sec)
30758 /* If that is not the case passing NULL as the section's name to
30759 'get_named_section' will create a section with the declaration's
30760 section name. */
30761 sec = get_named_section (decl, NULL, 0);
30763 /* Set the SHF_ARM_PURECODE attribute. */
30764 sec->common.flags |= SECTION_ARM_PURECODE;
30766 return sec;
30769 /* Implements the TARGET_SECTION_FLAGS hook.
30771 If DECL is a function declaration and pure-code is passed as an option
30772 then add the SFH_ARM_PURECODE attribute to the section flags. NAME is the
30773 section's name and RELOC indicates whether the declarations initializer may
30774 contain runtime relocations. */
30776 static unsigned int
30777 arm_elf_section_type_flags (tree decl, const char *name, int reloc)
30779 unsigned int flags = default_section_type_flags (decl, name, reloc);
30781 if (decl && TREE_CODE (decl) == FUNCTION_DECL && target_pure_code)
30782 flags |= SECTION_ARM_PURECODE;
30784 return flags;
30787 #include "gt-arm.h"